diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -10,1639 +10,1639 @@ { "epoch": 0.0, "learning_rate": 1.9998144540309864e-05, - "loss": 0.6623, + "loss": 0.6522, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.9996289080619725e-05, - "loss": 0.5762, + "loss": 0.5273, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.9994433620929587e-05, - "loss": 0.483, + "loss": 0.4584, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.999257816123945e-05, - "loss": 0.3882, + "loss": 0.3972, "step": 4 }, { "epoch": 0.0, "learning_rate": 1.999072270154931e-05, - "loss": 0.3943, + "loss": 0.295, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.998886724185917e-05, - "loss": 0.2874, + "loss": 0.2801, "step": 6 }, { "epoch": 0.0, "learning_rate": 1.9987011782169035e-05, - "loss": 0.259, + "loss": 0.2672, "step": 7 }, { "epoch": 0.0, "learning_rate": 1.9985156322478897e-05, - "loss": 0.2403, + "loss": 0.2612, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.998330086278876e-05, - "loss": 0.1743, + "loss": 0.1866, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.9981445403098617e-05, - "loss": 0.2289, + "loss": 0.1299, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.9979589943408482e-05, - "loss": 0.1836, + "loss": 0.1314, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.9977734483718344e-05, - "loss": 0.2114, + "loss": 0.1452, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.9975879024028203e-05, - "loss": 0.133, + "loss": 0.1988, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.9974023564338064e-05, - "loss": 0.1171, + "loss": 0.2823, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.997216810464793e-05, - "loss": 0.0704, + "loss": 0.2078, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.997031264495779e-05, - "loss": 0.1956, + "loss": 0.069, "step": 16 }, { "epoch": 0.0, "learning_rate": 1.996845718526765e-05, - "loss": 0.1071, + "loss": 0.1251, "step": 17 }, { "epoch": 0.01, "learning_rate": 1.9966601725577512e-05, - "loss": 0.4079, + "loss": 0.2065, "step": 18 }, { "epoch": 0.01, "learning_rate": 1.9964746265887377e-05, - "loss": 0.1876, + "loss": 0.1458, "step": 19 }, { "epoch": 0.01, "learning_rate": 1.9962890806197236e-05, - "loss": 0.1395, + "loss": 0.1095, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.9961035346507098e-05, - "loss": 0.049, + "loss": 0.0496, "step": 21 }, { "epoch": 0.01, "learning_rate": 1.995917988681696e-05, - "loss": 0.1874, + "loss": 0.0455, "step": 22 }, { "epoch": 0.01, "learning_rate": 1.995732442712682e-05, - "loss": 0.3381, + "loss": 0.1868, "step": 23 }, { "epoch": 0.01, "learning_rate": 1.9955468967436683e-05, - "loss": 0.0913, + "loss": 0.1877, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.9953613507746545e-05, - "loss": 0.1867, + "loss": 0.0915, "step": 25 }, { "epoch": 0.01, "learning_rate": 1.9951758048056407e-05, - "loss": 0.29, + "loss": 0.0918, "step": 26 }, { "epoch": 0.01, "learning_rate": 1.994990258836627e-05, - "loss": 0.0973, + "loss": 0.1434, "step": 27 }, { "epoch": 0.01, "learning_rate": 1.994804712867613e-05, - "loss": 0.1436, + "loss": 0.1397, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.9946191668985992e-05, - "loss": 0.286, + "loss": 0.2614, "step": 29 }, { "epoch": 0.01, "learning_rate": 1.9944336209295854e-05, - "loss": 0.1422, + "loss": 0.16, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.9942480749605716e-05, - "loss": 0.2818, + "loss": 0.1976, "step": 31 }, { "epoch": 0.01, "learning_rate": 1.9940625289915578e-05, - "loss": 0.2774, + "loss": 0.0534, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.993876983022544e-05, - "loss": 0.2312, + "loss": 0.0352, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.9936914370535302e-05, - "loss": 0.234, + "loss": 0.2084, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.9935058910845164e-05, - "loss": 0.1564, + "loss": 0.4373, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.9933203451155026e-05, - "loss": 0.1598, + "loss": 0.0411, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.9931347991464887e-05, - "loss": 0.0659, + "loss": 0.2488, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.992949253177475e-05, - "loss": 0.0995, + "loss": 0.1537, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.992763707208461e-05, - "loss": 0.2888, + "loss": 0.0384, "step": 39 }, { "epoch": 0.01, "learning_rate": 1.9925781612394473e-05, - "loss": 0.0935, + "loss": 0.0461, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.9923926152704335e-05, - "loss": 0.1428, + "loss": 0.0319, "step": 41 }, { "epoch": 0.01, "learning_rate": 1.9922070693014193e-05, - "loss": 0.0899, + "loss": 0.1531, "step": 42 }, { "epoch": 0.01, "learning_rate": 1.992021523332406e-05, - "loss": 0.1406, + "loss": 0.2518, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.991835977363392e-05, - "loss": 0.1905, + "loss": 0.0546, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.9916504313943782e-05, - "loss": 0.1399, + "loss": 0.1454, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.991464885425364e-05, - "loss": 0.2441, + "loss": 0.0933, "step": 46 }, { "epoch": 0.01, "learning_rate": 1.9912793394563506e-05, - "loss": 0.2373, + "loss": 0.207, "step": 47 }, { "epoch": 0.01, "learning_rate": 1.9910937934873368e-05, - "loss": 0.091, + "loss": 0.309, "step": 48 }, { "epoch": 0.01, "learning_rate": 1.9909082475183227e-05, - "loss": 0.1377, + "loss": 0.0894, "step": 49 }, { "epoch": 0.01, "learning_rate": 1.990722701549309e-05, - "loss": 0.1371, + "loss": 0.1448, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.9905371555802954e-05, - "loss": 0.0897, + "loss": 0.1871, "step": 51 }, { "epoch": 0.01, "learning_rate": 1.9903516096112812e-05, - "loss": 0.239, + "loss": 0.0539, "step": 52 }, { "epoch": 0.01, "learning_rate": 1.9901660636422674e-05, - "loss": 0.1415, + "loss": 0.0892, "step": 53 }, { "epoch": 0.02, "learning_rate": 1.9899805176732536e-05, - "loss": 0.1898, + "loss": 0.1967, "step": 54 }, { "epoch": 0.02, "learning_rate": 1.98979497170424e-05, - "loss": 0.1907, + "loss": 0.0312, "step": 55 }, { "epoch": 0.02, "learning_rate": 1.989609425735226e-05, - "loss": 0.142, + "loss": 0.2541, "step": 56 }, { "epoch": 0.02, "learning_rate": 1.989423879766212e-05, - "loss": 0.1422, + "loss": 0.1481, "step": 57 }, { "epoch": 0.02, "learning_rate": 1.9892383337971983e-05, - "loss": 0.1345, + "loss": 0.2065, "step": 58 }, { "epoch": 0.02, "learning_rate": 1.9890527878281845e-05, - "loss": 0.2453, + "loss": 0.1939, "step": 59 }, { "epoch": 0.02, "learning_rate": 1.9888672418591707e-05, - "loss": 0.0894, + "loss": 0.032, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.988681695890157e-05, - "loss": 0.0883, + "loss": 0.0341, "step": 61 }, { "epoch": 0.02, "learning_rate": 1.988496149921143e-05, - "loss": 0.1406, + "loss": 0.0836, "step": 62 }, { "epoch": 0.02, "learning_rate": 1.9883106039521293e-05, - "loss": 0.1939, + "loss": 0.1426, "step": 63 }, { "epoch": 0.02, "learning_rate": 1.9881250579831155e-05, - "loss": 0.1909, + "loss": 0.1994, "step": 64 }, { "epoch": 0.02, "learning_rate": 1.9879395120141016e-05, - "loss": 0.2411, + "loss": 0.0891, "step": 65 }, { "epoch": 0.02, "learning_rate": 1.987753966045088e-05, - "loss": 0.1899, + "loss": 0.0891, "step": 66 }, { "epoch": 0.02, "learning_rate": 1.987568420076074e-05, - "loss": 0.2437, + "loss": 0.0889, "step": 67 }, { "epoch": 0.02, "learning_rate": 1.9873828741070602e-05, - "loss": 0.1405, + "loss": 0.0262, "step": 68 }, { "epoch": 0.02, "learning_rate": 1.9871973281380464e-05, - "loss": 0.1918, + "loss": 0.1396, "step": 69 }, { "epoch": 0.02, "learning_rate": 1.9870117821690326e-05, - "loss": 0.1902, + "loss": 0.0289, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.9868262362000188e-05, - "loss": 0.2397, + "loss": 0.1448, "step": 71 }, { "epoch": 0.02, "learning_rate": 1.986640690231005e-05, - "loss": 0.2399, + "loss": 0.2549, "step": 72 }, { "epoch": 0.02, "learning_rate": 1.986455144261991e-05, - "loss": 0.1598, + "loss": 0.1409, "step": 73 }, { "epoch": 0.02, "learning_rate": 1.9862695982929773e-05, - "loss": 0.1514, + "loss": 0.0871, "step": 74 }, { "epoch": 0.02, "learning_rate": 1.9860840523239635e-05, - "loss": 0.2817, + "loss": 0.0849, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.9858985063549497e-05, - "loss": 0.1347, + "loss": 0.0267, "step": 76 }, { "epoch": 0.02, "learning_rate": 1.985712960385936e-05, - "loss": 0.0976, + "loss": 0.0782, "step": 77 }, { "epoch": 0.02, "learning_rate": 1.9855274144169217e-05, - "loss": 0.1868, + "loss": 0.0278, "step": 78 }, { "epoch": 0.02, "learning_rate": 1.9853418684479083e-05, - "loss": 0.1408, + "loss": 0.079, "step": 79 }, { "epoch": 0.02, "learning_rate": 1.9851563224788945e-05, - "loss": 0.195, + "loss": 0.1958, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.9849707765098803e-05, - "loss": 0.1992, + "loss": 0.0887, "step": 81 }, { "epoch": 0.02, "learning_rate": 1.9847852305408665e-05, - "loss": 0.185, + "loss": 0.0243, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.984599684571853e-05, - "loss": 0.1391, + "loss": 0.0219, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.9844141386028392e-05, - "loss": 0.1921, + "loss": 0.2795, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.984228592633825e-05, - "loss": 0.0905, + "loss": 0.2091, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.9840430466648112e-05, - "loss": 0.1446, + "loss": 0.2703, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.9838575006957978e-05, - "loss": 0.2937, + "loss": 0.0845, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.9836719547267836e-05, - "loss": 0.0917, + "loss": 0.1998, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.9834864087577698e-05, - "loss": 0.1356, + "loss": 0.084, "step": 89 }, { "epoch": 0.03, "learning_rate": 1.983300862788756e-05, - "loss": 0.0898, + "loss": 0.0957, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.9831153168197425e-05, - "loss": 0.0892, + "loss": 0.0352, "step": 91 }, { "epoch": 0.03, "learning_rate": 1.9829297708507284e-05, - "loss": 0.035, + "loss": 0.1384, "step": 92 }, { "epoch": 0.03, "learning_rate": 1.9827442248817145e-05, - "loss": 0.0877, + "loss": 0.1998, "step": 93 }, { "epoch": 0.03, "learning_rate": 1.9825586789127007e-05, - "loss": 0.2562, + "loss": 0.0249, "step": 94 }, { "epoch": 0.03, "learning_rate": 1.982373132943687e-05, - "loss": 0.1944, + "loss": 0.0863, "step": 95 }, { "epoch": 0.03, "learning_rate": 1.982187586974673e-05, - "loss": 0.1342, + "loss": 0.0846, "step": 96 }, { "epoch": 0.03, "learning_rate": 1.9820020410056593e-05, - "loss": 0.0841, + "loss": 0.0829, "step": 97 }, { "epoch": 0.03, "learning_rate": 1.9818164950366455e-05, - "loss": 0.3032, + "loss": 0.1405, "step": 98 }, { "epoch": 0.03, "learning_rate": 1.9816309490676317e-05, - "loss": 0.1462, + "loss": 0.1946, "step": 99 }, { "epoch": 0.03, "learning_rate": 1.981445403098618e-05, - "loss": 0.139, + "loss": 0.1475, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.981259857129604e-05, - "loss": 0.0901, + "loss": 0.2533, "step": 101 }, { "epoch": 0.03, "learning_rate": 1.9810743111605902e-05, - "loss": 0.1938, + "loss": 0.027, "step": 102 }, { "epoch": 0.03, "learning_rate": 1.9808887651915764e-05, - "loss": 0.138, + "loss": 0.1396, "step": 103 }, { "epoch": 0.03, "learning_rate": 1.9807032192225626e-05, - "loss": 0.0392, + "loss": 0.1327, "step": 104 }, { "epoch": 0.03, "learning_rate": 1.9805176732535488e-05, - "loss": 0.1977, + "loss": 0.0332, "step": 105 }, { "epoch": 0.03, "learning_rate": 1.980332127284535e-05, - "loss": 0.1946, + "loss": 0.2037, "step": 106 }, { "epoch": 0.03, "learning_rate": 1.980146581315521e-05, - "loss": 0.2936, + "loss": 0.1393, "step": 107 }, { "epoch": 0.03, "learning_rate": 1.9799610353465073e-05, - "loss": 0.1395, + "loss": 0.1898, "step": 108 }, { "epoch": 0.03, "learning_rate": 1.9797754893774935e-05, - "loss": 0.0837, + "loss": 0.1315, "step": 109 }, { "epoch": 0.03, "learning_rate": 1.9795899434084794e-05, - "loss": 0.2466, + "loss": 0.0926, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.979404397439466e-05, - "loss": 0.1951, + "loss": 0.0362, "step": 111 }, { "epoch": 0.03, "learning_rate": 1.979218851470452e-05, - "loss": 0.0896, + "loss": 0.2515, "step": 112 }, { "epoch": 0.03, "learning_rate": 1.9790333055014383e-05, - "loss": 0.1839, + "loss": 0.0879, "step": 113 }, { "epoch": 0.03, "learning_rate": 1.978847759532424e-05, - "loss": 0.0901, + "loss": 0.082, "step": 114 }, { "epoch": 0.03, "learning_rate": 1.9786622135634107e-05, - "loss": 0.2419, + "loss": 0.1467, "step": 115 }, { "epoch": 0.03, "learning_rate": 1.978476667594397e-05, - "loss": 0.1944, + "loss": 0.0854, "step": 116 }, { "epoch": 0.03, "learning_rate": 1.9782911216253827e-05, - "loss": 0.2416, + "loss": 0.0859, "step": 117 }, { "epoch": 0.03, "learning_rate": 1.978105575656369e-05, - "loss": 0.2341, + "loss": 0.0269, "step": 118 }, { "epoch": 0.03, "learning_rate": 1.9779200296873554e-05, - "loss": 0.1424, + "loss": 0.1417, "step": 119 }, { "epoch": 0.03, "learning_rate": 1.9777344837183416e-05, - "loss": 0.1417, + "loss": 0.0839, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.9775489377493274e-05, - "loss": 0.1892, + "loss": 0.0821, "step": 121 }, { "epoch": 0.03, "learning_rate": 1.9773633917803136e-05, - "loss": 0.3231, + "loss": 0.199, "step": 122 }, { "epoch": 0.03, "learning_rate": 1.9771778458113e-05, - "loss": 0.1028, + "loss": 0.1366, "step": 123 }, { "epoch": 0.03, "learning_rate": 1.976992299842286e-05, - "loss": 0.1439, + "loss": 0.1965, "step": 124 }, { "epoch": 0.03, "learning_rate": 1.9768067538732722e-05, - "loss": 0.2259, + "loss": 0.1412, "step": 125 }, { "epoch": 0.04, "learning_rate": 1.9766212079042584e-05, - "loss": 0.1401, + "loss": 0.086, "step": 126 }, { "epoch": 0.04, "learning_rate": 1.9764356619352446e-05, - "loss": 0.1904, + "loss": 0.0835, "step": 127 }, { "epoch": 0.04, "learning_rate": 1.9762501159662308e-05, - "loss": 0.0931, + "loss": 0.1986, "step": 128 }, { "epoch": 0.04, "learning_rate": 1.976064569997217e-05, - "loss": 0.0438, + "loss": 0.3112, "step": 129 }, { "epoch": 0.04, "learning_rate": 1.975879024028203e-05, - "loss": 0.1865, + "loss": 0.0838, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.9756934780591893e-05, - "loss": 0.1949, + "loss": 0.1383, "step": 131 }, { "epoch": 0.04, "learning_rate": 1.9755079320901755e-05, - "loss": 0.2473, + "loss": 0.1411, "step": 132 }, { "epoch": 0.04, "learning_rate": 1.9753223861211617e-05, - "loss": 0.1929, + "loss": 0.1392, "step": 133 }, { "epoch": 0.04, "learning_rate": 1.975136840152148e-05, - "loss": 0.2499, + "loss": 0.0481, "step": 134 }, { "epoch": 0.04, "learning_rate": 1.974951294183134e-05, - "loss": 0.0857, + "loss": 0.0395, "step": 135 }, { "epoch": 0.04, "learning_rate": 1.9747657482141202e-05, - "loss": 0.2468, + "loss": 0.0305, "step": 136 }, { "epoch": 0.04, "learning_rate": 1.9745802022451064e-05, - "loss": 0.2912, + "loss": 0.2518, "step": 137 }, { "epoch": 0.04, "learning_rate": 1.9743946562760926e-05, - "loss": 0.197, + "loss": 0.141, "step": 138 }, { "epoch": 0.04, "learning_rate": 1.9742091103070788e-05, - "loss": 0.138, + "loss": 0.0831, "step": 139 }, { "epoch": 0.04, "learning_rate": 1.974023564338065e-05, - "loss": 0.3418, + "loss": 0.0234, "step": 140 }, { "epoch": 0.04, "learning_rate": 1.9738380183690512e-05, - "loss": 0.2883, + "loss": 0.0825, "step": 141 }, { "epoch": 0.04, "learning_rate": 1.9736524724000374e-05, - "loss": 0.1477, + "loss": 0.2079, "step": 142 }, { "epoch": 0.04, "learning_rate": 1.9734669264310236e-05, - "loss": 0.1883, + "loss": 0.0238, "step": 143 }, { "epoch": 0.04, "learning_rate": 1.9732813804620097e-05, - "loss": 0.1863, + "loss": 0.0783, "step": 144 }, { "epoch": 0.04, "learning_rate": 1.973095834492996e-05, - "loss": 0.1911, + "loss": 0.1422, "step": 145 }, { "epoch": 0.04, "learning_rate": 1.9729102885239818e-05, - "loss": 0.0675, + "loss": 0.084, "step": 146 }, { "epoch": 0.04, "learning_rate": 1.9727247425549683e-05, - "loss": 0.1831, + "loss": 0.199, "step": 147 }, { "epoch": 0.04, "learning_rate": 1.9725391965859545e-05, - "loss": 0.2313, + "loss": 0.1505, "step": 148 }, { "epoch": 0.04, "learning_rate": 1.9723536506169403e-05, - "loss": 0.0977, + "loss": 0.0249, "step": 149 }, { "epoch": 0.04, "learning_rate": 1.9721681046479265e-05, - "loss": 0.096, + "loss": 0.2037, "step": 150 }, { "epoch": 0.04, "learning_rate": 1.971982558678913e-05, - "loss": 0.0917, + "loss": 0.0863, "step": 151 }, { "epoch": 0.04, "learning_rate": 1.9717970127098992e-05, - "loss": 0.3411, + "loss": 0.0809, "step": 152 }, { "epoch": 0.04, "learning_rate": 1.971611466740885e-05, - "loss": 0.245, + "loss": 0.0252, "step": 153 }, { "epoch": 0.04, "learning_rate": 1.9714259207718713e-05, - "loss": 0.1386, + "loss": 0.1415, "step": 154 }, { "epoch": 0.04, "learning_rate": 1.9712403748028575e-05, - "loss": 0.3496, + "loss": 0.0238, "step": 155 }, { "epoch": 0.04, "learning_rate": 1.9710548288338437e-05, - "loss": 0.1908, + "loss": 0.1374, "step": 156 }, { "epoch": 0.04, "learning_rate": 1.97086928286483e-05, - "loss": 0.0387, + "loss": 0.1432, "step": 157 }, { "epoch": 0.04, "learning_rate": 1.970683736895816e-05, - "loss": 0.1928, + "loss": 0.2, "step": 158 }, { "epoch": 0.04, "learning_rate": 1.9704981909268022e-05, - "loss": 0.1866, + "loss": 0.0249, "step": 159 }, { "epoch": 0.04, "learning_rate": 1.9703126449577884e-05, - "loss": 0.1404, + "loss": 0.259, "step": 160 }, { "epoch": 0.04, "learning_rate": 1.9701270989887746e-05, - "loss": 0.1443, + "loss": 0.1946, "step": 161 }, { "epoch": 0.05, "learning_rate": 1.9699415530197608e-05, - "loss": 0.1382, + "loss": 0.1394, "step": 162 }, { "epoch": 0.05, "learning_rate": 1.969756007050747e-05, - "loss": 0.1425, + "loss": 0.0303, "step": 163 }, { "epoch": 0.05, "learning_rate": 1.969570461081733e-05, - "loss": 0.0933, + "loss": 0.0285, "step": 164 }, { "epoch": 0.05, "learning_rate": 1.9693849151127193e-05, - "loss": 0.0923, + "loss": 0.1392, "step": 165 }, { "epoch": 0.05, "learning_rate": 1.9691993691437055e-05, - "loss": 0.1375, + "loss": 0.0851, "step": 166 }, { "epoch": 0.05, "learning_rate": 1.9690138231746917e-05, - "loss": 0.1387, + "loss": 0.1407, "step": 167 }, { "epoch": 0.05, "learning_rate": 1.968828277205678e-05, - "loss": 0.1396, + "loss": 0.0275, "step": 168 }, { "epoch": 0.05, "learning_rate": 1.968642731236664e-05, - "loss": 0.1879, + "loss": 0.1409, "step": 169 }, { "epoch": 0.05, "learning_rate": 1.9684571852676503e-05, - "loss": 0.2436, + "loss": 0.0238, "step": 170 }, { "epoch": 0.05, "learning_rate": 1.9682716392986365e-05, - "loss": 0.3052, + "loss": 0.1473, "step": 171 }, { "epoch": 0.05, "learning_rate": 1.9680860933296226e-05, - "loss": 0.1949, + "loss": 0.2058, "step": 172 }, { "epoch": 0.05, "learning_rate": 1.9679005473606088e-05, - "loss": 0.1376, + "loss": 0.0235, "step": 173 }, { "epoch": 0.05, "learning_rate": 1.967715001391595e-05, - "loss": 0.3447, + "loss": 0.0222, "step": 174 }, { "epoch": 0.05, "learning_rate": 1.967529455422581e-05, - "loss": 0.0924, + "loss": 0.1317, "step": 175 }, { "epoch": 0.05, "learning_rate": 1.9673439094535674e-05, - "loss": 0.1402, + "loss": 0.2069, "step": 176 }, { "epoch": 0.05, "learning_rate": 1.9671583634845536e-05, - "loss": 0.0389, + "loss": 0.0828, "step": 177 }, { "epoch": 0.05, "learning_rate": 1.9669728175155394e-05, - "loss": 0.3385, + "loss": 0.0805, "step": 178 }, { "epoch": 0.05, "learning_rate": 1.9667872715465256e-05, - "loss": 0.1414, + "loss": 0.1415, "step": 179 }, { "epoch": 0.05, "learning_rate": 1.966601725577512e-05, - "loss": 0.0916, + "loss": 0.2018, "step": 180 }, { "epoch": 0.05, "learning_rate": 1.9664161796084983e-05, - "loss": 0.0944, + "loss": 0.1986, "step": 181 }, { "epoch": 0.05, "learning_rate": 1.9662306336394842e-05, - "loss": 0.0949, + "loss": 0.0815, "step": 182 }, { "epoch": 0.05, "learning_rate": 1.9660450876704704e-05, - "loss": 0.0426, + "loss": 0.0814, "step": 183 }, { "epoch": 0.05, "learning_rate": 1.965859541701457e-05, - "loss": 0.1382, + "loss": 0.0827, "step": 184 }, { "epoch": 0.05, "learning_rate": 1.9656739957324427e-05, - "loss": 0.3478, + "loss": 0.2488, "step": 185 }, { "epoch": 0.05, "learning_rate": 1.965488449763429e-05, - "loss": 0.1406, + "loss": 0.0832, "step": 186 }, { "epoch": 0.05, "learning_rate": 1.965302903794415e-05, - "loss": 0.1976, + "loss": 0.1401, "step": 187 }, { "epoch": 0.05, "learning_rate": 1.9651173578254016e-05, - "loss": 0.1892, + "loss": 0.0858, "step": 188 }, { "epoch": 0.05, "learning_rate": 1.9649318118563875e-05, - "loss": 0.1388, + "loss": 0.3002, "step": 189 }, { "epoch": 0.05, "learning_rate": 1.9647462658873737e-05, - "loss": 0.2969, + "loss": 0.0343, "step": 190 }, { "epoch": 0.05, "learning_rate": 1.96456071991836e-05, - "loss": 0.1421, + "loss": 0.0964, "step": 191 }, { "epoch": 0.05, "learning_rate": 1.964375173949346e-05, - "loss": 0.0872, + "loss": 0.2369, "step": 192 }, { "epoch": 0.05, "learning_rate": 1.9641896279803322e-05, - "loss": 0.2473, + "loss": 0.2933, "step": 193 }, { "epoch": 0.05, "learning_rate": 1.9640040820113184e-05, - "loss": 0.2842, + "loss": 0.191, "step": 194 }, { "epoch": 0.05, "learning_rate": 1.9638185360423046e-05, - "loss": 0.145, + "loss": 0.0925, "step": 195 }, { "epoch": 0.05, "learning_rate": 1.9636329900732908e-05, - "loss": 0.1424, + "loss": 0.0966, "step": 196 }, { "epoch": 0.05, "learning_rate": 1.963447444104277e-05, - "loss": 0.0926, + "loss": 0.2799, "step": 197 }, { "epoch": 0.06, "learning_rate": 1.963261898135263e-05, - "loss": 0.0899, + "loss": 0.232, "step": 198 }, { "epoch": 0.06, "learning_rate": 1.9630763521662494e-05, - "loss": 0.0908, + "loss": 0.0541, "step": 199 }, { "epoch": 0.06, "learning_rate": 1.9628908061972355e-05, - "loss": 0.1411, + "loss": 0.1863, "step": 200 }, { "epoch": 0.06, "learning_rate": 1.9627052602282217e-05, - "loss": 0.1395, + "loss": 0.0928, "step": 201 }, { "epoch": 0.06, "learning_rate": 1.962519714259208e-05, - "loss": 0.139, + "loss": 0.0903, "step": 202 }, { "epoch": 0.06, "learning_rate": 1.962334168290194e-05, - "loss": 0.0878, + "loss": 0.1384, "step": 203 }, { "epoch": 0.06, "learning_rate": 1.9621486223211803e-05, - "loss": 0.2413, + "loss": 0.244, "step": 204 }, { "epoch": 0.06, "learning_rate": 1.9619630763521665e-05, - "loss": 0.1858, + "loss": 0.0879, "step": 205 }, { "epoch": 0.06, "learning_rate": 1.9617775303831527e-05, - "loss": 0.2483, + "loss": 0.0877, "step": 206 }, { "epoch": 0.06, "learning_rate": 1.9615919844141385e-05, - "loss": 0.2944, + "loss": 0.1991, "step": 207 }, { "epoch": 0.06, "learning_rate": 1.961406438445125e-05, - "loss": 0.0846, + "loss": 0.1917, "step": 208 }, { "epoch": 0.06, "learning_rate": 1.9612208924761112e-05, - "loss": 0.138, + "loss": 0.1384, "step": 209 }, { "epoch": 0.06, "learning_rate": 1.9610353465070974e-05, - "loss": 0.2424, + "loss": 0.1389, "step": 210 }, { "epoch": 0.06, "learning_rate": 1.9608498005380833e-05, - "loss": 0.1401, + "loss": 0.0275, "step": 211 }, { "epoch": 0.06, "learning_rate": 1.9606642545690698e-05, - "loss": 0.1352, + "loss": 0.2047, "step": 212 }, { "epoch": 0.06, "learning_rate": 1.960478708600056e-05, - "loss": 0.1398, + "loss": 0.1391, "step": 213 }, { "epoch": 0.06, "learning_rate": 1.9602931626310418e-05, - "loss": 0.1415, + "loss": 0.0856, "step": 214 }, { "epoch": 0.06, "learning_rate": 1.960107616662028e-05, - "loss": 0.2496, + "loss": 0.2533, "step": 215 }, { "epoch": 0.06, "learning_rate": 1.9599220706930145e-05, - "loss": 0.1401, + "loss": 0.0301, "step": 216 }, { "epoch": 0.06, "learning_rate": 1.9597365247240007e-05, - "loss": 0.1362, + "loss": 0.1445, "step": 217 }, { "epoch": 0.06, "learning_rate": 1.9595509787549866e-05, - "loss": 0.0882, + "loss": 0.031, "step": 218 }, { "epoch": 0.06, "learning_rate": 1.9593654327859728e-05, - "loss": 0.039, + "loss": 0.2514, "step": 219 }, { "epoch": 0.06, "learning_rate": 1.9591798868169593e-05, - "loss": 0.1373, + "loss": 0.0895, "step": 220 }, { "epoch": 0.06, "learning_rate": 1.958994340847945e-05, - "loss": 0.243, + "loss": 0.1403, "step": 221 }, { "epoch": 0.06, "learning_rate": 1.9588087948789313e-05, - "loss": 0.1943, + "loss": 0.1405, "step": 222 }, { "epoch": 0.06, "learning_rate": 1.9586232489099175e-05, - "loss": 0.134, + "loss": 0.1963, "step": 223 }, { "epoch": 0.06, "learning_rate": 1.9584377029409037e-05, - "loss": 0.0366, + "loss": 0.0823, "step": 224 }, { "epoch": 0.06, "learning_rate": 1.95825215697189e-05, - "loss": 0.3406, + "loss": 0.0343, "step": 225 }, { "epoch": 0.06, "learning_rate": 1.958066611002876e-05, - "loss": 0.144, + "loss": 0.0863, "step": 226 }, { "epoch": 0.06, "learning_rate": 1.9578810650338623e-05, - "loss": 0.0371, + "loss": 0.0283, "step": 227 }, { "epoch": 0.06, "learning_rate": 1.9576955190648484e-05, - "loss": 0.0365, + "loss": 0.1324, "step": 228 }, { "epoch": 0.06, "learning_rate": 1.9575099730958346e-05, - "loss": 0.2976, + "loss": 0.0829, "step": 229 }, { "epoch": 0.06, "learning_rate": 1.9573244271268208e-05, - "loss": 0.1938, + "loss": 0.0837, "step": 230 }, { "epoch": 0.06, "learning_rate": 1.957138881157807e-05, - "loss": 0.191, + "loss": 0.1367, "step": 231 }, { "epoch": 0.06, "learning_rate": 1.9569533351887932e-05, - "loss": 0.0866, + "loss": 0.0194, "step": 232 }, { "epoch": 0.06, "learning_rate": 1.9567677892197794e-05, - "loss": 0.0853, + "loss": 0.0202, "step": 233 }, { "epoch": 0.07, "learning_rate": 1.9565822432507656e-05, - "loss": 0.3506, + "loss": 0.1479, "step": 234 }, { "epoch": 0.07, "learning_rate": 1.9563966972817518e-05, - "loss": 0.0912, + "loss": 0.1475, "step": 235 }, { "epoch": 0.07, "learning_rate": 1.956211151312738e-05, - "loss": 0.0365, + "loss": 0.0812, "step": 236 }, { "epoch": 0.07, "learning_rate": 1.956025605343724e-05, - "loss": 0.1414, + "loss": 0.2642, "step": 237 }, { "epoch": 0.07, "learning_rate": 1.9558400593747103e-05, - "loss": 0.1358, + "loss": 0.0828, "step": 238 }, { "epoch": 0.07, "learning_rate": 1.9556545134056965e-05, - "loss": 0.2391, + "loss": 0.1329, "step": 239 }, { "epoch": 0.07, "learning_rate": 1.9554689674366827e-05, - "loss": 0.0348, + "loss": 0.0255, "step": 240 }, { "epoch": 0.07, "learning_rate": 1.955283421467669e-05, - "loss": 0.1853, + "loss": 0.0828, "step": 241 }, { "epoch": 0.07, "learning_rate": 1.955097875498655e-05, - "loss": 0.1939, + "loss": 0.136, "step": 242 }, { "epoch": 0.07, "learning_rate": 1.954912329529641e-05, - "loss": 0.1386, + "loss": 0.1932, "step": 243 }, { "epoch": 0.07, "learning_rate": 1.9547267835606274e-05, - "loss": 0.1906, + "loss": 0.084, "step": 244 }, { "epoch": 0.07, "learning_rate": 1.9545412375916136e-05, - "loss": 0.1334, + "loss": 0.1871, "step": 245 }, { "epoch": 0.07, "learning_rate": 1.9543556916225995e-05, - "loss": 0.142, + "loss": 0.1388, "step": 246 }, { "epoch": 0.07, "learning_rate": 1.9541701456535857e-05, - "loss": 0.0338, + "loss": 0.2472, "step": 247 }, { "epoch": 0.07, "learning_rate": 1.9539845996845722e-05, - "loss": 0.1874, + "loss": 0.0967, "step": 248 }, { "epoch": 0.07, "learning_rate": 1.9537990537155584e-05, - "loss": 0.1414, + "loss": 0.0963, "step": 249 }, { "epoch": 0.07, "learning_rate": 1.9536135077465442e-05, - "loss": 0.0851, + "loss": 0.0408, "step": 250 }, { "epoch": 0.07, "learning_rate": 1.9534279617775304e-05, - "loss": 0.3014, + "loss": 0.0311, "step": 251 }, { "epoch": 0.07, "learning_rate": 1.953242415808517e-05, - "loss": 0.1395, + "loss": 0.1374, "step": 252 }, { "epoch": 0.07, "learning_rate": 1.9530568698395028e-05, - "loss": 0.0832, + "loss": 0.1451, "step": 253 }, { "epoch": 0.07, "learning_rate": 1.952871323870489e-05, - "loss": 0.0859, + "loss": 0.0808, "step": 254 }, { "epoch": 0.07, "learning_rate": 1.952685777901475e-05, - "loss": 0.0871, + "loss": 0.081, "step": 255 }, { "epoch": 0.07, "learning_rate": 1.9525002319324617e-05, - "loss": 0.0886, + "loss": 0.0178, "step": 256 }, { "epoch": 0.07, "learning_rate": 1.9523146859634475e-05, - "loss": 0.0854, + "loss": 0.1997, "step": 257 }, { "epoch": 0.07, "learning_rate": 1.9521291399944337e-05, - "loss": 0.0857, + "loss": 0.2098, "step": 258 }, { "epoch": 0.07, "learning_rate": 1.95194359402542e-05, - "loss": 0.1967, + "loss": 0.1428, "step": 259 }, { "epoch": 0.07, "learning_rate": 1.951758048056406e-05, - "loss": 0.0276, + "loss": 0.1441, "step": 260 }, { "epoch": 0.07, "learning_rate": 1.9515725020873923e-05, - "loss": 0.0816, + "loss": 0.1428, "step": 261 }, { "epoch": 0.07, "learning_rate": 1.9513869561183785e-05, - "loss": 0.1972, + "loss": 0.0807, "step": 262 }, { "epoch": 0.07, "learning_rate": 1.9512014101493646e-05, - "loss": 0.3668, + "loss": 0.1968, "step": 263 }, { "epoch": 0.07, "learning_rate": 1.951015864180351e-05, - "loss": 0.2423, + "loss": 0.0835, "step": 264 }, { "epoch": 0.07, "learning_rate": 1.950830318211337e-05, - "loss": 0.1929, + "loss": 0.0257, "step": 265 }, { "epoch": 0.07, "learning_rate": 1.9506447722423232e-05, - "loss": 0.3961, + "loss": 0.1355, "step": 266 }, { "epoch": 0.07, "learning_rate": 1.9504592262733094e-05, - "loss": 0.3523, + "loss": 0.1359, "step": 267 }, { "epoch": 0.07, "learning_rate": 1.9502736803042956e-05, - "loss": 0.3257, + "loss": 0.0257, "step": 268 }, { "epoch": 0.07, "learning_rate": 1.9500881343352818e-05, - "loss": 0.2013, + "loss": 0.0819, "step": 269 }, { "epoch": 0.08, "learning_rate": 1.949902588366268e-05, - "loss": 0.1229, + "loss": 0.0819, "step": 270 }, { "epoch": 0.08, "learning_rate": 1.949717042397254e-05, - "loss": 0.1518, + "loss": 0.0259, "step": 271 }, { "epoch": 0.08, "learning_rate": 1.9495314964282403e-05, - "loss": 0.0525, + "loss": 0.0837, "step": 272 }, { "epoch": 0.08, "learning_rate": 1.9493459504592265e-05, - "loss": 0.1402, + "loss": 0.0228, "step": 273 }, { @@ -1654,22129 +1654,22129 @@ { "epoch": 0.08, "learning_rate": 1.9489748585211986e-05, - "loss": 0.1405, + "loss": 0.3861, "step": 275 }, { "epoch": 0.08, "learning_rate": 1.948789312552185e-05, - "loss": 0.0397, + "loss": 0.0208, "step": 276 }, { "epoch": 0.08, "learning_rate": 1.9486037665831713e-05, - "loss": 0.1415, + "loss": 0.0793, "step": 277 }, { "epoch": 0.08, "learning_rate": 1.9484182206141575e-05, - "loss": 0.1968, + "loss": 0.2038, "step": 278 }, { "epoch": 0.08, "learning_rate": 1.9482326746451433e-05, - "loss": 0.3479, + "loss": 0.0774, "step": 279 }, { "epoch": 0.08, "learning_rate": 1.9480471286761298e-05, - "loss": 0.3021, + "loss": 0.086, "step": 280 }, { "epoch": 0.08, "learning_rate": 1.947861582707116e-05, - "loss": 0.092, + "loss": 0.204, "step": 281 }, { "epoch": 0.08, "learning_rate": 1.947676036738102e-05, - "loss": 0.1447, + "loss": 0.1422, "step": 282 }, { "epoch": 0.08, "learning_rate": 1.947490490769088e-05, - "loss": 0.0894, + "loss": 0.0796, "step": 283 }, { "epoch": 0.08, "learning_rate": 1.9473049448000746e-05, - "loss": 0.041, + "loss": 0.0774, "step": 284 }, { "epoch": 0.08, "learning_rate": 1.9471193988310608e-05, - "loss": 0.09, + "loss": 0.1394, "step": 285 }, { "epoch": 0.08, "learning_rate": 1.9469338528620466e-05, - "loss": 0.1388, + "loss": 0.083, "step": 286 }, { "epoch": 0.08, "learning_rate": 1.9467483068930328e-05, - "loss": 0.2469, + "loss": 0.1345, "step": 287 }, { "epoch": 0.08, "learning_rate": 1.9465627609240193e-05, - "loss": 0.1395, + "loss": 0.1391, "step": 288 }, { "epoch": 0.08, "learning_rate": 1.9463772149550052e-05, - "loss": 0.1912, + "loss": 0.0817, "step": 289 }, { "epoch": 0.08, "learning_rate": 1.9461916689859914e-05, - "loss": 0.1399, + "loss": 0.2609, "step": 290 }, { "epoch": 0.08, "learning_rate": 1.9460061230169775e-05, - "loss": 0.1385, + "loss": 0.0792, "step": 291 }, { "epoch": 0.08, "learning_rate": 1.9458205770479637e-05, - "loss": 0.0866, + "loss": 0.0853, "step": 292 }, { "epoch": 0.08, "learning_rate": 1.94563503107895e-05, - "loss": 0.0873, + "loss": 0.3025, "step": 293 }, { "epoch": 0.08, "learning_rate": 1.945449485109936e-05, - "loss": 0.1398, + "loss": 0.1356, "step": 294 }, { "epoch": 0.08, "learning_rate": 1.9452639391409223e-05, - "loss": 0.2439, + "loss": 0.1357, "step": 295 }, { "epoch": 0.08, "learning_rate": 1.9450783931719085e-05, - "loss": 0.1932, + "loss": 0.0336, "step": 296 }, { "epoch": 0.08, "learning_rate": 1.9448928472028947e-05, - "loss": 0.2422, + "loss": 0.2373, "step": 297 }, { "epoch": 0.08, "learning_rate": 1.944707301233881e-05, - "loss": 0.193, + "loss": 0.1382, "step": 298 }, { "epoch": 0.08, "learning_rate": 1.944521755264867e-05, - "loss": 0.0856, + "loss": 0.1421, "step": 299 }, { "epoch": 0.08, "learning_rate": 1.9443362092958532e-05, - "loss": 0.1951, + "loss": 0.0885, "step": 300 }, { "epoch": 0.08, "learning_rate": 1.9441506633268394e-05, - "loss": 0.1401, + "loss": 0.1873, "step": 301 }, { "epoch": 0.08, "learning_rate": 1.9439651173578256e-05, - "loss": 0.2402, + "loss": 0.0856, "step": 302 }, { "epoch": 0.08, "learning_rate": 1.9437795713888118e-05, - "loss": 0.29, + "loss": 0.1361, "step": 303 }, { "epoch": 0.08, "learning_rate": 1.9435940254197976e-05, - "loss": 0.1952, + "loss": 0.1388, "step": 304 }, { "epoch": 0.08, "learning_rate": 1.943408479450784e-05, - "loss": 0.0528, + "loss": 0.2469, "step": 305 }, { "epoch": 0.09, "learning_rate": 1.9432229334817704e-05, - "loss": 0.3256, + "loss": 0.0384, "step": 306 }, { "epoch": 0.09, "learning_rate": 1.9430373875127565e-05, - "loss": 0.1917, + "loss": 0.0886, "step": 307 }, { "epoch": 0.09, "learning_rate": 1.9428518415437424e-05, - "loss": 0.0602, + "loss": 0.194, "step": 308 }, { "epoch": 0.09, "learning_rate": 1.942666295574729e-05, - "loss": 0.2792, + "loss": 0.1404, "step": 309 }, { "epoch": 0.09, "learning_rate": 1.942480749605715e-05, - "loss": 0.1015, + "loss": 0.0335, "step": 310 }, { "epoch": 0.09, "learning_rate": 1.942295203636701e-05, - "loss": 0.1907, + "loss": 0.1412, "step": 311 }, { "epoch": 0.09, "learning_rate": 1.942109657667687e-05, - "loss": 0.051, + "loss": 0.1349, "step": 312 }, { "epoch": 0.09, "learning_rate": 1.9419241116986737e-05, - "loss": 0.1881, + "loss": 0.3122, "step": 313 }, { "epoch": 0.09, "learning_rate": 1.94173856572966e-05, - "loss": 0.287, + "loss": 0.2437, "step": 314 }, { "epoch": 0.09, "learning_rate": 1.9415530197606457e-05, - "loss": 0.088, + "loss": 0.0296, "step": 315 }, { "epoch": 0.09, "learning_rate": 1.941367473791632e-05, - "loss": 0.1828, + "loss": 0.1932, "step": 316 }, { "epoch": 0.09, "learning_rate": 1.9411819278226184e-05, - "loss": 0.0385, + "loss": 0.0832, "step": 317 }, { "epoch": 0.09, "learning_rate": 1.9409963818536043e-05, - "loss": 0.0871, + "loss": 0.1905, "step": 318 }, { "epoch": 0.09, "learning_rate": 1.9408108358845904e-05, - "loss": 0.1365, + "loss": 0.0845, "step": 319 }, { "epoch": 0.09, "learning_rate": 1.9406252899155766e-05, - "loss": 0.1934, + "loss": 0.0825, "step": 320 }, { "epoch": 0.09, "learning_rate": 1.9404397439465628e-05, - "loss": 0.0341, + "loss": 0.1395, "step": 321 }, { "epoch": 0.09, "learning_rate": 1.940254197977549e-05, - "loss": 0.2469, + "loss": 0.0323, "step": 322 }, { "epoch": 0.09, "learning_rate": 1.9400686520085352e-05, - "loss": 0.1381, + "loss": 0.3576, "step": 323 }, { "epoch": 0.09, "learning_rate": 1.9398831060395214e-05, - "loss": 0.188, + "loss": 0.1855, "step": 324 }, { "epoch": 0.09, "learning_rate": 1.9396975600705076e-05, - "loss": 0.4072, + "loss": 0.1359, "step": 325 }, { "epoch": 0.09, "learning_rate": 1.9395120141014938e-05, - "loss": 0.1911, + "loss": 0.0295, "step": 326 }, { "epoch": 0.09, "learning_rate": 1.93932646813248e-05, - "loss": 0.134, + "loss": 0.0875, "step": 327 }, { "epoch": 0.09, "learning_rate": 1.939140922163466e-05, - "loss": 0.1402, + "loss": 0.0307, "step": 328 }, { "epoch": 0.09, "learning_rate": 1.9389553761944523e-05, - "loss": 0.0329, + "loss": 0.1959, "step": 329 }, { "epoch": 0.09, "learning_rate": 1.9387698302254385e-05, - "loss": 0.1342, + "loss": 0.0283, "step": 330 }, { "epoch": 0.09, "learning_rate": 1.9385842842564247e-05, - "loss": 0.2953, + "loss": 0.1392, "step": 331 }, { "epoch": 0.09, "learning_rate": 1.938398738287411e-05, - "loss": 0.2454, + "loss": 0.084, "step": 332 }, { "epoch": 0.09, "learning_rate": 1.938213192318397e-05, - "loss": 0.2457, + "loss": 0.196, "step": 333 }, { "epoch": 0.09, "learning_rate": 1.9380276463493833e-05, - "loss": 0.0879, + "loss": 0.0838, "step": 334 }, { "epoch": 0.09, "learning_rate": 1.9378421003803694e-05, - "loss": 0.1413, + "loss": 0.0244, "step": 335 }, { "epoch": 0.09, "learning_rate": 1.9376565544113556e-05, - "loss": 0.1834, + "loss": 0.1398, "step": 336 }, { "epoch": 0.09, "learning_rate": 1.9374710084423418e-05, - "loss": 0.2467, + "loss": 0.0804, "step": 337 }, { "epoch": 0.09, "learning_rate": 1.937285462473328e-05, - "loss": 0.1418, + "loss": 0.081, "step": 338 }, { "epoch": 0.09, "learning_rate": 1.9370999165043142e-05, - "loss": 0.0913, + "loss": 0.1331, "step": 339 }, { "epoch": 0.09, "learning_rate": 1.9369143705353e-05, - "loss": 0.1504, + "loss": 0.0856, "step": 340 }, { "epoch": 0.09, "learning_rate": 1.9367288245662866e-05, - "loss": 0.2328, + "loss": 0.2035, "step": 341 }, { "epoch": 0.1, "learning_rate": 1.9365432785972727e-05, - "loss": 0.2432, + "loss": 0.0209, "step": 342 }, { "epoch": 0.1, "learning_rate": 1.936357732628259e-05, - "loss": 0.096, + "loss": 0.08, "step": 343 }, { "epoch": 0.1, "learning_rate": 1.9361721866592448e-05, - "loss": 0.1449, + "loss": 0.0772, "step": 344 }, { "epoch": 0.1, "learning_rate": 1.9359866406902313e-05, - "loss": 0.3872, + "loss": 0.1405, "step": 345 }, { "epoch": 0.1, "learning_rate": 1.9358010947212175e-05, - "loss": 0.0894, + "loss": 0.1388, "step": 346 }, { "epoch": 0.1, "learning_rate": 1.9356155487522033e-05, - "loss": 0.1439, + "loss": 0.1427, "step": 347 }, { "epoch": 0.1, "learning_rate": 1.9354300027831895e-05, - "loss": 0.1417, + "loss": 0.147, "step": 348 }, { "epoch": 0.1, "learning_rate": 1.935244456814176e-05, - "loss": 0.0974, + "loss": 0.0826, "step": 349 }, { "epoch": 0.1, "learning_rate": 1.935058910845162e-05, - "loss": 0.1408, + "loss": 0.196, "step": 350 }, { "epoch": 0.1, "learning_rate": 1.934873364876148e-05, - "loss": 0.091, + "loss": 0.0233, "step": 351 }, { "epoch": 0.1, "learning_rate": 1.9346878189071343e-05, - "loss": 0.091, + "loss": 0.1418, "step": 352 }, { "epoch": 0.1, "learning_rate": 1.9345022729381208e-05, - "loss": 0.186, + "loss": 0.1372, "step": 353 }, { "epoch": 0.1, "learning_rate": 1.9343167269691067e-05, - "loss": 0.0882, + "loss": 0.205, "step": 354 }, { "epoch": 0.1, "learning_rate": 1.934131181000093e-05, - "loss": 0.1396, + "loss": 0.0833, "step": 355 }, { "epoch": 0.1, "learning_rate": 1.933945635031079e-05, - "loss": 0.1378, + "loss": 0.1365, "step": 356 }, { "epoch": 0.1, "learning_rate": 1.9337600890620652e-05, - "loss": 0.1391, + "loss": 0.2585, "step": 357 }, { "epoch": 0.1, "learning_rate": 1.9335745430930514e-05, - "loss": 0.1942, + "loss": 0.1371, "step": 358 }, { "epoch": 0.1, "learning_rate": 1.9333889971240376e-05, - "loss": 0.138, + "loss": 0.0329, "step": 359 }, { "epoch": 0.1, "learning_rate": 1.9332034511550238e-05, - "loss": 0.1388, + "loss": 0.0855, "step": 360 }, { "epoch": 0.1, "learning_rate": 1.93301790518601e-05, - "loss": 0.0872, + "loss": 0.0914, "step": 361 }, { "epoch": 0.1, "learning_rate": 1.932832359216996e-05, - "loss": 0.0314, + "loss": 0.19, "step": 362 }, { "epoch": 0.1, "learning_rate": 1.9326468132479823e-05, - "loss": 0.0866, + "loss": 0.1398, "step": 363 }, { "epoch": 0.1, "learning_rate": 1.9324612672789685e-05, - "loss": 0.2481, + "loss": 0.0865, "step": 364 }, { "epoch": 0.1, "learning_rate": 1.9322757213099547e-05, - "loss": 0.1924, + "loss": 0.0889, "step": 365 }, { "epoch": 0.1, "learning_rate": 1.932090175340941e-05, - "loss": 0.1986, + "loss": 0.0877, "step": 366 }, { "epoch": 0.1, "learning_rate": 1.931904629371927e-05, - "loss": 0.1896, + "loss": 0.0865, "step": 367 }, { "epoch": 0.1, "learning_rate": 1.9317190834029133e-05, - "loss": 0.029, + "loss": 0.0287, "step": 368 }, { "epoch": 0.1, "learning_rate": 1.9315335374338995e-05, - "loss": 0.0817, + "loss": 0.0864, "step": 369 }, { "epoch": 0.1, "learning_rate": 1.9313479914648856e-05, - "loss": 0.4144, + "loss": 0.1989, "step": 370 }, { "epoch": 0.1, "learning_rate": 1.931162445495872e-05, - "loss": 0.1952, + "loss": 0.0235, "step": 371 }, { "epoch": 0.1, "learning_rate": 1.9309768995268577e-05, - "loss": 0.0833, + "loss": 0.0776, "step": 372 }, { "epoch": 0.1, "learning_rate": 1.9307913535578442e-05, - "loss": 0.1367, + "loss": 0.2019, "step": 373 }, { "epoch": 0.1, "learning_rate": 1.9306058075888304e-05, - "loss": 0.0886, + "loss": 0.146, "step": 374 }, { "epoch": 0.1, "learning_rate": 1.9304202616198166e-05, - "loss": 0.1398, + "loss": 0.141, "step": 375 }, { "epoch": 0.1, "learning_rate": 1.9302347156508024e-05, - "loss": 0.1363, + "loss": 0.0835, "step": 376 }, { "epoch": 0.1, "learning_rate": 1.930049169681789e-05, - "loss": 0.0857, + "loss": 0.2034, "step": 377 }, { "epoch": 0.11, "learning_rate": 1.929863623712775e-05, - "loss": 0.3012, + "loss": 0.0816, "step": 378 }, { "epoch": 0.11, "learning_rate": 1.929678077743761e-05, - "loss": 0.1963, + "loss": 0.0215, "step": 379 }, { "epoch": 0.11, "learning_rate": 1.9294925317747472e-05, - "loss": 0.1928, + "loss": 0.0196, "step": 380 }, { "epoch": 0.11, "learning_rate": 1.9293069858057337e-05, - "loss": 0.083, + "loss": 0.0197, "step": 381 }, { "epoch": 0.11, "learning_rate": 1.92912143983672e-05, - "loss": 0.1919, + "loss": 0.0191, "step": 382 }, { "epoch": 0.11, "learning_rate": 1.9289358938677057e-05, - "loss": 0.1903, + "loss": 0.0803, "step": 383 }, { "epoch": 0.11, "learning_rate": 1.928750347898692e-05, - "loss": 0.2431, + "loss": 0.1371, "step": 384 }, { "epoch": 0.11, "learning_rate": 1.9285648019296785e-05, - "loss": 0.2363, + "loss": 0.1443, "step": 385 }, { "epoch": 0.11, "learning_rate": 1.9283792559606643e-05, - "loss": 0.0892, + "loss": 0.0777, "step": 386 }, { "epoch": 0.11, "learning_rate": 1.9281937099916505e-05, - "loss": 0.2401, + "loss": 0.0801, "step": 387 }, { "epoch": 0.11, "learning_rate": 1.9280081640226367e-05, - "loss": 0.1873, + "loss": 0.2054, "step": 388 }, { "epoch": 0.11, "learning_rate": 1.927822618053623e-05, - "loss": 0.1894, + "loss": 0.2127, "step": 389 }, { "epoch": 0.11, "learning_rate": 1.927637072084609e-05, - "loss": 0.2956, + "loss": 0.207, "step": 390 }, { "epoch": 0.11, "learning_rate": 1.9274515261155952e-05, - "loss": 0.2844, + "loss": 0.0776, "step": 391 }, { "epoch": 0.11, "learning_rate": 1.9272659801465814e-05, - "loss": 0.0517, + "loss": 0.1472, "step": 392 }, { "epoch": 0.11, "learning_rate": 1.9270804341775676e-05, - "loss": 0.2418, + "loss": 0.0793, "step": 393 }, { "epoch": 0.11, "learning_rate": 1.9268948882085538e-05, - "loss": 0.1433, + "loss": 0.2568, "step": 394 }, { "epoch": 0.11, "learning_rate": 1.92670934223954e-05, - "loss": 0.1002, + "loss": 0.2034, "step": 395 }, { "epoch": 0.11, "learning_rate": 1.9265237962705262e-05, - "loss": 0.054, + "loss": 0.029, "step": 396 }, { "epoch": 0.11, "learning_rate": 1.9263382503015124e-05, - "loss": 0.277, + "loss": 0.1347, "step": 397 }, { "epoch": 0.11, "learning_rate": 1.9261527043324985e-05, - "loss": 0.2301, + "loss": 0.2497, "step": 398 }, { "epoch": 0.11, "learning_rate": 1.9259671583634847e-05, - "loss": 0.2873, + "loss": 0.0887, "step": 399 }, { "epoch": 0.11, "learning_rate": 1.925781612394471e-05, - "loss": 0.1898, + "loss": 0.0892, "step": 400 }, { "epoch": 0.11, "learning_rate": 1.925596066425457e-05, - "loss": 0.2371, + "loss": 0.0893, "step": 401 }, { "epoch": 0.11, "learning_rate": 1.9254105204564433e-05, - "loss": 0.1912, + "loss": 0.1396, "step": 402 }, { "epoch": 0.11, "learning_rate": 1.9252249744874295e-05, - "loss": 0.1461, + "loss": 0.193, "step": 403 }, { "epoch": 0.11, "learning_rate": 1.9250394285184157e-05, - "loss": 0.1888, + "loss": 0.1393, "step": 404 }, { "epoch": 0.11, "learning_rate": 1.924853882549402e-05, - "loss": 0.1899, + "loss": 0.2031, "step": 405 }, { "epoch": 0.11, "learning_rate": 1.924668336580388e-05, - "loss": 0.1404, + "loss": 0.0374, "step": 406 }, { "epoch": 0.11, "learning_rate": 1.9244827906113742e-05, - "loss": 0.1874, + "loss": 0.2397, "step": 407 }, { "epoch": 0.11, "learning_rate": 1.92429724464236e-05, - "loss": 0.0967, + "loss": 0.0364, "step": 408 }, { "epoch": 0.11, "learning_rate": 1.9241116986733466e-05, - "loss": 0.1883, + "loss": 0.193, "step": 409 }, { "epoch": 0.11, "learning_rate": 1.9239261527043328e-05, - "loss": 0.1445, + "loss": 0.0868, "step": 410 }, { "epoch": 0.11, "learning_rate": 1.923740606735319e-05, - "loss": 0.0936, + "loss": 0.1367, "step": 411 }, { "epoch": 0.11, "learning_rate": 1.9235550607663048e-05, - "loss": 0.2842, + "loss": 0.1419, "step": 412 }, { "epoch": 0.11, "learning_rate": 1.9233695147972914e-05, - "loss": 0.1389, + "loss": 0.0887, "step": 413 }, { "epoch": 0.12, "learning_rate": 1.9231839688282775e-05, - "loss": 0.1387, + "loss": 0.1436, "step": 414 }, { "epoch": 0.12, "learning_rate": 1.9229984228592634e-05, - "loss": 0.1861, + "loss": 0.1434, "step": 415 }, { "epoch": 0.12, "learning_rate": 1.9228128768902496e-05, - "loss": 0.0415, + "loss": 0.0291, "step": 416 }, { "epoch": 0.12, "learning_rate": 1.922627330921236e-05, - "loss": 0.242, + "loss": 0.0829, "step": 417 }, { "epoch": 0.12, "learning_rate": 1.922441784952222e-05, - "loss": 0.0882, + "loss": 0.084, "step": 418 }, { "epoch": 0.12, "learning_rate": 1.922256238983208e-05, - "loss": 0.1347, + "loss": 0.19, "step": 419 }, { "epoch": 0.12, "learning_rate": 1.9220706930141943e-05, - "loss": 0.0364, + "loss": 0.1374, "step": 420 }, { "epoch": 0.12, "learning_rate": 1.921885147045181e-05, - "loss": 0.0871, + "loss": 0.0827, "step": 421 }, { "epoch": 0.12, "learning_rate": 1.9216996010761667e-05, - "loss": 0.0327, + "loss": 0.0246, "step": 422 }, { "epoch": 0.12, "learning_rate": 1.921514055107153e-05, - "loss": 0.0861, + "loss": 0.0221, "step": 423 }, { "epoch": 0.12, "learning_rate": 1.921328509138139e-05, - "loss": 0.246, + "loss": 0.0857, "step": 424 }, { "epoch": 0.12, "learning_rate": 1.9211429631691253e-05, - "loss": 0.1958, + "loss": 0.0211, "step": 425 }, { "epoch": 0.12, "learning_rate": 1.9209574172001114e-05, - "loss": 0.1416, + "loss": 0.0773, "step": 426 }, { "epoch": 0.12, "learning_rate": 1.9207718712310976e-05, - "loss": 0.0849, + "loss": 0.0181, "step": 427 }, { "epoch": 0.12, "learning_rate": 1.9205863252620838e-05, - "loss": 0.0863, + "loss": 0.1437, "step": 428 }, { "epoch": 0.12, "learning_rate": 1.92040077929307e-05, - "loss": 0.1416, + "loss": 0.0172, "step": 429 }, { "epoch": 0.12, "learning_rate": 1.9202152333240562e-05, - "loss": 0.2511, + "loss": 0.1503, "step": 430 }, { "epoch": 0.12, "learning_rate": 1.9200296873550424e-05, - "loss": 0.3139, + "loss": 0.0158, "step": 431 }, { "epoch": 0.12, "learning_rate": 1.9198441413860286e-05, - "loss": 0.1917, + "loss": 0.0806, "step": 432 }, { "epoch": 0.12, "learning_rate": 1.9196585954170148e-05, - "loss": 0.1973, + "loss": 0.082, "step": 433 }, { "epoch": 0.12, "learning_rate": 1.919473049448001e-05, - "loss": 0.2492, + "loss": 0.1462, "step": 434 }, { "epoch": 0.12, "learning_rate": 1.919287503478987e-05, - "loss": 0.1365, + "loss": 0.2086, "step": 435 }, { "epoch": 0.12, "learning_rate": 1.9191019575099733e-05, - "loss": 0.2427, + "loss": 0.1496, "step": 436 }, { "epoch": 0.12, "learning_rate": 1.9189164115409595e-05, - "loss": 0.2454, + "loss": 0.2181, "step": 437 }, { "epoch": 0.12, "learning_rate": 1.9187308655719457e-05, - "loss": 0.247, + "loss": 0.2772, "step": 438 }, { "epoch": 0.12, "learning_rate": 1.918545319602932e-05, - "loss": 0.2445, + "loss": 0.08, "step": 439 }, { "epoch": 0.12, "learning_rate": 1.918359773633918e-05, - "loss": 0.2366, + "loss": 0.0756, "step": 440 }, { "epoch": 0.12, "learning_rate": 1.9181742276649043e-05, - "loss": 0.1432, + "loss": 0.0828, "step": 441 }, { "epoch": 0.12, "learning_rate": 1.9179886816958904e-05, - "loss": 0.3311, + "loss": 0.0187, "step": 442 }, { "epoch": 0.12, "learning_rate": 1.9178031357268766e-05, - "loss": 0.0451, + "loss": 0.0187, "step": 443 }, { "epoch": 0.12, "learning_rate": 1.9176175897578625e-05, - "loss": 0.1433, + "loss": 0.0828, "step": 444 }, { "epoch": 0.12, "learning_rate": 1.917432043788849e-05, - "loss": 0.0985, + "loss": 0.1412, "step": 445 }, { "epoch": 0.12, "learning_rate": 1.9172464978198352e-05, - "loss": 0.1443, + "loss": 0.149, "step": 446 }, { "epoch": 0.12, "learning_rate": 1.917060951850821e-05, - "loss": 0.0958, + "loss": 0.1994, "step": 447 }, { "epoch": 0.12, "learning_rate": 1.9168754058818072e-05, - "loss": 0.1876, + "loss": 0.2563, "step": 448 }, { "epoch": 0.12, "learning_rate": 1.9166898599127937e-05, - "loss": 0.1866, + "loss": 0.1463, "step": 449 }, { "epoch": 0.13, "learning_rate": 1.91650431394378e-05, - "loss": 0.1443, + "loss": 0.2538, "step": 450 }, { "epoch": 0.13, "learning_rate": 1.9163187679747658e-05, - "loss": 0.0983, + "loss": 0.1389, "step": 451 }, { "epoch": 0.13, "learning_rate": 1.916133222005752e-05, - "loss": 0.1423, + "loss": 0.3577, "step": 452 }, { "epoch": 0.13, "learning_rate": 1.9159476760367385e-05, - "loss": 0.1425, + "loss": 0.1918, "step": 453 }, { "epoch": 0.13, "learning_rate": 1.9157621300677243e-05, - "loss": 0.0936, + "loss": 0.1401, "step": 454 }, { "epoch": 0.13, "learning_rate": 1.9155765840987105e-05, - "loss": 0.1955, + "loss": 0.1887, "step": 455 }, { "epoch": 0.13, "learning_rate": 1.9153910381296967e-05, - "loss": 0.041, + "loss": 0.1904, "step": 456 }, { "epoch": 0.13, "learning_rate": 1.915205492160683e-05, - "loss": 0.1926, + "loss": 0.1476, "step": 457 }, { "epoch": 0.13, "learning_rate": 1.915019946191669e-05, - "loss": 0.2412, + "loss": 0.2793, "step": 458 }, { "epoch": 0.13, "learning_rate": 1.9148344002226553e-05, - "loss": 0.1935, + "loss": 0.1477, "step": 459 }, { "epoch": 0.13, "learning_rate": 1.9146488542536415e-05, - "loss": 0.1902, + "loss": 0.0625, "step": 460 }, { "epoch": 0.13, "learning_rate": 1.9144633082846277e-05, - "loss": 0.1383, + "loss": 0.147, "step": 461 }, { "epoch": 0.13, "learning_rate": 1.914277762315614e-05, - "loss": 0.0881, + "loss": 0.1399, "step": 462 }, { "epoch": 0.13, "learning_rate": 1.9140922163466e-05, - "loss": 0.1937, + "loss": 0.1817, "step": 463 }, { "epoch": 0.13, "learning_rate": 1.9139066703775862e-05, - "loss": 0.1937, + "loss": 0.1813, "step": 464 }, { "epoch": 0.13, "learning_rate": 1.9137211244085724e-05, - "loss": 0.0353, + "loss": 0.0936, "step": 465 }, { "epoch": 0.13, "learning_rate": 1.9135355784395586e-05, - "loss": 0.1906, + "loss": 0.0861, "step": 466 }, { "epoch": 0.13, "learning_rate": 1.9133500324705448e-05, - "loss": 0.1459, + "loss": 0.0857, "step": 467 }, { "epoch": 0.13, "learning_rate": 1.913164486501531e-05, - "loss": 0.085, + "loss": 0.0336, "step": 468 }, { "epoch": 0.13, "learning_rate": 1.912978940532517e-05, - "loss": 0.1424, + "loss": 0.1398, "step": 469 }, { "epoch": 0.13, "learning_rate": 1.9127933945635033e-05, - "loss": 0.1934, + "loss": 0.0247, "step": 470 }, { "epoch": 0.13, "learning_rate": 1.9126078485944895e-05, - "loss": 0.2446, + "loss": 0.1389, "step": 471 }, { "epoch": 0.13, "learning_rate": 1.9124223026254757e-05, - "loss": 0.1325, + "loss": 0.1395, "step": 472 }, { "epoch": 0.13, "learning_rate": 1.9122367566564616e-05, - "loss": 0.0843, + "loss": 0.3881, "step": 473 }, { "epoch": 0.13, "learning_rate": 1.912051210687448e-05, - "loss": 0.1952, + "loss": 0.2012, "step": 474 }, { "epoch": 0.13, "learning_rate": 1.9118656647184343e-05, - "loss": 0.1921, + "loss": 0.3235, "step": 475 }, { "epoch": 0.13, "learning_rate": 1.91168011874942e-05, - "loss": 0.0866, + "loss": 0.0831, "step": 476 }, { "epoch": 0.13, "learning_rate": 1.9114945727804063e-05, - "loss": 0.1427, + "loss": 0.1402, "step": 477 }, { "epoch": 0.13, "learning_rate": 1.911309026811393e-05, - "loss": 0.1986, + "loss": 0.0272, "step": 478 }, { "epoch": 0.13, "learning_rate": 1.911123480842379e-05, - "loss": 0.1362, + "loss": 0.0817, "step": 479 }, { "epoch": 0.13, "learning_rate": 1.910937934873365e-05, - "loss": 0.2977, + "loss": 0.2552, "step": 480 }, { "epoch": 0.13, "learning_rate": 1.910752388904351e-05, - "loss": 0.034, + "loss": 0.0867, "step": 481 }, { "epoch": 0.13, "learning_rate": 1.9105668429353376e-05, - "loss": 0.2403, + "loss": 0.0815, "step": 482 }, { "epoch": 0.13, "learning_rate": 1.9103812969663234e-05, - "loss": 0.1939, + "loss": 0.1887, "step": 483 }, { "epoch": 0.13, "learning_rate": 1.9101957509973096e-05, - "loss": 0.0876, + "loss": 0.087, "step": 484 }, { "epoch": 0.13, "learning_rate": 1.9100102050282958e-05, - "loss": 0.239, + "loss": 0.2499, "step": 485 }, { "epoch": 0.14, "learning_rate": 1.909824659059282e-05, - "loss": 0.3498, + "loss": 0.2489, "step": 486 }, { "epoch": 0.14, "learning_rate": 1.9096391130902682e-05, - "loss": 0.2911, + "loss": 0.0907, "step": 487 }, { "epoch": 0.14, "learning_rate": 1.9094535671212544e-05, - "loss": 0.1922, + "loss": 0.0381, "step": 488 }, { "epoch": 0.14, "learning_rate": 1.9092680211522406e-05, - "loss": 0.042, + "loss": 0.1401, "step": 489 }, { "epoch": 0.14, "learning_rate": 1.9090824751832267e-05, - "loss": 0.1435, + "loss": 0.0858, "step": 490 }, { "epoch": 0.14, "learning_rate": 1.908896929214213e-05, - "loss": 0.1925, + "loss": 0.1414, "step": 491 }, { "epoch": 0.14, "learning_rate": 1.908711383245199e-05, - "loss": 0.1946, + "loss": 0.1358, "step": 492 }, { "epoch": 0.14, "learning_rate": 1.9085258372761853e-05, - "loss": 0.0968, + "loss": 0.0862, "step": 493 }, { "epoch": 0.14, "learning_rate": 1.9083402913071715e-05, - "loss": 0.1868, + "loss": 0.0308, "step": 494 }, { "epoch": 0.14, "learning_rate": 1.9081547453381577e-05, - "loss": 0.2683, + "loss": 0.2465, "step": 495 }, { "epoch": 0.14, "learning_rate": 1.907969199369144e-05, - "loss": 0.0469, + "loss": 0.0288, "step": 496 }, { "epoch": 0.14, "learning_rate": 1.90778365340013e-05, - "loss": 0.194, + "loss": 0.0836, "step": 497 }, { "epoch": 0.14, "learning_rate": 1.9075981074311162e-05, - "loss": 0.1454, + "loss": 0.0844, "step": 498 }, { "epoch": 0.14, "learning_rate": 1.9074125614621024e-05, - "loss": 0.1919, + "loss": 0.0805, "step": 499 }, { "epoch": 0.14, "learning_rate": 1.9072270154930886e-05, - "loss": 0.1911, + "loss": 0.0809, "step": 500 }, { "epoch": 0.14, "learning_rate": 1.9070414695240748e-05, - "loss": 0.0943, + "loss": 0.0218, "step": 501 }, { "epoch": 0.14, "learning_rate": 1.906855923555061e-05, - "loss": 0.1926, + "loss": 0.1386, "step": 502 }, { "epoch": 0.14, "learning_rate": 1.9066703775860472e-05, - "loss": 0.1877, + "loss": 0.1465, "step": 503 }, { "epoch": 0.14, "learning_rate": 1.9064848316170334e-05, - "loss": 0.0916, + "loss": 0.2663, "step": 504 }, { "epoch": 0.14, "learning_rate": 1.9062992856480192e-05, - "loss": 0.1861, + "loss": 0.08, "step": 505 }, { "epoch": 0.14, "learning_rate": 1.9061137396790057e-05, - "loss": 0.1874, + "loss": 0.1399, "step": 506 }, { "epoch": 0.14, "learning_rate": 1.905928193709992e-05, - "loss": 0.0437, + "loss": 0.1423, "step": 507 }, { "epoch": 0.14, "learning_rate": 1.905742647740978e-05, - "loss": 0.2888, + "loss": 0.1365, "step": 508 }, { "epoch": 0.14, "learning_rate": 1.905557101771964e-05, - "loss": 0.1903, + "loss": 0.0852, "step": 509 }, { "epoch": 0.14, "learning_rate": 1.9053715558029505e-05, - "loss": 0.2934, + "loss": 0.261, "step": 510 }, { "epoch": 0.14, "learning_rate": 1.9051860098339367e-05, - "loss": 0.1397, + "loss": 0.1426, "step": 511 }, { "epoch": 0.14, "learning_rate": 1.9050004638649225e-05, - "loss": 0.2432, + "loss": 0.1384, "step": 512 }, { "epoch": 0.14, "learning_rate": 1.9048149178959087e-05, - "loss": 0.2878, + "loss": 0.026, "step": 513 }, { "epoch": 0.14, "learning_rate": 1.9046293719268952e-05, - "loss": 0.2813, + "loss": 0.3005, "step": 514 }, { "epoch": 0.14, "learning_rate": 1.904443825957881e-05, - "loss": 0.2424, + "loss": 0.1949, "step": 515 }, { "epoch": 0.14, "learning_rate": 1.9042582799888673e-05, - "loss": 0.1912, + "loss": 0.0864, "step": 516 }, { "epoch": 0.14, "learning_rate": 1.9040727340198535e-05, - "loss": 0.0512, + "loss": 0.1391, "step": 517 }, { "epoch": 0.14, "learning_rate": 1.90388718805084e-05, - "loss": 0.2811, + "loss": 0.0867, "step": 518 }, { "epoch": 0.14, "learning_rate": 1.9037016420818258e-05, - "loss": 0.05, + "loss": 0.0376, "step": 519 }, { "epoch": 0.14, "learning_rate": 1.903516096112812e-05, - "loss": 0.138, + "loss": 0.1387, "step": 520 }, { "epoch": 0.15, "learning_rate": 1.9033305501437982e-05, - "loss": 0.1485, + "loss": 0.1397, "step": 521 }, { "epoch": 0.15, "learning_rate": 1.9031450041747844e-05, - "loss": 0.1406, + "loss": 0.0883, "step": 522 }, { "epoch": 0.15, "learning_rate": 1.9029594582057706e-05, - "loss": 0.2298, + "loss": 0.137, "step": 523 }, { "epoch": 0.15, "learning_rate": 1.9027739122367568e-05, - "loss": 0.1478, + "loss": 0.0871, "step": 524 }, { "epoch": 0.15, "learning_rate": 1.902588366267743e-05, - "loss": 0.1414, + "loss": 0.1385, "step": 525 }, { "epoch": 0.15, "learning_rate": 1.902402820298729e-05, - "loss": 0.0929, + "loss": 0.1343, "step": 526 }, { "epoch": 0.15, "learning_rate": 1.9022172743297153e-05, - "loss": 0.2376, + "loss": 0.1377, "step": 527 }, { "epoch": 0.15, "learning_rate": 1.9020317283607015e-05, - "loss": 0.095, + "loss": 0.1378, "step": 528 }, { "epoch": 0.15, "learning_rate": 1.9018461823916877e-05, - "loss": 0.1909, + "loss": 0.0276, "step": 529 }, { "epoch": 0.15, "learning_rate": 1.901660636422674e-05, - "loss": 0.1383, + "loss": 0.0263, "step": 530 }, { "epoch": 0.15, "learning_rate": 1.90147509045366e-05, - "loss": 0.137, + "loss": 0.1359, "step": 531 }, { "epoch": 0.15, "learning_rate": 1.9012895444846463e-05, - "loss": 0.0931, + "loss": 0.259, "step": 532 }, { "epoch": 0.15, "learning_rate": 1.9011039985156324e-05, - "loss": 0.1443, + "loss": 0.1389, "step": 533 }, { "epoch": 0.15, "learning_rate": 1.9009184525466186e-05, - "loss": 0.1885, + "loss": 0.0814, "step": 534 }, { "epoch": 0.15, "learning_rate": 1.9007329065776048e-05, - "loss": 0.2411, + "loss": 0.0826, "step": 535 }, { "epoch": 0.15, "learning_rate": 1.900547360608591e-05, - "loss": 0.1363, + "loss": 0.1948, "step": 536 }, { "epoch": 0.15, "learning_rate": 1.9003618146395772e-05, - "loss": 0.1374, + "loss": 0.0237, "step": 537 }, { "epoch": 0.15, "learning_rate": 1.9001762686705634e-05, - "loss": 0.0894, + "loss": 0.0247, "step": 538 }, { "epoch": 0.15, "learning_rate": 1.8999907227015496e-05, - "loss": 0.2485, + "loss": 0.2007, "step": 539 }, { "epoch": 0.15, "learning_rate": 1.8998051767325358e-05, - "loss": 0.2366, + "loss": 0.1418, "step": 540 }, { "epoch": 0.15, "learning_rate": 1.8996196307635216e-05, - "loss": 0.0362, + "loss": 0.2, "step": 541 }, { "epoch": 0.15, "learning_rate": 1.899434084794508e-05, - "loss": 0.1389, + "loss": 0.1979, "step": 542 }, { "epoch": 0.15, "learning_rate": 1.8992485388254943e-05, - "loss": 0.1387, + "loss": 0.0262, "step": 543 }, { "epoch": 0.15, "learning_rate": 1.89906299285648e-05, - "loss": 0.0839, + "loss": 0.2027, "step": 544 }, { "epoch": 0.15, "learning_rate": 1.8988774468874663e-05, - "loss": 0.0841, + "loss": 0.0814, "step": 545 }, { "epoch": 0.15, "learning_rate": 1.898691900918453e-05, - "loss": 0.0884, + "loss": 0.0272, "step": 546 }, { "epoch": 0.15, "learning_rate": 1.898506354949439e-05, - "loss": 0.0869, + "loss": 0.1384, "step": 547 }, { "epoch": 0.15, "learning_rate": 1.898320808980425e-05, - "loss": 0.1883, + "loss": 0.0827, "step": 548 }, { "epoch": 0.15, "learning_rate": 1.898135263011411e-05, - "loss": 0.1873, + "loss": 0.1365, "step": 549 }, { "epoch": 0.15, "learning_rate": 1.8979497170423976e-05, - "loss": 0.1883, + "loss": 0.1392, "step": 550 }, { "epoch": 0.15, "learning_rate": 1.8977641710733835e-05, - "loss": 0.2459, + "loss": 0.256, "step": 551 }, { "epoch": 0.15, "learning_rate": 1.8975786251043697e-05, - "loss": 0.08, + "loss": 0.1384, "step": 552 }, { "epoch": 0.15, "learning_rate": 1.897393079135356e-05, - "loss": 0.1895, + "loss": 0.0277, "step": 553 }, { "epoch": 0.15, "learning_rate": 1.8972075331663424e-05, - "loss": 0.0824, + "loss": 0.1425, "step": 554 }, { "epoch": 0.15, "learning_rate": 1.8970219871973282e-05, - "loss": 0.1402, + "loss": 0.1977, "step": 555 }, { "epoch": 0.15, "learning_rate": 1.8968364412283144e-05, - "loss": 0.0874, + "loss": 0.0292, "step": 556 }, { "epoch": 0.16, "learning_rate": 1.8966508952593006e-05, - "loss": 0.2468, + "loss": 0.2451, "step": 557 }, { "epoch": 0.16, "learning_rate": 1.8964653492902868e-05, - "loss": 0.085, + "loss": 0.1404, "step": 558 }, { "epoch": 0.16, "learning_rate": 1.896279803321273e-05, - "loss": 0.1996, + "loss": 0.0292, "step": 559 }, { "epoch": 0.16, "learning_rate": 1.896094257352259e-05, - "loss": 0.2485, + "loss": 0.2398, "step": 560 }, { "epoch": 0.16, "learning_rate": 1.8959087113832453e-05, - "loss": 0.0838, + "loss": 0.0825, "step": 561 }, { "epoch": 0.16, "learning_rate": 1.8957231654142315e-05, - "loss": 0.3482, + "loss": 0.03, "step": 562 }, { "epoch": 0.16, "learning_rate": 1.8955376194452177e-05, - "loss": 0.1365, + "loss": 0.085, "step": 563 }, { "epoch": 0.16, "learning_rate": 1.895352073476204e-05, - "loss": 0.2969, + "loss": 0.0826, "step": 564 }, { "epoch": 0.16, "learning_rate": 1.89516652750719e-05, - "loss": 0.1347, + "loss": 0.1971, "step": 565 }, { "epoch": 0.16, "learning_rate": 1.8949809815381763e-05, - "loss": 0.0851, + "loss": 0.0841, "step": 566 }, { "epoch": 0.16, "learning_rate": 1.8947954355691625e-05, - "loss": 0.1892, + "loss": 0.083, "step": 567 }, { "epoch": 0.16, "learning_rate": 1.8946098896001487e-05, - "loss": 0.0845, + "loss": 0.0254, "step": 568 }, { "epoch": 0.16, "learning_rate": 1.894424343631135e-05, - "loss": 0.0895, + "loss": 0.0806, "step": 569 }, { "epoch": 0.16, "learning_rate": 1.894238797662121e-05, - "loss": 0.0855, + "loss": 0.0804, "step": 570 }, { "epoch": 0.16, "learning_rate": 1.8940532516931072e-05, - "loss": 0.1932, + "loss": 0.2042, "step": 571 }, { "epoch": 0.16, "learning_rate": 1.8938677057240934e-05, - "loss": 0.2414, + "loss": 0.0798, "step": 572 }, { "epoch": 0.16, "learning_rate": 1.8936821597550792e-05, - "loss": 0.2423, + "loss": 0.0192, "step": 573 }, { "epoch": 0.16, "learning_rate": 1.8934966137860658e-05, - "loss": 0.1898, + "loss": 0.2004, "step": 574 }, { "epoch": 0.16, "learning_rate": 1.893311067817052e-05, - "loss": 0.1406, + "loss": 0.2693, "step": 575 }, { "epoch": 0.16, "learning_rate": 1.893125521848038e-05, - "loss": 0.1454, + "loss": 0.1449, "step": 576 }, { "epoch": 0.16, "learning_rate": 1.892939975879024e-05, - "loss": 0.1882, + "loss": 0.0813, "step": 577 }, { "epoch": 0.16, "learning_rate": 1.8927544299100105e-05, - "loss": 0.0923, + "loss": 0.0223, "step": 578 }, { "epoch": 0.16, "learning_rate": 1.8925688839409967e-05, - "loss": 0.0929, + "loss": 0.2012, "step": 579 }, { "epoch": 0.16, "learning_rate": 1.8923833379719826e-05, - "loss": 0.1428, + "loss": 0.0795, "step": 580 }, { "epoch": 0.16, "learning_rate": 1.8921977920029687e-05, - "loss": 0.3895, + "loss": 0.1391, "step": 581 }, { "epoch": 0.16, "learning_rate": 1.8920122460339553e-05, - "loss": 0.0912, + "loss": 0.1409, "step": 582 }, { "epoch": 0.16, "learning_rate": 1.8918267000649415e-05, - "loss": 0.0925, + "loss": 0.2498, "step": 583 }, { "epoch": 0.16, "learning_rate": 1.8916411540959273e-05, - "loss": 0.0914, + "loss": 0.0856, "step": 584 }, { "epoch": 0.16, "learning_rate": 1.8914556081269135e-05, - "loss": 0.0926, + "loss": 0.194, "step": 585 }, { "epoch": 0.16, "learning_rate": 1.8912700621579e-05, - "loss": 0.1866, + "loss": 0.0325, "step": 586 }, { "epoch": 0.16, "learning_rate": 1.891084516188886e-05, - "loss": 0.0869, + "loss": 0.0853, "step": 587 }, { "epoch": 0.16, "learning_rate": 1.890898970219872e-05, - "loss": 0.1408, + "loss": 0.0331, "step": 588 }, { "epoch": 0.16, "learning_rate": 1.8907134242508582e-05, - "loss": 0.1398, + "loss": 0.1858, "step": 589 }, { "epoch": 0.16, "learning_rate": 1.8905278782818444e-05, - "loss": 0.1369, + "loss": 0.0814, "step": 590 }, { "epoch": 0.16, "learning_rate": 1.8903423323128306e-05, - "loss": 0.136, + "loss": 0.0297, "step": 591 }, { "epoch": 0.16, "learning_rate": 1.8901567863438168e-05, - "loss": 0.0862, + "loss": 0.0276, "step": 592 }, { "epoch": 0.17, "learning_rate": 1.889971240374803e-05, - "loss": 0.2443, + "loss": 0.1453, "step": 593 }, { "epoch": 0.17, "learning_rate": 1.8897856944057892e-05, - "loss": 0.2474, + "loss": 0.0789, "step": 594 }, { "epoch": 0.17, "learning_rate": 1.8896001484367754e-05, - "loss": 0.0324, + "loss": 0.1393, "step": 595 }, { "epoch": 0.17, "learning_rate": 1.8894146024677616e-05, - "loss": 0.2429, + "loss": 0.0808, "step": 596 }, { "epoch": 0.17, "learning_rate": 1.8892290564987477e-05, - "loss": 0.0875, + "loss": 0.0845, "step": 597 }, { "epoch": 0.17, "learning_rate": 1.889043510529734e-05, - "loss": 0.1416, + "loss": 0.0814, "step": 598 }, { "epoch": 0.17, "learning_rate": 1.88885796456072e-05, - "loss": 0.1871, + "loss": 0.1403, "step": 599 }, { "epoch": 0.17, "learning_rate": 1.8886724185917063e-05, - "loss": 0.2495, + "loss": 0.1432, "step": 600 }, { "epoch": 0.17, "learning_rate": 1.8884868726226925e-05, - "loss": 0.1917, + "loss": 0.0198, "step": 601 }, { "epoch": 0.17, "learning_rate": 1.8883013266536787e-05, - "loss": 0.1424, + "loss": 0.0198, "step": 602 }, { "epoch": 0.17, "learning_rate": 1.888115780684665e-05, - "loss": 0.0346, + "loss": 0.2102, "step": 603 }, { "epoch": 0.17, "learning_rate": 1.887930234715651e-05, - "loss": 0.0839, + "loss": 0.0193, "step": 604 }, { "epoch": 0.17, "learning_rate": 1.8877446887466372e-05, - "loss": 0.1931, + "loss": 0.0803, "step": 605 }, { "epoch": 0.17, "learning_rate": 1.8875591427776234e-05, - "loss": 0.0875, + "loss": 0.0181, "step": 606 }, { "epoch": 0.17, "learning_rate": 1.8873735968086096e-05, - "loss": 0.0863, + "loss": 0.1419, "step": 607 }, { "epoch": 0.17, "learning_rate": 1.8871880508395958e-05, - "loss": 0.1763, + "loss": 0.0768, "step": 608 }, { "epoch": 0.17, "learning_rate": 1.8870025048705816e-05, - "loss": 0.1408, + "loss": 0.2057, "step": 609 }, { "epoch": 0.17, "learning_rate": 1.886816958901568e-05, - "loss": 0.1382, + "loss": 0.2009, "step": 610 }, { "epoch": 0.17, "learning_rate": 1.8866314129325544e-05, - "loss": 0.1361, + "loss": 0.1325, "step": 611 }, { "epoch": 0.17, "learning_rate": 1.8864458669635402e-05, - "loss": 0.2503, + "loss": 0.1417, "step": 612 }, { "epoch": 0.17, "learning_rate": 1.8862603209945264e-05, - "loss": 0.1973, + "loss": 0.0783, "step": 613 }, { "epoch": 0.17, "learning_rate": 1.8860747750255126e-05, - "loss": 0.2489, + "loss": 0.0223, "step": 614 }, { "epoch": 0.17, "learning_rate": 1.885889229056499e-05, - "loss": 0.0336, + "loss": 0.0819, "step": 615 }, { "epoch": 0.17, "learning_rate": 1.885703683087485e-05, - "loss": 0.3505, + "loss": 0.0816, "step": 616 }, { "epoch": 0.17, "learning_rate": 1.885518137118471e-05, - "loss": 0.1392, + "loss": 0.1424, "step": 617 }, { "epoch": 0.17, "learning_rate": 1.8853325911494573e-05, - "loss": 0.1397, + "loss": 0.0835, "step": 618 }, { "epoch": 0.17, "learning_rate": 1.8851470451804435e-05, - "loss": 0.1379, + "loss": 0.0788, "step": 619 }, { "epoch": 0.17, "learning_rate": 1.8849614992114297e-05, - "loss": 0.1439, + "loss": 0.1939, "step": 620 }, { "epoch": 0.17, "learning_rate": 1.884775953242416e-05, - "loss": 0.1388, + "loss": 0.143, "step": 621 }, { "epoch": 0.17, "learning_rate": 1.884590407273402e-05, - "loss": 0.1465, + "loss": 0.1397, "step": 622 }, { "epoch": 0.17, "learning_rate": 1.8844048613043883e-05, - "loss": 0.0404, + "loss": 0.1427, "step": 623 }, { "epoch": 0.17, "learning_rate": 1.8842193153353744e-05, - "loss": 0.1895, + "loss": 0.0794, "step": 624 }, { "epoch": 0.17, "learning_rate": 1.8840337693663606e-05, - "loss": 0.245, + "loss": 0.0262, "step": 625 }, { "epoch": 0.17, "learning_rate": 1.8838482233973468e-05, - "loss": 0.0392, + "loss": 0.1336, "step": 626 }, { "epoch": 0.17, "learning_rate": 1.883662677428333e-05, - "loss": 0.1939, + "loss": 0.1968, "step": 627 }, { "epoch": 0.17, "learning_rate": 1.8834771314593192e-05, - "loss": 0.2942, + "loss": 0.086, "step": 628 }, { "epoch": 0.18, "learning_rate": 1.8832915854903054e-05, - "loss": 0.0906, + "loss": 0.144, "step": 629 }, { "epoch": 0.18, "learning_rate": 1.8831060395212916e-05, - "loss": 0.1967, + "loss": 0.1914, "step": 630 }, { "epoch": 0.18, "learning_rate": 1.8829204935522778e-05, - "loss": 0.1432, + "loss": 0.0839, "step": 631 }, { "epoch": 0.18, "learning_rate": 1.882734947583264e-05, - "loss": 0.1455, + "loss": 0.0293, "step": 632 }, { "epoch": 0.18, "learning_rate": 1.88254940161425e-05, - "loss": 0.1409, + "loss": 0.0274, "step": 633 }, { "epoch": 0.18, "learning_rate": 1.8823638556452363e-05, - "loss": 0.0854, + "loss": 0.0861, "step": 634 }, { "epoch": 0.18, "learning_rate": 1.8821783096762225e-05, - "loss": 0.1416, + "loss": 0.1993, "step": 635 }, { "epoch": 0.18, "learning_rate": 1.8819927637072087e-05, - "loss": 0.0892, + "loss": 0.139, "step": 636 }, { "epoch": 0.18, "learning_rate": 1.881807217738195e-05, - "loss": 0.0366, + "loss": 0.0239, "step": 637 }, { "epoch": 0.18, "learning_rate": 1.8816216717691807e-05, - "loss": 0.0328, + "loss": 0.086, "step": 638 }, { "epoch": 0.18, "learning_rate": 1.8814361258001673e-05, - "loss": 0.0833, + "loss": 0.2093, "step": 639 }, { "epoch": 0.18, "learning_rate": 1.8812505798311534e-05, - "loss": 0.1433, + "loss": 0.1366, "step": 640 }, { "epoch": 0.18, "learning_rate": 1.8810650338621393e-05, - "loss": 0.1939, + "loss": 0.1368, "step": 641 }, { "epoch": 0.18, "learning_rate": 1.8808794878931255e-05, - "loss": 0.1443, + "loss": 0.08, "step": 642 }, { "epoch": 0.18, "learning_rate": 1.880693941924112e-05, - "loss": 0.2485, + "loss": 0.1412, "step": 643 }, { "epoch": 0.18, "learning_rate": 1.8805083959550982e-05, - "loss": 0.2017, + "loss": 0.0822, "step": 644 }, { "epoch": 0.18, "learning_rate": 1.880322849986084e-05, - "loss": 0.3584, + "loss": 0.0784, "step": 645 }, { "epoch": 0.18, "learning_rate": 1.8801373040170702e-05, - "loss": 0.0806, + "loss": 0.1446, "step": 646 }, { "epoch": 0.18, "learning_rate": 1.8799517580480568e-05, - "loss": 0.1373, + "loss": 0.0808, "step": 647 }, { "epoch": 0.18, "learning_rate": 1.8797662120790426e-05, - "loss": 0.0848, + "loss": 0.1406, "step": 648 }, { "epoch": 0.18, "learning_rate": 1.8795806661100288e-05, - "loss": 0.4113, + "loss": 0.0791, "step": 649 }, { "epoch": 0.18, "learning_rate": 1.879395120141015e-05, - "loss": 0.1426, + "loss": 0.0838, "step": 650 }, { "epoch": 0.18, "learning_rate": 1.8792095741720015e-05, - "loss": 0.0879, + "loss": 0.1981, "step": 651 }, { "epoch": 0.18, "learning_rate": 1.8790240282029873e-05, - "loss": 0.142, + "loss": 0.0814, "step": 652 }, { "epoch": 0.18, "learning_rate": 1.8788384822339735e-05, - "loss": 0.0343, + "loss": 0.0824, "step": 653 }, { "epoch": 0.18, "learning_rate": 1.8786529362649597e-05, - "loss": 0.1895, + "loss": 0.14, "step": 654 }, { "epoch": 0.18, "learning_rate": 1.878467390295946e-05, - "loss": 0.1398, + "loss": 0.0231, "step": 655 }, { "epoch": 0.18, "learning_rate": 1.878281844326932e-05, - "loss": 0.194, + "loss": 0.0233, "step": 656 }, { "epoch": 0.18, "learning_rate": 1.8780962983579183e-05, - "loss": 0.0892, + "loss": 0.1392, "step": 657 }, { "epoch": 0.18, "learning_rate": 1.8779107523889045e-05, - "loss": 0.136, + "loss": 0.0187, "step": 658 }, { "epoch": 0.18, "learning_rate": 1.8777252064198907e-05, - "loss": 0.1929, + "loss": 0.1393, "step": 659 }, { "epoch": 0.18, "learning_rate": 1.877539660450877e-05, - "loss": 0.1401, + "loss": 0.0832, "step": 660 }, { "epoch": 0.18, "learning_rate": 1.877354114481863e-05, - "loss": 0.1914, + "loss": 0.0783, "step": 661 }, { "epoch": 0.18, "learning_rate": 1.8771685685128492e-05, - "loss": 0.1414, + "loss": 0.08, "step": 662 }, { "epoch": 0.18, "learning_rate": 1.8769830225438354e-05, - "loss": 0.1396, + "loss": 0.2705, "step": 663 }, { "epoch": 0.18, "learning_rate": 1.8767974765748216e-05, - "loss": 0.09, + "loss": 0.2067, "step": 664 }, { "epoch": 0.19, "learning_rate": 1.8766119306058078e-05, - "loss": 0.086, + "loss": 0.084, "step": 665 }, { "epoch": 0.19, "learning_rate": 1.876426384636794e-05, - "loss": 0.1386, + "loss": 0.0787, "step": 666 }, { "epoch": 0.19, "learning_rate": 1.87624083866778e-05, - "loss": 0.1911, + "loss": 0.0858, "step": 667 }, { "epoch": 0.19, "learning_rate": 1.8760552926987663e-05, - "loss": 0.2376, + "loss": 0.1871, "step": 668 }, { "epoch": 0.19, "learning_rate": 1.8758697467297525e-05, - "loss": 0.187, + "loss": 0.035, "step": 669 }, { "epoch": 0.19, "learning_rate": 1.8756842007607384e-05, - "loss": 0.1416, + "loss": 0.0838, "step": 670 }, { "epoch": 0.19, "learning_rate": 1.875498654791725e-05, - "loss": 0.3411, + "loss": 0.0855, "step": 671 }, { "epoch": 0.19, "learning_rate": 1.875313108822711e-05, - "loss": 0.0852, + "loss": 0.1364, "step": 672 }, { "epoch": 0.19, "learning_rate": 1.8751275628536973e-05, - "loss": 0.1876, + "loss": 0.1967, "step": 673 }, { "epoch": 0.19, "learning_rate": 1.874942016884683e-05, - "loss": 0.1857, + "loss": 0.0295, "step": 674 }, { "epoch": 0.19, "learning_rate": 1.8747564709156697e-05, - "loss": 0.1382, + "loss": 0.1383, "step": 675 }, { "epoch": 0.19, "learning_rate": 1.874570924946656e-05, - "loss": 0.1897, + "loss": 0.0876, "step": 676 }, { "epoch": 0.19, "learning_rate": 1.8743853789776417e-05, - "loss": 0.1436, + "loss": 0.0267, "step": 677 }, { "epoch": 0.19, "learning_rate": 1.874199833008628e-05, - "loss": 0.1436, + "loss": 0.0814, "step": 678 }, { "epoch": 0.19, "learning_rate": 1.8740142870396144e-05, - "loss": 0.1483, + "loss": 0.0815, "step": 679 }, { "epoch": 0.19, "learning_rate": 1.8738287410706006e-05, - "loss": 0.0463, + "loss": 0.2013, "step": 680 }, { "epoch": 0.19, "learning_rate": 1.8736431951015864e-05, - "loss": 0.2409, + "loss": 0.0828, "step": 681 }, { "epoch": 0.19, "learning_rate": 1.8734576491325726e-05, - "loss": 0.1879, + "loss": 0.0828, "step": 682 }, { "epoch": 0.19, "learning_rate": 1.873272103163559e-05, - "loss": 0.0922, + "loss": 0.1432, "step": 683 }, { "epoch": 0.19, "learning_rate": 1.873086557194545e-05, - "loss": 0.2384, + "loss": 0.0212, "step": 684 }, { "epoch": 0.19, "learning_rate": 1.8729010112255312e-05, - "loss": 0.0361, + "loss": 0.2667, "step": 685 }, { "epoch": 0.19, "learning_rate": 1.8727154652565174e-05, - "loss": 0.25, + "loss": 0.0204, "step": 686 }, { "epoch": 0.19, "learning_rate": 1.8725299192875036e-05, - "loss": 0.0845, + "loss": 0.2642, "step": 687 }, { "epoch": 0.19, "learning_rate": 1.8723443733184897e-05, - "loss": 0.1385, + "loss": 0.0834, "step": 688 }, { "epoch": 0.19, "learning_rate": 1.872158827349476e-05, - "loss": 0.1349, + "loss": 0.0827, "step": 689 }, { "epoch": 0.19, "learning_rate": 1.871973281380462e-05, - "loss": 0.1401, + "loss": 0.1416, "step": 690 }, { "epoch": 0.19, "learning_rate": 1.8717877354114483e-05, - "loss": 0.1384, + "loss": 0.0823, "step": 691 }, { "epoch": 0.19, "learning_rate": 1.8716021894424345e-05, - "loss": 0.0873, + "loss": 0.1378, "step": 692 }, { "epoch": 0.19, "learning_rate": 1.8714166434734207e-05, - "loss": 0.1911, + "loss": 0.0869, "step": 693 }, { "epoch": 0.19, "learning_rate": 1.871231097504407e-05, - "loss": 0.1955, + "loss": 0.1383, "step": 694 }, { "epoch": 0.19, "learning_rate": 1.871045551535393e-05, - "loss": 0.137, + "loss": 0.0332, "step": 695 }, { "epoch": 0.19, "learning_rate": 1.8708600055663792e-05, - "loss": 0.1353, + "loss": 0.0826, "step": 696 }, { "epoch": 0.19, "learning_rate": 1.8706744595973654e-05, - "loss": 0.1982, + "loss": 0.1411, "step": 697 }, { "epoch": 0.19, "learning_rate": 1.8704889136283516e-05, - "loss": 0.1922, + "loss": 0.1404, "step": 698 }, { "epoch": 0.19, "learning_rate": 1.8703033676593378e-05, - "loss": 0.1961, + "loss": 0.0219, "step": 699 }, { "epoch": 0.19, "learning_rate": 1.870117821690324e-05, - "loss": 0.1886, + "loss": 0.1467, "step": 700 }, { "epoch": 0.2, "learning_rate": 1.8699322757213102e-05, - "loss": 0.3536, + "loss": 0.0208, "step": 701 }, { "epoch": 0.2, "learning_rate": 1.8697467297522964e-05, - "loss": 0.0868, + "loss": 0.0199, "step": 702 }, { "epoch": 0.2, "learning_rate": 1.8695611837832825e-05, - "loss": 0.1397, + "loss": 0.3242, "step": 703 }, { "epoch": 0.2, "learning_rate": 1.8693756378142687e-05, - "loss": 0.1369, + "loss": 0.0196, "step": 704 }, { "epoch": 0.2, "learning_rate": 1.869190091845255e-05, - "loss": 0.0894, + "loss": 0.081, "step": 705 }, { "epoch": 0.2, "learning_rate": 1.8690045458762408e-05, - "loss": 0.2964, + "loss": 0.1493, "step": 706 }, { "epoch": 0.2, "learning_rate": 1.8688189999072273e-05, - "loss": 0.0872, + "loss": 0.0766, "step": 707 }, { "epoch": 0.2, "learning_rate": 1.8686334539382135e-05, - "loss": 0.1473, + "loss": 0.0839, "step": 708 }, { "epoch": 0.2, "learning_rate": 1.8684479079691997e-05, - "loss": 0.0406, + "loss": 0.0811, "step": 709 }, { "epoch": 0.2, "learning_rate": 1.8682623620001855e-05, - "loss": 0.293, + "loss": 0.2627, "step": 710 }, { "epoch": 0.2, "learning_rate": 1.868076816031172e-05, - "loss": 0.1408, + "loss": 0.3265, "step": 711 }, { "epoch": 0.2, "learning_rate": 1.8678912700621582e-05, - "loss": 0.2396, + "loss": 0.0812, "step": 712 }, { "epoch": 0.2, "learning_rate": 1.867705724093144e-05, - "loss": 0.1943, + "loss": 0.2646, "step": 713 }, { "epoch": 0.2, "learning_rate": 1.8675201781241303e-05, - "loss": 0.1915, + "loss": 0.1397, "step": 714 }, { "epoch": 0.2, "learning_rate": 1.8673346321551168e-05, - "loss": 0.1465, + "loss": 0.1947, "step": 715 }, { "epoch": 0.2, "learning_rate": 1.8671490861861026e-05, - "loss": 0.0945, + "loss": 0.1951, "step": 716 }, { "epoch": 0.2, "learning_rate": 1.8669635402170888e-05, - "loss": 0.432, + "loss": 0.0284, "step": 717 }, { "epoch": 0.2, "learning_rate": 1.866777994248075e-05, - "loss": 0.0898, + "loss": 0.0293, "step": 718 }, { "epoch": 0.2, "learning_rate": 1.8665924482790615e-05, - "loss": 0.0428, + "loss": 0.086, "step": 719 }, { "epoch": 0.2, "learning_rate": 1.8664069023100474e-05, - "loss": 0.1935, + "loss": 0.2515, "step": 720 }, { "epoch": 0.2, "learning_rate": 1.8662213563410336e-05, - "loss": 0.1906, + "loss": 0.0814, "step": 721 }, { "epoch": 0.2, "learning_rate": 1.8660358103720198e-05, - "loss": 0.0911, + "loss": 0.1879, "step": 722 }, { "epoch": 0.2, "learning_rate": 1.865850264403006e-05, - "loss": 0.0427, + "loss": 0.1409, "step": 723 }, { "epoch": 0.2, "learning_rate": 1.865664718433992e-05, - "loss": 0.0394, + "loss": 0.1314, "step": 724 }, { "epoch": 0.2, "learning_rate": 1.8654791724649783e-05, - "loss": 0.1369, + "loss": 0.1425, "step": 725 }, { "epoch": 0.2, "learning_rate": 1.8652936264959645e-05, - "loss": 0.089, + "loss": 0.1905, "step": 726 }, { "epoch": 0.2, "learning_rate": 1.8651080805269507e-05, - "loss": 0.2476, + "loss": 0.1371, "step": 727 }, { "epoch": 0.2, "learning_rate": 1.864922534557937e-05, - "loss": 0.1865, + "loss": 0.1382, "step": 728 }, { "epoch": 0.2, "learning_rate": 1.864736988588923e-05, - "loss": 0.0903, + "loss": 0.182, "step": 729 }, { "epoch": 0.2, "learning_rate": 1.8645514426199093e-05, - "loss": 0.0314, + "loss": 0.1915, "step": 730 }, { "epoch": 0.2, "learning_rate": 1.8643658966508954e-05, - "loss": 0.0309, + "loss": 0.1418, "step": 731 }, { "epoch": 0.2, "learning_rate": 1.8641803506818816e-05, - "loss": 0.1417, + "loss": 0.186, "step": 732 }, { "epoch": 0.2, "learning_rate": 1.8639948047128678e-05, - "loss": 0.0878, + "loss": 0.0428, "step": 733 }, { "epoch": 0.2, "learning_rate": 1.863809258743854e-05, - "loss": 0.0856, + "loss": 0.0897, "step": 734 }, { "epoch": 0.2, "learning_rate": 1.8636237127748402e-05, - "loss": 0.138, + "loss": 0.0415, "step": 735 }, { "epoch": 0.2, "learning_rate": 1.8634381668058264e-05, - "loss": 0.1359, + "loss": 0.1873, "step": 736 }, { "epoch": 0.21, "learning_rate": 1.8632526208368126e-05, - "loss": 0.199, + "loss": 0.1444, "step": 737 }, { "epoch": 0.21, "learning_rate": 1.8630670748677984e-05, - "loss": 0.2567, + "loss": 0.0342, "step": 738 }, { "epoch": 0.21, "learning_rate": 1.862881528898785e-05, - "loss": 0.26, + "loss": 0.0302, "step": 739 }, { "epoch": 0.21, "learning_rate": 1.862695982929771e-05, - "loss": 0.2569, + "loss": 0.0843, "step": 740 }, { "epoch": 0.21, "learning_rate": 1.8625104369607573e-05, - "loss": 0.1975, + "loss": 0.0254, "step": 741 }, { "epoch": 0.21, "learning_rate": 1.862324890991743e-05, - "loss": 0.0266, + "loss": 0.1382, "step": 742 }, { "epoch": 0.21, "learning_rate": 1.8621393450227297e-05, - "loss": 0.0785, + "loss": 0.2597, "step": 743 }, { "epoch": 0.21, "learning_rate": 1.861953799053716e-05, - "loss": 0.2531, + "loss": 0.3214, "step": 744 }, { "epoch": 0.21, "learning_rate": 1.8617682530847017e-05, - "loss": 0.2506, + "loss": 0.1972, "step": 745 }, { "epoch": 0.21, "learning_rate": 1.861582707115688e-05, - "loss": 0.1417, + "loss": 0.0208, "step": 746 }, { "epoch": 0.21, "learning_rate": 1.8613971611466744e-05, - "loss": 0.0304, + "loss": 0.197, "step": 747 }, { "epoch": 0.21, "learning_rate": 1.8612116151776606e-05, - "loss": 0.303, + "loss": 0.2009, "step": 748 }, { "epoch": 0.21, "learning_rate": 1.8610260692086465e-05, - "loss": 0.1356, + "loss": 0.1421, "step": 749 }, { "epoch": 0.21, "learning_rate": 1.8608405232396327e-05, - "loss": 0.0819, + "loss": 0.078, "step": 750 }, { "epoch": 0.21, "learning_rate": 1.8606549772706192e-05, - "loss": 0.2461, + "loss": 0.0817, "step": 751 }, { "epoch": 0.21, "learning_rate": 1.860469431301605e-05, - "loss": 0.0344, + "loss": 0.3113, "step": 752 }, { "epoch": 0.21, "learning_rate": 1.8602838853325912e-05, - "loss": 0.1405, + "loss": 0.1415, "step": 753 }, { "epoch": 0.21, "learning_rate": 1.8600983393635774e-05, - "loss": 0.1382, + "loss": 0.2554, "step": 754 }, { "epoch": 0.21, "learning_rate": 1.8599127933945636e-05, - "loss": 0.1903, + "loss": 0.0295, "step": 755 }, { "epoch": 0.21, "learning_rate": 1.8597272474255498e-05, - "loss": 0.2482, + "loss": 0.0872, "step": 756 }, { "epoch": 0.21, "learning_rate": 1.859541701456536e-05, - "loss": 0.1392, + "loss": 0.2471, "step": 757 }, { "epoch": 0.21, "learning_rate": 1.859356155487522e-05, - "loss": 0.2406, + "loss": 0.242, "step": 758 }, { "epoch": 0.21, "learning_rate": 1.8591706095185083e-05, - "loss": 0.0889, + "loss": 0.0894, "step": 759 }, { "epoch": 0.21, "learning_rate": 1.8589850635494945e-05, - "loss": 0.2868, + "loss": 0.0909, "step": 760 }, { "epoch": 0.21, "learning_rate": 1.8587995175804807e-05, - "loss": 0.0921, + "loss": 0.1355, "step": 761 }, { "epoch": 0.21, "learning_rate": 1.858613971611467e-05, - "loss": 0.0409, + "loss": 0.1941, "step": 762 }, { "epoch": 0.21, "learning_rate": 1.858428425642453e-05, - "loss": 0.1915, + "loss": 0.091, "step": 763 }, { "epoch": 0.21, "learning_rate": 1.8582428796734393e-05, - "loss": 0.2436, + "loss": 0.0904, "step": 764 }, { "epoch": 0.21, "learning_rate": 1.8580573337044255e-05, - "loss": 0.141, + "loss": 0.0822, "step": 765 }, { "epoch": 0.21, "learning_rate": 1.8578717877354117e-05, - "loss": 0.2465, + "loss": 0.0404, "step": 766 }, { "epoch": 0.21, "learning_rate": 1.8576862417663975e-05, - "loss": 0.2915, + "loss": 0.089, "step": 767 }, { "epoch": 0.21, "learning_rate": 1.857500695797384e-05, - "loss": 0.0451, + "loss": 0.1391, "step": 768 }, { "epoch": 0.21, "learning_rate": 1.8573151498283702e-05, - "loss": 0.1407, + "loss": 0.0333, "step": 769 }, { "epoch": 0.21, "learning_rate": 1.8571296038593564e-05, - "loss": 0.0924, + "loss": 0.139, "step": 770 }, { "epoch": 0.21, "learning_rate": 1.8569440578903423e-05, - "loss": 0.146, + "loss": 0.0824, "step": 771 }, { "epoch": 0.21, "learning_rate": 1.8567585119213288e-05, - "loss": 0.2357, + "loss": 0.0821, "step": 772 }, { "epoch": 0.22, "learning_rate": 1.856572965952315e-05, - "loss": 0.0432, + "loss": 0.0784, "step": 773 }, { "epoch": 0.22, "learning_rate": 1.8563874199833008e-05, - "loss": 0.0405, + "loss": 0.0804, "step": 774 }, { "epoch": 0.22, "learning_rate": 1.856201874014287e-05, - "loss": 0.1433, + "loss": 0.1405, "step": 775 }, { "epoch": 0.22, "learning_rate": 1.8560163280452735e-05, - "loss": 0.1868, + "loss": 0.198, "step": 776 }, { "epoch": 0.22, "learning_rate": 1.8558307820762597e-05, - "loss": 0.0381, + "loss": 0.0192, "step": 777 }, { "epoch": 0.22, "learning_rate": 1.8556452361072456e-05, - "loss": 0.1903, + "loss": 0.0836, "step": 778 }, { "epoch": 0.22, "learning_rate": 1.8554596901382317e-05, - "loss": 0.0856, + "loss": 0.081, "step": 779 }, { "epoch": 0.22, "learning_rate": 1.8552741441692183e-05, - "loss": 0.0862, + "loss": 0.0162, "step": 780 }, { "epoch": 0.22, "learning_rate": 1.855088598200204e-05, - "loss": 0.0893, + "loss": 0.1432, "step": 781 }, { "epoch": 0.22, "learning_rate": 1.8549030522311903e-05, - "loss": 0.1415, + "loss": 0.1492, "step": 782 }, { "epoch": 0.22, "learning_rate": 1.8547175062621765e-05, - "loss": 0.1946, + "loss": 0.1469, "step": 783 }, { "epoch": 0.22, "learning_rate": 1.8545319602931627e-05, - "loss": 0.1444, + "loss": 0.0841, "step": 784 }, { "epoch": 0.22, "learning_rate": 1.854346414324149e-05, - "loss": 0.189, + "loss": 0.016, "step": 785 }, { "epoch": 0.22, "learning_rate": 1.854160868355135e-05, - "loss": 0.1425, + "loss": 0.1436, "step": 786 }, { "epoch": 0.22, "learning_rate": 1.8539753223861212e-05, - "loss": 0.2529, + "loss": 0.078, "step": 787 }, { "epoch": 0.22, "learning_rate": 1.8537897764171074e-05, - "loss": 0.0835, + "loss": 0.2758, "step": 788 }, { "epoch": 0.22, "learning_rate": 1.8536042304480936e-05, - "loss": 0.1891, + "loss": 0.079, "step": 789 }, { "epoch": 0.22, "learning_rate": 1.8534186844790798e-05, - "loss": 0.1903, + "loss": 0.325, "step": 790 }, { "epoch": 0.22, "learning_rate": 1.853233138510066e-05, - "loss": 0.2476, + "loss": 0.0209, "step": 791 }, { "epoch": 0.22, "learning_rate": 1.8530475925410522e-05, - "loss": 0.2527, + "loss": 0.1376, "step": 792 }, { "epoch": 0.22, "learning_rate": 1.8528620465720384e-05, - "loss": 0.3016, + "loss": 0.1378, "step": 793 }, { "epoch": 0.22, "learning_rate": 1.8526765006030246e-05, - "loss": 0.0331, + "loss": 0.1911, "step": 794 }, { "epoch": 0.22, "learning_rate": 1.8524909546340107e-05, - "loss": 0.1358, + "loss": 0.0811, "step": 795 }, { "epoch": 0.22, "learning_rate": 1.852305408664997e-05, - "loss": 0.1878, + "loss": 0.0807, "step": 796 }, { "epoch": 0.22, "learning_rate": 1.852119862695983e-05, - "loss": 0.1373, + "loss": 0.1365, "step": 797 }, { "epoch": 0.22, "learning_rate": 1.8519343167269693e-05, - "loss": 0.1362, + "loss": 0.0828, "step": 798 }, { "epoch": 0.22, "learning_rate": 1.8517487707579555e-05, - "loss": 0.1388, + "loss": 0.1358, "step": 799 }, { "epoch": 0.22, "learning_rate": 1.8515632247889417e-05, - "loss": 0.3926, + "loss": 0.0821, "step": 800 }, { "epoch": 0.22, "learning_rate": 1.851377678819928e-05, - "loss": 0.0892, + "loss": 0.1919, "step": 801 }, { "epoch": 0.22, "learning_rate": 1.851192132850914e-05, - "loss": 0.038, + "loss": 0.0308, "step": 802 }, { "epoch": 0.22, "learning_rate": 1.8510065868819e-05, - "loss": 0.1363, + "loss": 0.1338, "step": 803 }, { "epoch": 0.22, "learning_rate": 1.8508210409128864e-05, - "loss": 0.0868, + "loss": 0.2985, "step": 804 }, { "epoch": 0.22, "learning_rate": 1.8506354949438726e-05, - "loss": 0.1916, + "loss": 0.1393, "step": 805 }, { "epoch": 0.22, "learning_rate": 1.8504499489748588e-05, - "loss": 0.0409, + "loss": 0.1392, "step": 806 }, { "epoch": 0.22, "learning_rate": 1.8502644030058446e-05, - "loss": 0.0897, + "loss": 0.0843, "step": 807 }, { "epoch": 0.22, "learning_rate": 1.8500788570368312e-05, - "loss": 0.1381, + "loss": 0.1421, "step": 808 }, { "epoch": 0.23, "learning_rate": 1.8498933110678174e-05, - "loss": 0.1902, + "loss": 0.1428, "step": 809 }, { "epoch": 0.23, "learning_rate": 1.8497077650988032e-05, - "loss": 0.14, + "loss": 0.0882, "step": 810 }, { "epoch": 0.23, "learning_rate": 1.8495222191297894e-05, - "loss": 0.1945, + "loss": 0.1366, "step": 811 }, { "epoch": 0.23, "learning_rate": 1.849336673160776e-05, - "loss": 0.1384, + "loss": 0.0328, "step": 812 }, { "epoch": 0.23, "learning_rate": 1.8491511271917618e-05, - "loss": 0.1394, + "loss": 0.0859, "step": 813 }, { "epoch": 0.23, "learning_rate": 1.848965581222748e-05, - "loss": 0.1906, + "loss": 0.0825, "step": 814 }, { "epoch": 0.23, "learning_rate": 1.848780035253734e-05, - "loss": 0.0356, + "loss": 0.084, "step": 815 }, { "epoch": 0.23, "learning_rate": 1.8485944892847207e-05, - "loss": 0.0866, + "loss": 0.1358, "step": 816 }, { "epoch": 0.23, "learning_rate": 1.8484089433157065e-05, - "loss": 0.142, + "loss": 0.1411, "step": 817 }, { "epoch": 0.23, "learning_rate": 1.8482233973466927e-05, - "loss": 0.1389, + "loss": 0.0248, "step": 818 }, { "epoch": 0.23, "learning_rate": 1.848037851377679e-05, - "loss": 0.1972, + "loss": 0.0252, "step": 819 }, { "epoch": 0.23, "learning_rate": 1.847852305408665e-05, - "loss": 0.2426, + "loss": 0.1963, "step": 820 }, { "epoch": 0.23, "learning_rate": 1.8476667594396513e-05, - "loss": 0.1371, + "loss": 0.1912, "step": 821 }, { "epoch": 0.23, "learning_rate": 1.8474812134706375e-05, - "loss": 0.1386, + "loss": 0.1442, "step": 822 }, { "epoch": 0.23, "learning_rate": 1.8472956675016236e-05, - "loss": 0.2433, + "loss": 0.1422, "step": 823 }, { "epoch": 0.23, "learning_rate": 1.8471101215326098e-05, - "loss": 0.0328, + "loss": 0.0821, "step": 824 }, { "epoch": 0.23, "learning_rate": 1.846924575563596e-05, - "loss": 0.1913, + "loss": 0.0204, "step": 825 }, { "epoch": 0.23, "learning_rate": 1.8467390295945822e-05, - "loss": 0.2465, + "loss": 0.079, "step": 826 }, { "epoch": 0.23, "learning_rate": 1.8465534836255684e-05, - "loss": 0.0853, + "loss": 0.0199, "step": 827 }, { "epoch": 0.23, "learning_rate": 1.8463679376565546e-05, - "loss": 0.0879, + "loss": 0.1435, "step": 828 }, { "epoch": 0.23, "learning_rate": 1.8461823916875408e-05, - "loss": 0.2489, + "loss": 0.0173, "step": 829 }, { "epoch": 0.23, "learning_rate": 1.845996845718527e-05, - "loss": 0.2957, + "loss": 0.2703, "step": 830 }, { "epoch": 0.23, "learning_rate": 1.845811299749513e-05, - "loss": 0.1924, + "loss": 0.2066, "step": 831 }, { "epoch": 0.23, "learning_rate": 1.8456257537804993e-05, - "loss": 0.0833, + "loss": 0.0189, "step": 832 }, { "epoch": 0.23, "learning_rate": 1.8454402078114855e-05, - "loss": 0.3483, + "loss": 0.1413, "step": 833 }, { "epoch": 0.23, "learning_rate": 1.8452546618424717e-05, - "loss": 0.1378, + "loss": 0.0187, "step": 834 }, { "epoch": 0.23, "learning_rate": 1.845069115873458e-05, - "loss": 0.2427, + "loss": 0.1334, "step": 835 }, { "epoch": 0.23, "learning_rate": 1.844883569904444e-05, - "loss": 0.2362, + "loss": 0.0207, "step": 836 }, { "epoch": 0.23, "learning_rate": 1.8446980239354303e-05, - "loss": 0.0423, + "loss": 0.08, "step": 837 }, { "epoch": 0.23, "learning_rate": 1.8445124779664164e-05, - "loss": 0.0948, + "loss": 0.1339, "step": 838 }, { "epoch": 0.23, "learning_rate": 1.8443269319974023e-05, - "loss": 0.0909, + "loss": 0.0751, "step": 839 }, { "epoch": 0.23, "learning_rate": 1.8441413860283888e-05, - "loss": 0.489, + "loss": 0.2674, "step": 840 }, { "epoch": 0.23, "learning_rate": 1.843955840059375e-05, - "loss": 0.1409, + "loss": 0.1931, "step": 841 }, { "epoch": 0.23, "learning_rate": 1.843770294090361e-05, - "loss": 0.1425, + "loss": 0.0827, "step": 842 }, { "epoch": 0.23, "learning_rate": 1.843584748121347e-05, - "loss": 0.2924, + "loss": 0.024, "step": 843 }, { "epoch": 0.23, "learning_rate": 1.8433992021523336e-05, - "loss": 0.2415, + "loss": 0.0815, "step": 844 }, { "epoch": 0.24, "learning_rate": 1.8432136561833198e-05, - "loss": 0.1375, + "loss": 0.082, "step": 845 }, { "epoch": 0.24, "learning_rate": 1.8430281102143056e-05, - "loss": 0.2825, + "loss": 0.0808, "step": 846 }, { "epoch": 0.24, "learning_rate": 1.8428425642452918e-05, - "loss": 0.1938, + "loss": 0.1374, "step": 847 }, { "epoch": 0.24, "learning_rate": 1.8426570182762783e-05, - "loss": 0.0965, + "loss": 0.0248, "step": 848 }, { "epoch": 0.24, "learning_rate": 1.842471472307264e-05, - "loss": 0.1933, + "loss": 0.2411, "step": 849 }, { "epoch": 0.24, "learning_rate": 1.8422859263382504e-05, - "loss": 0.1385, + "loss": 0.2028, "step": 850 }, { "epoch": 0.24, "learning_rate": 1.8421003803692365e-05, - "loss": 0.2861, + "loss": 0.137, "step": 851 }, { "epoch": 0.24, "learning_rate": 1.8419148344002227e-05, - "loss": 0.0494, + "loss": 0.1371, "step": 852 }, { "epoch": 0.24, "learning_rate": 1.841729288431209e-05, - "loss": 0.2347, + "loss": 0.0292, "step": 853 }, { "epoch": 0.24, "learning_rate": 1.841543742462195e-05, - "loss": 0.14, + "loss": 0.0285, "step": 854 }, { "epoch": 0.24, "learning_rate": 1.8413581964931813e-05, - "loss": 0.3759, + "loss": 0.2391, "step": 855 }, { "epoch": 0.24, "learning_rate": 1.8411726505241675e-05, - "loss": 0.096, + "loss": 0.4086, "step": 856 }, { "epoch": 0.24, "learning_rate": 1.8409871045551537e-05, - "loss": 0.1878, + "loss": 0.0794, "step": 857 }, { "epoch": 0.24, "learning_rate": 1.84080155858614e-05, - "loss": 0.1433, + "loss": 0.0305, "step": 858 }, { "epoch": 0.24, "learning_rate": 1.840616012617126e-05, - "loss": 0.1443, + "loss": 0.0357, "step": 859 }, { "epoch": 0.24, "learning_rate": 1.8404304666481122e-05, - "loss": 0.0944, + "loss": 0.1862, "step": 860 }, { "epoch": 0.24, "learning_rate": 1.8402449206790984e-05, - "loss": 0.1437, + "loss": 0.1371, "step": 861 }, { "epoch": 0.24, "learning_rate": 1.8400593747100846e-05, - "loss": 0.1432, + "loss": 0.0875, "step": 862 }, { "epoch": 0.24, "learning_rate": 1.8398738287410708e-05, - "loss": 0.1936, + "loss": 0.0836, "step": 863 }, { "epoch": 0.24, "learning_rate": 1.839688282772057e-05, - "loss": 0.0923, + "loss": 0.0789, "step": 864 }, { "epoch": 0.24, "learning_rate": 1.839502736803043e-05, - "loss": 0.1886, + "loss": 0.2, "step": 865 }, { "epoch": 0.24, "learning_rate": 1.8393171908340293e-05, - "loss": 0.1934, + "loss": 0.0284, "step": 866 }, { "epoch": 0.24, "learning_rate": 1.8391316448650155e-05, - "loss": 0.1908, + "loss": 0.0822, "step": 867 }, { "epoch": 0.24, "learning_rate": 1.8389460988960017e-05, - "loss": 0.1917, + "loss": 0.1857, "step": 868 }, { "epoch": 0.24, "learning_rate": 1.838760552926988e-05, - "loss": 0.1447, + "loss": 0.0805, "step": 869 }, { "epoch": 0.24, "learning_rate": 1.838575006957974e-05, - "loss": 0.1428, + "loss": 0.1942, "step": 870 }, { "epoch": 0.24, "learning_rate": 1.83838946098896e-05, - "loss": 0.3384, + "loss": 0.0806, "step": 871 }, { "epoch": 0.24, "learning_rate": 1.8382039150199465e-05, - "loss": 0.1922, + "loss": 0.1359, "step": 872 }, { "epoch": 0.24, "learning_rate": 1.8380183690509327e-05, - "loss": 0.1947, + "loss": 0.1381, "step": 873 }, { "epoch": 0.24, "learning_rate": 1.837832823081919e-05, - "loss": 0.0913, + "loss": 0.0852, "step": 874 }, { "epoch": 0.24, "learning_rate": 1.8376472771129047e-05, - "loss": 0.088, + "loss": 0.0799, "step": 875 }, { "epoch": 0.24, "learning_rate": 1.8374617311438912e-05, - "loss": 0.1428, + "loss": 0.1909, "step": 876 }, { "epoch": 0.24, "learning_rate": 1.8372761851748774e-05, - "loss": 0.2466, + "loss": 0.0801, "step": 877 }, { "epoch": 0.24, "learning_rate": 1.8370906392058633e-05, - "loss": 0.2431, + "loss": 0.0776, "step": 878 }, { "epoch": 0.24, "learning_rate": 1.8369050932368494e-05, - "loss": 0.1949, + "loss": 0.1338, "step": 879 }, { "epoch": 0.24, "learning_rate": 1.836719547267836e-05, - "loss": 0.091, + "loss": 0.0798, "step": 880 }, { "epoch": 0.25, "learning_rate": 1.8365340012988218e-05, - "loss": 0.1399, + "loss": 0.1352, "step": 881 }, { "epoch": 0.25, "learning_rate": 1.836348455329808e-05, - "loss": 0.1432, + "loss": 0.073, "step": 882 }, { "epoch": 0.25, "learning_rate": 1.8361629093607942e-05, - "loss": 0.0402, + "loss": 0.1309, "step": 883 }, { "epoch": 0.25, "learning_rate": 1.8359773633917807e-05, - "loss": 0.192, + "loss": 0.1902, "step": 884 }, { "epoch": 0.25, "learning_rate": 1.8357918174227666e-05, - "loss": 0.1901, + "loss": 0.0218, "step": 885 }, { "epoch": 0.25, "learning_rate": 1.8356062714537527e-05, - "loss": 0.1403, + "loss": 0.1394, "step": 886 }, { "epoch": 0.25, "learning_rate": 1.835420725484739e-05, - "loss": 0.1434, + "loss": 0.0795, "step": 887 }, { "epoch": 0.25, "learning_rate": 1.835235179515725e-05, - "loss": 0.1412, + "loss": 0.0188, "step": 888 }, { "epoch": 0.25, "learning_rate": 1.8350496335467113e-05, - "loss": 0.1369, + "loss": 0.076, "step": 889 }, { "epoch": 0.25, "learning_rate": 1.8348640875776975e-05, - "loss": 0.1896, + "loss": 0.1501, "step": 890 }, { "epoch": 0.25, "learning_rate": 1.8346785416086837e-05, - "loss": 0.2451, + "loss": 0.0783, "step": 891 }, { "epoch": 0.25, "learning_rate": 1.83449299563967e-05, - "loss": 0.138, + "loss": 0.1339, "step": 892 }, { "epoch": 0.25, "learning_rate": 1.834307449670656e-05, - "loss": 0.1896, + "loss": 0.0788, "step": 893 }, { "epoch": 0.25, "learning_rate": 1.8341219037016422e-05, - "loss": 0.1409, + "loss": 0.0779, "step": 894 }, { "epoch": 0.25, "learning_rate": 1.8339363577326284e-05, - "loss": 0.0376, + "loss": 0.1405, "step": 895 }, { "epoch": 0.25, "learning_rate": 1.8337508117636146e-05, - "loss": 0.2419, + "loss": 0.08, "step": 896 }, { "epoch": 0.25, "learning_rate": 1.8335652657946008e-05, - "loss": 0.1412, + "loss": 0.0796, "step": 897 }, { "epoch": 0.25, "learning_rate": 1.833379719825587e-05, - "loss": 0.1925, + "loss": 0.1926, "step": 898 }, { "epoch": 0.25, "learning_rate": 1.8331941738565732e-05, - "loss": 0.1394, + "loss": 0.1844, "step": 899 }, { "epoch": 0.25, "learning_rate": 1.8330086278875594e-05, - "loss": 0.1398, + "loss": 0.0841, "step": 900 }, { "epoch": 0.25, "learning_rate": 1.8328230819185456e-05, - "loss": 0.1395, + "loss": 0.0277, "step": 901 }, { "epoch": 0.25, "learning_rate": 1.8326375359495317e-05, - "loss": 0.1925, + "loss": 0.0791, "step": 902 }, { "epoch": 0.25, "learning_rate": 1.832451989980518e-05, - "loss": 0.2827, + "loss": 0.1955, "step": 903 }, { "epoch": 0.25, "learning_rate": 1.832266444011504e-05, - "loss": 0.2443, + "loss": 0.1369, "step": 904 }, { "epoch": 0.25, "learning_rate": 1.8320808980424903e-05, - "loss": 0.0887, + "loss": 0.1381, "step": 905 }, { "epoch": 0.25, "learning_rate": 1.8318953520734765e-05, - "loss": 0.1946, + "loss": 0.03, "step": 906 }, { "epoch": 0.25, "learning_rate": 1.8317098061044623e-05, - "loss": 0.2436, + "loss": 0.1846, "step": 907 }, { "epoch": 0.25, "learning_rate": 1.831524260135449e-05, - "loss": 0.0916, + "loss": 0.0851, "step": 908 }, { "epoch": 0.25, "learning_rate": 1.831338714166435e-05, - "loss": 0.0885, + "loss": 0.1351, "step": 909 }, { "epoch": 0.25, "learning_rate": 1.831153168197421e-05, - "loss": 0.2862, + "loss": 0.0285, "step": 910 }, { "epoch": 0.25, "learning_rate": 1.830967622228407e-05, - "loss": 0.1871, + "loss": 0.2467, "step": 911 }, { "epoch": 0.25, "learning_rate": 1.8307820762593933e-05, - "loss": 0.137, + "loss": 0.1333, "step": 912 }, { "epoch": 0.25, "learning_rate": 1.8305965302903798e-05, - "loss": 0.2377, + "loss": 0.0818, "step": 913 }, { "epoch": 0.25, "learning_rate": 1.8304109843213656e-05, - "loss": 0.0431, + "loss": 0.1341, "step": 914 }, { "epoch": 0.25, "learning_rate": 1.830225438352352e-05, - "loss": 0.0432, + "loss": 0.0869, "step": 915 }, { "epoch": 0.25, "learning_rate": 1.830039892383338e-05, - "loss": 0.2377, + "loss": 0.0252, "step": 916 }, { "epoch": 0.26, "learning_rate": 1.8298543464143242e-05, - "loss": 0.0914, + "loss": 0.205, "step": 917 }, { "epoch": 0.26, "learning_rate": 1.8296688004453104e-05, - "loss": 0.0884, + "loss": 0.0822, "step": 918 }, { "epoch": 0.26, "learning_rate": 1.8294832544762966e-05, - "loss": 0.2419, + "loss": 0.1833, "step": 919 }, { "epoch": 0.26, "learning_rate": 1.8292977085072828e-05, - "loss": 0.1946, + "loss": 0.1899, "step": 920 }, { "epoch": 0.26, "learning_rate": 1.829112162538269e-05, - "loss": 0.0896, + "loss": 0.1942, "step": 921 }, { "epoch": 0.26, "learning_rate": 1.828926616569255e-05, - "loss": 0.2902, + "loss": 0.0319, "step": 922 }, { "epoch": 0.26, "learning_rate": 1.8287410706002413e-05, - "loss": 0.2425, + "loss": 0.0334, "step": 923 }, { "epoch": 0.26, "learning_rate": 1.8285555246312275e-05, - "loss": 0.2425, + "loss": 0.081, "step": 924 }, { "epoch": 0.26, "learning_rate": 1.8283699786622137e-05, - "loss": 0.1819, + "loss": 0.0302, "step": 925 }, { "epoch": 0.26, "learning_rate": 1.8281844326932e-05, - "loss": 0.1408, + "loss": 0.2353, "step": 926 }, { "epoch": 0.26, "learning_rate": 1.827998886724186e-05, - "loss": 0.1401, + "loss": 0.0255, "step": 927 }, { "epoch": 0.26, "learning_rate": 1.8278133407551723e-05, - "loss": 0.1358, + "loss": 0.0779, "step": 928 }, { "epoch": 0.26, "learning_rate": 1.8276277947861585e-05, - "loss": 0.1915, + "loss": 0.1893, "step": 929 }, { "epoch": 0.26, "learning_rate": 1.8274422488171446e-05, - "loss": 0.0416, + "loss": 0.0822, "step": 930 }, { "epoch": 0.26, "learning_rate": 1.8272567028481308e-05, - "loss": 0.1444, + "loss": 0.1322, "step": 931 }, { "epoch": 0.26, "learning_rate": 1.827071156879117e-05, - "loss": 0.1403, + "loss": 0.0746, "step": 932 }, { "epoch": 0.26, "learning_rate": 1.8268856109101032e-05, - "loss": 0.2426, + "loss": 0.1905, "step": 933 }, { "epoch": 0.26, "learning_rate": 1.8267000649410894e-05, - "loss": 0.142, + "loss": 0.192, "step": 934 }, { "epoch": 0.26, "learning_rate": 1.8265145189720756e-05, - "loss": 0.2423, + "loss": 0.023, "step": 935 }, { "epoch": 0.26, "learning_rate": 1.8263289730030614e-05, - "loss": 0.142, + "loss": 0.1333, "step": 936 }, { "epoch": 0.26, "learning_rate": 1.826143427034048e-05, - "loss": 0.2408, + "loss": 0.0796, "step": 937 }, { "epoch": 0.26, "learning_rate": 1.825957881065034e-05, - "loss": 0.1389, + "loss": 0.023, "step": 938 }, { "epoch": 0.26, "learning_rate": 1.82577233509602e-05, - "loss": 0.1324, + "loss": 0.1857, "step": 939 }, { "epoch": 0.26, "learning_rate": 1.8255867891270062e-05, - "loss": 0.0878, + "loss": 0.0206, "step": 940 }, { "epoch": 0.26, "learning_rate": 1.8254012431579927e-05, - "loss": 0.1356, + "loss": 0.1932, "step": 941 }, { "epoch": 0.26, "learning_rate": 1.825215697188979e-05, - "loss": 0.0403, + "loss": 0.0724, "step": 942 }, { "epoch": 0.26, "learning_rate": 1.8250301512199647e-05, - "loss": 0.1413, + "loss": 0.0783, "step": 943 }, { "epoch": 0.26, "learning_rate": 1.824844605250951e-05, - "loss": 0.2335, + "loss": 0.1351, "step": 944 }, { "epoch": 0.26, "learning_rate": 1.8246590592819374e-05, - "loss": 0.1869, + "loss": 0.0802, "step": 945 }, { "epoch": 0.26, "learning_rate": 1.8244735133129233e-05, - "loss": 0.0885, + "loss": 0.2364, "step": 946 }, { "epoch": 0.26, "learning_rate": 1.8242879673439095e-05, - "loss": 0.1915, + "loss": 0.0846, "step": 947 }, { "epoch": 0.26, "learning_rate": 1.8241024213748957e-05, - "loss": 0.1364, + "loss": 0.0847, "step": 948 }, { "epoch": 0.26, "learning_rate": 1.8239168754058822e-05, - "loss": 0.1402, + "loss": 0.0845, "step": 949 }, { "epoch": 0.26, "learning_rate": 1.823731329436868e-05, - "loss": 0.1951, + "loss": 0.0783, "step": 950 }, { "epoch": 0.26, "learning_rate": 1.8235457834678542e-05, - "loss": 0.3039, + "loss": 0.0815, "step": 951 }, { "epoch": 0.26, "learning_rate": 1.8233602374988404e-05, - "loss": 0.188, + "loss": 0.0805, "step": 952 }, { "epoch": 0.27, "learning_rate": 1.8231746915298266e-05, - "loss": 0.0365, + "loss": 0.0873, "step": 953 }, { "epoch": 0.27, "learning_rate": 1.8229891455608128e-05, - "loss": 0.194, + "loss": 0.252, "step": 954 }, { "epoch": 0.27, "learning_rate": 1.822803599591799e-05, - "loss": 0.3407, + "loss": 0.247, "step": 955 }, { "epoch": 0.27, "learning_rate": 1.822618053622785e-05, - "loss": 0.09, + "loss": 0.025, "step": 956 }, { "epoch": 0.27, "learning_rate": 1.8224325076537714e-05, - "loss": 0.1911, + "loss": 0.0842, "step": 957 }, { "epoch": 0.27, "learning_rate": 1.8222469616847575e-05, - "loss": 0.3917, + "loss": 0.1347, "step": 958 }, { "epoch": 0.27, "learning_rate": 1.8220614157157437e-05, - "loss": 0.1908, + "loss": 0.0277, "step": 959 }, { "epoch": 0.27, "learning_rate": 1.82187586974673e-05, - "loss": 0.0416, + "loss": 0.191, "step": 960 }, { "epoch": 0.27, "learning_rate": 1.821690323777716e-05, - "loss": 0.1943, + "loss": 0.0789, "step": 961 }, { "epoch": 0.27, "learning_rate": 1.8215047778087023e-05, - "loss": 0.2426, + "loss": 0.13, "step": 962 }, { "epoch": 0.27, "learning_rate": 1.8213192318396885e-05, - "loss": 0.0875, + "loss": 0.2965, "step": 963 }, { "epoch": 0.27, "learning_rate": 1.8211336858706747e-05, - "loss": 0.1412, + "loss": 0.1324, "step": 964 }, { "epoch": 0.27, "learning_rate": 1.820948139901661e-05, - "loss": 0.2407, + "loss": 0.029, "step": 965 }, { "epoch": 0.27, "learning_rate": 1.820762593932647e-05, - "loss": 0.0431, + "loss": 0.196, "step": 966 }, { "epoch": 0.27, "learning_rate": 1.8205770479636332e-05, - "loss": 0.1934, + "loss": 0.1401, "step": 967 }, { "epoch": 0.27, "learning_rate": 1.820391501994619e-05, - "loss": 0.0962, + "loss": 0.0872, "step": 968 }, { "epoch": 0.27, "learning_rate": 1.8202059560256056e-05, - "loss": 0.0919, + "loss": 0.2414, "step": 969 }, { "epoch": 0.27, "learning_rate": 1.8200204100565918e-05, - "loss": 0.1399, + "loss": 0.0874, "step": 970 }, { "epoch": 0.27, "learning_rate": 1.819834864087578e-05, - "loss": 0.0938, + "loss": 0.0879, "step": 971 }, { "epoch": 0.27, "learning_rate": 1.8196493181185638e-05, - "loss": 0.0895, + "loss": 0.1824, "step": 972 }, { "epoch": 0.27, "learning_rate": 1.8194637721495503e-05, - "loss": 0.0856, + "loss": 0.0821, "step": 973 }, { "epoch": 0.27, "learning_rate": 1.8192782261805365e-05, - "loss": 0.0881, + "loss": 0.0802, "step": 974 }, { "epoch": 0.27, "learning_rate": 1.8190926802115224e-05, - "loss": 0.0846, + "loss": 0.0802, "step": 975 }, { "epoch": 0.27, "learning_rate": 1.8189071342425086e-05, - "loss": 0.1936, + "loss": 0.1313, "step": 976 }, { "epoch": 0.27, "learning_rate": 1.818721588273495e-05, - "loss": 0.2447, + "loss": 0.0789, "step": 977 }, { "epoch": 0.27, "learning_rate": 1.818536042304481e-05, - "loss": 0.0864, + "loss": 0.028, "step": 978 }, { "epoch": 0.27, "learning_rate": 1.818350496335467e-05, - "loss": 0.303, + "loss": 0.0802, "step": 979 }, { "epoch": 0.27, "learning_rate": 1.8181649503664533e-05, - "loss": 0.089, + "loss": 0.0856, "step": 980 }, { "epoch": 0.27, "learning_rate": 1.81797940439744e-05, - "loss": 0.0837, + "loss": 0.0869, "step": 981 }, { "epoch": 0.27, "learning_rate": 1.8177938584284257e-05, - "loss": 0.0874, + "loss": 0.1436, "step": 982 }, { "epoch": 0.27, "learning_rate": 1.817608312459412e-05, - "loss": 0.3066, + "loss": 0.0143, "step": 983 }, { "epoch": 0.27, "learning_rate": 1.817422766490398e-05, - "loss": 0.3098, + "loss": 0.1402, "step": 984 }, { "epoch": 0.27, "learning_rate": 1.8172372205213843e-05, - "loss": 0.1375, + "loss": 0.0791, "step": 985 }, { "epoch": 0.27, "learning_rate": 1.8170516745523704e-05, - "loss": 0.2958, + "loss": 0.2636, "step": 986 }, { "epoch": 0.27, "learning_rate": 1.8168661285833566e-05, - "loss": 0.0318, + "loss": 0.1251, "step": 987 }, { "epoch": 0.27, "learning_rate": 1.8166805826143428e-05, - "loss": 0.0863, + "loss": 0.1362, "step": 988 }, { "epoch": 0.28, "learning_rate": 1.816495036645329e-05, - "loss": 0.2467, + "loss": 0.2968, "step": 989 }, { "epoch": 0.28, "learning_rate": 1.8163094906763152e-05, - "loss": 0.0896, + "loss": 0.1884, "step": 990 }, { "epoch": 0.28, "learning_rate": 1.8161239447073014e-05, - "loss": 0.1404, + "loss": 0.1915, "step": 991 }, { "epoch": 0.28, "learning_rate": 1.8159383987382876e-05, - "loss": 0.1921, + "loss": 0.1416, "step": 992 }, { "epoch": 0.28, "learning_rate": 1.8157528527692737e-05, - "loss": 0.091, + "loss": 0.0928, "step": 993 }, { "epoch": 0.28, "learning_rate": 1.81556730680026e-05, - "loss": 0.0881, + "loss": 0.1411, "step": 994 }, { "epoch": 0.28, "learning_rate": 1.815381760831246e-05, - "loss": 0.0865, + "loss": 0.1421, "step": 995 }, { "epoch": 0.28, "learning_rate": 1.8151962148622323e-05, - "loss": 0.2485, + "loss": 0.1875, "step": 996 }, { "epoch": 0.28, "learning_rate": 1.8150106688932185e-05, - "loss": 0.0864, + "loss": 0.1871, "step": 997 }, { "epoch": 0.28, "learning_rate": 1.8148251229242047e-05, - "loss": 0.0374, + "loss": 0.1363, "step": 998 }, { "epoch": 0.28, "learning_rate": 1.814639576955191e-05, - "loss": 0.2388, + "loss": 0.0888, "step": 999 }, { "epoch": 0.28, "learning_rate": 1.814454030986177e-05, - "loss": 0.0863, + "loss": 0.2375, "step": 1000 }, { "epoch": 0.28, "learning_rate": 1.8142684850171632e-05, - "loss": 0.0853, + "loss": 0.093, "step": 1001 }, { "epoch": 0.28, "learning_rate": 1.8140829390481494e-05, - "loss": 0.085, + "loss": 0.1856, "step": 1002 }, { "epoch": 0.28, "learning_rate": 1.8138973930791356e-05, - "loss": 0.1392, + "loss": 0.1369, "step": 1003 }, { "epoch": 0.28, "learning_rate": 1.8137118471101215e-05, - "loss": 0.1388, + "loss": 0.0889, "step": 1004 }, { "epoch": 0.28, "learning_rate": 1.813526301141108e-05, - "loss": 0.0328, + "loss": 0.1901, "step": 1005 }, { "epoch": 0.28, "learning_rate": 1.8133407551720942e-05, - "loss": 0.1372, + "loss": 0.0841, "step": 1006 }, { "epoch": 0.28, "learning_rate": 1.81315520920308e-05, - "loss": 0.0312, + "loss": 0.138, "step": 1007 }, { "epoch": 0.28, "learning_rate": 1.8129696632340662e-05, - "loss": 0.2515, + "loss": 0.1322, "step": 1008 }, { "epoch": 0.28, "learning_rate": 1.8127841172650527e-05, - "loss": 0.2436, + "loss": 0.0276, "step": 1009 }, { "epoch": 0.28, "learning_rate": 1.812598571296039e-05, - "loss": 0.0839, + "loss": 0.2401, "step": 1010 }, { "epoch": 0.28, "learning_rate": 1.8124130253270248e-05, - "loss": 0.0846, + "loss": 0.1968, "step": 1011 }, { "epoch": 0.28, "learning_rate": 1.812227479358011e-05, - "loss": 0.1957, + "loss": 0.1423, "step": 1012 }, { "epoch": 0.28, "learning_rate": 1.8120419333889975e-05, - "loss": 0.3043, + "loss": 0.024, "step": 1013 }, { "epoch": 0.28, "learning_rate": 1.8118563874199833e-05, - "loss": 0.0857, + "loss": 0.0769, "step": 1014 }, { "epoch": 0.28, "learning_rate": 1.8116708414509695e-05, - "loss": 0.1886, + "loss": 0.0226, "step": 1015 }, { "epoch": 0.28, "learning_rate": 1.8114852954819557e-05, - "loss": 0.03, + "loss": 0.0871, "step": 1016 }, { "epoch": 0.28, "learning_rate": 1.8112997495129422e-05, - "loss": 0.0825, + "loss": 0.0725, "step": 1017 }, { "epoch": 0.28, "learning_rate": 1.811114203543928e-05, - "loss": 0.0303, + "loss": 0.2588, "step": 1018 }, { "epoch": 0.28, "learning_rate": 1.8109286575749143e-05, - "loss": 0.3571, + "loss": 0.1431, "step": 1019 }, { "epoch": 0.28, "learning_rate": 1.8107431116059005e-05, - "loss": 0.0855, + "loss": 0.0756, "step": 1020 }, { "epoch": 0.28, "learning_rate": 1.8105575656368866e-05, - "loss": 0.192, + "loss": 0.0774, "step": 1021 }, { "epoch": 0.28, "learning_rate": 1.810372019667873e-05, - "loss": 0.0834, + "loss": 0.1329, "step": 1022 }, { "epoch": 0.28, "learning_rate": 1.810186473698859e-05, - "loss": 0.0845, + "loss": 0.0773, "step": 1023 }, { "epoch": 0.28, "learning_rate": 1.8100009277298452e-05, - "loss": 0.1951, + "loss": 0.0789, "step": 1024 }, { "epoch": 0.29, "learning_rate": 1.8098153817608314e-05, - "loss": 0.1997, + "loss": 0.2447, "step": 1025 }, { "epoch": 0.29, "learning_rate": 1.8096298357918176e-05, - "loss": 0.1412, + "loss": 0.0771, "step": 1026 }, { "epoch": 0.29, "learning_rate": 1.8094442898228038e-05, - "loss": 0.0329, + "loss": 0.0798, "step": 1027 }, { "epoch": 0.29, "learning_rate": 1.80925874385379e-05, - "loss": 0.1397, + "loss": 0.0811, "step": 1028 }, { "epoch": 0.29, "learning_rate": 1.809073197884776e-05, - "loss": 0.0316, + "loss": 0.0799, "step": 1029 }, { "epoch": 0.29, "learning_rate": 1.8088876519157623e-05, - "loss": 0.1924, + "loss": 0.2917, "step": 1030 }, { "epoch": 0.29, "learning_rate": 1.8087021059467485e-05, - "loss": 0.086, + "loss": 0.2344, "step": 1031 }, { "epoch": 0.29, "learning_rate": 1.8085165599777347e-05, - "loss": 0.1966, + "loss": 0.1851, "step": 1032 }, { "epoch": 0.29, "learning_rate": 1.808331014008721e-05, - "loss": 0.2476, + "loss": 0.1379, "step": 1033 }, { "epoch": 0.29, "learning_rate": 1.808145468039707e-05, - "loss": 0.1362, + "loss": 0.0459, "step": 1034 }, { "epoch": 0.29, "learning_rate": 1.8079599220706933e-05, - "loss": 0.0317, + "loss": 0.1415, "step": 1035 }, { "epoch": 0.29, "learning_rate": 1.807774376101679e-05, - "loss": 0.1414, + "loss": 0.0455, "step": 1036 }, { "epoch": 0.29, "learning_rate": 1.8075888301326656e-05, - "loss": 0.2475, + "loss": 0.1806, "step": 1037 }, { "epoch": 0.29, "learning_rate": 1.8074032841636518e-05, - "loss": 0.1918, + "loss": 0.1811, "step": 1038 }, { "epoch": 0.29, "learning_rate": 1.807217738194638e-05, - "loss": 0.1954, + "loss": 0.0839, "step": 1039 }, { "epoch": 0.29, "learning_rate": 1.807032192225624e-05, - "loss": 0.1376, + "loss": 0.0843, "step": 1040 }, { "epoch": 0.29, "learning_rate": 1.8068466462566104e-05, - "loss": 0.0852, + "loss": 0.1312, "step": 1041 }, { "epoch": 0.29, "learning_rate": 1.8066611002875966e-05, - "loss": 0.1947, + "loss": 0.0254, "step": 1042 }, { "epoch": 0.29, "learning_rate": 1.8064755543185824e-05, - "loss": 0.1914, + "loss": 0.0226, "step": 1043 }, { "epoch": 0.29, "learning_rate": 1.8062900083495686e-05, - "loss": 0.0886, + "loss": 0.2051, "step": 1044 }, { "epoch": 0.29, "learning_rate": 1.806104462380555e-05, - "loss": 0.1885, + "loss": 0.3291, "step": 1045 }, { "epoch": 0.29, "learning_rate": 1.8059189164115413e-05, - "loss": 0.1915, + "loss": 0.0195, "step": 1046 }, { "epoch": 0.29, "learning_rate": 1.8057333704425272e-05, - "loss": 0.1426, + "loss": 0.0201, "step": 1047 }, { "epoch": 0.29, "learning_rate": 1.8055478244735134e-05, - "loss": 0.0876, + "loss": 0.0724, "step": 1048 }, { "epoch": 0.29, "learning_rate": 1.8053622785045e-05, - "loss": 0.1903, + "loss": 0.1373, "step": 1049 }, { "epoch": 0.29, "learning_rate": 1.8051767325354857e-05, - "loss": 0.2436, + "loss": 0.0763, "step": 1050 }, { "epoch": 0.29, "learning_rate": 1.804991186566472e-05, - "loss": 0.1894, + "loss": 0.0182, "step": 1051 }, { "epoch": 0.29, "learning_rate": 1.804805640597458e-05, - "loss": 0.0889, + "loss": 0.0821, "step": 1052 }, { "epoch": 0.29, "learning_rate": 1.8046200946284443e-05, - "loss": 0.2373, + "loss": 0.0183, "step": 1053 }, { "epoch": 0.29, "learning_rate": 1.8044345486594305e-05, - "loss": 0.1332, + "loss": 0.0824, "step": 1054 }, { "epoch": 0.29, "learning_rate": 1.8042490026904167e-05, - "loss": 0.2386, + "loss": 0.1419, "step": 1055 }, { "epoch": 0.29, "learning_rate": 1.804063456721403e-05, - "loss": 0.0977, + "loss": 0.195, "step": 1056 }, { "epoch": 0.29, "learning_rate": 1.803877910752389e-05, - "loss": 0.1784, + "loss": 0.0785, "step": 1057 }, { "epoch": 0.29, "learning_rate": 1.8036923647833752e-05, - "loss": 0.192, + "loss": 0.1343, "step": 1058 }, { "epoch": 0.29, "learning_rate": 1.8035068188143614e-05, - "loss": 0.1904, + "loss": 0.0201, "step": 1059 }, { "epoch": 0.3, "learning_rate": 1.8033212728453476e-05, - "loss": 0.1881, + "loss": 0.0214, "step": 1060 }, { "epoch": 0.3, "learning_rate": 1.8031357268763338e-05, - "loss": 0.239, + "loss": 0.2466, "step": 1061 }, { "epoch": 0.3, "learning_rate": 1.80295018090732e-05, - "loss": 0.1464, + "loss": 0.1396, "step": 1062 }, { "epoch": 0.3, "learning_rate": 1.802764634938306e-05, - "loss": 0.1941, + "loss": 0.1286, "step": 1063 }, { "epoch": 0.3, "learning_rate": 1.8025790889692924e-05, - "loss": 0.4342, + "loss": 0.0825, "step": 1064 }, { "epoch": 0.3, "learning_rate": 1.8023935430002782e-05, - "loss": 0.1427, + "loss": 0.0794, "step": 1065 }, { "epoch": 0.3, "learning_rate": 1.8022079970312647e-05, - "loss": 0.1815, + "loss": 0.0783, "step": 1066 }, { "epoch": 0.3, "learning_rate": 1.802022451062251e-05, - "loss": 0.1411, + "loss": 0.2462, "step": 1067 }, { "epoch": 0.3, "learning_rate": 1.801836905093237e-05, - "loss": 0.1, + "loss": 0.0255, "step": 1068 }, { "epoch": 0.3, "learning_rate": 1.801651359124223e-05, - "loss": 0.0994, + "loss": 0.0798, "step": 1069 }, { "epoch": 0.3, "learning_rate": 1.8014658131552095e-05, - "loss": 0.0972, + "loss": 0.1897, "step": 1070 }, { "epoch": 0.3, "learning_rate": 1.8012802671861957e-05, - "loss": 0.1933, + "loss": 0.1887, "step": 1071 }, { "epoch": 0.3, "learning_rate": 1.8010947212171815e-05, - "loss": 0.1417, + "loss": 0.0287, "step": 1072 }, { "epoch": 0.3, "learning_rate": 1.8009091752481677e-05, - "loss": 0.0918, + "loss": 0.076, "step": 1073 }, { "epoch": 0.3, "learning_rate": 1.8007236292791542e-05, - "loss": 0.1411, + "loss": 0.1313, "step": 1074 }, { "epoch": 0.3, "learning_rate": 1.8005380833101404e-05, - "loss": 0.0859, + "loss": 0.0768, "step": 1075 }, { "epoch": 0.3, "learning_rate": 1.8003525373411263e-05, - "loss": 0.1933, + "loss": 0.0775, "step": 1076 }, { "epoch": 0.3, "learning_rate": 1.8001669913721124e-05, - "loss": 0.0843, + "loss": 0.0265, "step": 1077 }, { "epoch": 0.3, "learning_rate": 1.799981445403099e-05, - "loss": 0.2553, + "loss": 0.0263, "step": 1078 }, { "epoch": 0.3, "learning_rate": 1.7997958994340848e-05, - "loss": 0.1376, + "loss": 0.0752, "step": 1079 }, { "epoch": 0.3, "learning_rate": 1.799610353465071e-05, - "loss": 0.0849, + "loss": 0.0216, "step": 1080 }, { "epoch": 0.3, "learning_rate": 1.7994248074960572e-05, - "loss": 0.0837, + "loss": 0.0757, "step": 1081 }, { "epoch": 0.3, "learning_rate": 1.7992392615270434e-05, - "loss": 0.1959, + "loss": 0.1431, "step": 1082 }, { "epoch": 0.3, "learning_rate": 1.7990537155580296e-05, - "loss": 0.0856, + "loss": 0.1442, "step": 1083 }, { "epoch": 0.3, "learning_rate": 1.7988681695890158e-05, - "loss": 0.2466, + "loss": 0.0149, "step": 1084 }, { "epoch": 0.3, "learning_rate": 1.798682623620002e-05, - "loss": 0.1382, + "loss": 0.1995, "step": 1085 }, { "epoch": 0.3, "learning_rate": 1.798497077650988e-05, - "loss": 0.1938, + "loss": 0.2007, "step": 1086 }, { "epoch": 0.3, "learning_rate": 1.7983115316819743e-05, - "loss": 0.1949, + "loss": 0.075, "step": 1087 }, { "epoch": 0.3, "learning_rate": 1.7981259857129605e-05, - "loss": 0.1923, + "loss": 0.3128, "step": 1088 }, { "epoch": 0.3, "learning_rate": 1.7979404397439467e-05, - "loss": 0.1413, + "loss": 0.0208, "step": 1089 }, { "epoch": 0.3, "learning_rate": 1.797754893774933e-05, - "loss": 0.1346, + "loss": 0.025, "step": 1090 }, { "epoch": 0.3, "learning_rate": 1.797569347805919e-05, - "loss": 0.1353, + "loss": 0.1931, "step": 1091 }, { "epoch": 0.3, "learning_rate": 1.7973838018369052e-05, - "loss": 0.1912, + "loss": 0.0767, "step": 1092 }, { "epoch": 0.3, "learning_rate": 1.7971982558678914e-05, - "loss": 0.0857, + "loss": 0.1354, "step": 1093 }, { "epoch": 0.3, "learning_rate": 1.7970127098988776e-05, - "loss": 0.0318, + "loss": 0.1311, "step": 1094 }, { "epoch": 0.3, "learning_rate": 1.7968271639298638e-05, - "loss": 0.1332, + "loss": 0.2417, "step": 1095 }, { "epoch": 0.31, "learning_rate": 1.79664161796085e-05, - "loss": 0.1391, + "loss": 0.0805, "step": 1096 }, { "epoch": 0.31, "learning_rate": 1.7964560719918362e-05, - "loss": 0.1962, + "loss": 0.0318, "step": 1097 }, { "epoch": 0.31, "learning_rate": 1.7962705260228224e-05, - "loss": 0.1872, + "loss": 0.2928, "step": 1098 }, { "epoch": 0.31, "learning_rate": 1.7960849800538086e-05, - "loss": 0.1385, + "loss": 0.1371, "step": 1099 }, { "epoch": 0.31, "learning_rate": 1.7958994340847947e-05, - "loss": 0.1407, + "loss": 0.0854, "step": 1100 }, { "epoch": 0.31, "learning_rate": 1.7957138881157806e-05, - "loss": 0.0878, + "loss": 0.1361, "step": 1101 }, { "epoch": 0.31, "learning_rate": 1.795528342146767e-05, - "loss": 0.1909, + "loss": 0.1384, "step": 1102 }, { "epoch": 0.31, "learning_rate": 1.7953427961777533e-05, - "loss": 0.1344, + "loss": 0.035, "step": 1103 }, { "epoch": 0.31, "learning_rate": 1.795157250208739e-05, - "loss": 0.0331, + "loss": 0.0366, "step": 1104 }, { "epoch": 0.31, "learning_rate": 1.7949717042397253e-05, - "loss": 0.0885, + "loss": 0.1888, "step": 1105 }, { "epoch": 0.31, "learning_rate": 1.794786158270712e-05, - "loss": 0.1917, + "loss": 0.0856, "step": 1106 }, { "epoch": 0.31, "learning_rate": 1.794600612301698e-05, - "loss": 0.0842, + "loss": 0.0303, "step": 1107 }, { "epoch": 0.31, "learning_rate": 1.794415066332684e-05, - "loss": 0.1915, + "loss": 0.0833, "step": 1108 }, { "epoch": 0.31, "learning_rate": 1.79422952036367e-05, - "loss": 0.1923, + "loss": 0.0823, "step": 1109 }, { "epoch": 0.31, "learning_rate": 1.7940439743946566e-05, - "loss": 0.0308, + "loss": 0.1354, "step": 1110 }, { "epoch": 0.31, "learning_rate": 1.7938584284256425e-05, - "loss": 0.2495, + "loss": 0.0754, "step": 1111 }, { "epoch": 0.31, "learning_rate": 1.7936728824566287e-05, - "loss": 0.0333, + "loss": 0.0781, "step": 1112 }, { "epoch": 0.31, "learning_rate": 1.793487336487615e-05, - "loss": 0.0853, + "loss": 0.0205, "step": 1113 }, { "epoch": 0.31, "learning_rate": 1.7933017905186014e-05, - "loss": 0.1961, + "loss": 0.2015, "step": 1114 }, { "epoch": 0.31, "learning_rate": 1.7931162445495872e-05, - "loss": 0.1387, + "loss": 0.0763, "step": 1115 }, { "epoch": 0.31, "learning_rate": 1.7929306985805734e-05, - "loss": 0.1383, + "loss": 0.1926, "step": 1116 }, { "epoch": 0.31, "learning_rate": 1.7927451526115596e-05, - "loss": 0.3598, + "loss": 0.0176, "step": 1117 }, { "epoch": 0.31, "learning_rate": 1.7925596066425458e-05, - "loss": 0.2522, + "loss": 0.1266, "step": 1118 }, { "epoch": 0.31, "learning_rate": 1.792374060673532e-05, - "loss": 0.0897, + "loss": 0.0768, "step": 1119 }, { "epoch": 0.31, "learning_rate": 1.792188514704518e-05, - "loss": 0.0823, + "loss": 0.1383, "step": 1120 }, { "epoch": 0.31, "learning_rate": 1.7920029687355043e-05, - "loss": 0.1441, + "loss": 0.083, "step": 1121 }, { "epoch": 0.31, "learning_rate": 1.7918174227664905e-05, - "loss": 0.1366, + "loss": 0.1289, "step": 1122 }, { "epoch": 0.31, "learning_rate": 1.7916318767974767e-05, - "loss": 0.192, + "loss": 0.0192, "step": 1123 }, { "epoch": 0.31, "learning_rate": 1.791446330828463e-05, - "loss": 0.3465, + "loss": 0.2501, "step": 1124 }, { "epoch": 0.31, "learning_rate": 1.791260784859449e-05, - "loss": 0.0869, + "loss": 0.0823, "step": 1125 }, { "epoch": 0.31, "learning_rate": 1.7910752388904353e-05, - "loss": 0.0882, + "loss": 0.305, "step": 1126 }, { "epoch": 0.31, "learning_rate": 1.7908896929214215e-05, - "loss": 0.1413, + "loss": 0.0766, "step": 1127 }, { "epoch": 0.31, "learning_rate": 1.7907041469524076e-05, - "loss": 0.2525, + "loss": 0.083, "step": 1128 }, { "epoch": 0.31, "learning_rate": 1.790518600983394e-05, - "loss": 0.1894, + "loss": 0.0788, "step": 1129 }, { "epoch": 0.31, "learning_rate": 1.79033305501438e-05, - "loss": 0.1894, + "loss": 0.0797, "step": 1130 }, { "epoch": 0.31, "learning_rate": 1.7901475090453662e-05, - "loss": 0.2415, + "loss": 0.0301, "step": 1131 }, { "epoch": 0.32, "learning_rate": 1.7899619630763524e-05, - "loss": 0.246, + "loss": 0.1369, "step": 1132 }, { "epoch": 0.32, "learning_rate": 1.7897764171073382e-05, - "loss": 0.0945, + "loss": 0.0252, "step": 1133 }, { "epoch": 0.32, "learning_rate": 1.7895908711383248e-05, - "loss": 0.1424, + "loss": 0.0784, "step": 1134 }, { "epoch": 0.32, "learning_rate": 1.789405325169311e-05, - "loss": 0.1378, + "loss": 0.0795, "step": 1135 }, { "epoch": 0.32, "learning_rate": 1.789219779200297e-05, - "loss": 0.2406, + "loss": 0.1384, "step": 1136 }, { "epoch": 0.32, "learning_rate": 1.789034233231283e-05, - "loss": 0.1426, + "loss": 0.0736, "step": 1137 }, { "epoch": 0.32, "learning_rate": 1.7888486872622695e-05, - "loss": 0.1918, + "loss": 0.0208, "step": 1138 }, { "epoch": 0.32, "learning_rate": 1.7886631412932557e-05, - "loss": 0.093, + "loss": 0.0795, "step": 1139 }, { "epoch": 0.32, "learning_rate": 1.7884775953242416e-05, - "loss": 0.1408, + "loss": 0.1336, "step": 1140 }, { "epoch": 0.32, "learning_rate": 1.7882920493552277e-05, - "loss": 0.0926, + "loss": 0.1389, "step": 1141 }, { "epoch": 0.32, "learning_rate": 1.7881065033862143e-05, - "loss": 0.1384, + "loss": 0.1344, "step": 1142 }, { "epoch": 0.32, "learning_rate": 1.7879209574172005e-05, - "loss": 0.3924, + "loss": 0.0763, "step": 1143 }, { "epoch": 0.32, "learning_rate": 1.7877354114481863e-05, - "loss": 0.0951, + "loss": 0.0162, "step": 1144 }, { "epoch": 0.32, "learning_rate": 1.7875498654791725e-05, - "loss": 0.0957, + "loss": 0.3099, "step": 1145 }, { "epoch": 0.32, "learning_rate": 1.787364319510159e-05, - "loss": 0.0922, + "loss": 0.2566, "step": 1146 }, { "epoch": 0.32, "learning_rate": 1.787178773541145e-05, - "loss": 0.1394, + "loss": 0.1436, "step": 1147 }, { "epoch": 0.32, "learning_rate": 1.786993227572131e-05, - "loss": 0.0377, + "loss": 0.1364, "step": 1148 }, { "epoch": 0.32, "learning_rate": 1.7868076816031172e-05, - "loss": 0.0884, + "loss": 0.0796, "step": 1149 }, { "epoch": 0.32, "learning_rate": 1.7866221356341034e-05, - "loss": 0.0369, + "loss": 0.0341, "step": 1150 }, { "epoch": 0.32, "learning_rate": 1.7864365896650896e-05, - "loss": 0.1904, + "loss": 0.0307, "step": 1151 }, { "epoch": 0.32, "learning_rate": 1.7862510436960758e-05, - "loss": 0.1424, + "loss": 0.1322, "step": 1152 }, { "epoch": 0.32, "learning_rate": 1.786065497727062e-05, - "loss": 0.1984, + "loss": 0.0346, "step": 1153 }, { "epoch": 0.32, "learning_rate": 1.785879951758048e-05, - "loss": 0.0844, + "loss": 0.0794, "step": 1154 }, { "epoch": 0.32, "learning_rate": 1.7856944057890344e-05, - "loss": 0.1362, + "loss": 0.1318, "step": 1155 }, { "epoch": 0.32, "learning_rate": 1.7855088598200205e-05, - "loss": 0.0859, + "loss": 0.0823, "step": 1156 }, { "epoch": 0.32, "learning_rate": 1.7853233138510067e-05, - "loss": 0.1866, + "loss": 0.1343, "step": 1157 }, { "epoch": 0.32, "learning_rate": 1.785137767881993e-05, - "loss": 0.0799, + "loss": 0.0245, "step": 1158 }, { "epoch": 0.32, "learning_rate": 1.784952221912979e-05, - "loss": 0.1368, + "loss": 0.1255, "step": 1159 }, { "epoch": 0.32, "learning_rate": 1.7847666759439653e-05, - "loss": 0.3598, + "loss": 0.024, "step": 1160 }, { "epoch": 0.32, "learning_rate": 1.7845811299749515e-05, - "loss": 0.1962, + "loss": 0.0748, "step": 1161 }, { "epoch": 0.32, "learning_rate": 1.7843955840059377e-05, - "loss": 0.2437, + "loss": 0.185, "step": 1162 }, { "epoch": 0.32, "learning_rate": 1.784210038036924e-05, - "loss": 0.0281, + "loss": 0.1965, "step": 1163 }, { "epoch": 0.32, "learning_rate": 1.78402449206791e-05, - "loss": 0.1956, + "loss": 0.1365, "step": 1164 }, { "epoch": 0.32, "learning_rate": 1.7838389460988962e-05, - "loss": 0.0848, + "loss": 0.1884, "step": 1165 }, { "epoch": 0.32, "learning_rate": 1.7836534001298824e-05, - "loss": 0.1884, + "loss": 0.136, "step": 1166 }, { "epoch": 0.32, "learning_rate": 1.7834678541608686e-05, - "loss": 0.2498, + "loss": 0.1887, "step": 1167 }, { "epoch": 0.33, "learning_rate": 1.7832823081918548e-05, - "loss": 0.0319, + "loss": 0.0781, "step": 1168 }, { "epoch": 0.33, "learning_rate": 1.7830967622228406e-05, - "loss": 0.0309, + "loss": 0.0236, "step": 1169 }, { "epoch": 0.33, "learning_rate": 1.782911216253827e-05, - "loss": 0.0309, + "loss": 0.1377, "step": 1170 }, { "epoch": 0.33, "learning_rate": 1.7827256702848133e-05, - "loss": 0.1973, + "loss": 0.0793, "step": 1171 }, { "epoch": 0.33, "learning_rate": 1.7825401243157995e-05, - "loss": 0.1915, + "loss": 0.1307, "step": 1172 }, { "epoch": 0.33, "learning_rate": 1.7823545783467854e-05, - "loss": 0.0864, + "loss": 0.1847, "step": 1173 }, { "epoch": 0.33, "learning_rate": 1.782169032377772e-05, - "loss": 0.1354, + "loss": 0.0771, "step": 1174 }, { "epoch": 0.33, "learning_rate": 1.781983486408758e-05, - "loss": 0.1382, + "loss": 0.0284, "step": 1175 }, { "epoch": 0.33, "learning_rate": 1.781797940439744e-05, - "loss": 0.0859, + "loss": 0.0787, "step": 1176 }, { "epoch": 0.33, "learning_rate": 1.78161239447073e-05, - "loss": 0.25, + "loss": 0.0788, "step": 1177 }, { "epoch": 0.33, "learning_rate": 1.7814268485017167e-05, - "loss": 0.1371, + "loss": 0.084, "step": 1178 }, { "epoch": 0.33, "learning_rate": 1.7812413025327025e-05, - "loss": 0.4597, + "loss": 0.1904, "step": 1179 }, { "epoch": 0.33, "learning_rate": 1.7810557565636887e-05, - "loss": 0.1415, + "loss": 0.1326, "step": 1180 }, { "epoch": 0.33, "learning_rate": 1.780870210594675e-05, - "loss": 0.2475, + "loss": 0.0826, "step": 1181 }, { "epoch": 0.33, "learning_rate": 1.7806846646256614e-05, - "loss": 0.196, + "loss": 0.0293, "step": 1182 }, { "epoch": 0.33, "learning_rate": 1.7804991186566473e-05, - "loss": 0.1379, + "loss": 0.0246, "step": 1183 }, { "epoch": 0.33, "learning_rate": 1.7803135726876334e-05, - "loss": 0.1941, + "loss": 0.1342, "step": 1184 }, { "epoch": 0.33, "learning_rate": 1.7801280267186196e-05, - "loss": 0.0893, + "loss": 0.0236, "step": 1185 }, { "epoch": 0.33, "learning_rate": 1.7799424807496058e-05, - "loss": 0.2392, + "loss": 0.2385, "step": 1186 }, { "epoch": 0.33, "learning_rate": 1.779756934780592e-05, - "loss": 0.0889, + "loss": 0.1929, "step": 1187 }, { "epoch": 0.33, "learning_rate": 1.7795713888115782e-05, - "loss": 0.1412, + "loss": 0.0199, "step": 1188 }, { "epoch": 0.33, "learning_rate": 1.7793858428425644e-05, - "loss": 0.1939, + "loss": 0.0242, "step": 1189 }, { "epoch": 0.33, "learning_rate": 1.7792002968735506e-05, - "loss": 0.0381, + "loss": 0.0763, "step": 1190 }, { "epoch": 0.33, "learning_rate": 1.7790147509045368e-05, - "loss": 0.1414, + "loss": 0.0814, "step": 1191 }, { "epoch": 0.33, "learning_rate": 1.778829204935523e-05, - "loss": 0.1374, + "loss": 0.0193, "step": 1192 }, { "epoch": 0.33, "learning_rate": 1.778643658966509e-05, - "loss": 0.2437, + "loss": 0.1356, "step": 1193 }, { "epoch": 0.33, "learning_rate": 1.7784581129974953e-05, - "loss": 0.2377, + "loss": 0.1417, "step": 1194 }, { "epoch": 0.33, "learning_rate": 1.7782725670284815e-05, - "loss": 0.1368, + "loss": 0.0807, "step": 1195 }, { "epoch": 0.33, "learning_rate": 1.7780870210594677e-05, - "loss": 0.1906, + "loss": 0.0177, "step": 1196 }, { "epoch": 0.33, "learning_rate": 1.777901475090454e-05, - "loss": 0.0892, + "loss": 0.1327, "step": 1197 }, { "epoch": 0.33, "learning_rate": 1.77771592912144e-05, - "loss": 0.0888, + "loss": 0.1407, "step": 1198 }, { "epoch": 0.33, "learning_rate": 1.7775303831524262e-05, - "loss": 0.0905, + "loss": 0.0782, "step": 1199 }, { "epoch": 0.33, "learning_rate": 1.7773448371834124e-05, - "loss": 0.1359, + "loss": 0.203, "step": 1200 }, { "epoch": 0.33, "learning_rate": 1.7771592912143983e-05, - "loss": 0.2408, + "loss": 0.1365, "step": 1201 }, { "epoch": 0.33, "learning_rate": 1.7769737452453848e-05, - "loss": 0.2908, + "loss": 0.198, "step": 1202 }, { "epoch": 0.33, "learning_rate": 1.776788199276371e-05, - "loss": 0.1901, + "loss": 0.0221, "step": 1203 }, { "epoch": 0.34, "learning_rate": 1.7766026533073572e-05, - "loss": 0.1933, + "loss": 0.1846, "step": 1204 }, { "epoch": 0.34, "learning_rate": 1.776417107338343e-05, - "loss": 0.2366, + "loss": 0.0194, "step": 1205 }, { "epoch": 0.34, "learning_rate": 1.7762315613693296e-05, - "loss": 0.1909, + "loss": 0.1855, "step": 1206 }, { "epoch": 0.34, "learning_rate": 1.7760460154003157e-05, - "loss": 0.0888, + "loss": 0.0846, "step": 1207 }, { "epoch": 0.34, "learning_rate": 1.7758604694313016e-05, - "loss": 0.0925, + "loss": 0.1343, "step": 1208 }, { "epoch": 0.34, "learning_rate": 1.7756749234622878e-05, - "loss": 0.2416, + "loss": 0.1363, "step": 1209 }, { "epoch": 0.34, "learning_rate": 1.7754893774932743e-05, - "loss": 0.244, + "loss": 0.0834, "step": 1210 }, { "epoch": 0.34, "learning_rate": 1.7753038315242605e-05, - "loss": 0.2316, + "loss": 0.1339, "step": 1211 }, { "epoch": 0.34, "learning_rate": 1.7751182855552463e-05, - "loss": 0.0942, + "loss": 0.1794, "step": 1212 }, { "epoch": 0.34, "learning_rate": 1.7749327395862325e-05, - "loss": 0.0439, + "loss": 0.0831, "step": 1213 }, { "epoch": 0.34, "learning_rate": 1.774747193617219e-05, - "loss": 0.1403, + "loss": 0.1807, "step": 1214 }, { "epoch": 0.34, "learning_rate": 1.774561647648205e-05, - "loss": 0.1982, + "loss": 0.1384, "step": 1215 }, { "epoch": 0.34, "learning_rate": 1.774376101679191e-05, - "loss": 0.0941, + "loss": 0.1785, "step": 1216 }, { "epoch": 0.34, "learning_rate": 1.7741905557101773e-05, - "loss": 0.0422, + "loss": 0.0878, "step": 1217 }, { "epoch": 0.34, "learning_rate": 1.7740050097411635e-05, - "loss": 0.09, + "loss": 0.1309, "step": 1218 }, { "epoch": 0.34, "learning_rate": 1.7738194637721497e-05, - "loss": 0.1421, + "loss": 0.1308, "step": 1219 }, { "epoch": 0.34, "learning_rate": 1.773633917803136e-05, - "loss": 0.1911, + "loss": 0.0397, "step": 1220 }, { "epoch": 0.34, "learning_rate": 1.773448371834122e-05, - "loss": 0.1426, + "loss": 0.1245, "step": 1221 }, { "epoch": 0.34, "learning_rate": 1.7732628258651082e-05, - "loss": 0.1366, + "loss": 0.0855, "step": 1222 }, { "epoch": 0.34, "learning_rate": 1.7730772798960944e-05, - "loss": 0.1417, + "loss": 0.1902, "step": 1223 }, { "epoch": 0.34, "learning_rate": 1.7728917339270806e-05, - "loss": 0.1887, + "loss": 0.1353, "step": 1224 }, { "epoch": 0.34, "learning_rate": 1.7727061879580668e-05, - "loss": 0.1932, + "loss": 0.0213, "step": 1225 }, { "epoch": 0.34, "learning_rate": 1.772520641989053e-05, - "loss": 0.1937, + "loss": 0.1908, "step": 1226 }, { "epoch": 0.34, "learning_rate": 1.772335096020039e-05, - "loss": 0.2463, + "loss": 0.0711, "step": 1227 }, { "epoch": 0.34, "learning_rate": 1.7721495500510253e-05, - "loss": 0.1356, + "loss": 0.1959, "step": 1228 }, { "epoch": 0.34, "learning_rate": 1.7719640040820115e-05, - "loss": 0.1827, + "loss": 0.0788, "step": 1229 }, { "epoch": 0.34, "learning_rate": 1.7717784581129974e-05, - "loss": 0.0874, + "loss": 0.0169, "step": 1230 }, { "epoch": 0.34, "learning_rate": 1.771592912143984e-05, - "loss": 0.091, + "loss": 0.0719, "step": 1231 }, { "epoch": 0.34, "learning_rate": 1.77140736617497e-05, - "loss": 0.3011, + "loss": 0.257, "step": 1232 }, { "epoch": 0.34, "learning_rate": 1.7712218202059563e-05, - "loss": 0.0845, + "loss": 0.1941, "step": 1233 }, { "epoch": 0.34, "learning_rate": 1.771036274236942e-05, - "loss": 0.139, + "loss": 0.1381, "step": 1234 }, { "epoch": 0.34, "learning_rate": 1.7708507282679286e-05, - "loss": 0.2438, + "loss": 0.0799, "step": 1235 }, { "epoch": 0.34, "learning_rate": 1.7706651822989148e-05, - "loss": 0.088, + "loss": 0.1282, "step": 1236 }, { "epoch": 0.34, "learning_rate": 1.7704796363299007e-05, - "loss": 0.1418, + "loss": 0.1374, "step": 1237 }, { "epoch": 0.34, "learning_rate": 1.770294090360887e-05, - "loss": 0.1445, + "loss": 0.1448, "step": 1238 }, { "epoch": 0.34, "learning_rate": 1.7701085443918734e-05, - "loss": 0.2966, + "loss": 0.1273, "step": 1239 }, { "epoch": 0.35, "learning_rate": 1.7699229984228596e-05, - "loss": 0.0358, + "loss": 0.136, "step": 1240 }, { "epoch": 0.35, "learning_rate": 1.7697374524538454e-05, - "loss": 0.1924, + "loss": 0.1249, "step": 1241 }, { "epoch": 0.35, "learning_rate": 1.7695519064848316e-05, - "loss": 0.138, + "loss": 0.0971, "step": 1242 }, { "epoch": 0.35, "learning_rate": 1.769366360515818e-05, - "loss": 0.1975, + "loss": 0.0813, "step": 1243 }, { "epoch": 0.35, "learning_rate": 1.769180814546804e-05, - "loss": 0.036, + "loss": 0.0855, "step": 1244 }, { "epoch": 0.35, "learning_rate": 1.7689952685777902e-05, - "loss": 0.291, + "loss": 0.0787, "step": 1245 }, { "epoch": 0.35, "learning_rate": 1.7688097226087764e-05, - "loss": 0.2417, + "loss": 0.0317, "step": 1246 }, { "epoch": 0.35, "learning_rate": 1.7686241766397625e-05, - "loss": 0.1437, + "loss": 0.0325, "step": 1247 }, { "epoch": 0.35, "learning_rate": 1.7684386306707487e-05, - "loss": 0.1926, + "loss": 0.1297, "step": 1248 }, { "epoch": 0.35, "learning_rate": 1.768253084701735e-05, - "loss": 0.0913, + "loss": 0.135, "step": 1249 }, { "epoch": 0.35, "learning_rate": 1.768067538732721e-05, - "loss": 0.1424, + "loss": 0.1939, "step": 1250 }, { "epoch": 0.35, "learning_rate": 1.7678819927637073e-05, - "loss": 0.1395, + "loss": 0.1853, "step": 1251 }, { "epoch": 0.35, "learning_rate": 1.7676964467946935e-05, - "loss": 0.0384, + "loss": 0.1401, "step": 1252 }, { "epoch": 0.35, "learning_rate": 1.7675109008256797e-05, - "loss": 0.138, + "loss": 0.0787, "step": 1253 }, { "epoch": 0.35, "learning_rate": 1.767325354856666e-05, - "loss": 0.0894, + "loss": 0.1363, "step": 1254 }, { "epoch": 0.35, "learning_rate": 1.767139808887652e-05, - "loss": 0.2882, + "loss": 0.1344, "step": 1255 }, { "epoch": 0.35, "learning_rate": 1.7669542629186382e-05, - "loss": 0.1913, + "loss": 0.1911, "step": 1256 }, { "epoch": 0.35, "learning_rate": 1.7667687169496244e-05, - "loss": 0.1377, + "loss": 0.0219, "step": 1257 }, { "epoch": 0.35, "learning_rate": 1.7665831709806106e-05, - "loss": 0.0852, + "loss": 0.0783, "step": 1258 }, { "epoch": 0.35, "learning_rate": 1.7663976250115968e-05, - "loss": 0.0852, + "loss": 0.0757, "step": 1259 }, { "epoch": 0.35, "learning_rate": 1.766212079042583e-05, - "loss": 0.1409, + "loss": 0.2566, "step": 1260 }, { "epoch": 0.35, "learning_rate": 1.766026533073569e-05, - "loss": 0.0861, + "loss": 0.1289, "step": 1261 }, { "epoch": 0.35, "learning_rate": 1.7658409871045554e-05, - "loss": 0.139, + "loss": 0.0219, "step": 1262 }, { "epoch": 0.35, "learning_rate": 1.7656554411355415e-05, - "loss": 0.2448, + "loss": 0.0781, "step": 1263 }, { "epoch": 0.35, "learning_rate": 1.7654698951665277e-05, - "loss": 0.145, + "loss": 0.1361, "step": 1264 }, { "epoch": 0.35, "learning_rate": 1.765284349197514e-05, - "loss": 0.0835, + "loss": 0.082, "step": 1265 }, { "epoch": 0.35, "learning_rate": 1.7650988032284998e-05, - "loss": 0.1918, + "loss": 0.0787, "step": 1266 }, { "epoch": 0.35, "learning_rate": 1.7649132572594863e-05, - "loss": 0.2483, + "loss": 0.021, "step": 1267 }, { "epoch": 0.35, "learning_rate": 1.7647277112904725e-05, - "loss": 0.2427, + "loss": 0.0204, "step": 1268 }, { "epoch": 0.35, "learning_rate": 1.7645421653214587e-05, - "loss": 0.1874, + "loss": 0.0709, "step": 1269 }, { "epoch": 0.35, "learning_rate": 1.7643566193524445e-05, - "loss": 0.2972, + "loss": 0.014, "step": 1270 }, { "epoch": 0.35, "learning_rate": 1.764171073383431e-05, - "loss": 0.0878, + "loss": 0.1358, "step": 1271 }, { "epoch": 0.35, "learning_rate": 1.7639855274144172e-05, - "loss": 0.1952, + "loss": 0.007, "step": 1272 }, { "epoch": 0.35, "learning_rate": 1.763799981445403e-05, - "loss": 0.1928, + "loss": 0.0062, "step": 1273 }, { "epoch": 0.35, "learning_rate": 1.7636144354763893e-05, - "loss": 0.1378, + "loss": 0.0044, "step": 1274 }, { "epoch": 0.35, "learning_rate": 1.7634288895073758e-05, - "loss": 0.1387, + "loss": 0.0949, "step": 1275 }, { "epoch": 0.36, "learning_rate": 1.7632433435383616e-05, - "loss": 0.0404, + "loss": 0.3541, "step": 1276 }, { "epoch": 0.36, "learning_rate": 1.7630577975693478e-05, - "loss": 0.138, + "loss": 0.1746, "step": 1277 }, { "epoch": 0.36, "learning_rate": 1.762872251600334e-05, - "loss": 0.0389, + "loss": 0.0923, "step": 1278 }, { "epoch": 0.36, "learning_rate": 1.7626867056313205e-05, - "loss": 0.2483, + "loss": 0.0055, "step": 1279 }, { "epoch": 0.36, "learning_rate": 1.7625011596623064e-05, - "loss": 0.1414, + "loss": 0.0858, "step": 1280 }, { "epoch": 0.36, "learning_rate": 1.7623156136932926e-05, - "loss": 0.0899, + "loss": 0.2299, "step": 1281 }, { "epoch": 0.36, "learning_rate": 1.7621300677242788e-05, - "loss": 0.1863, + "loss": 0.1442, "step": 1282 }, { "epoch": 0.36, "learning_rate": 1.761944521755265e-05, - "loss": 0.0385, + "loss": 0.1403, "step": 1283 }, { "epoch": 0.36, "learning_rate": 1.761758975786251e-05, - "loss": 0.0893, + "loss": 0.1013, "step": 1284 }, { "epoch": 0.36, "learning_rate": 1.7615734298172373e-05, - "loss": 0.3027, + "loss": 0.0908, "step": 1285 }, { "epoch": 0.36, "learning_rate": 1.7613878838482235e-05, - "loss": 0.1412, + "loss": 0.1436, "step": 1286 }, { "epoch": 0.36, "learning_rate": 1.7612023378792097e-05, - "loss": 0.1911, + "loss": 0.0812, "step": 1287 }, { "epoch": 0.36, "learning_rate": 1.761016791910196e-05, - "loss": 0.2449, + "loss": 0.0168, "step": 1288 }, { "epoch": 0.36, "learning_rate": 1.760831245941182e-05, - "loss": 0.1437, + "loss": 0.0812, "step": 1289 }, { "epoch": 0.36, "learning_rate": 1.7606456999721683e-05, - "loss": 0.0849, + "loss": 0.1375, "step": 1290 }, { "epoch": 0.36, "learning_rate": 1.7604601540031544e-05, - "loss": 0.2348, + "loss": 0.0138, "step": 1291 }, { "epoch": 0.36, "learning_rate": 1.7602746080341406e-05, - "loss": 0.1393, + "loss": 0.3942, "step": 1292 }, { "epoch": 0.36, "learning_rate": 1.7600890620651268e-05, - "loss": 0.0888, + "loss": 0.0807, "step": 1293 }, { "epoch": 0.36, "learning_rate": 1.759903516096113e-05, - "loss": 0.1413, + "loss": 0.0809, "step": 1294 }, { "epoch": 0.36, "learning_rate": 1.7597179701270992e-05, - "loss": 0.0867, + "loss": 0.0834, "step": 1295 }, { "epoch": 0.36, "learning_rate": 1.7595324241580854e-05, - "loss": 0.142, + "loss": 0.219, "step": 1296 }, { "epoch": 0.36, "learning_rate": 1.7593468781890716e-05, - "loss": 0.1422, + "loss": 0.0803, "step": 1297 }, { "epoch": 0.36, "learning_rate": 1.7591613322200578e-05, - "loss": 0.1891, + "loss": 0.1471, "step": 1298 }, { "epoch": 0.36, "learning_rate": 1.758975786251044e-05, - "loss": 0.0335, + "loss": 0.0868, "step": 1299 }, { "epoch": 0.36, "learning_rate": 1.75879024028203e-05, - "loss": 0.145, + "loss": 0.0821, "step": 1300 }, { "epoch": 0.36, "learning_rate": 1.7586046943130163e-05, - "loss": 0.0337, + "loss": 0.0259, "step": 1301 }, { "epoch": 0.36, "learning_rate": 1.758419148344002e-05, - "loss": 0.1346, + "loss": 0.0804, "step": 1302 }, { "epoch": 0.36, "learning_rate": 1.7582336023749887e-05, - "loss": 0.2978, + "loss": 0.023, "step": 1303 }, { "epoch": 0.36, "learning_rate": 1.758048056405975e-05, - "loss": 0.3469, + "loss": 0.0794, "step": 1304 }, { "epoch": 0.36, "learning_rate": 1.7578625104369607e-05, - "loss": 0.2396, + "loss": 0.0169, "step": 1305 }, { "epoch": 0.36, "learning_rate": 1.757676964467947e-05, - "loss": 0.2974, + "loss": 0.0849, "step": 1306 }, { "epoch": 0.36, "learning_rate": 1.7574914184989334e-05, - "loss": 0.1425, + "loss": 0.2749, "step": 1307 }, { "epoch": 0.36, "learning_rate": 1.7573058725299196e-05, - "loss": 0.1403, + "loss": 0.1547, "step": 1308 }, { "epoch": 0.36, "learning_rate": 1.7571203265609055e-05, - "loss": 0.1421, + "loss": 0.1431, "step": 1309 }, { "epoch": 0.36, "learning_rate": 1.7569347805918917e-05, - "loss": 0.1402, + "loss": 0.1431, "step": 1310 }, { "epoch": 0.36, "learning_rate": 1.7567492346228782e-05, - "loss": 0.1364, + "loss": 0.0845, "step": 1311 }, { "epoch": 0.37, "learning_rate": 1.756563688653864e-05, - "loss": 0.0379, + "loss": 0.0208, "step": 1312 }, { "epoch": 0.37, "learning_rate": 1.7563781426848502e-05, - "loss": 0.0862, + "loss": 0.2112, "step": 1313 }, { "epoch": 0.37, "learning_rate": 1.7561925967158364e-05, - "loss": 0.0896, + "loss": 0.0234, "step": 1314 }, { "epoch": 0.37, "learning_rate": 1.756007050746823e-05, - "loss": 0.1974, + "loss": 0.079, "step": 1315 }, { "epoch": 0.37, "learning_rate": 1.7558215047778088e-05, - "loss": 0.1891, + "loss": 0.0228, "step": 1316 }, { "epoch": 0.37, "learning_rate": 1.755635958808795e-05, - "loss": 0.0869, + "loss": 0.0211, "step": 1317 }, { "epoch": 0.37, "learning_rate": 1.755450412839781e-05, - "loss": 0.0883, + "loss": 0.0816, "step": 1318 }, { "epoch": 0.37, "learning_rate": 1.7552648668707673e-05, - "loss": 0.2375, + "loss": 0.0185, "step": 1319 }, { "epoch": 0.37, "learning_rate": 1.7550793209017535e-05, - "loss": 0.1388, + "loss": 0.0814, "step": 1320 }, { "epoch": 0.37, "learning_rate": 1.7548937749327397e-05, - "loss": 0.1391, + "loss": 0.2773, "step": 1321 }, { "epoch": 0.37, "learning_rate": 1.754708228963726e-05, - "loss": 0.0366, + "loss": 0.0775, "step": 1322 }, { "epoch": 0.37, "learning_rate": 1.754522682994712e-05, - "loss": 0.1925, + "loss": 0.0143, "step": 1323 }, { "epoch": 0.37, "learning_rate": 1.7543371370256983e-05, - "loss": 0.291, + "loss": 0.2197, "step": 1324 }, { "epoch": 0.37, "learning_rate": 1.7541515910566845e-05, - "loss": 0.346, + "loss": 0.3345, "step": 1325 }, { "epoch": 0.37, "learning_rate": 1.7539660450876706e-05, - "loss": 0.1427, + "loss": 0.1504, "step": 1326 }, { "epoch": 0.37, "learning_rate": 1.753780499118657e-05, - "loss": 0.0377, + "loss": 0.018, "step": 1327 }, { "epoch": 0.37, "learning_rate": 1.753594953149643e-05, - "loss": 0.0858, + "loss": 0.0777, "step": 1328 }, { "epoch": 0.37, "learning_rate": 1.7534094071806292e-05, - "loss": 0.0903, + "loss": 0.1307, "step": 1329 }, { "epoch": 0.37, "learning_rate": 1.7532238612116154e-05, - "loss": 0.0861, + "loss": 0.1412, "step": 1330 }, { "epoch": 0.37, "learning_rate": 1.7530383152426016e-05, - "loss": 0.09, + "loss": 0.1896, "step": 1331 }, { "epoch": 0.37, "learning_rate": 1.7528527692735878e-05, - "loss": 0.0847, + "loss": 0.0797, "step": 1332 }, { "epoch": 0.37, "learning_rate": 1.752667223304574e-05, - "loss": 0.1362, + "loss": 0.082, "step": 1333 }, { "epoch": 0.37, "learning_rate": 1.7524816773355598e-05, - "loss": 0.2971, + "loss": 0.0837, "step": 1334 }, { "epoch": 0.37, "learning_rate": 1.7522961313665463e-05, - "loss": 0.2508, + "loss": 0.077, "step": 1335 }, { "epoch": 0.37, "learning_rate": 1.7521105853975325e-05, - "loss": 0.0874, + "loss": 0.1364, "step": 1336 }, { "epoch": 0.37, "learning_rate": 1.7519250394285187e-05, - "loss": 0.0833, + "loss": 0.1915, "step": 1337 }, { "epoch": 0.37, "learning_rate": 1.7517394934595046e-05, - "loss": 0.0862, + "loss": 0.1907, "step": 1338 }, { "epoch": 0.37, "learning_rate": 1.751553947490491e-05, - "loss": 0.1907, + "loss": 0.2903, "step": 1339 }, { "epoch": 0.37, "learning_rate": 1.7513684015214773e-05, - "loss": 0.1913, + "loss": 0.0854, "step": 1340 }, { "epoch": 0.37, "learning_rate": 1.751182855552463e-05, - "loss": 0.2512, + "loss": 0.1792, "step": 1341 }, { "epoch": 0.37, "learning_rate": 1.7509973095834493e-05, - "loss": 0.1433, + "loss": 0.0832, "step": 1342 }, { "epoch": 0.37, "learning_rate": 1.7508117636144358e-05, - "loss": 0.2425, + "loss": 0.1809, "step": 1343 }, { "epoch": 0.37, "learning_rate": 1.7506262176454217e-05, - "loss": 0.2391, + "loss": 0.1443, "step": 1344 }, { "epoch": 0.37, "learning_rate": 1.750440671676408e-05, - "loss": 0.2453, + "loss": 0.2293, "step": 1345 }, { "epoch": 0.37, "learning_rate": 1.750255125707394e-05, - "loss": 0.1338, + "loss": 0.0405, "step": 1346 }, { "epoch": 0.37, "learning_rate": 1.7500695797383806e-05, - "loss": 0.1915, + "loss": 0.0828, "step": 1347 }, { "epoch": 0.38, "learning_rate": 1.7498840337693664e-05, - "loss": 0.1356, + "loss": 0.1307, "step": 1348 }, { "epoch": 0.38, "learning_rate": 1.7496984878003526e-05, - "loss": 0.1905, + "loss": 0.3146, "step": 1349 }, { "epoch": 0.38, "learning_rate": 1.7495129418313388e-05, - "loss": 0.1406, + "loss": 0.0837, "step": 1350 }, { "epoch": 0.38, "learning_rate": 1.749327395862325e-05, - "loss": 0.1444, + "loss": 0.1343, "step": 1351 }, { "epoch": 0.38, "learning_rate": 1.7491418498933112e-05, - "loss": 0.2922, + "loss": 0.0353, "step": 1352 }, { "epoch": 0.38, "learning_rate": 1.7489563039242974e-05, - "loss": 0.0423, + "loss": 0.1322, "step": 1353 }, { "epoch": 0.38, "learning_rate": 1.7487707579552835e-05, - "loss": 0.3366, + "loss": 0.0793, "step": 1354 }, { "epoch": 0.38, "learning_rate": 1.7485852119862697e-05, - "loss": 0.3291, + "loss": 0.1999, "step": 1355 }, { "epoch": 0.38, "learning_rate": 1.748399666017256e-05, - "loss": 0.0931, + "loss": 0.0194, "step": 1356 }, { "epoch": 0.38, "learning_rate": 1.748214120048242e-05, - "loss": 0.142, + "loss": 0.1446, "step": 1357 }, { "epoch": 0.38, "learning_rate": 1.7480285740792283e-05, - "loss": 0.0958, + "loss": 0.207, "step": 1358 }, { "epoch": 0.38, "learning_rate": 1.7478430281102145e-05, - "loss": 0.1909, + "loss": 0.1922, "step": 1359 }, { "epoch": 0.38, "learning_rate": 1.7476574821412007e-05, - "loss": 0.3269, + "loss": 0.2349, "step": 1360 }, { "epoch": 0.38, "learning_rate": 1.747471936172187e-05, - "loss": 0.1394, + "loss": 0.0853, "step": 1361 }, { "epoch": 0.38, "learning_rate": 1.747286390203173e-05, - "loss": 0.1397, + "loss": 0.0828, "step": 1362 }, { "epoch": 0.38, "learning_rate": 1.7471008442341592e-05, - "loss": 0.1888, + "loss": 0.2363, "step": 1363 }, { "epoch": 0.38, "learning_rate": 1.7469152982651454e-05, - "loss": 0.2318, + "loss": 0.0918, "step": 1364 }, { "epoch": 0.38, "learning_rate": 1.7467297522961316e-05, - "loss": 0.2003, + "loss": 0.2261, "step": 1365 }, { "epoch": 0.38, "learning_rate": 1.7465442063271178e-05, - "loss": 0.1403, + "loss": 0.1706, "step": 1366 }, { "epoch": 0.38, "learning_rate": 1.7463586603581036e-05, - "loss": 0.0986, + "loss": 0.173, "step": 1367 }, { "epoch": 0.38, "learning_rate": 1.74617311438909e-05, - "loss": 0.2398, + "loss": 0.0538, "step": 1368 }, { "epoch": 0.38, "learning_rate": 1.7459875684200764e-05, - "loss": 0.1971, + "loss": 0.052, "step": 1369 }, { "epoch": 0.38, "learning_rate": 1.7458020224510622e-05, - "loss": 0.1455, + "loss": 0.1409, "step": 1370 }, { "epoch": 0.38, "learning_rate": 1.7456164764820484e-05, - "loss": 0.1903, + "loss": 0.0422, "step": 1371 }, { "epoch": 0.38, "learning_rate": 1.745430930513035e-05, - "loss": 0.1931, + "loss": 0.0874, "step": 1372 }, { "epoch": 0.38, "learning_rate": 1.7452453845440208e-05, - "loss": 0.1423, + "loss": 0.1252, "step": 1373 }, { "epoch": 0.38, "learning_rate": 1.745059838575007e-05, - "loss": 0.0957, + "loss": 0.2918, "step": 1374 }, { "epoch": 0.38, "learning_rate": 1.744874292605993e-05, - "loss": 0.0949, + "loss": 0.0748, "step": 1375 }, { "epoch": 0.38, "learning_rate": 1.7446887466369797e-05, - "loss": 0.0921, + "loss": 0.1348, "step": 1376 }, { "epoch": 0.38, "learning_rate": 1.7445032006679655e-05, - "loss": 0.1944, + "loss": 0.0199, "step": 1377 }, { "epoch": 0.38, "learning_rate": 1.7443176546989517e-05, - "loss": 0.1357, + "loss": 0.073, "step": 1378 }, { "epoch": 0.38, "learning_rate": 1.744132108729938e-05, - "loss": 0.2351, + "loss": 0.1887, "step": 1379 }, { "epoch": 0.38, "learning_rate": 1.743946562760924e-05, - "loss": 0.193, + "loss": 0.184, "step": 1380 }, { "epoch": 0.38, "learning_rate": 1.7437610167919103e-05, - "loss": 0.1371, + "loss": 0.0173, "step": 1381 }, { "epoch": 0.38, "learning_rate": 1.7435754708228964e-05, - "loss": 0.2388, + "loss": 0.0164, "step": 1382 }, { "epoch": 0.38, "learning_rate": 1.7433899248538826e-05, - "loss": 0.1395, + "loss": 0.0714, "step": 1383 }, { "epoch": 0.39, "learning_rate": 1.7432043788848688e-05, - "loss": 0.0901, + "loss": 0.1329, "step": 1384 }, { "epoch": 0.39, "learning_rate": 1.743018832915855e-05, - "loss": 0.1435, + "loss": 0.1284, "step": 1385 }, { "epoch": 0.39, "learning_rate": 1.7428332869468412e-05, - "loss": 0.2861, + "loss": 0.0204, "step": 1386 }, { "epoch": 0.39, "learning_rate": 1.7426477409778274e-05, - "loss": 0.0852, + "loss": 0.0203, "step": 1387 }, { "epoch": 0.39, "learning_rate": 1.7424621950088136e-05, - "loss": 0.1935, + "loss": 0.087, "step": 1388 }, { "epoch": 0.39, "learning_rate": 1.7422766490397998e-05, - "loss": 0.0949, + "loss": 0.0234, "step": 1389 }, { "epoch": 0.39, "learning_rate": 1.742091103070786e-05, - "loss": 0.1377, + "loss": 0.1254, "step": 1390 }, { "epoch": 0.39, "learning_rate": 1.741905557101772e-05, - "loss": 0.0909, + "loss": 0.0268, "step": 1391 }, { "epoch": 0.39, "learning_rate": 1.7417200111327583e-05, - "loss": 0.0374, + "loss": 0.0697, "step": 1392 }, { "epoch": 0.39, "learning_rate": 1.7415344651637445e-05, - "loss": 0.1906, + "loss": 0.0173, "step": 1393 }, { "epoch": 0.39, "learning_rate": 1.7413489191947307e-05, - "loss": 0.0367, + "loss": 0.0101, "step": 1394 }, { "epoch": 0.39, "learning_rate": 1.741163373225717e-05, - "loss": 0.2408, + "loss": 0.0833, "step": 1395 }, { "epoch": 0.39, "learning_rate": 1.740977827256703e-05, - "loss": 0.1398, + "loss": 0.2453, "step": 1396 }, { "epoch": 0.39, "learning_rate": 1.7407922812876893e-05, - "loss": 0.2465, + "loss": 0.0092, "step": 1397 }, { "epoch": 0.39, "learning_rate": 1.7406067353186754e-05, - "loss": 0.1921, + "loss": 0.0095, "step": 1398 }, { "epoch": 0.39, "learning_rate": 1.7404211893496613e-05, - "loss": 0.1398, + "loss": 0.3441, "step": 1399 }, { "epoch": 0.39, "learning_rate": 1.7402356433806478e-05, - "loss": 0.1945, + "loss": 0.0123, "step": 1400 }, { "epoch": 0.39, "learning_rate": 1.740050097411634e-05, - "loss": 0.1416, + "loss": 0.015, "step": 1401 }, { "epoch": 0.39, "learning_rate": 1.73986455144262e-05, - "loss": 0.143, + "loss": 0.0762, "step": 1402 }, { "epoch": 0.39, "learning_rate": 1.739679005473606e-05, - "loss": 0.1406, + "loss": 0.0202, "step": 1403 }, { "epoch": 0.39, "learning_rate": 1.7394934595045926e-05, - "loss": 0.2462, + "loss": 0.0744, "step": 1404 }, { "epoch": 0.39, "learning_rate": 1.7393079135355787e-05, - "loss": 0.1953, + "loss": 0.1331, "step": 1405 }, { "epoch": 0.39, "learning_rate": 1.7391223675665646e-05, - "loss": 0.2399, + "loss": 0.1316, "step": 1406 }, { "epoch": 0.39, "learning_rate": 1.7389368215975508e-05, - "loss": 0.093, + "loss": 0.0792, "step": 1407 }, { "epoch": 0.39, "learning_rate": 1.7387512756285373e-05, - "loss": 0.3519, + "loss": 0.1291, "step": 1408 }, { "epoch": 0.39, "learning_rate": 1.738565729659523e-05, - "loss": 0.187, + "loss": 0.1261, "step": 1409 }, { "epoch": 0.39, "learning_rate": 1.7383801836905093e-05, - "loss": 0.1873, + "loss": 0.0221, "step": 1410 }, { "epoch": 0.39, "learning_rate": 1.7381946377214955e-05, - "loss": 0.1934, + "loss": 0.1223, "step": 1411 }, { "epoch": 0.39, "learning_rate": 1.738009091752482e-05, - "loss": 0.0899, + "loss": 0.0749, "step": 1412 }, { "epoch": 0.39, "learning_rate": 1.737823545783468e-05, - "loss": 0.185, + "loss": 0.0774, "step": 1413 }, { "epoch": 0.39, "learning_rate": 1.737637999814454e-05, - "loss": 0.0913, + "loss": 0.1833, "step": 1414 }, { "epoch": 0.39, "learning_rate": 1.7374524538454403e-05, - "loss": 0.2422, + "loss": 0.1217, "step": 1415 }, { "epoch": 0.39, "learning_rate": 1.7372669078764265e-05, - "loss": 0.0941, + "loss": 0.0214, "step": 1416 }, { "epoch": 0.39, "learning_rate": 1.7370813619074127e-05, - "loss": 0.0947, + "loss": 0.0693, "step": 1417 }, { "epoch": 0.39, "learning_rate": 1.736895815938399e-05, - "loss": 0.2761, + "loss": 0.1769, "step": 1418 }, { "epoch": 0.39, "learning_rate": 1.736710269969385e-05, - "loss": 0.0922, + "loss": 0.0846, "step": 1419 }, { "epoch": 0.4, "learning_rate": 1.7365247240003712e-05, - "loss": 0.0914, + "loss": 0.1838, "step": 1420 }, { "epoch": 0.4, "learning_rate": 1.7363391780313574e-05, - "loss": 0.0911, + "loss": 0.0222, "step": 1421 }, { "epoch": 0.4, "learning_rate": 1.7361536320623436e-05, - "loss": 0.0909, + "loss": 0.0886, "step": 1422 }, { "epoch": 0.4, "learning_rate": 1.7359680860933298e-05, - "loss": 0.1404, + "loss": 0.076, "step": 1423 }, { "epoch": 0.4, "learning_rate": 1.735782540124316e-05, - "loss": 0.1415, + "loss": 0.1536, "step": 1424 }, { "epoch": 0.4, "learning_rate": 1.735596994155302e-05, - "loss": 0.136, + "loss": 0.1267, "step": 1425 }, { "epoch": 0.4, "learning_rate": 1.7354114481862883e-05, - "loss": 0.1944, + "loss": 0.0811, "step": 1426 }, { "epoch": 0.4, "learning_rate": 1.7352259022172745e-05, - "loss": 0.0368, + "loss": 0.1245, "step": 1427 }, { "epoch": 0.4, "learning_rate": 1.7350403562482607e-05, - "loss": 0.2437, + "loss": 0.0875, "step": 1428 }, { "epoch": 0.4, "learning_rate": 1.734854810279247e-05, - "loss": 0.1408, + "loss": 0.1743, "step": 1429 }, { "epoch": 0.4, "learning_rate": 1.734669264310233e-05, - "loss": 0.2954, + "loss": 0.0321, "step": 1430 }, { "epoch": 0.4, "learning_rate": 1.734483718341219e-05, - "loss": 0.087, + "loss": 0.2321, "step": 1431 }, { "epoch": 0.4, "learning_rate": 1.7342981723722055e-05, - "loss": 0.1367, + "loss": 0.0706, "step": 1432 }, { "epoch": 0.4, "learning_rate": 1.7341126264031916e-05, - "loss": 0.0337, + "loss": 0.0668, "step": 1433 }, { "epoch": 0.4, "learning_rate": 1.733927080434178e-05, - "loss": 0.0877, + "loss": 0.0172, "step": 1434 }, { "epoch": 0.4, "learning_rate": 1.7337415344651637e-05, - "loss": 0.1371, + "loss": 0.0145, "step": 1435 }, { "epoch": 0.4, "learning_rate": 1.7335559884961502e-05, - "loss": 0.1941, + "loss": 0.0786, "step": 1436 }, { "epoch": 0.4, "learning_rate": 1.7333704425271364e-05, - "loss": 0.1946, + "loss": 0.0848, "step": 1437 }, { "epoch": 0.4, "learning_rate": 1.7331848965581222e-05, - "loss": 0.1409, + "loss": 0.0699, "step": 1438 }, { "epoch": 0.4, "learning_rate": 1.7329993505891084e-05, - "loss": 0.0329, + "loss": 0.0061, "step": 1439 }, { "epoch": 0.4, "learning_rate": 1.732813804620095e-05, - "loss": 0.3009, + "loss": 0.1993, "step": 1440 }, { "epoch": 0.4, "learning_rate": 1.7326282586510808e-05, - "loss": 0.2975, + "loss": 0.2292, "step": 1441 }, { "epoch": 0.4, "learning_rate": 1.732442712682067e-05, - "loss": 0.1403, + "loss": 0.0887, "step": 1442 }, { "epoch": 0.4, "learning_rate": 1.7322571667130532e-05, - "loss": 0.0846, + "loss": 0.1362, "step": 1443 }, { "epoch": 0.4, "learning_rate": 1.7320716207440397e-05, - "loss": 0.3023, + "loss": 0.0697, "step": 1444 }, { "epoch": 0.4, "learning_rate": 1.7318860747750256e-05, - "loss": 0.0335, + "loss": 0.2356, "step": 1445 }, { "epoch": 0.4, "learning_rate": 1.7317005288060117e-05, - "loss": 0.0906, + "loss": 0.114, "step": 1446 }, { "epoch": 0.4, "learning_rate": 1.731514982836998e-05, - "loss": 0.2429, + "loss": 0.1259, "step": 1447 }, { "epoch": 0.4, "learning_rate": 1.731329436867984e-05, - "loss": 0.0846, + "loss": 0.0949, "step": 1448 }, { "epoch": 0.4, "learning_rate": 1.7311438908989703e-05, - "loss": 0.1381, + "loss": 0.0545, "step": 1449 }, { "epoch": 0.4, "learning_rate": 1.7309583449299565e-05, - "loss": 0.087, + "loss": 0.2963, "step": 1450 }, { "epoch": 0.4, "learning_rate": 1.7307727989609427e-05, - "loss": 0.1925, + "loss": 0.1314, "step": 1451 }, { "epoch": 0.4, "learning_rate": 1.730587252991929e-05, - "loss": 0.0875, + "loss": 0.1744, "step": 1452 }, { "epoch": 0.4, "learning_rate": 1.730401707022915e-05, - "loss": 0.0863, + "loss": 0.1194, "step": 1453 }, { "epoch": 0.4, "learning_rate": 1.7302161610539012e-05, - "loss": 0.2425, + "loss": 0.1795, "step": 1454 }, { "epoch": 0.4, "learning_rate": 1.7300306150848874e-05, - "loss": 0.2433, + "loss": 0.0777, "step": 1455 }, { "epoch": 0.41, "learning_rate": 1.7298450691158736e-05, - "loss": 0.089, + "loss": 0.0327, "step": 1456 }, { "epoch": 0.41, "learning_rate": 1.7296595231468598e-05, - "loss": 0.2384, + "loss": 0.0281, "step": 1457 }, { "epoch": 0.41, "learning_rate": 1.729473977177846e-05, - "loss": 0.196, + "loss": 0.0616, "step": 1458 }, { "epoch": 0.41, "learning_rate": 1.7292884312088322e-05, - "loss": 0.0893, + "loss": 0.0855, "step": 1459 }, { "epoch": 0.41, "learning_rate": 1.7291028852398184e-05, - "loss": 0.1391, + "loss": 0.1366, "step": 1460 }, { "epoch": 0.41, "learning_rate": 1.7289173392708045e-05, - "loss": 0.1852, + "loss": 0.1376, "step": 1461 }, { "epoch": 0.41, "learning_rate": 1.7287317933017907e-05, - "loss": 0.0885, + "loss": 0.2657, "step": 1462 }, { "epoch": 0.41, "learning_rate": 1.728546247332777e-05, - "loss": 0.2477, + "loss": 0.0736, "step": 1463 }, { "epoch": 0.41, "learning_rate": 1.728360701363763e-05, - "loss": 0.2981, + "loss": 0.1378, "step": 1464 }, { "epoch": 0.41, "learning_rate": 1.7281751553947493e-05, - "loss": 0.2456, + "loss": 0.1323, "step": 1465 }, { "epoch": 0.41, "learning_rate": 1.7279896094257355e-05, - "loss": 0.1384, + "loss": 0.0178, "step": 1466 }, { "epoch": 0.41, "learning_rate": 1.7278040634567213e-05, - "loss": 0.1905, + "loss": 0.0773, "step": 1467 }, { "epoch": 0.41, "learning_rate": 1.727618517487708e-05, - "loss": 0.0903, + "loss": 0.0774, "step": 1468 }, { "epoch": 0.41, "learning_rate": 1.727432971518694e-05, - "loss": 0.1367, + "loss": 0.0801, "step": 1469 }, { "epoch": 0.41, "learning_rate": 1.72724742554968e-05, - "loss": 0.141, + "loss": 0.2603, "step": 1470 }, { "epoch": 0.41, "learning_rate": 1.727061879580666e-05, - "loss": 0.0943, + "loss": 0.1333, "step": 1471 }, { "epoch": 0.41, "learning_rate": 1.7268763336116526e-05, - "loss": 0.0901, + "loss": 0.1205, "step": 1472 }, { "epoch": 0.41, "learning_rate": 1.7266907876426388e-05, - "loss": 0.0873, + "loss": 0.1268, "step": 1473 }, { "epoch": 0.41, "learning_rate": 1.7265052416736246e-05, - "loss": 0.1439, + "loss": 0.0778, "step": 1474 }, { "epoch": 0.41, "learning_rate": 1.7263196957046108e-05, - "loss": 0.1351, + "loss": 0.2231, "step": 1475 }, { "epoch": 0.41, "learning_rate": 1.7261341497355974e-05, - "loss": 0.2409, + "loss": 0.0806, "step": 1476 }, { "epoch": 0.41, "learning_rate": 1.7259486037665832e-05, - "loss": 0.0896, + "loss": 0.1292, "step": 1477 }, { "epoch": 0.41, "learning_rate": 1.7257630577975694e-05, - "loss": 0.0894, + "loss": 0.0859, "step": 1478 }, { "epoch": 0.41, "learning_rate": 1.7255775118285556e-05, - "loss": 0.1406, + "loss": 0.0793, "step": 1479 }, { "epoch": 0.41, "learning_rate": 1.725391965859542e-05, - "loss": 0.0895, + "loss": 0.0729, "step": 1480 }, { "epoch": 0.41, "learning_rate": 1.725206419890528e-05, - "loss": 0.1376, + "loss": 0.1153, "step": 1481 }, { "epoch": 0.41, "learning_rate": 1.725020873921514e-05, - "loss": 0.0866, + "loss": 0.0553, "step": 1482 }, { "epoch": 0.41, "learning_rate": 1.7248353279525003e-05, - "loss": 0.0859, + "loss": 0.1723, "step": 1483 }, { "epoch": 0.41, "learning_rate": 1.7246497819834865e-05, - "loss": 0.084, + "loss": 0.0706, "step": 1484 }, { "epoch": 0.41, "learning_rate": 1.7244642360144727e-05, - "loss": 0.1892, + "loss": 0.0155, "step": 1485 }, { "epoch": 0.41, "learning_rate": 1.724278690045459e-05, - "loss": 0.2398, + "loss": 0.0844, "step": 1486 }, { "epoch": 0.41, "learning_rate": 1.724093144076445e-05, - "loss": 0.0314, + "loss": 0.1219, "step": 1487 }, { "epoch": 0.41, "learning_rate": 1.7239075981074313e-05, - "loss": 0.1941, + "loss": 0.0733, "step": 1488 }, { "epoch": 0.41, "learning_rate": 1.7237220521384174e-05, - "loss": 0.1957, + "loss": 0.0455, "step": 1489 }, { "epoch": 0.41, "learning_rate": 1.7235365061694036e-05, - "loss": 0.1389, + "loss": 0.1913, "step": 1490 }, { "epoch": 0.41, "learning_rate": 1.7233509602003898e-05, - "loss": 0.1438, + "loss": 0.0699, "step": 1491 }, { "epoch": 0.42, "learning_rate": 1.723165414231376e-05, - "loss": 0.0301, + "loss": 0.134, "step": 1492 }, { "epoch": 0.42, "learning_rate": 1.7229798682623622e-05, - "loss": 0.2462, + "loss": 0.0994, "step": 1493 }, { "epoch": 0.42, "learning_rate": 1.7227943222933484e-05, - "loss": 0.1419, + "loss": 0.0218, "step": 1494 }, { "epoch": 0.42, "learning_rate": 1.7226087763243346e-05, - "loss": 0.1944, + "loss": 0.2037, "step": 1495 }, { "epoch": 0.42, "learning_rate": 1.7224232303553208e-05, - "loss": 0.3989, + "loss": 0.0516, "step": 1496 }, { "epoch": 0.42, "learning_rate": 1.722237684386307e-05, - "loss": 0.0851, + "loss": 0.1364, "step": 1497 }, { "epoch": 0.42, "learning_rate": 1.722052138417293e-05, - "loss": 0.1363, + "loss": 0.1383, "step": 1498 }, { "epoch": 0.42, "learning_rate": 1.721866592448279e-05, - "loss": 0.1909, + "loss": 0.0805, "step": 1499 }, { "epoch": 0.42, "learning_rate": 1.7216810464792655e-05, - "loss": 0.1383, + "loss": 0.2012, "step": 1500 }, { "epoch": 0.42, "learning_rate": 1.7214955005102517e-05, - "loss": 0.1937, + "loss": 0.1463, "step": 1501 }, { "epoch": 0.42, "learning_rate": 1.721309954541238e-05, - "loss": 0.1864, + "loss": 0.131, "step": 1502 }, { "epoch": 0.42, "learning_rate": 1.7211244085722237e-05, - "loss": 0.0344, + "loss": 0.117, "step": 1503 }, { "epoch": 0.42, "learning_rate": 1.7209388626032103e-05, - "loss": 0.1398, + "loss": 0.1262, "step": 1504 }, { "epoch": 0.42, "learning_rate": 1.7207533166341964e-05, - "loss": 0.2405, + "loss": 0.1563, "step": 1505 }, { "epoch": 0.42, "learning_rate": 1.7205677706651823e-05, - "loss": 0.0848, + "loss": 0.1598, "step": 1506 }, { "epoch": 0.42, "learning_rate": 1.7203822246961685e-05, - "loss": 0.2412, + "loss": 0.0828, "step": 1507 }, { "epoch": 0.42, "learning_rate": 1.720196678727155e-05, - "loss": 0.0863, + "loss": 0.1923, "step": 1508 }, { "epoch": 0.42, "learning_rate": 1.7200111327581412e-05, - "loss": 0.0849, + "loss": 0.0329, "step": 1509 }, { "epoch": 0.42, "learning_rate": 1.719825586789127e-05, - "loss": 0.1378, + "loss": 0.1912, "step": 1510 }, { "epoch": 0.42, "learning_rate": 1.7196400408201132e-05, - "loss": 0.2958, + "loss": 0.159, "step": 1511 }, { "epoch": 0.42, "learning_rate": 1.7194544948510997e-05, - "loss": 0.1378, + "loss": 0.0306, "step": 1512 }, { "epoch": 0.42, "learning_rate": 1.7192689488820856e-05, - "loss": 0.1883, + "loss": 0.167, "step": 1513 }, { "epoch": 0.42, "learning_rate": 1.7190834029130718e-05, - "loss": 0.0364, + "loss": 0.1249, "step": 1514 }, { "epoch": 0.42, "learning_rate": 1.718897856944058e-05, - "loss": 0.1947, + "loss": 0.1389, "step": 1515 }, { "epoch": 0.42, "learning_rate": 1.718712310975044e-05, - "loss": 0.1946, + "loss": 0.1479, "step": 1516 }, { "epoch": 0.42, "learning_rate": 1.7185267650060303e-05, - "loss": 0.1943, + "loss": 0.1585, "step": 1517 }, { "epoch": 0.42, "learning_rate": 1.7183412190370165e-05, - "loss": 0.0854, + "loss": 0.0872, "step": 1518 }, { "epoch": 0.42, "learning_rate": 1.7181556730680027e-05, - "loss": 0.1921, + "loss": 0.0929, "step": 1519 }, { "epoch": 0.42, "learning_rate": 1.717970127098989e-05, - "loss": 0.2367, + "loss": 0.1739, "step": 1520 }, { "epoch": 0.42, "learning_rate": 1.717784581129975e-05, - "loss": 0.2429, + "loss": 0.1759, "step": 1521 }, { "epoch": 0.42, "learning_rate": 1.7175990351609613e-05, - "loss": 0.1411, + "loss": 0.1127, "step": 1522 }, { "epoch": 0.42, "learning_rate": 1.7174134891919475e-05, - "loss": 0.1823, + "loss": 0.1195, "step": 1523 }, { "epoch": 0.42, "learning_rate": 1.7172279432229337e-05, - "loss": 0.1449, + "loss": 0.1188, "step": 1524 }, { "epoch": 0.42, "learning_rate": 1.71704239725392e-05, - "loss": 0.1884, + "loss": 0.0309, "step": 1525 }, { "epoch": 0.42, "learning_rate": 1.716856851284906e-05, - "loss": 0.2344, + "loss": 0.235, "step": 1526 }, { "epoch": 0.42, "learning_rate": 1.7166713053158922e-05, - "loss": 0.1405, + "loss": 0.113, "step": 1527 }, { "epoch": 0.43, "learning_rate": 1.716485759346878e-05, - "loss": 0.2387, + "loss": 0.1676, "step": 1528 }, { "epoch": 0.43, "learning_rate": 1.7163002133778646e-05, - "loss": 0.1436, + "loss": 0.1119, "step": 1529 }, { "epoch": 0.43, "learning_rate": 1.7161146674088508e-05, - "loss": 0.1884, + "loss": 0.0349, "step": 1530 }, { "epoch": 0.43, "learning_rate": 1.715929121439837e-05, - "loss": 0.1441, + "loss": 0.0272, "step": 1531 }, { "epoch": 0.43, "learning_rate": 1.7157435754708228e-05, - "loss": 0.1381, + "loss": 0.0782, "step": 1532 }, { "epoch": 0.43, "learning_rate": 1.7155580295018093e-05, - "loss": 0.0455, + "loss": 0.2023, "step": 1533 }, { "epoch": 0.43, "learning_rate": 1.7153724835327955e-05, - "loss": 0.1422, + "loss": 0.0869, "step": 1534 }, { "epoch": 0.43, "learning_rate": 1.7151869375637814e-05, - "loss": 0.3359, + "loss": 0.1046, "step": 1535 }, { "epoch": 0.43, "learning_rate": 1.7150013915947676e-05, - "loss": 0.0928, + "loss": 0.0745, "step": 1536 }, { "epoch": 0.43, "learning_rate": 1.714815845625754e-05, - "loss": 0.1893, + "loss": 0.0751, "step": 1537 }, { "epoch": 0.43, "learning_rate": 1.7146302996567403e-05, - "loss": 0.0453, + "loss": 0.0227, "step": 1538 }, { "epoch": 0.43, "learning_rate": 1.714444753687726e-05, - "loss": 0.1807, + "loss": 0.1125, "step": 1539 }, { "epoch": 0.43, "learning_rate": 1.7142592077187123e-05, - "loss": 0.0887, + "loss": 0.1745, "step": 1540 }, { "epoch": 0.43, "learning_rate": 1.714073661749699e-05, - "loss": 0.3379, + "loss": 0.0794, "step": 1541 }, { "epoch": 0.43, "learning_rate": 1.7138881157806847e-05, - "loss": 0.091, + "loss": 0.2564, "step": 1542 }, { "epoch": 0.43, "learning_rate": 1.713702569811671e-05, - "loss": 0.0876, + "loss": 0.1573, "step": 1543 }, { "epoch": 0.43, "learning_rate": 1.713517023842657e-05, - "loss": 0.1409, + "loss": 0.0712, "step": 1544 }, { "epoch": 0.43, "learning_rate": 1.7133314778736432e-05, - "loss": 0.1376, + "loss": 0.0429, "step": 1545 }, { "epoch": 0.43, "learning_rate": 1.7131459319046294e-05, - "loss": 0.14, + "loss": 0.1135, "step": 1546 }, { "epoch": 0.43, "learning_rate": 1.7129603859356156e-05, - "loss": 0.0376, + "loss": 0.1189, "step": 1547 }, { "epoch": 0.43, "learning_rate": 1.7127748399666018e-05, - "loss": 0.1883, + "loss": 0.1197, "step": 1548 }, { "epoch": 0.43, "learning_rate": 1.712589293997588e-05, - "loss": 0.1949, + "loss": 0.0715, "step": 1549 }, { "epoch": 0.43, "learning_rate": 1.7124037480285742e-05, - "loss": 0.0905, + "loss": 0.0674, "step": 1550 }, { "epoch": 0.43, "learning_rate": 1.7122182020595604e-05, - "loss": 0.0875, + "loss": 0.1339, "step": 1551 }, { "epoch": 0.43, "learning_rate": 1.7120326560905466e-05, - "loss": 0.086, + "loss": 0.0765, "step": 1552 }, { "epoch": 0.43, "learning_rate": 1.7118471101215327e-05, - "loss": 0.084, + "loss": 0.0725, "step": 1553 }, { "epoch": 0.43, "learning_rate": 1.711661564152519e-05, - "loss": 0.0845, + "loss": 0.0495, "step": 1554 }, { "epoch": 0.43, "learning_rate": 1.711476018183505e-05, - "loss": 0.0314, + "loss": 0.073, "step": 1555 }, { "epoch": 0.43, "learning_rate": 1.7112904722144913e-05, - "loss": 0.0831, + "loss": 0.1325, "step": 1556 }, { "epoch": 0.43, "learning_rate": 1.7111049262454775e-05, - "loss": 0.1405, + "loss": 0.0871, "step": 1557 }, { "epoch": 0.43, "learning_rate": 1.7109193802764637e-05, - "loss": 0.1958, + "loss": 0.1689, "step": 1558 }, { "epoch": 0.43, "learning_rate": 1.71073383430745e-05, - "loss": 0.1923, + "loss": 0.1134, "step": 1559 }, { "epoch": 0.43, "learning_rate": 1.710548288338436e-05, - "loss": 0.1912, + "loss": 0.1758, "step": 1560 }, { "epoch": 0.43, "learning_rate": 1.7103627423694222e-05, - "loss": 0.1965, + "loss": 0.0909, "step": 1561 }, { "epoch": 0.43, "learning_rate": 1.7101771964004084e-05, - "loss": 0.1992, + "loss": 0.125, "step": 1562 }, { "epoch": 0.44, "learning_rate": 1.7099916504313946e-05, - "loss": 0.0834, + "loss": 0.1615, "step": 1563 }, { "epoch": 0.44, "learning_rate": 1.7098061044623805e-05, - "loss": 0.1376, + "loss": 0.1198, "step": 1564 }, { "epoch": 0.44, "learning_rate": 1.709620558493367e-05, - "loss": 0.2546, + "loss": 0.0617, "step": 1565 }, { "epoch": 0.44, "learning_rate": 1.7094350125243532e-05, - "loss": 0.0874, + "loss": 0.1771, "step": 1566 }, { "epoch": 0.44, "learning_rate": 1.709249466555339e-05, - "loss": 0.0284, + "loss": 0.0412, "step": 1567 }, { "epoch": 0.44, "learning_rate": 1.7090639205863252e-05, - "loss": 0.1944, + "loss": 0.071, "step": 1568 }, { "epoch": 0.44, "learning_rate": 1.7088783746173117e-05, - "loss": 0.0289, + "loss": 0.1765, "step": 1569 }, { "epoch": 0.44, "learning_rate": 1.708692828648298e-05, - "loss": 0.087, + "loss": 0.0147, "step": 1570 }, { "epoch": 0.44, "learning_rate": 1.7085072826792838e-05, - "loss": 0.2054, + "loss": 0.0637, "step": 1571 }, { "epoch": 0.44, "learning_rate": 1.70832173671027e-05, - "loss": 0.1331, + "loss": 0.0249, "step": 1572 }, { "epoch": 0.44, "learning_rate": 1.7081361907412565e-05, - "loss": 0.1371, + "loss": 0.1072, "step": 1573 }, { "epoch": 0.44, "learning_rate": 1.7079506447722423e-05, - "loss": 0.14, + "loss": 0.1208, "step": 1574 }, { "epoch": 0.44, "learning_rate": 1.7077650988032285e-05, - "loss": 0.0275, + "loss": 0.0666, "step": 1575 }, { "epoch": 0.44, "learning_rate": 1.7075795528342147e-05, - "loss": 0.193, + "loss": 0.1367, "step": 1576 }, { "epoch": 0.44, "learning_rate": 1.7073940068652012e-05, - "loss": 0.1418, + "loss": 0.0524, "step": 1577 }, { "epoch": 0.44, "learning_rate": 1.707208460896187e-05, - "loss": 0.1996, + "loss": 0.1217, "step": 1578 }, { "epoch": 0.44, "learning_rate": 1.7070229149271733e-05, - "loss": 0.1933, + "loss": 0.0773, "step": 1579 }, { "epoch": 0.44, "learning_rate": 1.7068373689581595e-05, - "loss": 0.0286, + "loss": 0.0629, "step": 1580 }, { "epoch": 0.44, "learning_rate": 1.7066518229891456e-05, - "loss": 0.1443, + "loss": 0.0165, "step": 1581 }, { "epoch": 0.44, "learning_rate": 1.7064662770201318e-05, - "loss": 0.0872, + "loss": 0.0121, "step": 1582 }, { "epoch": 0.44, "learning_rate": 1.706280731051118e-05, - "loss": 0.0298, + "loss": 0.0754, "step": 1583 }, { "epoch": 0.44, "learning_rate": 1.7060951850821042e-05, - "loss": 0.0283, + "loss": 0.0653, "step": 1584 }, { "epoch": 0.44, "learning_rate": 1.7059096391130904e-05, - "loss": 0.1441, + "loss": 0.1402, "step": 1585 }, { "epoch": 0.44, "learning_rate": 1.7057240931440766e-05, - "loss": 0.0271, + "loss": 0.0672, "step": 1586 }, { "epoch": 0.44, "learning_rate": 1.7055385471750628e-05, - "loss": 0.0834, + "loss": 0.0108, "step": 1587 }, { "epoch": 0.44, "learning_rate": 1.705353001206049e-05, - "loss": 0.0873, + "loss": 0.1814, "step": 1588 }, { "epoch": 0.44, "learning_rate": 1.705167455237035e-05, - "loss": 0.2566, + "loss": 0.0195, "step": 1589 }, { "epoch": 0.44, "learning_rate": 1.7049819092680213e-05, - "loss": 0.1923, + "loss": 0.1718, "step": 1590 }, { "epoch": 0.44, "learning_rate": 1.7047963632990075e-05, - "loss": 0.309, + "loss": 0.3808, "step": 1591 }, { "epoch": 0.44, "learning_rate": 1.7046108173299937e-05, - "loss": 0.1382, + "loss": 0.2505, "step": 1592 }, { "epoch": 0.44, "learning_rate": 1.70442527136098e-05, - "loss": 0.0857, + "loss": 0.0744, "step": 1593 }, { "epoch": 0.44, "learning_rate": 1.704239725391966e-05, - "loss": 0.2504, + "loss": 0.2108, "step": 1594 }, { "epoch": 0.44, "learning_rate": 1.7040541794229523e-05, - "loss": 0.14, + "loss": 0.1198, "step": 1595 }, { "epoch": 0.44, "learning_rate": 1.703868633453938e-05, - "loss": 0.1937, + "loss": 0.184, "step": 1596 }, { "epoch": 0.44, "learning_rate": 1.7036830874849246e-05, - "loss": 0.1933, + "loss": 0.2781, "step": 1597 }, { "epoch": 0.44, "learning_rate": 1.7034975415159108e-05, - "loss": 0.1889, + "loss": 0.1045, "step": 1598 }, { "epoch": 0.45, "learning_rate": 1.703311995546897e-05, - "loss": 0.1372, + "loss": 0.1279, "step": 1599 }, { "epoch": 0.45, "learning_rate": 1.703126449577883e-05, - "loss": 0.1951, + "loss": 0.1205, "step": 1600 }, { "epoch": 0.45, "learning_rate": 1.7029409036088694e-05, - "loss": 0.0854, + "loss": 0.0831, "step": 1601 }, { "epoch": 0.45, "learning_rate": 1.7027553576398556e-05, - "loss": 0.0874, + "loss": 0.1533, "step": 1602 }, { "epoch": 0.45, "learning_rate": 1.7025698116708414e-05, - "loss": 0.0325, + "loss": 0.1031, "step": 1603 }, { "epoch": 0.45, "learning_rate": 1.7023842657018276e-05, - "loss": 0.0858, + "loss": 0.0614, "step": 1604 }, { "epoch": 0.45, "learning_rate": 1.702198719732814e-05, - "loss": 0.4039, + "loss": 0.2293, "step": 1605 }, { "epoch": 0.45, "learning_rate": 1.7020131737638003e-05, - "loss": 0.0815, + "loss": 0.2071, "step": 1606 }, { "epoch": 0.45, "learning_rate": 1.701827627794786e-05, - "loss": 0.1896, + "loss": 0.112, "step": 1607 }, { "epoch": 0.45, "learning_rate": 1.7016420818257723e-05, - "loss": 0.0852, + "loss": 0.0735, "step": 1608 }, { "epoch": 0.45, "learning_rate": 1.701456535856759e-05, - "loss": 0.1407, + "loss": 0.0133, "step": 1609 }, { "epoch": 0.45, "learning_rate": 1.7012709898877447e-05, - "loss": 0.3039, + "loss": 0.099, "step": 1610 }, { "epoch": 0.45, "learning_rate": 1.701085443918731e-05, - "loss": 0.139, + "loss": 0.0747, "step": 1611 }, { "epoch": 0.45, "learning_rate": 1.700899897949717e-05, - "loss": 0.0338, + "loss": 0.1002, "step": 1612 }, { "epoch": 0.45, "learning_rate": 1.7007143519807033e-05, - "loss": 0.301, + "loss": 0.0894, "step": 1613 }, { "epoch": 0.45, "learning_rate": 1.7005288060116895e-05, - "loss": 0.1954, + "loss": 0.0462, "step": 1614 }, { "epoch": 0.45, "learning_rate": 1.7003432600426757e-05, - "loss": 0.1416, + "loss": 0.0935, "step": 1615 }, { "epoch": 0.45, "learning_rate": 1.700157714073662e-05, - "loss": 0.1365, + "loss": 0.0788, "step": 1616 }, { "epoch": 0.45, "learning_rate": 1.699972168104648e-05, - "loss": 0.3534, + "loss": 0.2256, "step": 1617 }, { "epoch": 0.45, "learning_rate": 1.6997866221356342e-05, - "loss": 0.2426, + "loss": 0.0765, "step": 1618 }, { "epoch": 0.45, "learning_rate": 1.6996010761666204e-05, - "loss": 0.1368, + "loss": 0.0263, "step": 1619 }, { "epoch": 0.45, "learning_rate": 1.6994155301976066e-05, - "loss": 0.1896, + "loss": 0.0798, "step": 1620 }, { "epoch": 0.45, "learning_rate": 1.6992299842285928e-05, - "loss": 0.0417, + "loss": 0.0812, "step": 1621 }, { "epoch": 0.45, "learning_rate": 1.699044438259579e-05, - "loss": 0.2386, + "loss": 0.0727, "step": 1622 }, { "epoch": 0.45, "learning_rate": 1.698858892290565e-05, - "loss": 0.1892, + "loss": 0.0212, "step": 1623 }, { "epoch": 0.45, "learning_rate": 1.6986733463215513e-05, - "loss": 0.1924, + "loss": 0.0187, "step": 1624 }, { "epoch": 0.45, "learning_rate": 1.6984878003525375e-05, - "loss": 0.2422, + "loss": 0.1963, "step": 1625 }, { "epoch": 0.45, "learning_rate": 1.6983022543835237e-05, - "loss": 0.0908, + "loss": 0.1267, "step": 1626 }, { "epoch": 0.45, "learning_rate": 1.69811670841451e-05, - "loss": 0.1411, + "loss": 0.1701, "step": 1627 }, { "epoch": 0.45, "learning_rate": 1.697931162445496e-05, - "loss": 0.1904, + "loss": 0.0583, "step": 1628 }, { "epoch": 0.45, "learning_rate": 1.6977456164764823e-05, - "loss": 0.1821, + "loss": 0.0158, "step": 1629 }, { "epoch": 0.45, "learning_rate": 1.6975600705074685e-05, - "loss": 0.1412, + "loss": 0.0148, "step": 1630 }, { "epoch": 0.45, "learning_rate": 1.6973745245384547e-05, - "loss": 0.0951, + "loss": 0.0708, "step": 1631 }, { "epoch": 0.45, "learning_rate": 1.6971889785694405e-05, - "loss": 0.1874, + "loss": 0.2903, "step": 1632 }, { "epoch": 0.45, "learning_rate": 1.697003432600427e-05, - "loss": 0.1947, + "loss": 0.2659, "step": 1633 }, { "epoch": 0.45, "learning_rate": 1.6968178866314132e-05, - "loss": 0.1835, + "loss": 0.0656, "step": 1634 }, { "epoch": 0.46, "learning_rate": 1.6966323406623994e-05, - "loss": 0.1861, + "loss": 0.1154, "step": 1635 }, { "epoch": 0.46, "learning_rate": 1.6964467946933852e-05, - "loss": 0.2832, + "loss": 0.0684, "step": 1636 }, { "epoch": 0.46, "learning_rate": 1.6962612487243718e-05, - "loss": 0.2415, + "loss": 0.0301, "step": 1637 }, { "epoch": 0.46, "learning_rate": 1.696075702755358e-05, - "loss": 0.3823, + "loss": 0.0816, "step": 1638 }, { "epoch": 0.46, "learning_rate": 1.6958901567863438e-05, - "loss": 0.0945, + "loss": 0.0589, "step": 1639 }, { "epoch": 0.46, "learning_rate": 1.69570461081733e-05, - "loss": 0.1926, + "loss": 0.2022, "step": 1640 }, { "epoch": 0.46, "learning_rate": 1.6955190648483165e-05, - "loss": 0.0941, + "loss": 0.0825, "step": 1641 }, { "epoch": 0.46, "learning_rate": 1.6953335188793024e-05, - "loss": 0.1922, + "loss": 0.1332, "step": 1642 }, { "epoch": 0.46, "learning_rate": 1.6951479729102886e-05, - "loss": 0.1015, + "loss": 0.0202, "step": 1643 }, { "epoch": 0.46, "learning_rate": 1.6949624269412747e-05, - "loss": 0.1913, + "loss": 0.111, "step": 1644 }, { "epoch": 0.46, "learning_rate": 1.6947768809722613e-05, - "loss": 0.1903, + "loss": 0.0684, "step": 1645 }, { "epoch": 0.46, "learning_rate": 1.694591335003247e-05, - "loss": 0.0504, + "loss": 0.1361, "step": 1646 }, { "epoch": 0.46, "learning_rate": 1.6944057890342333e-05, - "loss": 0.0982, + "loss": 0.1126, "step": 1647 }, { "epoch": 0.46, "learning_rate": 1.6942202430652195e-05, - "loss": 0.1385, + "loss": 0.0569, "step": 1648 }, { "epoch": 0.46, "learning_rate": 1.6940346970962057e-05, - "loss": 0.2418, + "loss": 0.2469, "step": 1649 }, { "epoch": 0.46, "learning_rate": 1.693849151127192e-05, - "loss": 0.1398, + "loss": 0.1106, "step": 1650 }, { "epoch": 0.46, "learning_rate": 1.693663605158178e-05, - "loss": 0.1368, + "loss": 0.1354, "step": 1651 }, { "epoch": 0.46, "learning_rate": 1.6934780591891642e-05, - "loss": 0.1366, + "loss": 0.0744, "step": 1652 }, { "epoch": 0.46, "learning_rate": 1.6932925132201504e-05, - "loss": 0.2905, + "loss": 0.0433, "step": 1653 }, { "epoch": 0.46, "learning_rate": 1.6931069672511366e-05, - "loss": 0.0904, + "loss": 0.0433, "step": 1654 }, { "epoch": 0.46, "learning_rate": 1.6929214212821228e-05, - "loss": 0.1446, + "loss": 0.0465, "step": 1655 }, { "epoch": 0.46, "learning_rate": 1.692735875313109e-05, - "loss": 0.0381, + "loss": 0.0621, "step": 1656 }, { "epoch": 0.46, "learning_rate": 1.6925503293440952e-05, - "loss": 0.2966, + "loss": 0.247, "step": 1657 }, { "epoch": 0.46, "learning_rate": 1.6923647833750814e-05, - "loss": 0.2928, + "loss": 0.1106, "step": 1658 }, { "epoch": 0.46, "learning_rate": 1.6921792374060676e-05, - "loss": 0.1899, + "loss": 0.05, "step": 1659 }, { "epoch": 0.46, "learning_rate": 1.6919936914370537e-05, - "loss": 0.0873, + "loss": 0.0611, "step": 1660 }, { "epoch": 0.46, "learning_rate": 1.69180814546804e-05, - "loss": 0.2425, + "loss": 0.0714, "step": 1661 }, { "epoch": 0.46, "learning_rate": 1.691622599499026e-05, - "loss": 0.0869, + "loss": 0.1492, "step": 1662 }, { "epoch": 0.46, "learning_rate": 1.6914370535300123e-05, - "loss": 0.0892, + "loss": 0.0218, "step": 1663 }, { "epoch": 0.46, "learning_rate": 1.6912515075609985e-05, - "loss": 0.0367, + "loss": 0.0508, "step": 1664 }, { "epoch": 0.46, "learning_rate": 1.6910659615919847e-05, - "loss": 0.0853, + "loss": 0.0866, "step": 1665 }, { "epoch": 0.46, "learning_rate": 1.690880415622971e-05, - "loss": 0.3008, + "loss": 0.1415, "step": 1666 }, { "epoch": 0.46, "learning_rate": 1.690694869653957e-05, - "loss": 0.2509, + "loss": 0.0483, "step": 1667 }, { "epoch": 0.46, "learning_rate": 1.690509323684943e-05, - "loss": 0.1397, + "loss": 0.154, "step": 1668 }, { "epoch": 0.46, "learning_rate": 1.6903237777159294e-05, - "loss": 0.1911, + "loss": 0.1313, "step": 1669 }, { "epoch": 0.46, "learning_rate": 1.6901382317469156e-05, - "loss": 0.0875, + "loss": 0.0711, "step": 1670 }, { "epoch": 0.47, "learning_rate": 1.6899526857779015e-05, - "loss": 0.1443, + "loss": 0.015, "step": 1671 }, { "epoch": 0.47, "learning_rate": 1.6897671398088876e-05, - "loss": 0.0877, + "loss": 0.1101, "step": 1672 }, { "epoch": 0.47, "learning_rate": 1.689581593839874e-05, - "loss": 0.1913, + "loss": 0.1635, "step": 1673 }, { "epoch": 0.47, "learning_rate": 1.6893960478708604e-05, - "loss": 0.0855, + "loss": 0.1139, "step": 1674 }, { "epoch": 0.47, "learning_rate": 1.6892105019018462e-05, - "loss": 0.1417, + "loss": 0.0731, "step": 1675 }, { "epoch": 0.47, "learning_rate": 1.6890249559328324e-05, - "loss": 0.2468, + "loss": 0.0348, "step": 1676 }, { "epoch": 0.47, "learning_rate": 1.6888394099638186e-05, - "loss": 0.1897, + "loss": 0.0779, "step": 1677 }, { "epoch": 0.47, "learning_rate": 1.6886538639948048e-05, - "loss": 0.0882, + "loss": 0.0833, "step": 1678 }, { "epoch": 0.47, "learning_rate": 1.688468318025791e-05, - "loss": 0.1918, + "loss": 0.155, "step": 1679 }, { "epoch": 0.47, "learning_rate": 1.688282772056777e-05, - "loss": 0.144, + "loss": 0.0747, "step": 1680 }, { "epoch": 0.47, "learning_rate": 1.6880972260877633e-05, - "loss": 0.1358, + "loss": 0.0734, "step": 1681 }, { "epoch": 0.47, "learning_rate": 1.6879116801187495e-05, - "loss": 0.2925, + "loss": 0.0682, "step": 1682 }, { "epoch": 0.47, "learning_rate": 1.6877261341497357e-05, - "loss": 0.0864, + "loss": 0.2141, "step": 1683 }, { "epoch": 0.47, "learning_rate": 1.687540588180722e-05, - "loss": 0.1415, + "loss": 0.0171, "step": 1684 }, { "epoch": 0.47, "learning_rate": 1.687355042211708e-05, - "loss": 0.19, + "loss": 0.019, "step": 1685 }, { "epoch": 0.47, "learning_rate": 1.6871694962426943e-05, - "loss": 0.1864, + "loss": 0.1167, "step": 1686 }, { "epoch": 0.47, "learning_rate": 1.6869839502736804e-05, - "loss": 0.1946, + "loss": 0.117, "step": 1687 }, { "epoch": 0.47, "learning_rate": 1.6867984043046666e-05, - "loss": 0.3883, + "loss": 0.0132, "step": 1688 }, { "epoch": 0.47, "learning_rate": 1.6866128583356528e-05, - "loss": 0.1887, + "loss": 0.0102, "step": 1689 }, { "epoch": 0.47, "learning_rate": 1.686427312366639e-05, - "loss": 0.2401, + "loss": 0.1586, "step": 1690 }, { "epoch": 0.47, "learning_rate": 1.6862417663976252e-05, - "loss": 0.1402, + "loss": 0.2661, "step": 1691 }, { "epoch": 0.47, "learning_rate": 1.6860562204286114e-05, - "loss": 0.1863, + "loss": 0.1161, "step": 1692 }, { "epoch": 0.47, "learning_rate": 1.6858706744595972e-05, - "loss": 0.1884, + "loss": 0.1455, "step": 1693 }, { "epoch": 0.47, "learning_rate": 1.6856851284905838e-05, - "loss": 0.1908, + "loss": 0.1248, "step": 1694 }, { "epoch": 0.47, "learning_rate": 1.68549958252157e-05, - "loss": 0.1442, + "loss": 0.1414, "step": 1695 }, { "epoch": 0.47, "learning_rate": 1.685314036552556e-05, - "loss": 0.0475, + "loss": 0.0821, "step": 1696 }, { "epoch": 0.47, "learning_rate": 1.685128490583542e-05, - "loss": 0.2772, + "loss": 0.0359, "step": 1697 }, { "epoch": 0.47, "learning_rate": 1.6849429446145285e-05, - "loss": 0.1942, + "loss": 0.0916, "step": 1698 }, { "epoch": 0.47, "learning_rate": 1.6847573986455147e-05, - "loss": 0.0479, + "loss": 0.0462, "step": 1699 }, { "epoch": 0.47, "learning_rate": 1.6845718526765005e-05, - "loss": 0.0939, + "loss": 0.135, "step": 1700 }, { "epoch": 0.47, "learning_rate": 1.6843863067074867e-05, - "loss": 0.0945, + "loss": 0.0381, "step": 1701 }, { "epoch": 0.47, "learning_rate": 1.6842007607384733e-05, - "loss": 0.0952, + "loss": 0.0253, "step": 1702 }, { "epoch": 0.47, "learning_rate": 1.6840152147694594e-05, - "loss": 0.0922, + "loss": 0.1866, "step": 1703 }, { "epoch": 0.47, "learning_rate": 1.6838296688004453e-05, - "loss": 0.093, + "loss": 0.0665, "step": 1704 }, { "epoch": 0.47, "learning_rate": 1.6836441228314315e-05, - "loss": 0.2952, + "loss": 0.0862, "step": 1705 }, { "epoch": 0.47, "learning_rate": 1.683458576862418e-05, - "loss": 0.0382, + "loss": 0.0148, "step": 1706 }, { "epoch": 0.48, "learning_rate": 1.683273030893404e-05, - "loss": 0.1387, + "loss": 0.207, "step": 1707 }, { "epoch": 0.48, "learning_rate": 1.68308748492439e-05, - "loss": 0.1937, + "loss": 0.0945, "step": 1708 }, { "epoch": 0.48, "learning_rate": 1.6829019389553762e-05, - "loss": 0.0853, + "loss": 0.0581, "step": 1709 }, { "epoch": 0.48, "learning_rate": 1.6827163929863624e-05, - "loss": 0.0875, + "loss": 0.0146, "step": 1710 }, { "epoch": 0.48, "learning_rate": 1.6825308470173486e-05, - "loss": 0.0899, + "loss": 0.1995, "step": 1711 }, { "epoch": 0.48, "learning_rate": 1.6823453010483348e-05, - "loss": 0.1381, + "loss": 0.007, "step": 1712 }, { "epoch": 0.48, "learning_rate": 1.682159755079321e-05, - "loss": 0.0334, + "loss": 0.2264, "step": 1713 }, { "epoch": 0.48, "learning_rate": 1.681974209110307e-05, - "loss": 0.1916, + "loss": 0.1293, "step": 1714 }, { "epoch": 0.48, "learning_rate": 1.6817886631412933e-05, - "loss": 0.1402, + "loss": 0.0969, "step": 1715 }, { "epoch": 0.48, "learning_rate": 1.6816031171722795e-05, - "loss": 0.198, + "loss": 0.107, "step": 1716 }, { "epoch": 0.48, "learning_rate": 1.6814175712032657e-05, - "loss": 0.1428, + "loss": 0.0824, "step": 1717 }, { "epoch": 0.48, "learning_rate": 1.681232025234252e-05, - "loss": 0.194, + "loss": 0.1261, "step": 1718 }, { "epoch": 0.48, "learning_rate": 1.681046479265238e-05, - "loss": 0.0872, + "loss": 0.1132, "step": 1719 }, { "epoch": 0.48, "learning_rate": 1.6808609332962243e-05, - "loss": 0.085, + "loss": 0.177, "step": 1720 }, { "epoch": 0.48, "learning_rate": 1.6806753873272105e-05, - "loss": 0.1905, + "loss": 0.1548, "step": 1721 }, { "epoch": 0.48, "learning_rate": 1.6804898413581967e-05, - "loss": 0.2007, + "loss": 0.11, "step": 1722 }, { "epoch": 0.48, "learning_rate": 1.680304295389183e-05, - "loss": 0.0832, + "loss": 0.1957, "step": 1723 }, { "epoch": 0.48, "learning_rate": 1.680118749420169e-05, - "loss": 0.1365, + "loss": 0.0684, "step": 1724 }, { "epoch": 0.48, "learning_rate": 1.6799332034511552e-05, - "loss": 0.1951, + "loss": 0.1621, "step": 1725 }, { "epoch": 0.48, "learning_rate": 1.6797476574821414e-05, - "loss": 0.1886, + "loss": 0.0582, "step": 1726 }, { "epoch": 0.48, "learning_rate": 1.6795621115131276e-05, - "loss": 0.1893, + "loss": 0.0646, "step": 1727 }, { "epoch": 0.48, "learning_rate": 1.6793765655441138e-05, - "loss": 0.1399, + "loss": 0.0533, "step": 1728 }, { "epoch": 0.48, "learning_rate": 1.6791910195750996e-05, - "loss": 0.3004, + "loss": 0.0684, "step": 1729 }, { "epoch": 0.48, "learning_rate": 1.679005473606086e-05, - "loss": 0.1407, + "loss": 0.2079, "step": 1730 }, { "epoch": 0.48, "learning_rate": 1.6788199276370723e-05, - "loss": 0.137, + "loss": 0.0726, "step": 1731 }, { "epoch": 0.48, "learning_rate": 1.6786343816680585e-05, - "loss": 0.2515, + "loss": 0.1183, "step": 1732 }, { "epoch": 0.48, "learning_rate": 1.6784488356990444e-05, - "loss": 0.1333, + "loss": 0.1027, "step": 1733 }, { "epoch": 0.48, "learning_rate": 1.678263289730031e-05, - "loss": 0.1402, + "loss": 0.0175, "step": 1734 }, { "epoch": 0.48, "learning_rate": 1.678077743761017e-05, - "loss": 0.1364, + "loss": 0.0676, "step": 1735 }, { "epoch": 0.48, "learning_rate": 1.677892197792003e-05, - "loss": 0.0354, + "loss": 0.0355, "step": 1736 }, { "epoch": 0.48, "learning_rate": 1.677706651822989e-05, - "loss": 0.2419, + "loss": 0.0723, "step": 1737 }, { "epoch": 0.48, "learning_rate": 1.6775211058539757e-05, - "loss": 0.1369, + "loss": 0.1243, "step": 1738 }, { "epoch": 0.48, "learning_rate": 1.6773355598849615e-05, - "loss": 0.1411, + "loss": 0.1034, "step": 1739 }, { "epoch": 0.48, "learning_rate": 1.6771500139159477e-05, - "loss": 0.1358, + "loss": 0.1442, "step": 1740 }, { "epoch": 0.48, "learning_rate": 1.676964467946934e-05, - "loss": 0.182, + "loss": 0.0149, "step": 1741 }, { "epoch": 0.48, "learning_rate": 1.6767789219779204e-05, - "loss": 0.089, + "loss": 0.0755, "step": 1742 }, { "epoch": 0.49, "learning_rate": 1.6765933760089062e-05, - "loss": 0.2385, + "loss": 0.165, "step": 1743 }, { "epoch": 0.49, "learning_rate": 1.6764078300398924e-05, - "loss": 0.0896, + "loss": 0.1027, "step": 1744 }, { "epoch": 0.49, "learning_rate": 1.6762222840708786e-05, - "loss": 0.0393, + "loss": 0.0668, "step": 1745 }, { "epoch": 0.49, "learning_rate": 1.6760367381018648e-05, - "loss": 0.1993, + "loss": 0.1458, "step": 1746 }, { "epoch": 0.49, "learning_rate": 1.675851192132851e-05, - "loss": 0.2361, + "loss": 0.0607, "step": 1747 }, { "epoch": 0.49, "learning_rate": 1.6756656461638372e-05, - "loss": 0.085, + "loss": 0.0199, "step": 1748 }, { "epoch": 0.49, "learning_rate": 1.6754801001948234e-05, - "loss": 0.1373, + "loss": 0.0981, "step": 1749 }, { "epoch": 0.49, "learning_rate": 1.6752945542258096e-05, - "loss": 0.0924, + "loss": 0.0303, "step": 1750 }, { "epoch": 0.49, "learning_rate": 1.6751090082567957e-05, - "loss": 0.087, + "loss": 0.0658, "step": 1751 }, { "epoch": 0.49, "learning_rate": 1.674923462287782e-05, - "loss": 0.0378, + "loss": 0.2779, "step": 1752 }, { "epoch": 0.49, "learning_rate": 1.674737916318768e-05, - "loss": 0.346, + "loss": 0.0544, "step": 1753 }, { "epoch": 0.49, "learning_rate": 1.6745523703497543e-05, - "loss": 0.0901, + "loss": 0.268, "step": 1754 }, { "epoch": 0.49, "learning_rate": 1.6743668243807405e-05, - "loss": 0.2484, + "loss": 0.1315, "step": 1755 }, { "epoch": 0.49, "learning_rate": 1.6741812784117267e-05, - "loss": 0.1864, + "loss": 0.2288, "step": 1756 }, { "epoch": 0.49, "learning_rate": 1.673995732442713e-05, - "loss": 0.0872, + "loss": 0.0593, "step": 1757 }, { "epoch": 0.49, "learning_rate": 1.673810186473699e-05, - "loss": 0.1369, + "loss": 0.1033, "step": 1758 }, { "epoch": 0.49, "learning_rate": 1.6736246405046852e-05, - "loss": 0.2418, + "loss": 0.1682, "step": 1759 }, { "epoch": 0.49, "learning_rate": 1.6734390945356714e-05, - "loss": 0.1385, + "loss": 0.0373, "step": 1760 }, { "epoch": 0.49, "learning_rate": 1.6732535485666576e-05, - "loss": 0.1881, + "loss": 0.1092, "step": 1761 }, { "epoch": 0.49, "learning_rate": 1.6730680025976438e-05, - "loss": 0.0878, + "loss": 0.0525, "step": 1762 }, { "epoch": 0.49, "learning_rate": 1.67288245662863e-05, - "loss": 0.2991, + "loss": 0.0598, "step": 1763 }, { "epoch": 0.49, "learning_rate": 1.6726969106596162e-05, - "loss": 0.2919, + "loss": 0.1529, "step": 1764 }, { "epoch": 0.49, "learning_rate": 1.672511364690602e-05, - "loss": 0.0861, + "loss": 0.1159, "step": 1765 }, { "epoch": 0.49, "learning_rate": 1.6723258187215885e-05, - "loss": 0.089, + "loss": 0.0274, "step": 1766 }, { "epoch": 0.49, "learning_rate": 1.6721402727525747e-05, - "loss": 0.1411, + "loss": 0.0228, "step": 1767 }, { "epoch": 0.49, "learning_rate": 1.6719547267835606e-05, - "loss": 0.088, + "loss": 0.2266, "step": 1768 }, { "epoch": 0.49, "learning_rate": 1.6717691808145468e-05, - "loss": 0.0879, + "loss": 0.0168, "step": 1769 }, { "epoch": 0.49, "learning_rate": 1.6715836348455333e-05, - "loss": 0.141, + "loss": 0.075, "step": 1770 }, { "epoch": 0.49, "learning_rate": 1.6713980888765195e-05, - "loss": 0.2372, + "loss": 0.0412, "step": 1771 }, { "epoch": 0.49, "learning_rate": 1.6712125429075053e-05, - "loss": 0.1352, + "loss": 0.0112, "step": 1772 }, { "epoch": 0.49, "learning_rate": 1.6710269969384915e-05, - "loss": 0.1387, + "loss": 0.1155, "step": 1773 }, { "epoch": 0.49, "learning_rate": 1.670841450969478e-05, - "loss": 0.1386, + "loss": 0.0879, "step": 1774 }, { "epoch": 0.49, "learning_rate": 1.670655905000464e-05, - "loss": 0.0356, + "loss": 0.1112, "step": 1775 }, { "epoch": 0.49, "learning_rate": 1.67047035903145e-05, - "loss": 0.086, + "loss": 0.0804, "step": 1776 }, { "epoch": 0.49, "learning_rate": 1.6702848130624363e-05, - "loss": 0.1937, + "loss": 0.0101, "step": 1777 }, { "epoch": 0.49, "learning_rate": 1.6700992670934228e-05, - "loss": 0.292, + "loss": 0.0653, "step": 1778 }, { "epoch": 0.5, "learning_rate": 1.6699137211244086e-05, - "loss": 0.0888, + "loss": 0.0169, "step": 1779 }, { "epoch": 0.5, "learning_rate": 1.6697281751553948e-05, - "loss": 0.0863, + "loss": 0.0092, "step": 1780 }, { "epoch": 0.5, "learning_rate": 1.669542629186381e-05, - "loss": 0.1371, + "loss": 0.0102, "step": 1781 }, { "epoch": 0.5, "learning_rate": 1.6693570832173672e-05, - "loss": 0.1918, + "loss": 0.1782, "step": 1782 }, { "epoch": 0.5, "learning_rate": 1.6691715372483534e-05, - "loss": 0.0859, + "loss": 0.073, "step": 1783 }, { "epoch": 0.5, "learning_rate": 1.6689859912793396e-05, - "loss": 0.0858, + "loss": 0.107, "step": 1784 }, { "epoch": 0.5, "learning_rate": 1.6688004453103258e-05, - "loss": 0.1954, + "loss": 0.1425, "step": 1785 }, { "epoch": 0.5, "learning_rate": 1.668614899341312e-05, - "loss": 0.2434, + "loss": 0.0868, "step": 1786 }, { "epoch": 0.5, "learning_rate": 1.668429353372298e-05, - "loss": 0.0803, + "loss": 0.1127, "step": 1787 }, { "epoch": 0.5, "learning_rate": 1.6682438074032843e-05, - "loss": 0.4061, + "loss": 0.1075, "step": 1788 }, { "epoch": 0.5, "learning_rate": 1.6680582614342705e-05, - "loss": 0.3033, + "loss": 0.0091, "step": 1789 }, { "epoch": 0.5, "learning_rate": 1.6678727154652567e-05, - "loss": 0.0338, + "loss": 0.0135, "step": 1790 }, { "epoch": 0.5, "learning_rate": 1.667687169496243e-05, - "loss": 0.09, + "loss": 0.0063, "step": 1791 }, { "epoch": 0.5, "learning_rate": 1.667501623527229e-05, - "loss": 0.0863, + "loss": 0.0069, "step": 1792 }, { "epoch": 0.5, "learning_rate": 1.6673160775582153e-05, - "loss": 0.1941, + "loss": 0.0762, "step": 1793 }, { "epoch": 0.5, "learning_rate": 1.6671305315892014e-05, - "loss": 0.1938, + "loss": 0.3017, "step": 1794 }, { "epoch": 0.5, "learning_rate": 1.6669449856201876e-05, - "loss": 0.2966, + "loss": 0.0841, "step": 1795 }, { "epoch": 0.5, "learning_rate": 1.6667594396511738e-05, - "loss": 0.1928, + "loss": 0.071, "step": 1796 }, { "epoch": 0.5, "learning_rate": 1.6665738936821597e-05, - "loss": 0.089, + "loss": 0.1114, "step": 1797 }, { "epoch": 0.5, "learning_rate": 1.6663883477131462e-05, - "loss": 0.1899, + "loss": 0.2178, "step": 1798 }, { "epoch": 0.5, "learning_rate": 1.6662028017441324e-05, - "loss": 0.141, + "loss": 0.2855, "step": 1799 }, { "epoch": 0.5, "learning_rate": 1.6660172557751186e-05, - "loss": 0.1949, + "loss": 0.2318, "step": 1800 }, { "epoch": 0.5, "learning_rate": 1.6658317098061044e-05, - "loss": 0.2398, + "loss": 0.1224, "step": 1801 }, { "epoch": 0.5, "learning_rate": 1.665646163837091e-05, - "loss": 0.2938, + "loss": 0.0831, "step": 1802 }, { "epoch": 0.5, "learning_rate": 1.665460617868077e-05, - "loss": 0.0882, + "loss": 0.1417, "step": 1803 }, { "epoch": 0.5, "learning_rate": 1.665275071899063e-05, - "loss": 0.1377, + "loss": 0.0829, "step": 1804 }, { "epoch": 0.5, "learning_rate": 1.665089525930049e-05, - "loss": 0.1377, + "loss": 0.0875, "step": 1805 }, { "epoch": 0.5, "learning_rate": 1.6649039799610357e-05, - "loss": 0.1415, + "loss": 0.1726, "step": 1806 }, { "epoch": 0.5, "learning_rate": 1.6647184339920215e-05, - "loss": 0.1945, + "loss": 0.1008, "step": 1807 }, { "epoch": 0.5, "learning_rate": 1.6645328880230077e-05, - "loss": 0.0939, + "loss": 0.0605, "step": 1808 }, { "epoch": 0.5, "learning_rate": 1.664347342053994e-05, - "loss": 0.1391, + "loss": 0.0715, "step": 1809 }, { "epoch": 0.5, "learning_rate": 1.6641617960849804e-05, - "loss": 0.0953, + "loss": 0.0304, "step": 1810 }, { "epoch": 0.5, "learning_rate": 1.6639762501159663e-05, - "loss": 0.1359, + "loss": 0.0545, "step": 1811 }, { "epoch": 0.5, "learning_rate": 1.6637907041469525e-05, - "loss": 0.0424, + "loss": 0.0242, "step": 1812 }, { "epoch": 0.5, "learning_rate": 1.6636051581779387e-05, - "loss": 0.2397, + "loss": 0.1041, "step": 1813 }, { "epoch": 0.5, "learning_rate": 1.663419612208925e-05, - "loss": 0.1427, + "loss": 0.0644, "step": 1814 }, { "epoch": 0.51, "learning_rate": 1.663234066239911e-05, - "loss": 0.0401, + "loss": 0.0782, "step": 1815 }, { "epoch": 0.51, "learning_rate": 1.6630485202708972e-05, - "loss": 0.1886, + "loss": 0.1113, "step": 1816 }, { "epoch": 0.51, "learning_rate": 1.6628629743018834e-05, - "loss": 0.2424, + "loss": 0.0356, "step": 1817 }, { "epoch": 0.51, "learning_rate": 1.6626774283328696e-05, - "loss": 0.2467, + "loss": 0.0669, "step": 1818 }, { "epoch": 0.51, "learning_rate": 1.6624918823638558e-05, - "loss": 0.089, + "loss": 0.0105, "step": 1819 }, { "epoch": 0.51, "learning_rate": 1.662306336394842e-05, - "loss": 0.3515, + "loss": 0.1163, "step": 1820 }, { "epoch": 0.51, "learning_rate": 1.662120790425828e-05, - "loss": 0.1383, + "loss": 0.2124, "step": 1821 }, { "epoch": 0.51, "learning_rate": 1.6619352444568143e-05, - "loss": 0.1874, + "loss": 0.0647, "step": 1822 }, { "epoch": 0.51, "learning_rate": 1.6617496984878005e-05, - "loss": 0.0902, + "loss": 0.0722, "step": 1823 }, { "epoch": 0.51, "learning_rate": 1.6615641525187867e-05, - "loss": 0.0371, + "loss": 0.1297, "step": 1824 }, { "epoch": 0.51, "learning_rate": 1.661378606549773e-05, - "loss": 0.196, + "loss": 0.0701, "step": 1825 }, { "epoch": 0.51, "learning_rate": 1.6611930605807588e-05, - "loss": 0.2428, + "loss": 0.1005, "step": 1826 }, { "epoch": 0.51, "learning_rate": 1.6610075146117453e-05, - "loss": 0.14, + "loss": 0.142, "step": 1827 }, { "epoch": 0.51, "learning_rate": 1.6608219686427315e-05, - "loss": 0.2367, + "loss": 0.0149, "step": 1828 }, { "epoch": 0.51, "learning_rate": 1.6606364226737177e-05, - "loss": 0.0376, + "loss": 0.0575, "step": 1829 }, { "epoch": 0.51, "learning_rate": 1.6604508767047035e-05, - "loss": 0.09, + "loss": 0.0192, "step": 1830 }, { "epoch": 0.51, "learning_rate": 1.66026533073569e-05, - "loss": 0.1357, + "loss": 0.1531, "step": 1831 }, { "epoch": 0.51, "learning_rate": 1.6600797847666762e-05, - "loss": 0.1417, + "loss": 0.1139, "step": 1832 }, { "epoch": 0.51, "learning_rate": 1.659894238797662e-05, - "loss": 0.0915, + "loss": 0.0622, "step": 1833 }, { "epoch": 0.51, "learning_rate": 1.6597086928286483e-05, - "loss": 0.0844, + "loss": 0.125, "step": 1834 }, { "epoch": 0.51, "learning_rate": 1.6595231468596348e-05, - "loss": 0.2486, + "loss": 0.1962, "step": 1835 }, { "epoch": 0.51, "learning_rate": 1.6593376008906206e-05, - "loss": 0.1423, + "loss": 0.0695, "step": 1836 }, { "epoch": 0.51, "learning_rate": 1.6591520549216068e-05, - "loss": 0.0884, + "loss": 0.1276, "step": 1837 }, { "epoch": 0.51, "learning_rate": 1.658966508952593e-05, - "loss": 0.087, + "loss": 0.2234, "step": 1838 }, { "epoch": 0.51, "learning_rate": 1.6587809629835795e-05, - "loss": 0.1393, + "loss": 0.081, "step": 1839 }, { "epoch": 0.51, "learning_rate": 1.6585954170145654e-05, - "loss": 0.086, + "loss": 0.1628, "step": 1840 }, { "epoch": 0.51, "learning_rate": 1.6584098710455516e-05, - "loss": 0.0341, + "loss": 0.0946, "step": 1841 }, { "epoch": 0.51, "learning_rate": 1.6582243250765377e-05, - "loss": 0.0323, + "loss": 0.0752, "step": 1842 }, { "epoch": 0.51, "learning_rate": 1.658038779107524e-05, - "loss": 0.1891, + "loss": 0.1024, "step": 1843 }, { "epoch": 0.51, "learning_rate": 1.65785323313851e-05, - "loss": 0.0313, + "loss": 0.0255, "step": 1844 }, { "epoch": 0.51, "learning_rate": 1.6576676871694963e-05, - "loss": 0.1365, + "loss": 0.1836, "step": 1845 }, { "epoch": 0.51, "learning_rate": 1.6574821412004825e-05, - "loss": 0.3657, + "loss": 0.1347, "step": 1846 }, { "epoch": 0.51, "learning_rate": 1.6572965952314687e-05, - "loss": 0.1428, + "loss": 0.1579, "step": 1847 }, { "epoch": 0.51, "learning_rate": 1.657111049262455e-05, - "loss": 0.1902, + "loss": 0.096, "step": 1848 }, { "epoch": 0.51, "learning_rate": 1.656925503293441e-05, - "loss": 0.1972, + "loss": 0.1352, "step": 1849 }, { "epoch": 0.51, "learning_rate": 1.6567399573244272e-05, - "loss": 0.2521, + "loss": 0.0703, "step": 1850 }, { "epoch": 0.52, "learning_rate": 1.6565544113554134e-05, - "loss": 0.0815, + "loss": 0.1906, "step": 1851 }, { "epoch": 0.52, "learning_rate": 1.6563688653863996e-05, - "loss": 0.1939, + "loss": 0.0695, "step": 1852 }, { "epoch": 0.52, "learning_rate": 1.6561833194173858e-05, - "loss": 0.1367, + "loss": 0.0805, "step": 1853 }, { "epoch": 0.52, "learning_rate": 1.655997773448372e-05, - "loss": 0.0305, + "loss": 0.1162, "step": 1854 }, { "epoch": 0.52, "learning_rate": 1.6558122274793582e-05, - "loss": 0.2512, + "loss": 0.0608, "step": 1855 }, { "epoch": 0.52, "learning_rate": 1.6556266815103444e-05, - "loss": 0.142, + "loss": 0.0642, "step": 1856 }, { "epoch": 0.52, "learning_rate": 1.6554411355413306e-05, - "loss": 0.1392, + "loss": 0.1236, "step": 1857 }, { "epoch": 0.52, "learning_rate": 1.6552555895723167e-05, - "loss": 0.1983, + "loss": 0.0182, "step": 1858 }, { "epoch": 0.52, "learning_rate": 1.655070043603303e-05, - "loss": 0.2473, + "loss": 0.0152, "step": 1859 }, { "epoch": 0.52, "learning_rate": 1.654884497634289e-05, - "loss": 0.1987, + "loss": 0.0559, "step": 1860 }, { "epoch": 0.52, "learning_rate": 1.6546989516652753e-05, - "loss": 0.1913, + "loss": 0.2346, "step": 1861 }, { "epoch": 0.52, "learning_rate": 1.654513405696261e-05, - "loss": 0.2411, + "loss": 0.014, "step": 1862 }, { "epoch": 0.52, "learning_rate": 1.6543278597272477e-05, - "loss": 0.0362, + "loss": 0.01, "step": 1863 }, { "epoch": 0.52, "learning_rate": 1.654142313758234e-05, - "loss": 0.2885, + "loss": 0.0145, "step": 1864 }, { "epoch": 0.52, "learning_rate": 1.6539567677892197e-05, - "loss": 0.0921, + "loss": 0.1064, "step": 1865 }, { "epoch": 0.52, "learning_rate": 1.653771221820206e-05, - "loss": 0.2461, + "loss": 0.133, "step": 1866 }, { "epoch": 0.52, "learning_rate": 1.6535856758511924e-05, - "loss": 0.1386, + "loss": 0.1115, "step": 1867 }, { "epoch": 0.52, "learning_rate": 1.6534001298821786e-05, - "loss": 0.1424, + "loss": 0.158, "step": 1868 }, { "epoch": 0.52, "learning_rate": 1.6532145839131645e-05, - "loss": 0.2419, + "loss": 0.0163, "step": 1869 }, { "epoch": 0.52, "learning_rate": 1.6530290379441506e-05, - "loss": 0.0924, + "loss": 0.1929, "step": 1870 }, { "epoch": 0.52, "learning_rate": 1.6528434919751372e-05, - "loss": 0.0408, + "loss": 0.1034, "step": 1871 }, { "epoch": 0.52, "learning_rate": 1.652657946006123e-05, - "loss": 0.1915, + "loss": 0.1489, "step": 1872 }, { "epoch": 0.52, "learning_rate": 1.6524724000371092e-05, - "loss": 0.1921, + "loss": 0.1938, "step": 1873 }, { "epoch": 0.52, "learning_rate": 1.6522868540680954e-05, - "loss": 0.0915, + "loss": 0.1606, "step": 1874 }, { "epoch": 0.52, "learning_rate": 1.652101308099082e-05, - "loss": 0.1437, + "loss": 0.1342, "step": 1875 }, { "epoch": 0.52, "learning_rate": 1.6519157621300678e-05, - "loss": 0.1405, + "loss": 0.116, "step": 1876 }, { "epoch": 0.52, "learning_rate": 1.651730216161054e-05, - "loss": 0.2385, + "loss": 0.0771, "step": 1877 }, { "epoch": 0.52, "learning_rate": 1.65154467019204e-05, - "loss": 0.1404, + "loss": 0.0419, "step": 1878 }, { "epoch": 0.52, "learning_rate": 1.6513591242230263e-05, - "loss": 0.0886, + "loss": 0.0726, "step": 1879 }, { "epoch": 0.52, "learning_rate": 1.6511735782540125e-05, - "loss": 0.2392, + "loss": 0.0656, "step": 1880 }, { "epoch": 0.52, "learning_rate": 1.6509880322849987e-05, - "loss": 0.0399, + "loss": 0.0332, "step": 1881 }, { "epoch": 0.52, "learning_rate": 1.650802486315985e-05, - "loss": 0.1919, + "loss": 0.125, "step": 1882 }, { "epoch": 0.52, "learning_rate": 1.650616940346971e-05, - "loss": 0.1419, + "loss": 0.0911, "step": 1883 }, { "epoch": 0.52, "learning_rate": 1.6504313943779573e-05, - "loss": 0.0901, + "loss": 0.0229, "step": 1884 }, { "epoch": 0.52, "learning_rate": 1.6502458484089435e-05, - "loss": 0.1442, + "loss": 0.099, "step": 1885 }, { "epoch": 0.52, "learning_rate": 1.6500603024399296e-05, - "loss": 0.1375, + "loss": 0.1226, "step": 1886 }, { "epoch": 0.53, "learning_rate": 1.6498747564709158e-05, - "loss": 0.1386, + "loss": 0.0239, "step": 1887 }, { "epoch": 0.53, "learning_rate": 1.649689210501902e-05, - "loss": 0.0362, + "loss": 0.0631, "step": 1888 }, { "epoch": 0.53, "learning_rate": 1.6495036645328882e-05, - "loss": 0.2427, + "loss": 0.0148, "step": 1889 }, { "epoch": 0.53, "learning_rate": 1.6493181185638744e-05, - "loss": 0.191, + "loss": 0.0758, "step": 1890 }, { "epoch": 0.53, "learning_rate": 1.6491325725948606e-05, - "loss": 0.188, + "loss": 0.1576, "step": 1891 }, { "epoch": 0.53, "learning_rate": 1.6489470266258468e-05, - "loss": 0.3521, + "loss": 0.0112, "step": 1892 }, { "epoch": 0.53, "learning_rate": 1.648761480656833e-05, - "loss": 0.0843, + "loss": 0.0142, "step": 1893 }, { "epoch": 0.53, "learning_rate": 1.6485759346878188e-05, - "loss": 0.2379, + "loss": 0.0111, "step": 1894 }, { "epoch": 0.53, "learning_rate": 1.6483903887188053e-05, - "loss": 0.1956, + "loss": 0.2496, "step": 1895 }, { "epoch": 0.53, "learning_rate": 1.6482048427497915e-05, - "loss": 0.0901, + "loss": 0.2374, "step": 1896 }, { "epoch": 0.53, "learning_rate": 1.6480192967807777e-05, - "loss": 0.2381, + "loss": 0.2373, "step": 1897 }, { "epoch": 0.53, "learning_rate": 1.6478337508117635e-05, - "loss": 0.0906, + "loss": 0.1693, "step": 1898 }, { "epoch": 0.53, "learning_rate": 1.64764820484275e-05, - "loss": 0.1858, + "loss": 0.0519, "step": 1899 }, { "epoch": 0.53, "learning_rate": 1.6474626588737363e-05, - "loss": 0.142, + "loss": 0.1931, "step": 1900 }, { "epoch": 0.53, "learning_rate": 1.647277112904722e-05, - "loss": 0.1373, + "loss": 0.0174, "step": 1901 }, { "epoch": 0.53, "learning_rate": 1.6470915669357083e-05, - "loss": 0.2341, + "loss": 0.0243, "step": 1902 }, { "epoch": 0.53, "learning_rate": 1.6469060209666948e-05, - "loss": 0.1368, + "loss": 0.0311, "step": 1903 }, { "epoch": 0.53, "learning_rate": 1.646720474997681e-05, - "loss": 0.2967, + "loss": 0.0675, "step": 1904 }, { "epoch": 0.53, "learning_rate": 1.646534929028667e-05, - "loss": 0.1428, + "loss": 0.0403, "step": 1905 }, { "epoch": 0.53, "learning_rate": 1.646349383059653e-05, - "loss": 0.2365, + "loss": 0.0795, "step": 1906 }, { "epoch": 0.53, "learning_rate": 1.6461638370906396e-05, - "loss": 0.1398, + "loss": 0.151, "step": 1907 }, { "epoch": 0.53, "learning_rate": 1.6459782911216254e-05, - "loss": 0.1889, + "loss": 0.0221, "step": 1908 }, { "epoch": 0.53, "learning_rate": 1.6457927451526116e-05, - "loss": 0.0944, + "loss": 0.0697, "step": 1909 }, { "epoch": 0.53, "learning_rate": 1.6456071991835978e-05, - "loss": 0.2402, + "loss": 0.173, "step": 1910 }, { "epoch": 0.53, "learning_rate": 1.645421653214584e-05, - "loss": 0.1832, + "loss": 0.0931, "step": 1911 }, { "epoch": 0.53, "learning_rate": 1.64523610724557e-05, - "loss": 0.2323, + "loss": 0.1144, "step": 1912 }, { "epoch": 0.53, "learning_rate": 1.6450505612765564e-05, - "loss": 0.3294, + "loss": 0.0443, "step": 1913 }, { "epoch": 0.53, "learning_rate": 1.6448650153075425e-05, - "loss": 0.1423, + "loss": 0.057, "step": 1914 }, { "epoch": 0.53, "learning_rate": 1.6446794693385287e-05, - "loss": 0.1444, + "loss": 0.0724, "step": 1915 }, { "epoch": 0.53, "learning_rate": 1.644493923369515e-05, - "loss": 0.1412, + "loss": 0.022, "step": 1916 }, { "epoch": 0.53, "learning_rate": 1.644308377400501e-05, - "loss": 0.1894, + "loss": 0.1721, "step": 1917 }, { "epoch": 0.53, "learning_rate": 1.6441228314314873e-05, - "loss": 0.1899, + "loss": 0.1688, "step": 1918 }, { "epoch": 0.53, "learning_rate": 1.6439372854624735e-05, - "loss": 0.1881, + "loss": 0.0709, "step": 1919 }, { "epoch": 0.53, "learning_rate": 1.6437517394934597e-05, - "loss": 0.0511, + "loss": 0.1394, "step": 1920 }, { "epoch": 0.53, "learning_rate": 1.643566193524446e-05, - "loss": 0.0953, + "loss": 0.0196, "step": 1921 }, { "epoch": 0.53, "learning_rate": 1.643380647555432e-05, - "loss": 0.0928, + "loss": 0.0524, "step": 1922 }, { "epoch": 0.54, "learning_rate": 1.6431951015864182e-05, - "loss": 0.1949, + "loss": 0.1382, "step": 1923 }, { "epoch": 0.54, "learning_rate": 1.6430095556174044e-05, - "loss": 0.192, + "loss": 0.0224, "step": 1924 }, { "epoch": 0.54, "learning_rate": 1.6428240096483906e-05, - "loss": 0.0406, + "loss": 0.0494, "step": 1925 }, { "epoch": 0.54, "learning_rate": 1.6426384636793768e-05, - "loss": 0.0372, + "loss": 0.0577, "step": 1926 }, { "epoch": 0.54, "learning_rate": 1.642452917710363e-05, - "loss": 0.0371, + "loss": 0.0534, "step": 1927 }, { "epoch": 0.54, "learning_rate": 1.642267371741349e-05, - "loss": 0.2431, + "loss": 0.0705, "step": 1928 }, { "epoch": 0.54, "learning_rate": 1.6420818257723353e-05, - "loss": 0.138, + "loss": 0.0767, "step": 1929 }, { "epoch": 0.54, "learning_rate": 1.6418962798033212e-05, - "loss": 0.1934, + "loss": 0.0911, "step": 1930 }, { "epoch": 0.54, "learning_rate": 1.6417107338343077e-05, - "loss": 0.032, + "loss": 0.099, "step": 1931 }, { "epoch": 0.54, "learning_rate": 1.641525187865294e-05, - "loss": 0.0315, + "loss": 0.1389, "step": 1932 }, { "epoch": 0.54, "learning_rate": 1.6413396418962798e-05, - "loss": 0.1942, + "loss": 0.0701, "step": 1933 }, { "epoch": 0.54, "learning_rate": 1.641154095927266e-05, - "loss": 0.1371, + "loss": 0.0117, "step": 1934 }, { "epoch": 0.54, "learning_rate": 1.6409685499582525e-05, - "loss": 0.0846, + "loss": 0.1866, "step": 1935 }, { "epoch": 0.54, "learning_rate": 1.6407830039892387e-05, - "loss": 0.0827, + "loss": 0.0723, "step": 1936 }, { "epoch": 0.54, "learning_rate": 1.6405974580202245e-05, - "loss": 0.2501, + "loss": 0.0362, "step": 1937 }, { "epoch": 0.54, "learning_rate": 1.6404119120512107e-05, - "loss": 0.1433, + "loss": 0.0185, "step": 1938 }, { "epoch": 0.54, "learning_rate": 1.6402263660821972e-05, - "loss": 0.3102, + "loss": 0.0201, "step": 1939 }, { "epoch": 0.54, "learning_rate": 1.640040820113183e-05, - "loss": 0.0833, + "loss": 0.0123, "step": 1940 }, { "epoch": 0.54, "learning_rate": 1.6398552741441693e-05, - "loss": 0.0318, + "loss": 0.0999, "step": 1941 }, { "epoch": 0.54, "learning_rate": 1.6396697281751554e-05, - "loss": 0.1921, + "loss": 0.1107, "step": 1942 }, { "epoch": 0.54, "learning_rate": 1.639484182206142e-05, - "loss": 0.1438, + "loss": 0.0085, "step": 1943 }, { "epoch": 0.54, "learning_rate": 1.6392986362371278e-05, - "loss": 0.1351, + "loss": 0.0155, "step": 1944 }, { "epoch": 0.54, "learning_rate": 1.639113090268114e-05, - "loss": 0.1383, + "loss": 0.0881, "step": 1945 }, { "epoch": 0.54, "learning_rate": 1.6389275442991002e-05, - "loss": 0.1957, + "loss": 0.1401, "step": 1946 }, { "epoch": 0.54, "learning_rate": 1.6387419983300864e-05, - "loss": 0.1418, + "loss": 0.0617, "step": 1947 }, { "epoch": 0.54, "learning_rate": 1.6385564523610726e-05, - "loss": 0.3585, + "loss": 0.1124, "step": 1948 }, { "epoch": 0.54, "learning_rate": 1.6383709063920587e-05, - "loss": 0.1859, + "loss": 0.1, "step": 1949 }, { "epoch": 0.54, "learning_rate": 1.638185360423045e-05, - "loss": 0.2956, + "loss": 0.0163, "step": 1950 }, { "epoch": 0.54, "learning_rate": 1.637999814454031e-05, - "loss": 0.232, + "loss": 0.0196, "step": 1951 }, { "epoch": 0.54, "learning_rate": 1.6378142684850173e-05, - "loss": 0.3169, + "loss": 0.0233, "step": 1952 }, { "epoch": 0.54, "learning_rate": 1.6376287225160035e-05, - "loss": 0.1824, + "loss": 0.0288, "step": 1953 }, { "epoch": 0.54, "learning_rate": 1.6374431765469897e-05, - "loss": 0.0723, + "loss": 0.0239, "step": 1954 }, { "epoch": 0.54, "learning_rate": 1.637257630577976e-05, - "loss": 0.1019, + "loss": 0.1575, "step": 1955 }, { "epoch": 0.54, "learning_rate": 1.637072084608962e-05, - "loss": 0.099, + "loss": 0.0694, "step": 1956 }, { "epoch": 0.54, "learning_rate": 1.6368865386399482e-05, - "loss": 0.2467, + "loss": 0.0693, "step": 1957 }, { "epoch": 0.54, "learning_rate": 1.6367009926709344e-05, - "loss": 0.2394, + "loss": 0.0277, "step": 1958 }, { "epoch": 0.55, "learning_rate": 1.6365154467019206e-05, - "loss": 0.1915, + "loss": 0.1489, "step": 1959 }, { "epoch": 0.55, "learning_rate": 1.6363299007329068e-05, - "loss": 0.0896, + "loss": 0.0676, "step": 1960 }, { "epoch": 0.55, "learning_rate": 1.636144354763893e-05, - "loss": 0.0373, + "loss": 0.011, "step": 1961 }, { "epoch": 0.55, "learning_rate": 1.635958808794879e-05, - "loss": 0.1949, + "loss": 0.1761, "step": 1962 }, { "epoch": 0.55, "learning_rate": 1.6357732628258654e-05, - "loss": 0.0357, + "loss": 0.045, "step": 1963 }, { "epoch": 0.55, "learning_rate": 1.6355877168568516e-05, - "loss": 0.2482, + "loss": 0.058, "step": 1964 }, { "epoch": 0.55, "learning_rate": 1.6354021708878377e-05, - "loss": 0.2405, + "loss": 0.288, "step": 1965 }, { "epoch": 0.55, "learning_rate": 1.6352166249188236e-05, - "loss": 0.0872, + "loss": 0.0496, "step": 1966 }, { "epoch": 0.55, "learning_rate": 1.63503107894981e-05, - "loss": 0.1945, + "loss": 0.0963, "step": 1967 }, { "epoch": 0.55, "learning_rate": 1.6348455329807963e-05, - "loss": 0.0869, + "loss": 0.0242, "step": 1968 }, { "epoch": 0.55, "learning_rate": 1.634659987011782e-05, - "loss": 0.0871, + "loss": 0.1156, "step": 1969 }, { "epoch": 0.55, "learning_rate": 1.6344744410427683e-05, - "loss": 0.0884, + "loss": 0.1606, "step": 1970 }, { "epoch": 0.55, "learning_rate": 1.634288895073755e-05, - "loss": 0.0857, + "loss": 0.0899, "step": 1971 }, { "epoch": 0.55, "learning_rate": 1.634103349104741e-05, - "loss": 0.2491, + "loss": 0.1293, "step": 1972 }, { "epoch": 0.55, "learning_rate": 1.633917803135727e-05, - "loss": 0.1378, + "loss": 0.0191, "step": 1973 }, { "epoch": 0.55, "learning_rate": 1.633732257166713e-05, - "loss": 0.1367, + "loss": 0.1094, "step": 1974 }, { "epoch": 0.55, "learning_rate": 1.6335467111976993e-05, - "loss": 0.1364, + "loss": 0.1693, "step": 1975 }, { "epoch": 0.55, "learning_rate": 1.6333611652286855e-05, - "loss": 0.1952, + "loss": 0.0712, "step": 1976 }, { "epoch": 0.55, "learning_rate": 1.6331756192596716e-05, - "loss": 0.1445, + "loss": 0.1687, "step": 1977 }, { "epoch": 0.55, "learning_rate": 1.632990073290658e-05, - "loss": 0.0322, + "loss": 0.0197, "step": 1978 }, { "epoch": 0.55, "learning_rate": 1.632804527321644e-05, - "loss": 0.0315, + "loss": 0.0763, "step": 1979 }, { "epoch": 0.55, "learning_rate": 1.6326189813526302e-05, - "loss": 0.2496, + "loss": 0.1703, "step": 1980 }, { "epoch": 0.55, "learning_rate": 1.6324334353836164e-05, - "loss": 0.251, + "loss": 0.0168, "step": 1981 }, { "epoch": 0.55, "learning_rate": 1.6322478894146026e-05, - "loss": 0.1904, + "loss": 0.0644, "step": 1982 }, { "epoch": 0.55, "learning_rate": 1.6320623434455888e-05, - "loss": 0.1381, + "loss": 0.0778, "step": 1983 }, { "epoch": 0.55, "learning_rate": 1.631876797476575e-05, - "loss": 0.1467, + "loss": 0.1428, "step": 1984 }, { "epoch": 0.55, "learning_rate": 1.631691251507561e-05, - "loss": 0.1404, + "loss": 0.1069, "step": 1985 }, { "epoch": 0.55, "learning_rate": 1.6315057055385473e-05, - "loss": 0.1415, + "loss": 0.0229, "step": 1986 }, { "epoch": 0.55, "learning_rate": 1.6313201595695335e-05, - "loss": 0.2445, + "loss": 0.0221, "step": 1987 }, { "epoch": 0.55, "learning_rate": 1.6311346136005197e-05, - "loss": 0.1406, + "loss": 0.1269, "step": 1988 }, { "epoch": 0.55, "learning_rate": 1.630949067631506e-05, - "loss": 0.1909, + "loss": 0.0158, "step": 1989 }, { "epoch": 0.55, "learning_rate": 1.630763521662492e-05, - "loss": 0.1394, + "loss": 0.0227, "step": 1990 }, { "epoch": 0.55, "learning_rate": 1.630577975693478e-05, - "loss": 0.1387, + "loss": 0.1002, "step": 1991 }, { "epoch": 0.55, "learning_rate": 1.6303924297244645e-05, - "loss": 0.142, + "loss": 0.2297, "step": 1992 }, { "epoch": 0.55, "learning_rate": 1.6302068837554506e-05, - "loss": 0.1361, + "loss": 0.1048, "step": 1993 }, { "epoch": 0.55, "learning_rate": 1.6300213377864368e-05, - "loss": 0.1919, + "loss": 0.0187, "step": 1994 }, { "epoch": 0.56, "learning_rate": 1.6298357918174227e-05, - "loss": 0.1854, + "loss": 0.0185, "step": 1995 }, { "epoch": 0.56, "learning_rate": 1.6296502458484092e-05, - "loss": 0.0881, + "loss": 0.0133, "step": 1996 }, { "epoch": 0.56, "learning_rate": 1.6294646998793954e-05, - "loss": 0.1397, + "loss": 0.0653, "step": 1997 }, { "epoch": 0.56, "learning_rate": 1.6292791539103812e-05, - "loss": 0.1401, + "loss": 0.2524, "step": 1998 }, { "epoch": 0.56, "learning_rate": 1.6290936079413674e-05, - "loss": 0.2401, + "loss": 0.0598, "step": 1999 }, { "epoch": 0.56, "learning_rate": 1.628908061972354e-05, - "loss": 0.2408, + "loss": 0.0909, "step": 2000 }, { "epoch": 0.56, "learning_rate": 1.62872251600334e-05, - "loss": 0.2498, + "loss": 0.0689, "step": 2001 }, { "epoch": 0.56, "learning_rate": 1.628536970034326e-05, - "loss": 0.1944, + "loss": 0.0572, "step": 2002 }, { "epoch": 0.56, "learning_rate": 1.6283514240653122e-05, - "loss": 0.0902, + "loss": 0.1912, "step": 2003 }, { "epoch": 0.56, "learning_rate": 1.6281658780962987e-05, - "loss": 0.29, + "loss": 0.0146, "step": 2004 }, { "epoch": 0.56, "learning_rate": 1.6279803321272845e-05, - "loss": 0.0941, + "loss": 0.0572, "step": 2005 }, { "epoch": 0.56, "learning_rate": 1.6277947861582707e-05, - "loss": 0.0969, + "loss": 0.0154, "step": 2006 }, { "epoch": 0.56, "learning_rate": 1.627609240189257e-05, - "loss": 0.14, + "loss": 0.0228, "step": 2007 }, { "epoch": 0.56, "learning_rate": 1.627423694220243e-05, - "loss": 0.0902, + "loss": 0.0921, "step": 2008 }, { "epoch": 0.56, "learning_rate": 1.6272381482512293e-05, - "loss": 0.2363, + "loss": 0.121, "step": 2009 }, { "epoch": 0.56, "learning_rate": 1.6270526022822155e-05, - "loss": 0.2003, + "loss": 0.0227, "step": 2010 }, { "epoch": 0.56, "learning_rate": 1.6268670563132017e-05, - "loss": 0.2354, + "loss": 0.1578, "step": 2011 }, { "epoch": 0.56, "learning_rate": 1.626681510344188e-05, - "loss": 0.1418, + "loss": 0.2239, "step": 2012 }, { "epoch": 0.56, "learning_rate": 1.626495964375174e-05, - "loss": 0.0931, + "loss": 0.1686, "step": 2013 }, { "epoch": 0.56, "learning_rate": 1.6263104184061602e-05, - "loss": 0.236, + "loss": 0.1176, "step": 2014 }, { "epoch": 0.56, "learning_rate": 1.6261248724371464e-05, - "loss": 0.1395, + "loss": 0.1143, "step": 2015 }, { "epoch": 0.56, "learning_rate": 1.6259393264681326e-05, - "loss": 0.0394, + "loss": 0.0266, "step": 2016 }, { "epoch": 0.56, "learning_rate": 1.6257537804991188e-05, - "loss": 0.1937, + "loss": 0.0685, "step": 2017 }, { "epoch": 0.56, "learning_rate": 1.625568234530105e-05, - "loss": 0.2377, + "loss": 0.0311, "step": 2018 }, { "epoch": 0.56, "learning_rate": 1.625382688561091e-05, - "loss": 0.0881, + "loss": 0.0289, "step": 2019 }, { "epoch": 0.56, "learning_rate": 1.6251971425920774e-05, - "loss": 0.0867, + "loss": 0.0285, "step": 2020 }, { "epoch": 0.56, "learning_rate": 1.6250115966230635e-05, - "loss": 0.0894, + "loss": 0.1561, "step": 2021 }, { "epoch": 0.56, "learning_rate": 1.6248260506540497e-05, - "loss": 0.1974, + "loss": 0.0407, "step": 2022 }, { "epoch": 0.56, "learning_rate": 1.624640504685036e-05, - "loss": 0.2409, + "loss": 0.0866, "step": 2023 }, { "epoch": 0.56, "learning_rate": 1.624454958716022e-05, - "loss": 0.1877, + "loss": 0.0284, "step": 2024 }, { "epoch": 0.56, "learning_rate": 1.6242694127470083e-05, - "loss": 0.2912, + "loss": 0.0798, "step": 2025 }, { "epoch": 0.56, "learning_rate": 1.6240838667779945e-05, - "loss": 0.2909, + "loss": 0.0712, "step": 2026 }, { "epoch": 0.56, "learning_rate": 1.6238983208089803e-05, - "loss": 0.0858, + "loss": 0.0709, "step": 2027 }, { "epoch": 0.56, "learning_rate": 1.623712774839967e-05, - "loss": 0.1876, + "loss": 0.1935, "step": 2028 }, { "epoch": 0.56, "learning_rate": 1.623527228870953e-05, - "loss": 0.0926, + "loss": 0.0844, "step": 2029 }, { "epoch": 0.56, "learning_rate": 1.6233416829019392e-05, - "loss": 0.1884, + "loss": 0.0473, "step": 2030 }, { "epoch": 0.57, "learning_rate": 1.623156136932925e-05, - "loss": 0.0429, + "loss": 0.0931, "step": 2031 }, { "epoch": 0.57, "learning_rate": 1.6229705909639116e-05, - "loss": 0.2399, + "loss": 0.0147, "step": 2032 }, { "epoch": 0.57, "learning_rate": 1.6227850449948978e-05, - "loss": 0.1403, + "loss": 0.108, "step": 2033 }, { "epoch": 0.57, "learning_rate": 1.6225994990258836e-05, - "loss": 0.1355, + "loss": 0.0657, "step": 2034 }, { "epoch": 0.57, "learning_rate": 1.6224139530568698e-05, - "loss": 0.3462, + "loss": 0.09, "step": 2035 }, { "epoch": 0.57, "learning_rate": 1.6222284070878563e-05, - "loss": 0.295, + "loss": 0.0144, "step": 2036 }, { "epoch": 0.57, "learning_rate": 1.6220428611188422e-05, - "loss": 0.19, + "loss": 0.071, "step": 2037 }, { "epoch": 0.57, "learning_rate": 1.6218573151498284e-05, - "loss": 0.0466, + "loss": 0.0071, "step": 2038 }, { "epoch": 0.57, "learning_rate": 1.6216717691808146e-05, - "loss": 0.137, + "loss": 0.1877, "step": 2039 }, { "epoch": 0.57, "learning_rate": 1.621486223211801e-05, - "loss": 0.1413, + "loss": 0.1643, "step": 2040 }, { "epoch": 0.57, "learning_rate": 1.621300677242787e-05, - "loss": 0.1453, + "loss": 0.0118, "step": 2041 }, { "epoch": 0.57, "learning_rate": 1.621115131273773e-05, - "loss": 0.1409, + "loss": 0.178, "step": 2042 }, { "epoch": 0.57, "learning_rate": 1.6209295853047593e-05, - "loss": 0.1879, + "loss": 0.0698, "step": 2043 }, { "epoch": 0.57, "learning_rate": 1.6207440393357455e-05, - "loss": 0.1414, + "loss": 0.0181, "step": 2044 }, { "epoch": 0.57, "learning_rate": 1.6205584933667317e-05, - "loss": 0.1882, + "loss": 0.1237, "step": 2045 }, { "epoch": 0.57, "learning_rate": 1.620372947397718e-05, - "loss": 0.1407, + "loss": 0.2057, "step": 2046 }, { "epoch": 0.57, "learning_rate": 1.620187401428704e-05, - "loss": 0.1406, + "loss": 0.1094, "step": 2047 }, { "epoch": 0.57, "learning_rate": 1.6200018554596903e-05, - "loss": 0.042, + "loss": 0.0182, "step": 2048 }, { "epoch": 0.57, "learning_rate": 1.6198163094906764e-05, - "loss": 0.0859, + "loss": 0.1177, "step": 2049 }, { "epoch": 0.57, "learning_rate": 1.6196307635216626e-05, - "loss": 0.1438, + "loss": 0.0152, "step": 2050 }, { "epoch": 0.57, "learning_rate": 1.6194452175526488e-05, - "loss": 0.239, + "loss": 0.1136, "step": 2051 }, { "epoch": 0.57, "learning_rate": 1.619259671583635e-05, - "loss": 0.3521, + "loss": 0.1504, "step": 2052 }, { "epoch": 0.57, "learning_rate": 1.6190741256146212e-05, - "loss": 0.1917, + "loss": 0.0277, "step": 2053 }, { "epoch": 0.57, "learning_rate": 1.6188885796456074e-05, - "loss": 0.1387, + "loss": 0.0961, "step": 2054 }, { "epoch": 0.57, "learning_rate": 1.6187030336765936e-05, - "loss": 0.0367, + "loss": 0.033, "step": 2055 }, { "epoch": 0.57, "learning_rate": 1.6185174877075797e-05, - "loss": 0.0853, + "loss": 0.13, "step": 2056 }, { "epoch": 0.57, "learning_rate": 1.618331941738566e-05, - "loss": 0.2452, + "loss": 0.0201, "step": 2057 }, { "epoch": 0.57, "learning_rate": 1.618146395769552e-05, - "loss": 0.0881, + "loss": 0.0681, "step": 2058 }, { "epoch": 0.57, "learning_rate": 1.617960849800538e-05, - "loss": 0.1882, + "loss": 0.0243, "step": 2059 }, { "epoch": 0.57, "learning_rate": 1.6177753038315245e-05, - "loss": 0.0346, + "loss": 0.0326, "step": 2060 }, { "epoch": 0.57, "learning_rate": 1.6175897578625107e-05, - "loss": 0.1367, + "loss": 0.0644, "step": 2061 }, { "epoch": 0.57, "learning_rate": 1.617404211893497e-05, - "loss": 0.1392, + "loss": 0.1141, "step": 2062 }, { "epoch": 0.57, "learning_rate": 1.6172186659244827e-05, - "loss": 0.1389, + "loss": 0.1011, "step": 2063 }, { "epoch": 0.57, "learning_rate": 1.6170331199554692e-05, - "loss": 0.1363, + "loss": 0.1682, "step": 2064 }, { "epoch": 0.57, "learning_rate": 1.6168475739864554e-05, - "loss": 0.1364, + "loss": 0.2075, "step": 2065 }, { "epoch": 0.58, "learning_rate": 1.6166620280174413e-05, - "loss": 0.2992, + "loss": 0.0582, "step": 2066 }, { "epoch": 0.58, "learning_rate": 1.6164764820484275e-05, - "loss": 0.1857, + "loss": 0.1175, "step": 2067 }, { "epoch": 0.58, "learning_rate": 1.616290936079414e-05, - "loss": 0.142, + "loss": 0.0705, "step": 2068 }, { "epoch": 0.58, "learning_rate": 1.6161053901104002e-05, - "loss": 0.0881, + "loss": 0.0979, "step": 2069 }, { "epoch": 0.58, "learning_rate": 1.615919844141386e-05, - "loss": 0.1409, + "loss": 0.1659, "step": 2070 }, { "epoch": 0.58, "learning_rate": 1.6157342981723722e-05, - "loss": 0.3526, + "loss": 0.0136, "step": 2071 }, { "epoch": 0.58, "learning_rate": 1.6155487522033587e-05, - "loss": 0.1927, + "loss": 0.1259, "step": 2072 }, { "epoch": 0.58, "learning_rate": 1.6153632062343446e-05, - "loss": 0.1382, + "loss": 0.1576, "step": 2073 }, { "epoch": 0.58, "learning_rate": 1.6151776602653308e-05, - "loss": 0.0884, + "loss": 0.1764, "step": 2074 }, { "epoch": 0.58, "learning_rate": 1.614992114296317e-05, - "loss": 0.4034, + "loss": 0.0726, "step": 2075 }, { "epoch": 0.58, "learning_rate": 1.614806568327303e-05, - "loss": 0.138, + "loss": 0.0639, "step": 2076 }, { "epoch": 0.58, "learning_rate": 1.6146210223582893e-05, - "loss": 0.1884, + "loss": 0.1608, "step": 2077 }, { "epoch": 0.58, "learning_rate": 1.6144354763892755e-05, - "loss": 0.1892, + "loss": 0.0665, "step": 2078 }, { "epoch": 0.58, "learning_rate": 1.6142499304202617e-05, - "loss": 0.0928, + "loss": 0.2021, "step": 2079 }, { "epoch": 0.58, "learning_rate": 1.614064384451248e-05, - "loss": 0.1919, + "loss": 0.1827, "step": 2080 }, { "epoch": 0.58, "learning_rate": 1.613878838482234e-05, - "loss": 0.2352, + "loss": 0.1079, "step": 2081 }, { "epoch": 0.58, "learning_rate": 1.6136932925132203e-05, - "loss": 0.1409, + "loss": 0.0952, "step": 2082 }, { "epoch": 0.58, "learning_rate": 1.6135077465442065e-05, - "loss": 0.2866, + "loss": 0.0388, "step": 2083 }, { "epoch": 0.58, "learning_rate": 1.6133222005751926e-05, - "loss": 0.3325, + "loss": 0.0949, "step": 2084 }, { "epoch": 0.58, "learning_rate": 1.613136654606179e-05, - "loss": 0.1943, + "loss": 0.1057, "step": 2085 }, { "epoch": 0.58, "learning_rate": 1.612951108637165e-05, - "loss": 0.1446, + "loss": 0.06, "step": 2086 }, { "epoch": 0.58, "learning_rate": 1.6127655626681512e-05, - "loss": 0.0556, + "loss": 0.0538, "step": 2087 }, { "epoch": 0.58, "learning_rate": 1.6125800166991374e-05, - "loss": 0.0975, + "loss": 0.04, "step": 2088 }, { "epoch": 0.58, "learning_rate": 1.6123944707301236e-05, - "loss": 0.1415, + "loss": 0.0301, "step": 2089 }, { "epoch": 0.58, "learning_rate": 1.6122089247611098e-05, - "loss": 0.0484, + "loss": 0.0617, "step": 2090 }, { "epoch": 0.58, "learning_rate": 1.612023378792096e-05, - "loss": 0.1374, + "loss": 0.0395, "step": 2091 }, { "epoch": 0.58, "learning_rate": 1.611837832823082e-05, - "loss": 0.1408, + "loss": 0.0102, "step": 2092 }, { "epoch": 0.58, "learning_rate": 1.6116522868540683e-05, - "loss": 0.0953, + "loss": 0.0999, "step": 2093 }, { "epoch": 0.58, "learning_rate": 1.6114667408850545e-05, - "loss": 0.1924, + "loss": 0.008, "step": 2094 }, { "epoch": 0.58, "learning_rate": 1.6112811949160404e-05, - "loss": 0.1354, + "loss": 0.1136, "step": 2095 }, { "epoch": 0.58, "learning_rate": 1.611095648947027e-05, - "loss": 0.1382, + "loss": 0.1856, "step": 2096 }, { "epoch": 0.58, "learning_rate": 1.610910102978013e-05, - "loss": 0.0372, + "loss": 0.0663, "step": 2097 }, { "epoch": 0.58, "learning_rate": 1.6107245570089993e-05, - "loss": 0.1363, + "loss": 0.1236, "step": 2098 }, { "epoch": 0.58, "learning_rate": 1.610539011039985e-05, - "loss": 0.3457, + "loss": 0.0564, "step": 2099 }, { "epoch": 0.58, "learning_rate": 1.6103534650709716e-05, - "loss": 0.0849, + "loss": 0.2329, "step": 2100 }, { "epoch": 0.58, "learning_rate": 1.6101679191019578e-05, - "loss": 0.0895, + "loss": 0.0124, "step": 2101 }, { "epoch": 0.59, "learning_rate": 1.6099823731329437e-05, - "loss": 0.296, + "loss": 0.0693, "step": 2102 }, { "epoch": 0.59, "learning_rate": 1.60979682716393e-05, - "loss": 0.1935, + "loss": 0.1356, "step": 2103 }, { "epoch": 0.59, "learning_rate": 1.6096112811949164e-05, - "loss": 0.0871, + "loss": 0.1272, "step": 2104 }, { "epoch": 0.59, "learning_rate": 1.6094257352259022e-05, - "loss": 0.249, + "loss": 0.0943, "step": 2105 }, { "epoch": 0.59, "learning_rate": 1.6092401892568884e-05, - "loss": 0.305, + "loss": 0.1199, "step": 2106 }, { "epoch": 0.59, "learning_rate": 1.6090546432878746e-05, - "loss": 0.0867, + "loss": 0.0286, "step": 2107 }, { "epoch": 0.59, "learning_rate": 1.608869097318861e-05, - "loss": 0.2402, + "loss": 0.1624, "step": 2108 }, { "epoch": 0.59, "learning_rate": 1.608683551349847e-05, - "loss": 0.141, + "loss": 0.1644, "step": 2109 }, { "epoch": 0.59, "learning_rate": 1.6084980053808332e-05, - "loss": 0.0893, + "loss": 0.0942, "step": 2110 }, { "epoch": 0.59, "learning_rate": 1.6083124594118194e-05, - "loss": 0.1391, + "loss": 0.0888, "step": 2111 }, { "epoch": 0.59, "learning_rate": 1.6081269134428055e-05, - "loss": 0.1862, + "loss": 0.1175, "step": 2112 }, { "epoch": 0.59, "learning_rate": 1.6079413674737917e-05, - "loss": 0.0891, + "loss": 0.1741, "step": 2113 }, { "epoch": 0.59, "learning_rate": 1.607755821504778e-05, - "loss": 0.1409, + "loss": 0.144, "step": 2114 }, { "epoch": 0.59, "learning_rate": 1.607570275535764e-05, - "loss": 0.0358, + "loss": 0.0649, "step": 2115 }, { "epoch": 0.59, "learning_rate": 1.6073847295667503e-05, - "loss": 0.0909, + "loss": 0.0506, "step": 2116 }, { "epoch": 0.59, "learning_rate": 1.6071991835977365e-05, - "loss": 0.0878, + "loss": 0.0903, "step": 2117 }, { "epoch": 0.59, "learning_rate": 1.6070136376287227e-05, - "loss": 0.1933, + "loss": 0.1056, "step": 2118 }, { "epoch": 0.59, "learning_rate": 1.606828091659709e-05, - "loss": 0.1941, + "loss": 0.1003, "step": 2119 }, { "epoch": 0.59, "learning_rate": 1.606642545690695e-05, - "loss": 0.1366, + "loss": 0.0643, "step": 2120 }, { "epoch": 0.59, "learning_rate": 1.6064569997216812e-05, - "loss": 0.2468, + "loss": 0.1163, "step": 2121 }, { "epoch": 0.59, "learning_rate": 1.6062714537526674e-05, - "loss": 0.2463, + "loss": 0.1079, "step": 2122 }, { "epoch": 0.59, "learning_rate": 1.6060859077836536e-05, - "loss": 0.085, + "loss": 0.1647, "step": 2123 }, { "epoch": 0.59, "learning_rate": 1.6059003618146398e-05, - "loss": 0.2438, + "loss": 0.1843, "step": 2124 }, { "epoch": 0.59, "learning_rate": 1.605714815845626e-05, - "loss": 0.1419, + "loss": 0.0236, "step": 2125 }, { "epoch": 0.59, "learning_rate": 1.605529269876612e-05, - "loss": 0.1855, + "loss": 0.0302, "step": 2126 }, { "epoch": 0.59, "learning_rate": 1.6053437239075984e-05, - "loss": 0.1382, + "loss": 0.0959, "step": 2127 }, { "epoch": 0.59, "learning_rate": 1.6051581779385842e-05, - "loss": 0.1403, + "loss": 0.1944, "step": 2128 }, { "epoch": 0.59, "learning_rate": 1.6049726319695707e-05, - "loss": 0.2413, + "loss": 0.2036, "step": 2129 }, { "epoch": 0.59, "learning_rate": 1.604787086000557e-05, - "loss": 0.1378, + "loss": 0.1184, "step": 2130 }, { "epoch": 0.59, "learning_rate": 1.6046015400315428e-05, - "loss": 0.0872, + "loss": 0.0219, "step": 2131 }, { "epoch": 0.59, "learning_rate": 1.604415994062529e-05, - "loss": 0.1397, + "loss": 0.033, "step": 2132 }, { "epoch": 0.59, "learning_rate": 1.6042304480935155e-05, - "loss": 0.2409, + "loss": 0.1018, "step": 2133 }, { "epoch": 0.59, "learning_rate": 1.6040449021245013e-05, - "loss": 0.196, + "loss": 0.0386, "step": 2134 }, { "epoch": 0.59, "learning_rate": 1.6038593561554875e-05, - "loss": 0.0859, + "loss": 0.1556, "step": 2135 }, { "epoch": 0.59, "learning_rate": 1.6036738101864737e-05, - "loss": 0.0903, + "loss": 0.0233, "step": 2136 }, { "epoch": 0.59, "learning_rate": 1.6034882642174602e-05, - "loss": 0.2447, + "loss": 0.0732, "step": 2137 }, { "epoch": 0.6, "learning_rate": 1.603302718248446e-05, - "loss": 0.0906, + "loss": 0.027, "step": 2138 }, { "epoch": 0.6, "learning_rate": 1.6031171722794323e-05, - "loss": 0.1423, + "loss": 0.0221, "step": 2139 }, { "epoch": 0.6, "learning_rate": 1.6029316263104184e-05, - "loss": 0.0922, + "loss": 0.0188, "step": 2140 }, { "epoch": 0.6, "learning_rate": 1.6027460803414046e-05, - "loss": 0.0893, + "loss": 0.0182, "step": 2141 }, { "epoch": 0.6, "learning_rate": 1.6025605343723908e-05, - "loss": 0.242, + "loss": 0.1365, "step": 2142 }, { "epoch": 0.6, "learning_rate": 1.602374988403377e-05, - "loss": 0.189, + "loss": 0.0683, "step": 2143 }, { "epoch": 0.6, "learning_rate": 1.6021894424343632e-05, - "loss": 0.0885, + "loss": 0.0131, "step": 2144 }, { "epoch": 0.6, "learning_rate": 1.6020038964653494e-05, - "loss": 0.1395, + "loss": 0.0725, "step": 2145 }, { "epoch": 0.6, "learning_rate": 1.6018183504963356e-05, - "loss": 0.1391, + "loss": 0.0107, "step": 2146 }, { "epoch": 0.6, "learning_rate": 1.6016328045273218e-05, - "loss": 0.1375, + "loss": 0.0553, "step": 2147 }, { "epoch": 0.6, "learning_rate": 1.601447258558308e-05, - "loss": 0.1411, + "loss": 0.1076, "step": 2148 }, { "epoch": 0.6, "learning_rate": 1.601261712589294e-05, - "loss": 0.3924, + "loss": 0.0774, "step": 2149 }, { "epoch": 0.6, "learning_rate": 1.6010761666202803e-05, - "loss": 0.1404, + "loss": 0.0597, "step": 2150 }, { "epoch": 0.6, "learning_rate": 1.6008906206512665e-05, - "loss": 0.1899, + "loss": 0.1732, "step": 2151 }, { "epoch": 0.6, "learning_rate": 1.6007050746822527e-05, - "loss": 0.1425, + "loss": 0.0207, "step": 2152 }, { "epoch": 0.6, "learning_rate": 1.600519528713239e-05, - "loss": 0.2409, + "loss": 0.1226, "step": 2153 }, { "epoch": 0.6, "learning_rate": 1.600333982744225e-05, - "loss": 0.0886, + "loss": 0.0097, "step": 2154 }, { "epoch": 0.6, "learning_rate": 1.6001484367752112e-05, - "loss": 0.0377, + "loss": 0.1114, "step": 2155 }, { "epoch": 0.6, "learning_rate": 1.599962890806197e-05, - "loss": 0.1884, + "loss": 0.0809, "step": 2156 }, { "epoch": 0.6, "learning_rate": 1.5997773448371836e-05, - "loss": 0.1911, + "loss": 0.2268, "step": 2157 }, { "epoch": 0.6, "learning_rate": 1.5995917988681698e-05, - "loss": 0.1362, + "loss": 0.0147, "step": 2158 }, { "epoch": 0.6, "learning_rate": 1.599406252899156e-05, - "loss": 0.1882, + "loss": 0.0187, "step": 2159 }, { "epoch": 0.6, "learning_rate": 1.599220706930142e-05, - "loss": 0.0856, + "loss": 0.1957, "step": 2160 }, { "epoch": 0.6, "learning_rate": 1.5990351609611284e-05, - "loss": 0.192, + "loss": 0.1376, "step": 2161 }, { "epoch": 0.6, "learning_rate": 1.5988496149921146e-05, - "loss": 0.1907, + "loss": 0.0224, "step": 2162 }, { "epoch": 0.6, "learning_rate": 1.5986640690231004e-05, - "loss": 0.0898, + "loss": 0.062, "step": 2163 }, { "epoch": 0.6, "learning_rate": 1.5984785230540866e-05, - "loss": 0.1417, + "loss": 0.1228, "step": 2164 }, { "epoch": 0.6, "learning_rate": 1.598292977085073e-05, - "loss": 0.144, + "loss": 0.102, "step": 2165 }, { "epoch": 0.6, "learning_rate": 1.5981074311160593e-05, - "loss": 0.2965, + "loss": 0.0708, "step": 2166 }, { "epoch": 0.6, "learning_rate": 1.597921885147045e-05, - "loss": 0.1942, + "loss": 0.0255, "step": 2167 }, { "epoch": 0.6, "learning_rate": 1.5977363391780313e-05, - "loss": 0.1929, + "loss": 0.1094, "step": 2168 }, { "epoch": 0.6, "learning_rate": 1.597550793209018e-05, - "loss": 0.1887, + "loss": 0.0245, "step": 2169 }, { "epoch": 0.6, "learning_rate": 1.5973652472400037e-05, - "loss": 0.234, + "loss": 0.0566, "step": 2170 }, { "epoch": 0.6, "learning_rate": 1.59717970127099e-05, - "loss": 0.1919, + "loss": 0.0765, "step": 2171 }, { "epoch": 0.6, "learning_rate": 1.596994155301976e-05, - "loss": 0.1405, + "loss": 0.1036, "step": 2172 }, { "epoch": 0.6, "learning_rate": 1.5968086093329623e-05, - "loss": 0.1403, + "loss": 0.2447, "step": 2173 }, { "epoch": 0.61, "learning_rate": 1.5966230633639485e-05, - "loss": 0.1425, + "loss": 0.0803, "step": 2174 }, { "epoch": 0.61, "learning_rate": 1.5964375173949347e-05, - "loss": 0.0948, + "loss": 0.0654, "step": 2175 }, { "epoch": 0.61, "learning_rate": 1.596251971425921e-05, - "loss": 0.2401, + "loss": 0.1233, "step": 2176 }, { "epoch": 0.61, "learning_rate": 1.596066425456907e-05, - "loss": 0.1393, + "loss": 0.0731, "step": 2177 }, { "epoch": 0.61, "learning_rate": 1.5958808794878932e-05, - "loss": 0.2406, + "loss": 0.0169, "step": 2178 }, { "epoch": 0.61, "learning_rate": 1.5956953335188794e-05, - "loss": 0.1915, + "loss": 0.0993, "step": 2179 }, { "epoch": 0.61, "learning_rate": 1.5955097875498656e-05, - "loss": 0.1916, + "loss": 0.1234, "step": 2180 }, { "epoch": 0.61, "learning_rate": 1.5953242415808518e-05, - "loss": 0.2405, + "loss": 0.1528, "step": 2181 }, { "epoch": 0.61, "learning_rate": 1.595138695611838e-05, - "loss": 0.1412, + "loss": 0.1676, "step": 2182 }, { "epoch": 0.61, "learning_rate": 1.594953149642824e-05, - "loss": 0.1421, + "loss": 0.1143, "step": 2183 }, { "epoch": 0.61, "learning_rate": 1.5947676036738103e-05, - "loss": 0.0928, + "loss": 0.071, "step": 2184 }, { "epoch": 0.61, "learning_rate": 1.5945820577047965e-05, - "loss": 0.1442, + "loss": 0.1136, "step": 2185 }, { "epoch": 0.61, "learning_rate": 1.5943965117357827e-05, - "loss": 0.146, + "loss": 0.1015, "step": 2186 }, { "epoch": 0.61, "learning_rate": 1.594210965766769e-05, - "loss": 0.187, + "loss": 0.1137, "step": 2187 }, { "epoch": 0.61, "learning_rate": 1.594025419797755e-05, - "loss": 0.291, + "loss": 0.0686, "step": 2188 }, { "epoch": 0.61, "learning_rate": 1.5938398738287413e-05, - "loss": 0.087, + "loss": 0.043, "step": 2189 }, { "epoch": 0.61, "learning_rate": 1.5936543278597275e-05, - "loss": 0.1441, + "loss": 0.0206, "step": 2190 }, { "epoch": 0.61, "learning_rate": 1.5934687818907136e-05, - "loss": 0.2454, + "loss": 0.0761, "step": 2191 }, { "epoch": 0.61, "learning_rate": 1.5932832359216995e-05, - "loss": 0.1396, + "loss": 0.1757, "step": 2192 }, { "epoch": 0.61, "learning_rate": 1.593097689952686e-05, - "loss": 0.0414, + "loss": 0.0679, "step": 2193 }, { "epoch": 0.61, "learning_rate": 1.5929121439836722e-05, - "loss": 0.0411, + "loss": 0.0587, "step": 2194 }, { "epoch": 0.61, "learning_rate": 1.5927265980146584e-05, - "loss": 0.0888, + "loss": 0.0145, "step": 2195 }, { "epoch": 0.61, "learning_rate": 1.5925410520456442e-05, - "loss": 0.0889, + "loss": 0.0824, "step": 2196 }, { "epoch": 0.61, "learning_rate": 1.5923555060766308e-05, - "loss": 0.34, + "loss": 0.1091, "step": 2197 }, { "epoch": 0.61, "learning_rate": 1.592169960107617e-05, - "loss": 0.143, + "loss": 0.1418, "step": 2198 }, { "epoch": 0.61, "learning_rate": 1.5919844141386028e-05, - "loss": 0.1943, + "loss": 0.0229, "step": 2199 }, { "epoch": 0.61, "learning_rate": 1.591798868169589e-05, - "loss": 0.3493, + "loss": 0.0589, "step": 2200 }, { "epoch": 0.61, "learning_rate": 1.5916133222005755e-05, - "loss": 0.1327, + "loss": 0.0216, "step": 2201 }, { "epoch": 0.61, "learning_rate": 1.5914277762315614e-05, - "loss": 0.1392, + "loss": 0.058, "step": 2202 }, { "epoch": 0.61, "learning_rate": 1.5912422302625476e-05, - "loss": 0.1419, + "loss": 0.0935, "step": 2203 }, { "epoch": 0.61, "learning_rate": 1.5910566842935337e-05, - "loss": 0.0899, + "loss": 0.0121, "step": 2204 }, { "epoch": 0.61, "learning_rate": 1.5908711383245203e-05, - "loss": 0.0912, + "loss": 0.0515, "step": 2205 }, { "epoch": 0.61, "learning_rate": 1.590685592355506e-05, - "loss": 0.1359, + "loss": 0.0501, "step": 2206 }, { "epoch": 0.61, "learning_rate": 1.5905000463864923e-05, - "loss": 0.1908, + "loss": 0.0719, "step": 2207 }, { "epoch": 0.61, "learning_rate": 1.5903145004174785e-05, - "loss": 0.1396, + "loss": 0.0999, "step": 2208 }, { "epoch": 0.61, "learning_rate": 1.5901289544484647e-05, - "loss": 0.1841, + "loss": 0.2282, "step": 2209 }, { "epoch": 0.62, "learning_rate": 1.589943408479451e-05, - "loss": 0.1377, + "loss": 0.0433, "step": 2210 }, { "epoch": 0.62, "learning_rate": 1.589757862510437e-05, - "loss": 0.2491, + "loss": 0.0872, "step": 2211 }, { "epoch": 0.62, "learning_rate": 1.5895723165414232e-05, - "loss": 0.2404, + "loss": 0.1042, "step": 2212 }, { "epoch": 0.62, "learning_rate": 1.5893867705724094e-05, - "loss": 0.1411, + "loss": 0.0192, "step": 2213 }, { "epoch": 0.62, "learning_rate": 1.5892012246033956e-05, - "loss": 0.0365, + "loss": 0.063, "step": 2214 }, { "epoch": 0.62, "learning_rate": 1.5890156786343818e-05, - "loss": 0.0873, + "loss": 0.1896, "step": 2215 }, { "epoch": 0.62, "learning_rate": 1.588830132665368e-05, - "loss": 0.3002, + "loss": 0.0279, "step": 2216 }, { "epoch": 0.62, "learning_rate": 1.588644586696354e-05, - "loss": 0.0863, + "loss": 0.1139, "step": 2217 }, { "epoch": 0.62, "learning_rate": 1.5884590407273404e-05, - "loss": 0.0865, + "loss": 0.0974, "step": 2218 }, { "epoch": 0.62, "learning_rate": 1.5882734947583265e-05, - "loss": 0.2421, + "loss": 0.0602, "step": 2219 }, { "epoch": 0.62, "learning_rate": 1.5880879487893127e-05, - "loss": 0.0899, + "loss": 0.0707, "step": 2220 }, { "epoch": 0.62, "learning_rate": 1.587902402820299e-05, - "loss": 0.1827, + "loss": 0.0319, "step": 2221 }, { "epoch": 0.62, "learning_rate": 1.587716856851285e-05, - "loss": 0.1363, + "loss": 0.218, "step": 2222 }, { "epoch": 0.62, "learning_rate": 1.5875313108822713e-05, - "loss": 0.1905, + "loss": 0.2005, "step": 2223 }, { "epoch": 0.62, "learning_rate": 1.5873457649132575e-05, - "loss": 0.0367, + "loss": 0.1027, "step": 2224 }, { "epoch": 0.62, "learning_rate": 1.5871602189442437e-05, - "loss": 0.1959, + "loss": 0.1373, "step": 2225 }, { "epoch": 0.62, "learning_rate": 1.58697467297523e-05, - "loss": 0.0881, + "loss": 0.042, "step": 2226 }, { "epoch": 0.62, "learning_rate": 1.586789127006216e-05, - "loss": 0.0892, + "loss": 0.0698, "step": 2227 }, { "epoch": 0.62, "learning_rate": 1.586603581037202e-05, - "loss": 0.1401, + "loss": 0.1313, "step": 2228 }, { "epoch": 0.62, "learning_rate": 1.5864180350681884e-05, - "loss": 0.3402, + "loss": 0.0615, "step": 2229 }, { "epoch": 0.62, "learning_rate": 1.5862324890991746e-05, - "loss": 0.1368, + "loss": 0.2223, "step": 2230 }, { "epoch": 0.62, "learning_rate": 1.5860469431301604e-05, - "loss": 0.3478, + "loss": 0.0534, "step": 2231 }, { "epoch": 0.62, "learning_rate": 1.5858613971611466e-05, - "loss": 0.1967, + "loss": 0.0239, "step": 2232 }, { "epoch": 0.62, "learning_rate": 1.585675851192133e-05, - "loss": 0.1403, + "loss": 0.0235, "step": 2233 }, { "epoch": 0.62, "learning_rate": 1.5854903052231193e-05, - "loss": 0.2468, + "loss": 0.0666, "step": 2234 }, { "epoch": 0.62, "learning_rate": 1.5853047592541052e-05, - "loss": 0.0371, + "loss": 0.1583, "step": 2235 }, { "epoch": 0.62, "learning_rate": 1.5851192132850914e-05, - "loss": 0.1867, + "loss": 0.1181, "step": 2236 }, { "epoch": 0.62, "learning_rate": 1.584933667316078e-05, - "loss": 0.2416, + "loss": 0.2333, "step": 2237 }, { "epoch": 0.62, "learning_rate": 1.5847481213470638e-05, - "loss": 0.1393, + "loss": 0.069, "step": 2238 }, { "epoch": 0.62, "learning_rate": 1.58456257537805e-05, - "loss": 0.1403, + "loss": 0.0672, "step": 2239 }, { "epoch": 0.62, "learning_rate": 1.584377029409036e-05, - "loss": 0.0913, + "loss": 0.028, "step": 2240 }, { "epoch": 0.62, "learning_rate": 1.5841914834400227e-05, - "loss": 0.1935, + "loss": 0.0876, "step": 2241 }, { "epoch": 0.62, "learning_rate": 1.5840059374710085e-05, - "loss": 0.1385, + "loss": 0.0652, "step": 2242 }, { "epoch": 0.62, "learning_rate": 1.5838203915019947e-05, - "loss": 0.1398, + "loss": 0.0323, "step": 2243 }, { "epoch": 0.62, "learning_rate": 1.583634845532981e-05, - "loss": 0.1399, + "loss": 0.0703, "step": 2244 }, { "epoch": 0.62, "learning_rate": 1.583449299563967e-05, - "loss": 0.1372, + "loss": 0.0276, "step": 2245 }, { "epoch": 0.63, "learning_rate": 1.5832637535949533e-05, - "loss": 0.0911, + "loss": 0.119, "step": 2246 }, { "epoch": 0.63, "learning_rate": 1.5830782076259394e-05, - "loss": 0.241, + "loss": 0.2008, "step": 2247 }, { "epoch": 0.63, "learning_rate": 1.5828926616569256e-05, - "loss": 0.0894, + "loss": 0.1013, "step": 2248 }, { "epoch": 0.63, "learning_rate": 1.5827071156879118e-05, - "loss": 0.1429, + "loss": 0.0626, "step": 2249 }, { "epoch": 0.63, "learning_rate": 1.582521569718898e-05, - "loss": 0.1985, + "loss": 0.0542, "step": 2250 }, { "epoch": 0.63, "learning_rate": 1.5823360237498842e-05, - "loss": 0.0389, + "loss": 0.0931, "step": 2251 }, { "epoch": 0.63, "learning_rate": 1.5821504777808704e-05, - "loss": 0.1941, + "loss": 0.0153, "step": 2252 }, { "epoch": 0.63, "learning_rate": 1.5819649318118566e-05, - "loss": 0.1935, + "loss": 0.0526, "step": 2253 }, { "epoch": 0.63, "learning_rate": 1.5817793858428428e-05, - "loss": 0.091, + "loss": 0.1136, "step": 2254 }, { "epoch": 0.63, "learning_rate": 1.581593839873829e-05, - "loss": 0.4449, + "loss": 0.1512, "step": 2255 }, { "epoch": 0.63, "learning_rate": 1.581408293904815e-05, - "loss": 0.0369, + "loss": 0.1501, "step": 2256 }, { "epoch": 0.63, "learning_rate": 1.5812227479358013e-05, - "loss": 0.1925, + "loss": 0.1075, "step": 2257 }, { "epoch": 0.63, "learning_rate": 1.5810372019667875e-05, - "loss": 0.3396, + "loss": 0.0416, "step": 2258 }, { "epoch": 0.63, "learning_rate": 1.5808516559977737e-05, - "loss": 0.0886, + "loss": 0.1297, "step": 2259 }, { "epoch": 0.63, "learning_rate": 1.5806661100287595e-05, - "loss": 0.1929, + "loss": 0.0208, "step": 2260 }, { "epoch": 0.63, "learning_rate": 1.580480564059746e-05, - "loss": 0.4031, + "loss": 0.1083, "step": 2261 }, { "epoch": 0.63, "learning_rate": 1.5802950180907322e-05, - "loss": 0.0893, + "loss": 0.0331, "step": 2262 }, { "epoch": 0.63, "learning_rate": 1.5801094721217184e-05, - "loss": 0.1396, + "loss": 0.0884, "step": 2263 }, { "epoch": 0.63, "learning_rate": 1.5799239261527043e-05, - "loss": 0.0894, + "loss": 0.0702, "step": 2264 }, { "epoch": 0.63, "learning_rate": 1.5797383801836908e-05, - "loss": 0.1419, + "loss": 0.0625, "step": 2265 }, { "epoch": 0.63, "learning_rate": 1.579552834214677e-05, - "loss": 0.3489, + "loss": 0.1416, "step": 2266 }, { "epoch": 0.63, "learning_rate": 1.579367288245663e-05, - "loss": 0.041, + "loss": 0.0311, "step": 2267 }, { "epoch": 0.63, "learning_rate": 1.579181742276649e-05, - "loss": 0.145, + "loss": 0.1409, "step": 2268 }, { "epoch": 0.63, "learning_rate": 1.5789961963076356e-05, - "loss": 0.0394, + "loss": 0.0655, "step": 2269 }, { "epoch": 0.63, "learning_rate": 1.5788106503386217e-05, - "loss": 0.0397, + "loss": 0.075, "step": 2270 }, { "epoch": 0.63, "learning_rate": 1.5786251043696076e-05, - "loss": 0.294, + "loss": 0.0834, "step": 2271 }, { "epoch": 0.63, "learning_rate": 1.5784395584005938e-05, - "loss": 0.1928, + "loss": 0.029, "step": 2272 }, { "epoch": 0.63, "learning_rate": 1.5782540124315803e-05, - "loss": 0.3389, + "loss": 0.1039, "step": 2273 }, { "epoch": 0.63, "learning_rate": 1.578068466462566e-05, - "loss": 0.2417, + "loss": 0.116, "step": 2274 }, { "epoch": 0.63, "learning_rate": 1.5778829204935523e-05, - "loss": 0.1423, + "loss": 0.0965, "step": 2275 }, { "epoch": 0.63, "learning_rate": 1.5776973745245385e-05, - "loss": 0.1998, + "loss": 0.1349, "step": 2276 }, { "epoch": 0.63, "learning_rate": 1.5775118285555247e-05, - "loss": 0.1418, + "loss": 0.1126, "step": 2277 }, { "epoch": 0.63, "learning_rate": 1.577326282586511e-05, - "loss": 0.0905, + "loss": 0.2243, "step": 2278 }, { "epoch": 0.63, "learning_rate": 1.577140736617497e-05, - "loss": 0.2344, + "loss": 0.0993, "step": 2279 }, { "epoch": 0.63, "learning_rate": 1.5769551906484833e-05, - "loss": 0.0419, + "loss": 0.0316, "step": 2280 }, { "epoch": 0.63, "learning_rate": 1.5767696446794695e-05, - "loss": 0.0938, + "loss": 0.1027, "step": 2281 }, { "epoch": 0.64, "learning_rate": 1.5765840987104557e-05, - "loss": 0.2416, + "loss": 0.0916, "step": 2282 }, { "epoch": 0.64, "learning_rate": 1.576398552741442e-05, - "loss": 0.0912, + "loss": 0.064, "step": 2283 }, { "epoch": 0.64, "learning_rate": 1.576213006772428e-05, - "loss": 0.1829, + "loss": 0.1029, "step": 2284 }, { "epoch": 0.64, "learning_rate": 1.5760274608034142e-05, - "loss": 0.2917, + "loss": 0.019, "step": 2285 }, { "epoch": 0.64, "learning_rate": 1.5758419148344004e-05, - "loss": 0.1909, + "loss": 0.0512, "step": 2286 }, { "epoch": 0.64, "learning_rate": 1.5756563688653866e-05, - "loss": 0.2916, + "loss": 0.0621, "step": 2287 }, { "epoch": 0.64, "learning_rate": 1.5754708228963728e-05, - "loss": 0.1911, + "loss": 0.1429, "step": 2288 }, { "epoch": 0.64, "learning_rate": 1.5752852769273586e-05, - "loss": 0.0914, + "loss": 0.0885, "step": 2289 }, { "epoch": 0.64, "learning_rate": 1.575099730958345e-05, - "loss": 0.1404, + "loss": 0.1969, "step": 2290 }, { "epoch": 0.64, "learning_rate": 1.5749141849893313e-05, - "loss": 0.138, + "loss": 0.1058, "step": 2291 }, { "epoch": 0.64, "learning_rate": 1.5747286390203175e-05, - "loss": 0.0913, + "loss": 0.0681, "step": 2292 }, { "epoch": 0.64, "learning_rate": 1.5745430930513034e-05, - "loss": 0.1929, + "loss": 0.0157, "step": 2293 }, { "epoch": 0.64, "learning_rate": 1.57435754708229e-05, - "loss": 0.2875, + "loss": 0.0161, "step": 2294 }, { "epoch": 0.64, "learning_rate": 1.574172001113276e-05, - "loss": 0.2368, + "loss": 0.0317, "step": 2295 }, { "epoch": 0.64, "learning_rate": 1.573986455144262e-05, - "loss": 0.2879, + "loss": 0.0911, "step": 2296 }, { "epoch": 0.64, "learning_rate": 1.573800909175248e-05, - "loss": 0.0929, + "loss": 0.1432, "step": 2297 }, { "epoch": 0.64, "learning_rate": 1.5736153632062346e-05, - "loss": 0.2363, + "loss": 0.0245, "step": 2298 }, { "epoch": 0.64, "learning_rate": 1.5734298172372205e-05, - "loss": 0.0938, + "loss": 0.0453, "step": 2299 }, { "epoch": 0.64, "learning_rate": 1.5732442712682067e-05, - "loss": 0.0943, + "loss": 0.1639, "step": 2300 }, { "epoch": 0.64, "learning_rate": 1.573058725299193e-05, - "loss": 0.2429, + "loss": 0.1566, "step": 2301 }, { "epoch": 0.64, "learning_rate": 1.5728731793301794e-05, - "loss": 0.1919, + "loss": 0.1503, "step": 2302 }, { "epoch": 0.64, "learning_rate": 1.5726876333611652e-05, - "loss": 0.1456, + "loss": 0.0209, "step": 2303 }, { "epoch": 0.64, "learning_rate": 1.5725020873921514e-05, - "loss": 0.1909, + "loss": 0.0118, "step": 2304 }, { "epoch": 0.64, "learning_rate": 1.5723165414231376e-05, - "loss": 0.0461, + "loss": 0.0546, "step": 2305 }, { "epoch": 0.64, "learning_rate": 1.5721309954541238e-05, - "loss": 0.1398, + "loss": 0.2187, "step": 2306 }, { "epoch": 0.64, "learning_rate": 1.57194544948511e-05, - "loss": 0.0953, + "loss": 0.0621, "step": 2307 }, { "epoch": 0.64, "learning_rate": 1.5717599035160962e-05, - "loss": 0.0914, + "loss": 0.0214, "step": 2308 }, { "epoch": 0.64, "learning_rate": 1.5715743575470824e-05, - "loss": 0.2321, + "loss": 0.1131, "step": 2309 }, { "epoch": 0.64, "learning_rate": 1.5713888115780685e-05, - "loss": 0.2386, + "loss": 0.062, "step": 2310 }, { "epoch": 0.64, "learning_rate": 1.5712032656090547e-05, - "loss": 0.0394, + "loss": 0.1087, "step": 2311 }, { "epoch": 0.64, "learning_rate": 1.571017719640041e-05, - "loss": 0.0894, + "loss": 0.1127, "step": 2312 }, { "epoch": 0.64, "learning_rate": 1.570832173671027e-05, - "loss": 0.1393, + "loss": 0.0895, "step": 2313 }, { "epoch": 0.64, "learning_rate": 1.5706466277020133e-05, - "loss": 0.0854, + "loss": 0.1147, "step": 2314 }, { "epoch": 0.64, "learning_rate": 1.5704610817329995e-05, - "loss": 0.0355, + "loss": 0.1447, "step": 2315 }, { "epoch": 0.64, "learning_rate": 1.5702755357639857e-05, - "loss": 0.035, + "loss": 0.0777, "step": 2316 }, { "epoch": 0.64, "learning_rate": 1.570089989794972e-05, - "loss": 0.1914, + "loss": 0.0818, "step": 2317 }, { "epoch": 0.65, "learning_rate": 1.569904443825958e-05, - "loss": 0.1408, + "loss": 0.0205, "step": 2318 }, { "epoch": 0.65, "learning_rate": 1.5697188978569442e-05, - "loss": 0.1431, + "loss": 0.0886, "step": 2319 }, { "epoch": 0.65, "learning_rate": 1.5695333518879304e-05, - "loss": 0.2471, + "loss": 0.0947, "step": 2320 }, { "epoch": 0.65, "learning_rate": 1.5693478059189166e-05, - "loss": 0.0853, + "loss": 0.022, "step": 2321 }, { "epoch": 0.65, "learning_rate": 1.5691622599499028e-05, - "loss": 0.0289, + "loss": 0.0229, "step": 2322 }, { "epoch": 0.65, "learning_rate": 1.568976713980889e-05, - "loss": 0.3077, + "loss": 0.0728, "step": 2323 }, { "epoch": 0.65, "learning_rate": 1.568791168011875e-05, - "loss": 0.1965, + "loss": 0.1904, "step": 2324 }, { "epoch": 0.65, "learning_rate": 1.568605622042861e-05, - "loss": 0.3012, + "loss": 0.0226, "step": 2325 }, { "epoch": 0.65, "learning_rate": 1.5684200760738475e-05, - "loss": 0.2431, + "loss": 0.1065, "step": 2326 }, { "epoch": 0.65, "learning_rate": 1.5682345301048337e-05, - "loss": 0.1968, + "loss": 0.02, "step": 2327 }, { "epoch": 0.65, "learning_rate": 1.5680489841358196e-05, - "loss": 0.0301, + "loss": 0.1142, "step": 2328 }, { "epoch": 0.65, "learning_rate": 1.5678634381668058e-05, - "loss": 0.1425, + "loss": 0.0934, "step": 2329 }, { "epoch": 0.65, "learning_rate": 1.5676778921977923e-05, - "loss": 0.1417, + "loss": 0.0905, "step": 2330 }, { "epoch": 0.65, "learning_rate": 1.5674923462287785e-05, - "loss": 0.0873, + "loss": 0.0193, "step": 2331 }, { "epoch": 0.65, "learning_rate": 1.5673068002597643e-05, - "loss": 0.1927, + "loss": 0.2152, "step": 2332 }, { "epoch": 0.65, "learning_rate": 1.5671212542907505e-05, - "loss": 0.3004, + "loss": 0.1053, "step": 2333 }, { "epoch": 0.65, "learning_rate": 1.566935708321737e-05, - "loss": 0.3554, + "loss": 0.1221, "step": 2334 }, { "epoch": 0.65, "learning_rate": 1.566750162352723e-05, - "loss": 0.19, + "loss": 0.0208, "step": 2335 }, { "epoch": 0.65, "learning_rate": 1.566564616383709e-05, - "loss": 0.0858, + "loss": 0.2155, "step": 2336 }, { "epoch": 0.65, "learning_rate": 1.5663790704146953e-05, - "loss": 0.2969, + "loss": 0.0214, "step": 2337 }, { "epoch": 0.65, "learning_rate": 1.5661935244456818e-05, - "loss": 0.2468, + "loss": 0.1162, "step": 2338 }, { "epoch": 0.65, "learning_rate": 1.5660079784766676e-05, - "loss": 0.2957, + "loss": 0.1, "step": 2339 }, { "epoch": 0.65, "learning_rate": 1.5658224325076538e-05, - "loss": 0.0402, + "loss": 0.1974, "step": 2340 }, { "epoch": 0.65, "learning_rate": 1.56563688653864e-05, - "loss": 0.2375, + "loss": 0.1162, "step": 2341 }, { "epoch": 0.65, "learning_rate": 1.5654513405696262e-05, - "loss": 0.1918, + "loss": 0.1163, "step": 2342 }, { "epoch": 0.65, "learning_rate": 1.5652657946006124e-05, - "loss": 0.1377, + "loss": 0.0646, "step": 2343 }, { "epoch": 0.65, "learning_rate": 1.5650802486315986e-05, - "loss": 0.0936, + "loss": 0.147, "step": 2344 }, { "epoch": 0.65, "learning_rate": 1.5648947026625848e-05, - "loss": 0.09, + "loss": 0.2244, "step": 2345 }, { "epoch": 0.65, "learning_rate": 1.564709156693571e-05, - "loss": 0.2381, + "loss": 0.1616, "step": 2346 }, { "epoch": 0.65, "learning_rate": 1.564523610724557e-05, - "loss": 0.0994, + "loss": 0.0711, "step": 2347 }, { "epoch": 0.65, "learning_rate": 1.5643380647555433e-05, - "loss": 0.1873, + "loss": 0.0644, "step": 2348 }, { "epoch": 0.65, "learning_rate": 1.5641525187865295e-05, - "loss": 0.096, + "loss": 0.125, "step": 2349 }, { "epoch": 0.65, "learning_rate": 1.5639669728175157e-05, - "loss": 0.0954, + "loss": 0.2036, "step": 2350 }, { "epoch": 0.65, "learning_rate": 1.563781426848502e-05, - "loss": 0.1482, + "loss": 0.0345, "step": 2351 }, { "epoch": 0.65, "learning_rate": 1.563595880879488e-05, - "loss": 0.2398, + "loss": 0.0312, "step": 2352 }, { "epoch": 0.65, "learning_rate": 1.5634103349104743e-05, - "loss": 0.1399, + "loss": 0.1491, "step": 2353 }, { "epoch": 0.66, "learning_rate": 1.5632247889414604e-05, - "loss": 0.2918, + "loss": 0.0556, "step": 2354 }, { "epoch": 0.66, "learning_rate": 1.5630392429724466e-05, - "loss": 0.1417, + "loss": 0.0458, "step": 2355 }, { "epoch": 0.66, "learning_rate": 1.5628536970034328e-05, - "loss": 0.0424, + "loss": 0.022, "step": 2356 }, { "epoch": 0.66, "learning_rate": 1.5626681510344187e-05, - "loss": 0.24, + "loss": 0.0993, "step": 2357 }, { "epoch": 0.66, "learning_rate": 1.5624826050654052e-05, - "loss": 0.0907, + "loss": 0.0902, "step": 2358 }, { "epoch": 0.66, "learning_rate": 1.5622970590963914e-05, - "loss": 0.0408, + "loss": 0.1613, "step": 2359 }, { "epoch": 0.66, "learning_rate": 1.5621115131273776e-05, - "loss": 0.0945, + "loss": 0.2031, "step": 2360 }, { "epoch": 0.66, "learning_rate": 1.5619259671583634e-05, - "loss": 0.1354, + "loss": 0.0361, "step": 2361 }, { "epoch": 0.66, "learning_rate": 1.56174042118935e-05, - "loss": 0.1922, + "loss": 0.1009, "step": 2362 }, { "epoch": 0.66, "learning_rate": 1.561554875220336e-05, - "loss": 0.1408, + "loss": 0.1567, "step": 2363 }, { "epoch": 0.66, "learning_rate": 1.561369329251322e-05, - "loss": 0.4029, + "loss": 0.0566, "step": 2364 }, { "epoch": 0.66, "learning_rate": 1.561183783282308e-05, - "loss": 0.0353, + "loss": 0.0372, "step": 2365 }, { "epoch": 0.66, "learning_rate": 1.5609982373132947e-05, - "loss": 0.1425, + "loss": 0.1025, "step": 2366 }, { "epoch": 0.66, "learning_rate": 1.560812691344281e-05, - "loss": 0.185, + "loss": 0.0951, "step": 2367 }, { "epoch": 0.66, "learning_rate": 1.5606271453752667e-05, - "loss": 0.1409, + "loss": 0.0199, "step": 2368 }, { "epoch": 0.66, "learning_rate": 1.560441599406253e-05, - "loss": 0.1849, + "loss": 0.1437, "step": 2369 }, { "epoch": 0.66, "learning_rate": 1.5602560534372394e-05, - "loss": 0.1446, + "loss": 0.0155, "step": 2370 }, { "epoch": 0.66, "learning_rate": 1.5600705074682253e-05, - "loss": 0.1926, + "loss": 0.2177, "step": 2371 }, { "epoch": 0.66, "learning_rate": 1.5598849614992115e-05, - "loss": 0.0872, + "loss": 0.0459, "step": 2372 }, { "epoch": 0.66, "learning_rate": 1.5596994155301977e-05, - "loss": 0.0362, + "loss": 0.0624, "step": 2373 }, { "epoch": 0.66, "learning_rate": 1.559513869561184e-05, - "loss": 0.0342, + "loss": 0.0608, "step": 2374 }, { "epoch": 0.66, "learning_rate": 1.55932832359217e-05, - "loss": 0.2485, + "loss": 0.0676, "step": 2375 }, { "epoch": 0.66, "learning_rate": 1.5591427776231562e-05, - "loss": 0.1421, + "loss": 0.0283, "step": 2376 }, { "epoch": 0.66, "learning_rate": 1.5589572316541424e-05, - "loss": 0.1363, + "loss": 0.0191, "step": 2377 }, { "epoch": 0.66, "learning_rate": 1.5587716856851286e-05, - "loss": 0.1958, + "loss": 0.0719, "step": 2378 }, { "epoch": 0.66, "learning_rate": 1.5585861397161148e-05, - "loss": 0.1896, + "loss": 0.0641, "step": 2379 }, { "epoch": 0.66, "learning_rate": 1.558400593747101e-05, - "loss": 0.1934, + "loss": 0.1773, "step": 2380 }, { "epoch": 0.66, "learning_rate": 1.558215047778087e-05, - "loss": 0.1894, + "loss": 0.171, "step": 2381 }, { "epoch": 0.66, "learning_rate": 1.5580295018090733e-05, - "loss": 0.1974, + "loss": 0.0654, "step": 2382 }, { "epoch": 0.66, "learning_rate": 1.5578439558400595e-05, - "loss": 0.1923, + "loss": 0.1658, "step": 2383 }, { "epoch": 0.66, "learning_rate": 1.5576584098710457e-05, - "loss": 0.1371, + "loss": 0.0958, "step": 2384 }, { "epoch": 0.66, "learning_rate": 1.557472863902032e-05, - "loss": 0.1949, + "loss": 0.0958, "step": 2385 }, { "epoch": 0.66, "learning_rate": 1.557287317933018e-05, - "loss": 0.1394, + "loss": 0.0945, "step": 2386 }, { "epoch": 0.66, "learning_rate": 1.5571017719640043e-05, - "loss": 0.0867, + "loss": 0.1671, "step": 2387 }, { "epoch": 0.66, "learning_rate": 1.5569162259949905e-05, - "loss": 0.1356, + "loss": 0.0201, "step": 2388 }, { "epoch": 0.66, "learning_rate": 1.5567306800259766e-05, - "loss": 0.0864, + "loss": 0.0736, "step": 2389 }, { "epoch": 0.67, "learning_rate": 1.556545134056963e-05, - "loss": 0.196, + "loss": 0.0296, "step": 2390 }, { "epoch": 0.67, "learning_rate": 1.556359588087949e-05, - "loss": 0.2956, + "loss": 0.0805, "step": 2391 }, { "epoch": 0.67, "learning_rate": 1.5561740421189352e-05, - "loss": 0.0354, + "loss": 0.1048, "step": 2392 }, { "epoch": 0.67, "learning_rate": 1.555988496149921e-05, - "loss": 0.1397, + "loss": 0.0667, "step": 2393 }, { "epoch": 0.67, "learning_rate": 1.5558029501809076e-05, - "loss": 0.1395, + "loss": 0.0635, "step": 2394 }, { "epoch": 0.67, "learning_rate": 1.5556174042118938e-05, - "loss": 0.1384, + "loss": 0.0365, "step": 2395 }, { "epoch": 0.67, "learning_rate": 1.5554318582428796e-05, - "loss": 0.1406, + "loss": 0.0895, "step": 2396 }, { "epoch": 0.67, "learning_rate": 1.5552463122738658e-05, - "loss": 0.0871, + "loss": 0.0253, "step": 2397 }, { "epoch": 0.67, "learning_rate": 1.5550607663048523e-05, - "loss": 0.1955, + "loss": 0.1393, "step": 2398 }, { "epoch": 0.67, "learning_rate": 1.5548752203358385e-05, - "loss": 0.1389, + "loss": 0.1599, "step": 2399 }, { "epoch": 0.67, "learning_rate": 1.5546896743668244e-05, - "loss": 0.0882, + "loss": 0.0288, "step": 2400 }, { "epoch": 0.67, "learning_rate": 1.5545041283978106e-05, - "loss": 0.1407, + "loss": 0.1678, "step": 2401 }, { "epoch": 0.67, "learning_rate": 1.554318582428797e-05, - "loss": 0.19, + "loss": 0.32, "step": 2402 }, { "epoch": 0.67, "learning_rate": 1.554133036459783e-05, - "loss": 0.2413, + "loss": 0.1526, "step": 2403 }, { "epoch": 0.67, "learning_rate": 1.553947490490769e-05, - "loss": 0.1402, + "loss": 0.2034, "step": 2404 }, { "epoch": 0.67, "learning_rate": 1.5537619445217553e-05, - "loss": 0.1362, + "loss": 0.0167, "step": 2405 }, { "epoch": 0.67, "learning_rate": 1.5535763985527418e-05, - "loss": 0.2946, + "loss": 0.1222, "step": 2406 }, { "epoch": 0.67, "learning_rate": 1.5533908525837277e-05, - "loss": 0.1393, + "loss": 0.0621, "step": 2407 }, { "epoch": 0.67, "learning_rate": 1.553205306614714e-05, - "loss": 0.2394, + "loss": 0.0366, "step": 2408 }, { "epoch": 0.67, "learning_rate": 1.5530197606457e-05, - "loss": 0.0892, + "loss": 0.0614, "step": 2409 }, { "epoch": 0.67, "learning_rate": 1.5528342146766862e-05, - "loss": 0.1424, + "loss": 0.0917, "step": 2410 }, { "epoch": 0.67, "learning_rate": 1.5526486687076724e-05, - "loss": 0.3945, + "loss": 0.0631, "step": 2411 }, { "epoch": 0.67, "learning_rate": 1.5524631227386586e-05, - "loss": 0.1408, + "loss": 0.1503, "step": 2412 }, { "epoch": 0.67, "learning_rate": 1.5522775767696448e-05, - "loss": 0.1946, + "loss": 0.0285, "step": 2413 }, { "epoch": 0.67, "learning_rate": 1.552092030800631e-05, - "loss": 0.1397, + "loss": 0.084, "step": 2414 }, { "epoch": 0.67, "learning_rate": 1.5519064848316172e-05, - "loss": 0.138, + "loss": 0.0504, "step": 2415 }, { "epoch": 0.67, "learning_rate": 1.5517209388626034e-05, - "loss": 0.0911, + "loss": 0.0662, "step": 2416 }, { "epoch": 0.67, "learning_rate": 1.5515353928935895e-05, - "loss": 0.187, + "loss": 0.0265, "step": 2417 }, { "epoch": 0.67, "learning_rate": 1.5513498469245757e-05, - "loss": 0.1485, + "loss": 0.0653, "step": 2418 }, { "epoch": 0.67, "learning_rate": 1.551164300955562e-05, - "loss": 0.1416, + "loss": 0.0608, "step": 2419 }, { "epoch": 0.67, "learning_rate": 1.550978754986548e-05, - "loss": 0.1913, + "loss": 0.1191, "step": 2420 }, { "epoch": 0.67, "learning_rate": 1.5507932090175343e-05, - "loss": 0.2442, + "loss": 0.0652, "step": 2421 }, { "epoch": 0.67, "learning_rate": 1.5506076630485205e-05, - "loss": 0.1908, + "loss": 0.1339, "step": 2422 }, { "epoch": 0.67, "learning_rate": 1.5504221170795067e-05, - "loss": 0.0939, + "loss": 0.1726, "step": 2423 }, { "epoch": 0.67, "learning_rate": 1.550236571110493e-05, - "loss": 0.0403, + "loss": 0.0649, "step": 2424 }, { "epoch": 0.67, "learning_rate": 1.5500510251414787e-05, - "loss": 0.0906, + "loss": 0.1177, "step": 2425 }, { "epoch": 0.68, "learning_rate": 1.5498654791724652e-05, - "loss": 0.1399, + "loss": 0.0661, "step": 2426 }, { "epoch": 0.68, "learning_rate": 1.5496799332034514e-05, - "loss": 0.1409, + "loss": 0.0593, "step": 2427 }, { "epoch": 0.68, "learning_rate": 1.5494943872344376e-05, - "loss": 0.1421, + "loss": 0.1539, "step": 2428 }, { "epoch": 0.68, "learning_rate": 1.5493088412654235e-05, - "loss": 0.1929, + "loss": 0.0205, "step": 2429 }, { "epoch": 0.68, "learning_rate": 1.5491232952964096e-05, - "loss": 0.1415, + "loss": 0.0968, "step": 2430 }, { "epoch": 0.68, "learning_rate": 1.548937749327396e-05, - "loss": 0.1412, + "loss": 0.0668, "step": 2431 }, { "epoch": 0.68, "learning_rate": 1.548752203358382e-05, - "loss": 0.1951, + "loss": 0.1566, "step": 2432 }, { "epoch": 0.68, "learning_rate": 1.5485666573893682e-05, - "loss": 0.0907, + "loss": 0.0654, "step": 2433 }, { "epoch": 0.68, "learning_rate": 1.5483811114203544e-05, - "loss": 0.0887, + "loss": 0.0697, "step": 2434 }, { "epoch": 0.68, "learning_rate": 1.548195565451341e-05, - "loss": 0.1383, + "loss": 0.1695, "step": 2435 }, { "epoch": 0.68, "learning_rate": 1.5480100194823268e-05, - "loss": 0.1372, + "loss": 0.0575, "step": 2436 }, { "epoch": 0.68, "learning_rate": 1.547824473513313e-05, - "loss": 0.2489, + "loss": 0.0302, "step": 2437 }, { "epoch": 0.68, "learning_rate": 1.547638927544299e-05, - "loss": 0.0881, + "loss": 0.189, "step": 2438 }, { "epoch": 0.68, "learning_rate": 1.5474533815752853e-05, - "loss": 0.0884, + "loss": 0.0562, "step": 2439 }, { "epoch": 0.68, "learning_rate": 1.5472678356062715e-05, - "loss": 0.1924, + "loss": 0.1246, "step": 2440 }, { "epoch": 0.68, "learning_rate": 1.5470822896372577e-05, - "loss": 0.1443, + "loss": 0.2075, "step": 2441 }, { "epoch": 0.68, "learning_rate": 1.546896743668244e-05, - "loss": 0.1914, + "loss": 0.1105, "step": 2442 }, { "epoch": 0.68, "learning_rate": 1.54671119769923e-05, - "loss": 0.0322, + "loss": 0.1281, "step": 2443 }, { "epoch": 0.68, "learning_rate": 1.5465256517302163e-05, - "loss": 0.0836, + "loss": 0.0209, "step": 2444 }, { "epoch": 0.68, "learning_rate": 1.5463401057612024e-05, - "loss": 0.0845, + "loss": 0.0685, "step": 2445 }, { "epoch": 0.68, "learning_rate": 1.5461545597921886e-05, - "loss": 0.138, + "loss": 0.1196, "step": 2446 }, { "epoch": 0.68, "learning_rate": 1.5459690138231748e-05, - "loss": 0.1368, + "loss": 0.0688, "step": 2447 }, { "epoch": 0.68, "learning_rate": 1.545783467854161e-05, - "loss": 0.2507, + "loss": 0.1526, "step": 2448 }, { "epoch": 0.68, "learning_rate": 1.5455979218851472e-05, - "loss": 0.1855, + "loss": 0.0796, "step": 2449 }, { "epoch": 0.68, "learning_rate": 1.5454123759161334e-05, - "loss": 0.188, + "loss": 0.0325, "step": 2450 }, { "epoch": 0.68, "learning_rate": 1.5452268299471196e-05, - "loss": 0.2498, + "loss": 0.1298, "step": 2451 }, { "epoch": 0.68, "learning_rate": 1.5450412839781058e-05, - "loss": 0.3578, + "loss": 0.0901, "step": 2452 }, { "epoch": 0.68, "learning_rate": 1.544855738009092e-05, - "loss": 0.0296, + "loss": 0.0238, "step": 2453 }, { "epoch": 0.68, "learning_rate": 1.5446701920400778e-05, - "loss": 0.2434, + "loss": 0.1033, "step": 2454 }, { "epoch": 0.68, "learning_rate": 1.5444846460710643e-05, - "loss": 0.1391, + "loss": 0.1219, "step": 2455 }, { "epoch": 0.68, "learning_rate": 1.5442991001020505e-05, - "loss": 0.0338, + "loss": 0.0696, "step": 2456 }, { "epoch": 0.68, "learning_rate": 1.5441135541330367e-05, - "loss": 0.3997, + "loss": 0.2103, "step": 2457 }, { "epoch": 0.68, "learning_rate": 1.5439280081640225e-05, - "loss": 0.085, + "loss": 0.0146, "step": 2458 }, { "epoch": 0.68, "learning_rate": 1.543742462195009e-05, - "loss": 0.1396, + "loss": 0.0893, "step": 2459 }, { "epoch": 0.68, "learning_rate": 1.5435569162259953e-05, - "loss": 0.0875, + "loss": 0.0223, "step": 2460 }, { "epoch": 0.68, "learning_rate": 1.543371370256981e-05, - "loss": 0.1421, + "loss": 0.1427, "step": 2461 }, { "epoch": 0.69, "learning_rate": 1.5431858242879673e-05, - "loss": 0.3469, + "loss": 0.0192, "step": 2462 }, { "epoch": 0.69, "learning_rate": 1.5430002783189538e-05, - "loss": 0.1845, + "loss": 0.0362, "step": 2463 }, { "epoch": 0.69, "learning_rate": 1.54281473234994e-05, - "loss": 0.0872, + "loss": 0.2569, "step": 2464 }, { "epoch": 0.69, "learning_rate": 1.542629186380926e-05, - "loss": 0.0894, + "loss": 0.0168, "step": 2465 }, { "epoch": 0.69, "learning_rate": 1.542443640411912e-05, - "loss": 0.0911, + "loss": 0.1707, "step": 2466 }, { "epoch": 0.69, "learning_rate": 1.5422580944428986e-05, - "loss": 0.0903, + "loss": 0.0362, "step": 2467 }, { "epoch": 0.69, "learning_rate": 1.5420725484738844e-05, - "loss": 0.1879, + "loss": 0.1124, "step": 2468 }, { "epoch": 0.69, "learning_rate": 1.5418870025048706e-05, - "loss": 0.0858, + "loss": 0.1752, "step": 2469 }, { "epoch": 0.69, "learning_rate": 1.5417014565358568e-05, - "loss": 0.2395, + "loss": 0.0895, "step": 2470 }, { "epoch": 0.69, "learning_rate": 1.541515910566843e-05, - "loss": 0.1375, + "loss": 0.0425, "step": 2471 }, { "epoch": 0.69, "learning_rate": 1.541330364597829e-05, - "loss": 0.1938, + "loss": 0.0759, "step": 2472 }, { "epoch": 0.69, "learning_rate": 1.5411448186288153e-05, - "loss": 0.192, + "loss": 0.049, "step": 2473 }, { "epoch": 0.69, "learning_rate": 1.5409592726598015e-05, - "loss": 0.1413, + "loss": 0.0811, "step": 2474 }, { "epoch": 0.69, "learning_rate": 1.5407737266907877e-05, - "loss": 0.1403, + "loss": 0.1843, "step": 2475 }, { "epoch": 0.69, "learning_rate": 1.540588180721774e-05, - "loss": 0.1863, + "loss": 0.1219, "step": 2476 }, { "epoch": 0.69, "learning_rate": 1.54040263475276e-05, - "loss": 0.037, + "loss": 0.0296, "step": 2477 }, { "epoch": 0.69, "learning_rate": 1.5402170887837463e-05, - "loss": 0.0904, + "loss": 0.0638, "step": 2478 }, { "epoch": 0.69, "learning_rate": 1.5400315428147325e-05, - "loss": 0.2459, + "loss": 0.0829, "step": 2479 }, { "epoch": 0.69, "learning_rate": 1.5398459968457187e-05, - "loss": 0.1378, + "loss": 0.0187, "step": 2480 }, { "epoch": 0.69, "learning_rate": 1.539660450876705e-05, - "loss": 0.1918, + "loss": 0.0134, "step": 2481 }, { "epoch": 0.69, "learning_rate": 1.539474904907691e-05, - "loss": 0.1416, + "loss": 0.2184, "step": 2482 }, { "epoch": 0.69, "learning_rate": 1.5392893589386772e-05, - "loss": 0.2395, + "loss": 0.1717, "step": 2483 }, { "epoch": 0.69, "learning_rate": 1.5391038129696634e-05, - "loss": 0.1845, + "loss": 0.0117, "step": 2484 }, { "epoch": 0.69, "learning_rate": 1.5389182670006496e-05, - "loss": 0.1389, + "loss": 0.2155, "step": 2485 }, { "epoch": 0.69, "learning_rate": 1.5387327210316358e-05, - "loss": 0.0871, + "loss": 0.0168, "step": 2486 }, { "epoch": 0.69, "learning_rate": 1.538547175062622e-05, - "loss": 0.2428, + "loss": 0.0153, "step": 2487 }, { "epoch": 0.69, "learning_rate": 1.538361629093608e-05, - "loss": 0.2908, + "loss": 0.0649, "step": 2488 }, { "epoch": 0.69, "learning_rate": 1.5381760831245943e-05, - "loss": 0.0901, + "loss": 0.0455, "step": 2489 }, { "epoch": 0.69, "learning_rate": 1.5379905371555802e-05, - "loss": 0.1355, + "loss": 0.1578, "step": 2490 }, { "epoch": 0.69, "learning_rate": 1.5378049911865667e-05, - "loss": 0.1348, + "loss": 0.0435, "step": 2491 }, { "epoch": 0.69, "learning_rate": 1.537619445217553e-05, - "loss": 0.0877, + "loss": 0.1545, "step": 2492 }, { "epoch": 0.69, "learning_rate": 1.537433899248539e-05, - "loss": 0.0887, + "loss": 0.1227, "step": 2493 }, { "epoch": 0.69, "learning_rate": 1.537248353279525e-05, - "loss": 0.091, + "loss": 0.0165, "step": 2494 }, { "epoch": 0.69, "learning_rate": 1.5370628073105115e-05, - "loss": 0.0888, + "loss": 0.0162, "step": 2495 }, { "epoch": 0.69, "learning_rate": 1.5368772613414976e-05, - "loss": 0.0928, + "loss": 0.0715, "step": 2496 }, { "epoch": 0.69, "learning_rate": 1.5366917153724835e-05, - "loss": 0.0914, + "loss": 0.0661, "step": 2497 }, { "epoch": 0.7, "learning_rate": 1.5365061694034697e-05, - "loss": 0.1914, + "loss": 0.0679, "step": 2498 }, { "epoch": 0.7, "learning_rate": 1.5363206234344562e-05, - "loss": 0.0352, + "loss": 0.0664, "step": 2499 }, { "epoch": 0.7, "learning_rate": 1.536135077465442e-05, - "loss": 0.3416, + "loss": 0.0808, "step": 2500 }, { "epoch": 0.7, "learning_rate": 1.5359495314964282e-05, - "loss": 0.0884, + "loss": 0.013, "step": 2501 }, { "epoch": 0.7, "learning_rate": 1.5357639855274144e-05, - "loss": 0.1911, + "loss": 0.0109, "step": 2502 }, { "epoch": 0.7, "learning_rate": 1.535578439558401e-05, - "loss": 0.1373, + "loss": 0.0804, "step": 2503 }, { "epoch": 0.7, "learning_rate": 1.5353928935893868e-05, - "loss": 0.1384, + "loss": 0.1499, "step": 2504 }, { "epoch": 0.7, "learning_rate": 1.535207347620373e-05, - "loss": 0.0841, + "loss": 0.2043, "step": 2505 }, { "epoch": 0.7, "learning_rate": 1.5350218016513592e-05, - "loss": 0.3513, + "loss": 0.0661, "step": 2506 }, { "epoch": 0.7, "learning_rate": 1.5348362556823454e-05, - "loss": 0.1388, + "loss": 0.0289, "step": 2507 }, { "epoch": 0.7, "learning_rate": 1.5346507097133316e-05, - "loss": 0.244, + "loss": 0.0193, "step": 2508 }, { "epoch": 0.7, "learning_rate": 1.5344651637443177e-05, - "loss": 0.1353, + "loss": 0.1222, "step": 2509 }, { "epoch": 0.7, "learning_rate": 1.534279617775304e-05, - "loss": 0.1869, + "loss": 0.0184, "step": 2510 }, { "epoch": 0.7, "learning_rate": 1.53409407180629e-05, - "loss": 0.035, + "loss": 0.0669, "step": 2511 }, { "epoch": 0.7, "learning_rate": 1.5339085258372763e-05, - "loss": 0.3048, + "loss": 0.1499, "step": 2512 }, { "epoch": 0.7, "learning_rate": 1.5337229798682625e-05, - "loss": 0.2415, + "loss": 0.1253, "step": 2513 }, { "epoch": 0.7, "learning_rate": 1.5335374338992487e-05, - "loss": 0.2432, + "loss": 0.0639, "step": 2514 }, { "epoch": 0.7, "learning_rate": 1.533351887930235e-05, - "loss": 0.1386, + "loss": 0.143, "step": 2515 }, { "epoch": 0.7, "learning_rate": 1.533166341961221e-05, - "loss": 0.2396, + "loss": 0.1032, "step": 2516 }, { "epoch": 0.7, "learning_rate": 1.5329807959922072e-05, - "loss": 0.2447, + "loss": 0.022, "step": 2517 }, { "epoch": 0.7, "learning_rate": 1.5327952500231934e-05, - "loss": 0.1437, + "loss": 0.0617, "step": 2518 }, { "epoch": 0.7, "learning_rate": 1.5326097040541796e-05, - "loss": 0.0399, + "loss": 0.0151, "step": 2519 }, { "epoch": 0.7, "learning_rate": 1.5324241580851658e-05, - "loss": 0.1362, + "loss": 0.0606, "step": 2520 }, { "epoch": 0.7, "learning_rate": 1.532238612116152e-05, - "loss": 0.142, + "loss": 0.1775, "step": 2521 }, { "epoch": 0.7, "learning_rate": 1.532053066147138e-05, - "loss": 0.2389, + "loss": 0.1435, "step": 2522 }, { "epoch": 0.7, "learning_rate": 1.5318675201781244e-05, - "loss": 0.1928, + "loss": 0.0238, "step": 2523 }, { "epoch": 0.7, "learning_rate": 1.5316819742091105e-05, - "loss": 0.2874, + "loss": 0.228, "step": 2524 }, { "epoch": 0.7, "learning_rate": 1.5314964282400967e-05, - "loss": 0.2355, + "loss": 0.0939, "step": 2525 }, { "epoch": 0.7, "learning_rate": 1.5313108822710826e-05, - "loss": 0.2883, + "loss": 0.2054, "step": 2526 }, { "epoch": 0.7, "learning_rate": 1.531125336302069e-05, - "loss": 0.1431, + "loss": 0.1005, "step": 2527 }, { "epoch": 0.7, "learning_rate": 1.5309397903330553e-05, - "loss": 0.1439, + "loss": 0.0864, "step": 2528 }, { "epoch": 0.7, "learning_rate": 1.530754244364041e-05, - "loss": 0.0969, + "loss": 0.06, "step": 2529 }, { "epoch": 0.7, "learning_rate": 1.5305686983950273e-05, - "loss": 0.0469, + "loss": 0.0235, "step": 2530 }, { "epoch": 0.7, "learning_rate": 1.530383152426014e-05, - "loss": 0.1914, + "loss": 0.0929, "step": 2531 }, { "epoch": 0.7, "learning_rate": 1.530197606457e-05, - "loss": 0.1438, + "loss": 0.0217, "step": 2532 }, { "epoch": 0.7, "learning_rate": 1.530012060487986e-05, - "loss": 0.1446, + "loss": 0.1289, "step": 2533 }, { "epoch": 0.71, "learning_rate": 1.529826514518972e-05, - "loss": 0.1934, + "loss": 0.0713, "step": 2534 }, { "epoch": 0.71, "learning_rate": 1.5296409685499586e-05, - "loss": 0.2405, + "loss": 0.1153, "step": 2535 }, { "epoch": 0.71, "learning_rate": 1.5294554225809445e-05, - "loss": 0.0923, + "loss": 0.144, "step": 2536 }, { "epoch": 0.71, "learning_rate": 1.5292698766119306e-05, - "loss": 0.0912, + "loss": 0.1666, "step": 2537 }, { "epoch": 0.71, "learning_rate": 1.5290843306429168e-05, - "loss": 0.2393, + "loss": 0.0894, "step": 2538 }, { "epoch": 0.71, "learning_rate": 1.528898784673903e-05, - "loss": 0.0917, + "loss": 0.3268, "step": 2539 }, { "epoch": 0.71, "learning_rate": 1.5287132387048892e-05, - "loss": 0.1441, + "loss": 0.1502, "step": 2540 }, { "epoch": 0.71, "learning_rate": 1.5285276927358754e-05, - "loss": 0.2449, + "loss": 0.0776, "step": 2541 }, { "epoch": 0.71, "learning_rate": 1.5283421467668616e-05, - "loss": 0.1452, + "loss": 0.1015, "step": 2542 }, { "epoch": 0.71, "learning_rate": 1.5281566007978478e-05, - "loss": 0.1402, + "loss": 0.0285, "step": 2543 }, { "epoch": 0.71, "learning_rate": 1.527971054828834e-05, - "loss": 0.14, + "loss": 0.0368, "step": 2544 }, { "epoch": 0.71, "learning_rate": 1.52778550885982e-05, - "loss": 0.1429, + "loss": 0.112, "step": 2545 }, { "epoch": 0.71, "learning_rate": 1.5275999628908063e-05, - "loss": 0.0903, + "loss": 0.1741, "step": 2546 }, { "epoch": 0.71, "learning_rate": 1.5274144169217925e-05, - "loss": 0.0918, + "loss": 0.0711, "step": 2547 }, { "epoch": 0.71, "learning_rate": 1.5272288709527787e-05, - "loss": 0.1361, + "loss": 0.0841, "step": 2548 }, { "epoch": 0.71, "learning_rate": 1.527043324983765e-05, - "loss": 0.1882, + "loss": 0.1753, "step": 2549 }, { "epoch": 0.71, "learning_rate": 1.526857779014751e-05, - "loss": 0.3377, + "loss": 0.0239, "step": 2550 }, { "epoch": 0.71, "learning_rate": 1.5266722330457373e-05, - "loss": 0.29, + "loss": 0.1951, "step": 2551 }, { "epoch": 0.71, "learning_rate": 1.5264866870767234e-05, - "loss": 0.1938, + "loss": 0.0821, "step": 2552 }, { "epoch": 0.71, "learning_rate": 1.5263011411077096e-05, - "loss": 0.04, + "loss": 0.0776, "step": 2553 }, { "epoch": 0.71, "learning_rate": 1.5261155951386958e-05, - "loss": 0.2401, + "loss": 0.2117, "step": 2554 }, { "epoch": 0.71, "learning_rate": 1.525930049169682e-05, - "loss": 0.1917, + "loss": 0.0844, "step": 2555 }, { "epoch": 0.71, "learning_rate": 1.5257445032006682e-05, - "loss": 0.1905, + "loss": 0.158, "step": 2556 }, { "epoch": 0.71, "learning_rate": 1.5255589572316542e-05, - "loss": 0.0385, + "loss": 0.136, "step": 2557 }, { "epoch": 0.71, "learning_rate": 1.5253734112626404e-05, - "loss": 0.3958, + "loss": 0.0221, "step": 2558 }, { "epoch": 0.71, "learning_rate": 1.5251878652936268e-05, - "loss": 0.1365, + "loss": 0.0617, "step": 2559 }, { "epoch": 0.71, "learning_rate": 1.5250023193246128e-05, - "loss": 0.0396, + "loss": 0.0336, "step": 2560 }, { "epoch": 0.71, "learning_rate": 1.524816773355599e-05, - "loss": 0.0396, + "loss": 0.1787, "step": 2561 }, { "epoch": 0.71, "learning_rate": 1.5246312273865851e-05, - "loss": 0.0398, + "loss": 0.1771, "step": 2562 }, { "epoch": 0.71, "learning_rate": 1.5244456814175715e-05, - "loss": 0.0891, + "loss": 0.1563, "step": 2563 }, { "epoch": 0.71, "learning_rate": 1.5242601354485575e-05, - "loss": 0.1416, + "loss": 0.0598, "step": 2564 }, { "epoch": 0.71, "learning_rate": 1.5240745894795437e-05, - "loss": 0.0889, + "loss": 0.1261, "step": 2565 }, { "epoch": 0.71, "learning_rate": 1.5238890435105297e-05, - "loss": 0.1384, + "loss": 0.0677, "step": 2566 }, { "epoch": 0.71, "learning_rate": 1.523703497541516e-05, - "loss": 0.0884, + "loss": 0.0342, "step": 2567 }, { "epoch": 0.71, "learning_rate": 1.5235179515725023e-05, - "loss": 0.1934, + "loss": 0.0759, "step": 2568 }, { "epoch": 0.72, "learning_rate": 1.5233324056034885e-05, - "loss": 0.2489, + "loss": 0.0244, "step": 2569 }, { "epoch": 0.72, "learning_rate": 1.5231468596344745e-05, - "loss": 0.0338, + "loss": 0.0272, "step": 2570 }, { "epoch": 0.72, "learning_rate": 1.5229613136654608e-05, - "loss": 0.24, + "loss": 0.1451, "step": 2571 }, { "epoch": 0.72, "learning_rate": 1.522775767696447e-05, - "loss": 0.2416, + "loss": 0.1005, "step": 2572 }, { "epoch": 0.72, "learning_rate": 1.522590221727433e-05, - "loss": 0.1387, + "loss": 0.0247, "step": 2573 }, { "epoch": 0.72, "learning_rate": 1.5224046757584192e-05, - "loss": 0.289, + "loss": 0.0159, "step": 2574 }, { "epoch": 0.72, "learning_rate": 1.5222191297894056e-05, - "loss": 0.0847, + "loss": 0.0174, "step": 2575 }, { "epoch": 0.72, "learning_rate": 1.5220335838203918e-05, - "loss": 0.1412, + "loss": 0.2246, "step": 2576 }, { "epoch": 0.72, "learning_rate": 1.5218480378513778e-05, - "loss": 0.1937, + "loss": 0.1251, "step": 2577 }, { "epoch": 0.72, "learning_rate": 1.521662491882364e-05, - "loss": 0.1367, + "loss": 0.0617, "step": 2578 }, { "epoch": 0.72, "learning_rate": 1.5214769459133503e-05, - "loss": 0.1895, + "loss": 0.0596, "step": 2579 }, { "epoch": 0.72, "learning_rate": 1.5212913999443363e-05, - "loss": 0.1395, + "loss": 0.1294, "step": 2580 }, { "epoch": 0.72, "learning_rate": 1.5211058539753225e-05, - "loss": 0.1432, + "loss": 0.0161, "step": 2581 }, { "epoch": 0.72, "learning_rate": 1.5209203080063085e-05, - "loss": 0.3444, + "loss": 0.1527, "step": 2582 }, { "epoch": 0.72, "learning_rate": 1.5207347620372947e-05, - "loss": 0.0889, + "loss": 0.0697, "step": 2583 }, { "epoch": 0.72, "learning_rate": 1.5205492160682811e-05, - "loss": 0.1941, + "loss": 0.1015, "step": 2584 }, { "epoch": 0.72, "learning_rate": 1.5203636700992673e-05, - "loss": 0.1927, + "loss": 0.0131, "step": 2585 }, { "epoch": 0.72, "learning_rate": 1.5201781241302533e-05, - "loss": 0.2423, + "loss": 0.0132, "step": 2586 }, { "epoch": 0.72, "learning_rate": 1.5199925781612395e-05, - "loss": 0.0889, + "loss": 0.0124, "step": 2587 }, { "epoch": 0.72, "learning_rate": 1.5198070321922258e-05, - "loss": 0.0375, + "loss": 0.1623, "step": 2588 }, { "epoch": 0.72, "learning_rate": 1.5196214862232119e-05, - "loss": 0.0375, + "loss": 0.0528, "step": 2589 }, { "epoch": 0.72, "learning_rate": 1.519435940254198e-05, - "loss": 0.1344, + "loss": 0.1034, "step": 2590 }, { "epoch": 0.72, "learning_rate": 1.5192503942851842e-05, - "loss": 0.395, + "loss": 0.0909, "step": 2591 }, { "epoch": 0.72, "learning_rate": 1.5190648483161706e-05, - "loss": 0.0895, + "loss": 0.2021, "step": 2592 }, { "epoch": 0.72, "learning_rate": 1.5188793023471566e-05, - "loss": 0.1387, + "loss": 0.0462, "step": 2593 }, { "epoch": 0.72, "learning_rate": 1.5186937563781428e-05, - "loss": 0.1332, + "loss": 0.2368, "step": 2594 }, { "epoch": 0.72, "learning_rate": 1.5185082104091288e-05, - "loss": 0.0822, + "loss": 0.1104, "step": 2595 }, { "epoch": 0.72, "learning_rate": 1.5183226644401152e-05, - "loss": 0.2414, + "loss": 0.1185, "step": 2596 }, { "epoch": 0.72, "learning_rate": 1.5181371184711014e-05, - "loss": 0.1387, + "loss": 0.0664, "step": 2597 }, { "epoch": 0.72, "learning_rate": 1.5179515725020875e-05, - "loss": 0.2423, + "loss": 0.1054, "step": 2598 }, { "epoch": 0.72, "learning_rate": 1.5177660265330736e-05, - "loss": 0.1943, + "loss": 0.0238, "step": 2599 }, { "epoch": 0.72, "learning_rate": 1.51758048056406e-05, - "loss": 0.1789, + "loss": 0.0263, "step": 2600 }, { "epoch": 0.72, "learning_rate": 1.5173949345950461e-05, - "loss": 0.0356, + "loss": 0.0835, "step": 2601 }, { "epoch": 0.72, "learning_rate": 1.5172093886260321e-05, - "loss": 0.0375, + "loss": 0.0808, "step": 2602 }, { "epoch": 0.72, "learning_rate": 1.5170238426570183e-05, - "loss": 0.088, + "loss": 0.0256, "step": 2603 }, { "epoch": 0.72, "learning_rate": 1.5168382966880047e-05, - "loss": 0.1184, + "loss": 0.0641, "step": 2604 }, { "epoch": 0.73, "learning_rate": 1.5166527507189907e-05, - "loss": 0.1381, + "loss": 0.1097, "step": 2605 }, { "epoch": 0.73, "learning_rate": 1.5164672047499769e-05, - "loss": 0.086, + "loss": 0.0274, "step": 2606 }, { "epoch": 0.73, "learning_rate": 1.516281658780963e-05, - "loss": 0.0879, + "loss": 0.1563, "step": 2607 }, { "epoch": 0.73, "learning_rate": 1.5160961128119494e-05, - "loss": 0.0856, + "loss": 0.0642, "step": 2608 }, { "epoch": 0.73, "learning_rate": 1.5159105668429354e-05, - "loss": 0.2099, + "loss": 0.1177, "step": 2609 }, { "epoch": 0.73, "learning_rate": 1.5157250208739216e-05, - "loss": 0.082, + "loss": 0.244, "step": 2610 }, { "epoch": 0.73, "learning_rate": 1.5155394749049076e-05, - "loss": 0.1378, + "loss": 0.2763, "step": 2611 }, { "epoch": 0.73, "learning_rate": 1.515353928935894e-05, - "loss": 0.1139, + "loss": 0.14, "step": 2612 }, { "epoch": 0.73, "learning_rate": 1.5151683829668802e-05, - "loss": 0.1345, + "loss": 0.0648, "step": 2613 }, { "epoch": 0.73, "learning_rate": 1.5149828369978664e-05, - "loss": 0.1434, + "loss": 0.0847, "step": 2614 }, { "epoch": 0.73, "learning_rate": 1.5147972910288524e-05, - "loss": 0.3562, + "loss": 0.0244, "step": 2615 }, { "epoch": 0.73, "learning_rate": 1.5146117450598387e-05, - "loss": 0.0836, + "loss": 0.0329, "step": 2616 }, { "epoch": 0.73, "learning_rate": 1.514426199090825e-05, - "loss": 0.1435, + "loss": 0.1227, "step": 2617 }, { "epoch": 0.73, "learning_rate": 1.514240653121811e-05, - "loss": 0.2011, + "loss": 0.0236, "step": 2618 }, { "epoch": 0.73, "learning_rate": 1.5140551071527971e-05, - "loss": 0.0879, + "loss": 0.0613, "step": 2619 }, { "epoch": 0.73, "learning_rate": 1.5138695611837835e-05, - "loss": 0.2507, + "loss": 0.043, "step": 2620 }, { "epoch": 0.73, "learning_rate": 1.5136840152147697e-05, - "loss": 0.0886, + "loss": 0.2793, "step": 2621 }, { "epoch": 0.73, "learning_rate": 1.5134984692457557e-05, - "loss": 0.1404, + "loss": 0.1374, "step": 2622 }, { "epoch": 0.73, "learning_rate": 1.5133129232767419e-05, - "loss": 0.1866, + "loss": 0.0662, "step": 2623 }, { "epoch": 0.73, "learning_rate": 1.5131273773077282e-05, - "loss": 0.2431, + "loss": 0.0599, "step": 2624 }, { "epoch": 0.73, "learning_rate": 1.5129418313387143e-05, - "loss": 0.2771, + "loss": 0.0289, "step": 2625 }, { "epoch": 0.73, "learning_rate": 1.5127562853697004e-05, - "loss": 0.1995, + "loss": 0.0228, "step": 2626 }, { "epoch": 0.73, "learning_rate": 1.5125707394006866e-05, - "loss": 0.2863, + "loss": 0.1013, "step": 2627 }, { "epoch": 0.73, "learning_rate": 1.5123851934316728e-05, - "loss": 0.1091, + "loss": 0.0162, "step": 2628 }, { "epoch": 0.73, "learning_rate": 1.512199647462659e-05, - "loss": 0.0921, + "loss": 0.0629, "step": 2629 }, { "epoch": 0.73, "learning_rate": 1.5120141014936452e-05, - "loss": 0.088, + "loss": 0.0947, "step": 2630 }, { "epoch": 0.73, "learning_rate": 1.5118285555246312e-05, - "loss": 0.0376, + "loss": 0.0533, "step": 2631 }, { "epoch": 0.73, "learning_rate": 1.5116430095556176e-05, - "loss": 0.1372, + "loss": 0.0128, "step": 2632 }, { "epoch": 0.73, "learning_rate": 1.5114574635866037e-05, - "loss": 0.2516, + "loss": 0.0538, "step": 2633 }, { "epoch": 0.73, "learning_rate": 1.5112719176175898e-05, - "loss": 0.1383, + "loss": 0.0954, "step": 2634 }, { "epoch": 0.73, "learning_rate": 1.511086371648576e-05, - "loss": 0.1907, + "loss": 0.0074, "step": 2635 }, { "epoch": 0.73, "learning_rate": 1.5109008256795623e-05, - "loss": 0.1341, + "loss": 0.1234, "step": 2636 }, { "epoch": 0.73, "learning_rate": 1.5107152797105485e-05, - "loss": 0.1946, + "loss": 0.1768, "step": 2637 }, { "epoch": 0.73, "learning_rate": 1.5105297337415345e-05, - "loss": 0.1485, + "loss": 0.0205, "step": 2638 }, { "epoch": 0.73, "learning_rate": 1.5103441877725207e-05, - "loss": 0.1967, + "loss": 0.1876, "step": 2639 }, { "epoch": 0.73, "learning_rate": 1.510158641803507e-05, - "loss": 0.1925, + "loss": 0.0559, "step": 2640 }, { "epoch": 0.74, "learning_rate": 1.509973095834493e-05, - "loss": 0.1959, + "loss": 0.0108, "step": 2641 }, { "epoch": 0.74, "learning_rate": 1.5097875498654793e-05, - "loss": 0.0364, + "loss": 0.0858, "step": 2642 }, { "epoch": 0.74, "learning_rate": 1.5096020038964655e-05, - "loss": 0.0862, + "loss": 0.0676, "step": 2643 }, { "epoch": 0.74, "learning_rate": 1.5094164579274518e-05, - "loss": 0.1399, + "loss": 0.1587, "step": 2644 }, { "epoch": 0.74, "learning_rate": 1.5092309119584378e-05, - "loss": 0.3491, + "loss": 0.0647, "step": 2645 }, { "epoch": 0.74, "learning_rate": 1.509045365989424e-05, - "loss": 0.0887, + "loss": 0.0333, "step": 2646 }, { "epoch": 0.74, "learning_rate": 1.50885982002041e-05, - "loss": 0.1403, + "loss": 0.1625, "step": 2647 }, { "epoch": 0.74, "learning_rate": 1.5086742740513964e-05, - "loss": 0.0873, + "loss": 0.0215, "step": 2648 }, { "epoch": 0.74, "learning_rate": 1.5084887280823826e-05, - "loss": 0.0876, + "loss": 0.1065, "step": 2649 }, { "epoch": 0.74, "learning_rate": 1.5083031821133688e-05, - "loss": 0.1398, + "loss": 0.0193, "step": 2650 }, { "epoch": 0.74, "learning_rate": 1.5081176361443548e-05, - "loss": 0.0328, + "loss": 0.0743, "step": 2651 }, { "epoch": 0.74, "learning_rate": 1.5079320901753411e-05, - "loss": 0.033, + "loss": 0.0143, "step": 2652 }, { "epoch": 0.74, "learning_rate": 1.5077465442063273e-05, - "loss": 0.1944, + "loss": 0.0563, "step": 2653 }, { "epoch": 0.74, "learning_rate": 1.5075609982373133e-05, - "loss": 0.1947, + "loss": 0.1295, "step": 2654 }, { "epoch": 0.74, "learning_rate": 1.5073754522682995e-05, - "loss": 0.0855, + "loss": 0.2078, "step": 2655 }, { "epoch": 0.74, "learning_rate": 1.5071899062992859e-05, - "loss": 0.1412, + "loss": 0.0256, "step": 2656 }, { "epoch": 0.74, "learning_rate": 1.5070043603302719e-05, - "loss": 0.3052, + "loss": 0.061, "step": 2657 }, { "epoch": 0.74, "learning_rate": 1.5068188143612581e-05, - "loss": 0.1432, + "loss": 0.1057, "step": 2658 }, { "epoch": 0.74, "learning_rate": 1.5066332683922443e-05, - "loss": 0.0823, + "loss": 0.0269, "step": 2659 }, { "epoch": 0.74, "learning_rate": 1.5064477224232306e-05, - "loss": 0.1386, + "loss": 0.0693, "step": 2660 }, { "epoch": 0.74, "learning_rate": 1.5062621764542166e-05, - "loss": 0.0865, + "loss": 0.0783, "step": 2661 }, { "epoch": 0.74, "learning_rate": 1.5060766304852028e-05, - "loss": 0.1965, + "loss": 0.055, "step": 2662 }, { "epoch": 0.74, "learning_rate": 1.5058910845161889e-05, - "loss": 0.1391, + "loss": 0.0659, "step": 2663 }, { "epoch": 0.74, "learning_rate": 1.5057055385471752e-05, - "loss": 0.1409, + "loss": 0.0516, "step": 2664 }, { "epoch": 0.74, "learning_rate": 1.5055199925781614e-05, - "loss": 0.141, + "loss": 0.0208, "step": 2665 }, { "epoch": 0.74, "learning_rate": 1.5053344466091476e-05, - "loss": 0.3038, + "loss": 0.0511, "step": 2666 }, { "epoch": 0.74, "learning_rate": 1.5051489006401336e-05, - "loss": 0.1974, + "loss": 0.0569, "step": 2667 }, { "epoch": 0.74, "learning_rate": 1.50496335467112e-05, - "loss": 0.1931, + "loss": 0.1495, "step": 2668 }, { "epoch": 0.74, "learning_rate": 1.5047778087021061e-05, - "loss": 0.0362, + "loss": 0.0084, "step": 2669 }, { "epoch": 0.74, "learning_rate": 1.5045922627330922e-05, - "loss": 0.2472, + "loss": 0.0592, "step": 2670 }, { "epoch": 0.74, "learning_rate": 1.5044067167640783e-05, - "loss": 0.0359, + "loss": 0.1111, "step": 2671 }, { "epoch": 0.74, "learning_rate": 1.5042211707950647e-05, - "loss": 0.1913, + "loss": 0.0094, "step": 2672 }, { "epoch": 0.74, "learning_rate": 1.5040356248260509e-05, - "loss": 0.2455, + "loss": 0.0751, "step": 2673 }, { "epoch": 0.74, "learning_rate": 1.5038500788570369e-05, - "loss": 0.2499, + "loss": 0.0709, "step": 2674 }, { "epoch": 0.74, "learning_rate": 1.5036645328880231e-05, - "loss": 0.0874, + "loss": 0.0889, "step": 2675 }, { "epoch": 0.74, "learning_rate": 1.5034789869190095e-05, - "loss": 0.2952, + "loss": 0.1204, "step": 2676 }, { "epoch": 0.75, "learning_rate": 1.5032934409499955e-05, - "loss": 0.2416, + "loss": 0.0055, "step": 2677 }, { "epoch": 0.75, "learning_rate": 1.5031078949809817e-05, - "loss": 0.2418, + "loss": 0.0044, "step": 2678 }, { "epoch": 0.75, "learning_rate": 1.5029223490119678e-05, - "loss": 0.1892, + "loss": 0.137, "step": 2679 }, { "epoch": 0.75, "learning_rate": 1.502736803042954e-05, - "loss": 0.1393, + "loss": 0.1131, "step": 2680 }, { "epoch": 0.75, "learning_rate": 1.5025512570739402e-05, - "loss": 0.288, + "loss": 0.0556, "step": 2681 }, { "epoch": 0.75, "learning_rate": 1.5023657111049264e-05, - "loss": 0.2417, + "loss": 0.0068, "step": 2682 }, { "epoch": 0.75, "learning_rate": 1.5021801651359124e-05, - "loss": 0.3722, + "loss": 0.0932, "step": 2683 }, { "epoch": 0.75, "learning_rate": 1.5019946191668988e-05, - "loss": 0.194, + "loss": 0.0588, "step": 2684 }, { "epoch": 0.75, "learning_rate": 1.501809073197885e-05, - "loss": 0.2367, + "loss": 0.0606, "step": 2685 }, { "epoch": 0.75, "learning_rate": 1.501623527228871e-05, - "loss": 0.2368, + "loss": 0.1361, "step": 2686 }, { "epoch": 0.75, "learning_rate": 1.5014379812598572e-05, - "loss": 0.1512, + "loss": 0.113, "step": 2687 }, { "epoch": 0.75, "learning_rate": 1.5012524352908435e-05, - "loss": 0.102, + "loss": 0.1208, "step": 2688 }, { "epoch": 0.75, "learning_rate": 1.5010668893218297e-05, - "loss": 0.1007, + "loss": 0.0911, "step": 2689 }, { "epoch": 0.75, "learning_rate": 1.5008813433528157e-05, - "loss": 0.1888, + "loss": 0.0598, "step": 2690 }, { "epoch": 0.75, "learning_rate": 1.500695797383802e-05, - "loss": 0.1926, + "loss": 0.2298, "step": 2691 }, { "epoch": 0.75, "learning_rate": 1.5005102514147883e-05, - "loss": 0.1889, + "loss": 0.0928, "step": 2692 }, { "epoch": 0.75, "learning_rate": 1.5003247054457743e-05, - "loss": 0.0954, + "loss": 0.0758, "step": 2693 }, { "epoch": 0.75, "learning_rate": 1.5001391594767605e-05, - "loss": 0.0465, + "loss": 0.0585, "step": 2694 }, { "epoch": 0.75, "learning_rate": 1.4999536135077467e-05, - "loss": 0.0929, + "loss": 0.1098, "step": 2695 }, { "epoch": 0.75, "learning_rate": 1.499768067538733e-05, - "loss": 0.0408, + "loss": 0.1672, "step": 2696 }, { "epoch": 0.75, "learning_rate": 1.499582521569719e-05, - "loss": 0.1952, + "loss": 0.1087, "step": 2697 }, { "epoch": 0.75, "learning_rate": 1.4993969756007052e-05, - "loss": 0.1413, + "loss": 0.1363, "step": 2698 }, { "epoch": 0.75, "learning_rate": 1.4992114296316912e-05, - "loss": 0.1958, + "loss": 0.1218, "step": 2699 }, { "epoch": 0.75, "learning_rate": 1.4990258836626776e-05, - "loss": 0.1883, + "loss": 0.0994, "step": 2700 }, { "epoch": 0.75, "learning_rate": 1.4988403376936638e-05, - "loss": 0.189, + "loss": 0.0695, "step": 2701 }, { "epoch": 0.75, "learning_rate": 1.4986547917246498e-05, - "loss": 0.1458, + "loss": 0.102, "step": 2702 }, { "epoch": 0.75, "learning_rate": 1.498469245755636e-05, - "loss": 0.1394, + "loss": 0.0936, "step": 2703 }, { "epoch": 0.75, "learning_rate": 1.4982836997866224e-05, - "loss": 0.0873, + "loss": 0.1072, "step": 2704 }, { "epoch": 0.75, "learning_rate": 1.4980981538176085e-05, - "loss": 0.0865, + "loss": 0.0587, "step": 2705 }, { "epoch": 0.75, "learning_rate": 1.4979126078485946e-05, - "loss": 0.0329, + "loss": 0.0996, "step": 2706 }, { "epoch": 0.75, "learning_rate": 1.4977270618795807e-05, - "loss": 0.0817, + "loss": 0.0746, "step": 2707 }, { "epoch": 0.75, "learning_rate": 1.4975415159105671e-05, - "loss": 0.0878, + "loss": 0.0204, "step": 2708 }, { "epoch": 0.75, "learning_rate": 1.4973559699415531e-05, - "loss": 0.0305, + "loss": 0.1161, "step": 2709 }, { "epoch": 0.75, "learning_rate": 1.4971704239725393e-05, - "loss": 0.1389, + "loss": 0.0388, "step": 2710 }, { "epoch": 0.75, "learning_rate": 1.4969848780035255e-05, - "loss": 0.0291, + "loss": 0.0554, "step": 2711 }, { "epoch": 0.75, "learning_rate": 1.4967993320345118e-05, - "loss": 0.1408, + "loss": 0.1504, "step": 2712 }, { "epoch": 0.76, "learning_rate": 1.4966137860654979e-05, - "loss": 0.0812, + "loss": 0.0414, "step": 2713 }, { "epoch": 0.76, "learning_rate": 1.496428240096484e-05, - "loss": 0.0822, + "loss": 0.0145, "step": 2714 }, { "epoch": 0.76, "learning_rate": 1.49624269412747e-05, - "loss": 0.0839, + "loss": 0.0614, "step": 2715 }, { "epoch": 0.76, "learning_rate": 1.4960571481584564e-05, - "loss": 0.3084, + "loss": 0.0384, "step": 2716 }, { "epoch": 0.76, "learning_rate": 1.4958716021894426e-05, - "loss": 0.0251, + "loss": 0.0965, "step": 2717 }, { "epoch": 0.76, "learning_rate": 1.4956860562204288e-05, - "loss": 0.0242, + "loss": 0.1329, "step": 2718 }, { "epoch": 0.76, "learning_rate": 1.4955005102514148e-05, - "loss": 0.198, + "loss": 0.2522, "step": 2719 }, { "epoch": 0.76, "learning_rate": 1.4953149642824012e-05, - "loss": 0.2537, + "loss": 0.0123, "step": 2720 }, { "epoch": 0.76, "learning_rate": 1.4951294183133874e-05, - "loss": 0.1943, + "loss": 0.102, "step": 2721 }, { "epoch": 0.76, "learning_rate": 1.4949438723443734e-05, - "loss": 0.1361, + "loss": 0.0118, "step": 2722 }, { "epoch": 0.76, "learning_rate": 1.4947583263753596e-05, - "loss": 0.1397, + "loss": 0.0869, "step": 2723 }, { "epoch": 0.76, "learning_rate": 1.494572780406346e-05, - "loss": 0.0829, + "loss": 0.0844, "step": 2724 }, { "epoch": 0.76, "learning_rate": 1.494387234437332e-05, - "loss": 0.0247, + "loss": 0.1232, "step": 2725 }, { "epoch": 0.76, "learning_rate": 1.4942016884683181e-05, - "loss": 0.1969, + "loss": 0.0195, "step": 2726 }, { "epoch": 0.76, "learning_rate": 1.4940161424993043e-05, - "loss": 0.0251, + "loss": 0.0268, "step": 2727 }, { "epoch": 0.76, "learning_rate": 1.4938305965302907e-05, - "loss": 0.313, + "loss": 0.0603, "step": 2728 }, { "epoch": 0.76, "learning_rate": 1.4936450505612767e-05, - "loss": 0.025, + "loss": 0.0231, "step": 2729 }, { "epoch": 0.76, "learning_rate": 1.4934595045922629e-05, - "loss": 0.1407, + "loss": 0.055, "step": 2730 }, { "epoch": 0.76, "learning_rate": 1.4932739586232489e-05, - "loss": 0.1393, + "loss": 0.1314, "step": 2731 }, { "epoch": 0.76, "learning_rate": 1.4930884126542353e-05, - "loss": 0.0831, + "loss": 0.058, "step": 2732 }, { "epoch": 0.76, "learning_rate": 1.4929028666852214e-05, - "loss": 0.1415, + "loss": 0.0733, "step": 2733 }, { "epoch": 0.76, "learning_rate": 1.4927173207162076e-05, - "loss": 0.2541, + "loss": 0.0127, "step": 2734 }, { "epoch": 0.76, "learning_rate": 1.4925317747471936e-05, - "loss": 0.1413, + "loss": 0.1098, "step": 2735 }, { "epoch": 0.76, "learning_rate": 1.4923462287781798e-05, - "loss": 0.1878, + "loss": 0.1151, "step": 2736 }, { "epoch": 0.76, "learning_rate": 1.4921606828091662e-05, - "loss": 0.1927, + "loss": 0.1191, "step": 2737 }, { "epoch": 0.76, "learning_rate": 1.4919751368401522e-05, - "loss": 0.2496, + "loss": 0.094, "step": 2738 }, { "epoch": 0.76, "learning_rate": 1.4917895908711384e-05, - "loss": 0.0836, + "loss": 0.1036, "step": 2739 }, { "epoch": 0.76, "learning_rate": 1.4916040449021246e-05, - "loss": 0.2516, + "loss": 0.1017, "step": 2740 }, { "epoch": 0.76, "learning_rate": 1.491418498933111e-05, - "loss": 0.3033, + "loss": 0.0981, "step": 2741 }, { "epoch": 0.76, "learning_rate": 1.491232952964097e-05, - "loss": 0.1335, + "loss": 0.162, "step": 2742 }, { "epoch": 0.76, "learning_rate": 1.4910474069950831e-05, - "loss": 0.0855, + "loss": 0.1787, "step": 2743 }, { "epoch": 0.76, "learning_rate": 1.4908618610260692e-05, - "loss": 0.1948, + "loss": 0.1045, "step": 2744 }, { "epoch": 0.76, "learning_rate": 1.4906763150570555e-05, - "loss": 0.0344, + "loss": 0.1231, "step": 2745 }, { "epoch": 0.76, "learning_rate": 1.4904907690880417e-05, - "loss": 0.1904, + "loss": 0.0853, "step": 2746 }, { "epoch": 0.76, "learning_rate": 1.4903052231190279e-05, - "loss": 0.0872, + "loss": 0.106, "step": 2747 }, { "epoch": 0.76, "learning_rate": 1.4901196771500139e-05, - "loss": 0.0894, + "loss": 0.1516, "step": 2748 }, { "epoch": 0.77, "learning_rate": 1.4899341311810003e-05, - "loss": 0.2414, + "loss": 0.1578, "step": 2749 }, { "epoch": 0.77, "learning_rate": 1.4897485852119864e-05, - "loss": 0.1953, + "loss": 0.0986, "step": 2750 }, { "epoch": 0.77, "learning_rate": 1.4895630392429725e-05, - "loss": 0.2433, + "loss": 0.1234, "step": 2751 }, { "epoch": 0.77, "learning_rate": 1.4893774932739587e-05, - "loss": 0.2423, + "loss": 0.0494, "step": 2752 }, { "epoch": 0.77, "learning_rate": 1.489191947304945e-05, - "loss": 0.1368, + "loss": 0.0445, "step": 2753 }, { "epoch": 0.77, "learning_rate": 1.489006401335931e-05, - "loss": 0.3891, + "loss": 0.1085, "step": 2754 }, { "epoch": 0.77, "learning_rate": 1.4888208553669172e-05, - "loss": 0.1875, + "loss": 0.067, "step": 2755 }, { "epoch": 0.77, "learning_rate": 1.4886353093979034e-05, - "loss": 0.0921, + "loss": 0.0766, "step": 2756 }, { "epoch": 0.77, "learning_rate": 1.4884497634288898e-05, - "loss": 0.1417, + "loss": 0.0585, "step": 2757 }, { "epoch": 0.77, "learning_rate": 1.4882642174598758e-05, - "loss": 0.1858, + "loss": 0.1575, "step": 2758 }, { "epoch": 0.77, "learning_rate": 1.488078671490862e-05, - "loss": 0.2429, + "loss": 0.0942, "step": 2759 }, { "epoch": 0.77, "learning_rate": 1.487893125521848e-05, - "loss": 0.2421, + "loss": 0.0548, "step": 2760 }, { "epoch": 0.77, "learning_rate": 1.4877075795528343e-05, - "loss": 0.1405, + "loss": 0.1302, "step": 2761 }, { "epoch": 0.77, "learning_rate": 1.4875220335838205e-05, - "loss": 0.0456, + "loss": 0.1436, "step": 2762 }, { "epoch": 0.77, "learning_rate": 1.4873364876148067e-05, - "loss": 0.1424, + "loss": 0.1222, "step": 2763 }, { "epoch": 0.77, "learning_rate": 1.4871509416457927e-05, - "loss": 0.1463, + "loss": 0.1879, "step": 2764 }, { "epoch": 0.77, "learning_rate": 1.4869653956767791e-05, - "loss": 0.2332, + "loss": 0.0182, "step": 2765 }, { "epoch": 0.77, "learning_rate": 1.4867798497077653e-05, - "loss": 0.0939, + "loss": 0.0532, "step": 2766 }, { "epoch": 0.77, "learning_rate": 1.4865943037387513e-05, - "loss": 0.0941, + "loss": 0.0697, "step": 2767 }, { "epoch": 0.77, "learning_rate": 1.4864087577697375e-05, - "loss": 0.1456, + "loss": 0.1437, "step": 2768 }, { "epoch": 0.77, "learning_rate": 1.4862232118007238e-05, - "loss": 0.2421, + "loss": 0.062, "step": 2769 }, { "epoch": 0.77, "learning_rate": 1.48603766583171e-05, - "loss": 0.1796, + "loss": 0.177, "step": 2770 }, { "epoch": 0.77, "learning_rate": 1.485852119862696e-05, - "loss": 0.194, + "loss": 0.1267, "step": 2771 }, { "epoch": 0.77, "learning_rate": 1.4856665738936822e-05, - "loss": 0.093, + "loss": 0.0235, "step": 2772 }, { "epoch": 0.77, "learning_rate": 1.4854810279246686e-05, - "loss": 0.1901, + "loss": 0.0641, "step": 2773 }, { "epoch": 0.77, "learning_rate": 1.4852954819556546e-05, - "loss": 0.2387, + "loss": 0.0222, "step": 2774 }, { "epoch": 0.77, "learning_rate": 1.4851099359866408e-05, - "loss": 0.1901, + "loss": 0.2195, "step": 2775 }, { "epoch": 0.77, "learning_rate": 1.484924390017627e-05, - "loss": 0.0917, + "loss": 0.0254, "step": 2776 }, { "epoch": 0.77, "learning_rate": 1.4847388440486132e-05, - "loss": 0.0902, + "loss": 0.0244, "step": 2777 }, { "epoch": 0.77, "learning_rate": 1.4845532980795993e-05, - "loss": 0.1429, + "loss": 0.0821, "step": 2778 }, { "epoch": 0.77, "learning_rate": 1.4843677521105855e-05, - "loss": 0.1838, + "loss": 0.1392, "step": 2779 }, { "epoch": 0.77, "learning_rate": 1.4841822061415716e-05, - "loss": 0.2428, + "loss": 0.0257, "step": 2780 }, { "epoch": 0.77, "learning_rate": 1.4839966601725579e-05, - "loss": 0.0909, + "loss": 0.1406, "step": 2781 }, { "epoch": 0.77, "learning_rate": 1.4838111142035441e-05, - "loss": 0.1916, + "loss": 0.2371, "step": 2782 }, { "epoch": 0.77, "learning_rate": 1.4836255682345301e-05, - "loss": 0.1379, + "loss": 0.0833, "step": 2783 }, { "epoch": 0.77, "learning_rate": 1.4834400222655163e-05, - "loss": 0.1382, + "loss": 0.0772, "step": 2784 }, { "epoch": 0.78, "learning_rate": 1.4832544762965027e-05, - "loss": 0.1379, + "loss": 0.0521, "step": 2785 }, { "epoch": 0.78, "learning_rate": 1.4830689303274888e-05, - "loss": 0.1382, + "loss": 0.1666, "step": 2786 }, { "epoch": 0.78, "learning_rate": 1.4828833843584749e-05, - "loss": 0.249, + "loss": 0.066, "step": 2787 }, { "epoch": 0.78, "learning_rate": 1.482697838389461e-05, - "loss": 0.2467, + "loss": 0.1656, "step": 2788 }, { "epoch": 0.78, "learning_rate": 1.4825122924204474e-05, - "loss": 0.1405, + "loss": 0.0215, "step": 2789 }, { "epoch": 0.78, "learning_rate": 1.4823267464514334e-05, - "loss": 0.0849, + "loss": 0.1187, "step": 2790 }, { "epoch": 0.78, "learning_rate": 1.4821412004824196e-05, - "loss": 0.189, + "loss": 0.0971, "step": 2791 }, { "epoch": 0.78, "learning_rate": 1.4819556545134058e-05, - "loss": 0.1369, + "loss": 0.1006, "step": 2792 }, { "epoch": 0.78, "learning_rate": 1.4817701085443922e-05, - "loss": 0.0903, + "loss": 0.0709, "step": 2793 }, { "epoch": 0.78, "learning_rate": 1.4815845625753782e-05, - "loss": 0.1461, + "loss": 0.0732, "step": 2794 }, { "epoch": 0.78, "learning_rate": 1.4813990166063644e-05, - "loss": 0.1425, + "loss": 0.0302, "step": 2795 }, { "epoch": 0.78, "learning_rate": 1.4812134706373504e-05, - "loss": 0.1897, + "loss": 0.1225, "step": 2796 }, { "epoch": 0.78, "learning_rate": 1.4810279246683367e-05, - "loss": 0.2481, + "loss": 0.1012, "step": 2797 }, { "epoch": 0.78, "learning_rate": 1.480842378699323e-05, - "loss": 0.0888, + "loss": 0.0717, "step": 2798 }, { "epoch": 0.78, "learning_rate": 1.4806568327303091e-05, - "loss": 0.1908, + "loss": 0.0242, "step": 2799 }, { "epoch": 0.78, "learning_rate": 1.4804712867612951e-05, - "loss": 0.1396, + "loss": 0.0879, "step": 2800 }, { "epoch": 0.78, "learning_rate": 1.4802857407922815e-05, - "loss": 0.089, + "loss": 0.0246, "step": 2801 }, { "epoch": 0.78, "learning_rate": 1.4801001948232677e-05, - "loss": 0.1426, + "loss": 0.0207, "step": 2802 }, { "epoch": 0.78, "learning_rate": 1.4799146488542537e-05, - "loss": 0.0376, + "loss": 0.0981, "step": 2803 }, { "epoch": 0.78, "learning_rate": 1.4797291028852399e-05, - "loss": 0.1947, + "loss": 0.0475, "step": 2804 }, { "epoch": 0.78, "learning_rate": 1.4795435569162262e-05, - "loss": 0.2463, + "loss": 0.0158, "step": 2805 }, { "epoch": 0.78, "learning_rate": 1.4793580109472122e-05, - "loss": 0.1407, + "loss": 0.0166, "step": 2806 }, { "epoch": 0.78, "learning_rate": 1.4791724649781984e-05, - "loss": 0.0869, + "loss": 0.0821, "step": 2807 }, { "epoch": 0.78, "learning_rate": 1.4789869190091846e-05, - "loss": 0.2908, + "loss": 0.0102, "step": 2808 }, { "epoch": 0.78, "learning_rate": 1.478801373040171e-05, - "loss": 0.0892, + "loss": 0.1325, "step": 2809 }, { "epoch": 0.78, "learning_rate": 1.478615827071157e-05, - "loss": 0.0864, + "loss": 0.1455, "step": 2810 }, { "epoch": 0.78, "learning_rate": 1.4784302811021432e-05, - "loss": 0.1403, + "loss": 0.2387, "step": 2811 }, { "epoch": 0.78, "learning_rate": 1.4782447351331292e-05, - "loss": 0.1372, + "loss": 0.0079, "step": 2812 }, { "epoch": 0.78, "learning_rate": 1.4780591891641156e-05, - "loss": 0.0349, + "loss": 0.0618, "step": 2813 }, { "epoch": 0.78, "learning_rate": 1.4778736431951017e-05, - "loss": 0.2943, + "loss": 0.0721, "step": 2814 }, { "epoch": 0.78, "learning_rate": 1.477688097226088e-05, - "loss": 0.2447, + "loss": 0.1241, "step": 2815 }, { "epoch": 0.78, "learning_rate": 1.477502551257074e-05, - "loss": 0.0867, + "loss": 0.097, "step": 2816 }, { "epoch": 0.78, "learning_rate": 1.4773170052880603e-05, - "loss": 0.0883, + "loss": 0.1128, "step": 2817 }, { "epoch": 0.78, "learning_rate": 1.4771314593190465e-05, - "loss": 0.0874, + "loss": 0.1021, "step": 2818 }, { "epoch": 0.78, "learning_rate": 1.4769459133500325e-05, - "loss": 0.0333, + "loss": 0.0527, "step": 2819 }, { "epoch": 0.78, "learning_rate": 1.4767603673810187e-05, - "loss": 0.1413, + "loss": 0.053, "step": 2820 }, { "epoch": 0.79, "learning_rate": 1.476574821412005e-05, - "loss": 0.2465, + "loss": 0.1306, "step": 2821 }, { "epoch": 0.79, "learning_rate": 1.476389275442991e-05, - "loss": 0.0816, + "loss": 0.1393, "step": 2822 }, { "epoch": 0.79, "learning_rate": 1.4762037294739773e-05, - "loss": 0.2477, + "loss": 0.0884, "step": 2823 }, { "epoch": 0.79, "learning_rate": 1.4760181835049634e-05, - "loss": 0.1365, + "loss": 0.1444, "step": 2824 }, { "epoch": 0.79, "learning_rate": 1.4758326375359498e-05, - "loss": 0.1857, + "loss": 0.0995, "step": 2825 }, { "epoch": 0.79, "learning_rate": 1.4756470915669358e-05, - "loss": 0.1393, + "loss": 0.0388, "step": 2826 }, { "epoch": 0.79, "learning_rate": 1.475461545597922e-05, - "loss": 0.1992, + "loss": 0.1283, "step": 2827 }, { "epoch": 0.79, "learning_rate": 1.475275999628908e-05, - "loss": 0.1944, + "loss": 0.1159, "step": 2828 }, { "epoch": 0.79, "learning_rate": 1.4750904536598944e-05, - "loss": 0.1906, + "loss": 0.0712, "step": 2829 }, { "epoch": 0.79, "learning_rate": 1.4749049076908806e-05, - "loss": 0.1377, + "loss": 0.0998, "step": 2830 }, { "epoch": 0.79, "learning_rate": 1.4747193617218668e-05, - "loss": 0.3009, + "loss": 0.0329, "step": 2831 }, { "epoch": 0.79, "learning_rate": 1.4745338157528528e-05, - "loss": 0.1947, + "loss": 0.0319, "step": 2832 }, { "epoch": 0.79, "learning_rate": 1.4743482697838391e-05, - "loss": 0.1391, + "loss": 0.2335, "step": 2833 }, { "epoch": 0.79, "learning_rate": 1.4741627238148253e-05, - "loss": 0.2434, + "loss": 0.0933, "step": 2834 }, { "epoch": 0.79, "learning_rate": 1.4739771778458113e-05, - "loss": 0.0357, + "loss": 0.09, "step": 2835 }, { "epoch": 0.79, "learning_rate": 1.4737916318767975e-05, - "loss": 0.2902, + "loss": 0.0693, "step": 2836 }, { "epoch": 0.79, "learning_rate": 1.4736060859077839e-05, - "loss": 0.087, + "loss": 0.063, "step": 2837 }, { "epoch": 0.79, "learning_rate": 1.47342053993877e-05, - "loss": 0.0871, + "loss": 0.0166, "step": 2838 }, { "epoch": 0.79, "learning_rate": 1.473234993969756e-05, - "loss": 0.0912, + "loss": 0.0823, "step": 2839 }, { "epoch": 0.79, "learning_rate": 1.4730494480007423e-05, - "loss": 0.1394, + "loss": 0.1181, "step": 2840 }, { "epoch": 0.79, "learning_rate": 1.4728639020317286e-05, - "loss": 0.2413, + "loss": 0.0491, "step": 2841 }, { "epoch": 0.79, "learning_rate": 1.4726783560627146e-05, - "loss": 0.2916, + "loss": 0.0193, "step": 2842 }, { "epoch": 0.79, "learning_rate": 1.4724928100937008e-05, - "loss": 0.2425, + "loss": 0.2266, "step": 2843 }, { "epoch": 0.79, "learning_rate": 1.472307264124687e-05, - "loss": 0.0881, + "loss": 0.0851, "step": 2844 }, { "epoch": 0.79, "learning_rate": 1.4721217181556732e-05, - "loss": 0.1407, + "loss": 0.0686, "step": 2845 }, { "epoch": 0.79, "learning_rate": 1.4719361721866594e-05, - "loss": 0.194, + "loss": 0.1038, "step": 2846 }, { "epoch": 0.79, "learning_rate": 1.4717506262176456e-05, - "loss": 0.1974, + "loss": 0.0272, "step": 2847 }, { "epoch": 0.79, "learning_rate": 1.4715650802486316e-05, - "loss": 0.1435, + "loss": 0.0776, "step": 2848 }, { "epoch": 0.79, "learning_rate": 1.471379534279618e-05, - "loss": 0.1925, + "loss": 0.0564, "step": 2849 }, { "epoch": 0.79, "learning_rate": 1.4711939883106041e-05, - "loss": 0.0418, + "loss": 0.0683, "step": 2850 }, { "epoch": 0.79, "learning_rate": 1.4710084423415902e-05, - "loss": 0.1426, + "loss": 0.1127, "step": 2851 }, { "epoch": 0.79, "learning_rate": 1.4708228963725763e-05, - "loss": 0.0406, + "loss": 0.1237, "step": 2852 }, { "epoch": 0.79, "learning_rate": 1.4706373504035627e-05, - "loss": 0.0915, + "loss": 0.0326, "step": 2853 }, { "epoch": 0.79, "learning_rate": 1.4704518044345489e-05, - "loss": 0.0912, + "loss": 0.0487, "step": 2854 }, { "epoch": 0.79, "learning_rate": 1.4702662584655349e-05, - "loss": 0.3952, + "loss": 0.0331, "step": 2855 }, { "epoch": 0.79, "learning_rate": 1.4700807124965211e-05, - "loss": 0.039, + "loss": 0.1299, "step": 2856 }, { "epoch": 0.8, "learning_rate": 1.4698951665275074e-05, - "loss": 0.1922, + "loss": 0.2108, "step": 2857 }, { "epoch": 0.8, "learning_rate": 1.4697096205584935e-05, - "loss": 0.089, + "loss": 0.0158, "step": 2858 }, { "epoch": 0.8, "learning_rate": 1.4695240745894797e-05, - "loss": 0.3372, + "loss": 0.2149, "step": 2859 }, { "epoch": 0.8, "learning_rate": 1.4693385286204658e-05, - "loss": 0.1413, + "loss": 0.071, "step": 2860 }, { "epoch": 0.8, "learning_rate": 1.4691529826514522e-05, - "loss": 0.1881, + "loss": 0.1018, "step": 2861 }, { "epoch": 0.8, "learning_rate": 1.4689674366824382e-05, - "loss": 0.353, + "loss": 0.0637, "step": 2862 }, { "epoch": 0.8, "learning_rate": 1.4687818907134244e-05, - "loss": 0.3994, + "loss": 0.0239, "step": 2863 }, { "epoch": 0.8, "learning_rate": 1.4685963447444104e-05, - "loss": 0.1395, + "loss": 0.068, "step": 2864 }, { "epoch": 0.8, "learning_rate": 1.4684107987753968e-05, - "loss": 0.139, + "loss": 0.0625, "step": 2865 }, { "epoch": 0.8, "learning_rate": 1.468225252806383e-05, - "loss": 0.1388, + "loss": 0.059, "step": 2866 }, { "epoch": 0.8, "learning_rate": 1.4680397068373691e-05, - "loss": 0.1395, + "loss": 0.1227, "step": 2867 }, { "epoch": 0.8, "learning_rate": 1.4678541608683552e-05, - "loss": 0.0923, + "loss": 0.0791, "step": 2868 }, { "epoch": 0.8, "learning_rate": 1.4676686148993415e-05, - "loss": 0.0896, + "loss": 0.0603, "step": 2869 }, { "epoch": 0.8, "learning_rate": 1.4674830689303277e-05, - "loss": 0.1882, + "loss": 0.1224, "step": 2870 }, { "epoch": 0.8, "learning_rate": 1.4672975229613137e-05, - "loss": 0.0926, + "loss": 0.0142, "step": 2871 }, { "epoch": 0.8, "learning_rate": 1.4671119769922999e-05, - "loss": 0.2896, + "loss": 0.1475, "step": 2872 }, { "epoch": 0.8, "learning_rate": 1.4669264310232863e-05, - "loss": 0.1902, + "loss": 0.0658, "step": 2873 }, { "epoch": 0.8, "learning_rate": 1.4667408850542723e-05, - "loss": 0.1837, + "loss": 0.0847, "step": 2874 }, { "epoch": 0.8, "learning_rate": 1.4665553390852585e-05, - "loss": 0.0908, + "loss": 0.0158, "step": 2875 }, { "epoch": 0.8, "learning_rate": 1.4663697931162447e-05, - "loss": 0.2404, + "loss": 0.0653, "step": 2876 }, { "epoch": 0.8, "learning_rate": 1.466184247147231e-05, - "loss": 0.2369, + "loss": 0.1096, "step": 2877 }, { "epoch": 0.8, "learning_rate": 1.465998701178217e-05, - "loss": 0.2337, + "loss": 0.1161, "step": 2878 }, { "epoch": 0.8, "learning_rate": 1.4658131552092032e-05, - "loss": 0.1873, + "loss": 0.0874, "step": 2879 }, { "epoch": 0.8, "learning_rate": 1.4656276092401892e-05, - "loss": 0.091, + "loss": 0.1797, "step": 2880 }, { "epoch": 0.8, "learning_rate": 1.4654420632711756e-05, - "loss": 0.2354, + "loss": 0.1633, "step": 2881 }, { "epoch": 0.8, "learning_rate": 1.4652565173021618e-05, - "loss": 0.1346, + "loss": 0.2145, "step": 2882 }, { "epoch": 0.8, "learning_rate": 1.465070971333148e-05, - "loss": 0.1388, + "loss": 0.0694, "step": 2883 }, { "epoch": 0.8, "learning_rate": 1.464885425364134e-05, - "loss": 0.1919, + "loss": 0.1041, "step": 2884 }, { "epoch": 0.8, "learning_rate": 1.4646998793951202e-05, - "loss": 0.0915, + "loss": 0.0715, "step": 2885 }, { "epoch": 0.8, "learning_rate": 1.4645143334261065e-05, - "loss": 0.1398, + "loss": 0.0396, "step": 2886 }, { "epoch": 0.8, "learning_rate": 1.4643287874570926e-05, - "loss": 0.1405, + "loss": 0.0615, "step": 2887 }, { "epoch": 0.8, "learning_rate": 1.4641432414880787e-05, - "loss": 0.0891, + "loss": 0.2253, "step": 2888 }, { "epoch": 0.8, "learning_rate": 1.463957695519065e-05, - "loss": 0.0914, + "loss": 0.0781, "step": 2889 }, { "epoch": 0.8, "learning_rate": 1.4637721495500513e-05, - "loss": 0.1384, + "loss": 0.0679, "step": 2890 }, { "epoch": 0.8, "learning_rate": 1.4635866035810373e-05, - "loss": 0.141, + "loss": 0.1249, "step": 2891 }, { "epoch": 0.8, "learning_rate": 1.4634010576120235e-05, - "loss": 0.1401, + "loss": 0.1139, "step": 2892 }, { "epoch": 0.81, "learning_rate": 1.4632155116430095e-05, - "loss": 0.1916, + "loss": 0.0699, "step": 2893 }, { "epoch": 0.81, "learning_rate": 1.4630299656739959e-05, - "loss": 0.1933, + "loss": 0.0883, "step": 2894 }, { "epoch": 0.81, "learning_rate": 1.462844419704982e-05, - "loss": 0.1417, + "loss": 0.0242, "step": 2895 }, { "epoch": 0.81, "learning_rate": 1.4626588737359682e-05, - "loss": 0.1356, + "loss": 0.0358, "step": 2896 }, { "epoch": 0.81, "learning_rate": 1.4624733277669543e-05, - "loss": 0.0869, + "loss": 0.1134, "step": 2897 }, { "epoch": 0.81, "learning_rate": 1.4622877817979406e-05, - "loss": 0.1396, + "loss": 0.1039, "step": 2898 }, { "epoch": 0.81, "learning_rate": 1.4621022358289268e-05, - "loss": 0.1402, + "loss": 0.0558, "step": 2899 }, { "epoch": 0.81, "learning_rate": 1.4619166898599128e-05, - "loss": 0.1899, + "loss": 0.0609, "step": 2900 }, { "epoch": 0.81, "learning_rate": 1.461731143890899e-05, - "loss": 0.1953, + "loss": 0.0153, "step": 2901 }, { "epoch": 0.81, "learning_rate": 1.4615455979218854e-05, - "loss": 0.0901, + "loss": 0.0144, "step": 2902 }, { "epoch": 0.81, "learning_rate": 1.4613600519528714e-05, - "loss": 0.3465, + "loss": 0.0536, "step": 2903 }, { "epoch": 0.81, "learning_rate": 1.4611745059838576e-05, - "loss": 0.1927, + "loss": 0.1043, "step": 2904 }, { "epoch": 0.81, "learning_rate": 1.4609889600148437e-05, - "loss": 0.0865, + "loss": 0.0143, "step": 2905 }, { "epoch": 0.81, "learning_rate": 1.4608034140458301e-05, - "loss": 0.091, + "loss": 0.0434, "step": 2906 }, { "epoch": 0.81, "learning_rate": 1.4606178680768161e-05, - "loss": 0.1457, + "loss": 0.0268, "step": 2907 }, { "epoch": 0.81, "learning_rate": 1.4604323221078023e-05, - "loss": 0.1981, + "loss": 0.0062, "step": 2908 }, { "epoch": 0.81, "learning_rate": 1.4602467761387883e-05, - "loss": 0.1457, + "loss": 0.0059, "step": 2909 }, { "epoch": 0.81, "learning_rate": 1.4600612301697747e-05, - "loss": 0.1455, + "loss": 0.0072, "step": 2910 }, { "epoch": 0.81, "learning_rate": 1.4598756842007609e-05, - "loss": 0.1928, + "loss": 0.0277, "step": 2911 }, { "epoch": 0.81, "learning_rate": 1.459690138231747e-05, - "loss": 0.2857, + "loss": 0.0031, "step": 2912 }, { "epoch": 0.81, "learning_rate": 1.459504592262733e-05, - "loss": 0.1912, + "loss": 0.0513, "step": 2913 }, { "epoch": 0.81, "learning_rate": 1.4593190462937194e-05, - "loss": 0.0354, + "loss": 0.1785, "step": 2914 }, { "epoch": 0.81, "learning_rate": 1.4591335003247056e-05, - "loss": 0.2913, + "loss": 0.1587, "step": 2915 }, { "epoch": 0.81, "learning_rate": 1.4589479543556916e-05, - "loss": 0.0362, + "loss": 0.0038, "step": 2916 }, { "epoch": 0.81, "learning_rate": 1.4587624083866778e-05, - "loss": 0.0896, + "loss": 0.0027, "step": 2917 }, { "epoch": 0.81, "learning_rate": 1.4585768624176642e-05, - "loss": 0.198, + "loss": 0.053, "step": 2918 }, { "epoch": 0.81, "learning_rate": 1.4583913164486504e-05, - "loss": 0.1901, + "loss": 0.0285, "step": 2919 }, { "epoch": 0.81, "learning_rate": 1.4582057704796364e-05, - "loss": 0.0362, + "loss": 0.1312, "step": 2920 }, { "epoch": 0.81, "learning_rate": 1.4580202245106226e-05, - "loss": 0.0875, + "loss": 0.0502, "step": 2921 }, { "epoch": 0.81, "learning_rate": 1.457834678541609e-05, - "loss": 0.0858, + "loss": 0.0996, "step": 2922 }, { "epoch": 0.81, "learning_rate": 1.457649132572595e-05, - "loss": 0.1393, + "loss": 0.1783, "step": 2923 }, { "epoch": 0.81, "learning_rate": 1.4574635866035811e-05, - "loss": 0.0841, + "loss": 0.1451, "step": 2924 }, { "epoch": 0.81, "learning_rate": 1.4572780406345673e-05, - "loss": 0.0879, + "loss": 0.1479, "step": 2925 }, { "epoch": 0.81, "learning_rate": 1.4570924946655535e-05, - "loss": 0.0863, + "loss": 0.1067, "step": 2926 }, { "epoch": 0.81, "learning_rate": 1.4569069486965397e-05, - "loss": 0.1421, + "loss": 0.1054, "step": 2927 }, { "epoch": 0.81, "learning_rate": 1.4567214027275259e-05, - "loss": 0.0868, + "loss": 0.0665, "step": 2928 }, { "epoch": 0.82, "learning_rate": 1.4565358567585119e-05, - "loss": 0.0855, + "loss": 0.0356, "step": 2929 }, { "epoch": 0.82, "learning_rate": 1.4563503107894983e-05, - "loss": 0.0845, + "loss": 0.097, "step": 2930 }, { "epoch": 0.82, "learning_rate": 1.4561647648204844e-05, - "loss": 0.032, + "loss": 0.1327, "step": 2931 }, { "epoch": 0.82, "learning_rate": 1.4559792188514705e-05, - "loss": 0.2495, + "loss": 0.0635, "step": 2932 }, { "epoch": 0.82, "learning_rate": 1.4557936728824566e-05, - "loss": 0.1973, + "loss": 0.0358, "step": 2933 }, { "epoch": 0.82, "learning_rate": 1.455608126913443e-05, - "loss": 0.0848, + "loss": 0.0765, "step": 2934 }, { "epoch": 0.82, "learning_rate": 1.4554225809444292e-05, - "loss": 0.1379, + "loss": 0.038, "step": 2935 }, { "epoch": 0.82, "learning_rate": 1.4552370349754152e-05, - "loss": 0.1983, + "loss": 0.177, "step": 2936 }, { "epoch": 0.82, "learning_rate": 1.4550514890064014e-05, - "loss": 0.1433, + "loss": 0.0226, "step": 2937 }, { "epoch": 0.82, "learning_rate": 1.4548659430373878e-05, - "loss": 0.2501, + "loss": 0.0184, "step": 2938 }, { "epoch": 0.82, "learning_rate": 1.4546803970683738e-05, - "loss": 0.027, + "loss": 0.0865, "step": 2939 }, { "epoch": 0.82, "learning_rate": 1.45449485109936e-05, - "loss": 0.135, + "loss": 0.0691, "step": 2940 }, { "epoch": 0.82, "learning_rate": 1.4543093051303461e-05, - "loss": 0.0849, + "loss": 0.02, "step": 2941 }, { "epoch": 0.82, "learning_rate": 1.4541237591613323e-05, - "loss": 0.029, + "loss": 0.2262, "step": 2942 }, { "epoch": 0.82, "learning_rate": 1.4539382131923185e-05, - "loss": 0.1934, + "loss": 0.1263, "step": 2943 }, { "epoch": 0.82, "learning_rate": 1.4537526672233047e-05, - "loss": 0.1962, + "loss": 0.0298, "step": 2944 }, { "epoch": 0.82, "learning_rate": 1.4535671212542907e-05, - "loss": 0.0806, + "loss": 0.0182, "step": 2945 }, { "epoch": 0.82, "learning_rate": 1.453381575285277e-05, - "loss": 0.1371, + "loss": 0.0607, "step": 2946 }, { "epoch": 0.82, "learning_rate": 1.4531960293162633e-05, - "loss": 0.0275, + "loss": 0.0684, "step": 2947 }, { "epoch": 0.82, "learning_rate": 1.4530104833472493e-05, - "loss": 0.138, + "loss": 0.0375, "step": 2948 }, { "epoch": 0.82, "learning_rate": 1.4528249373782355e-05, - "loss": 0.0846, + "loss": 0.1294, "step": 2949 }, { "epoch": 0.82, "learning_rate": 1.4526393914092218e-05, - "loss": 0.3042, + "loss": 0.0149, "step": 2950 }, { "epoch": 0.82, "learning_rate": 1.452453845440208e-05, - "loss": 0.1924, + "loss": 0.0636, "step": 2951 }, { "epoch": 0.82, "learning_rate": 1.452268299471194e-05, - "loss": 0.0836, + "loss": 0.1405, "step": 2952 }, { "epoch": 0.82, "learning_rate": 1.4520827535021802e-05, - "loss": 0.1976, + "loss": 0.1297, "step": 2953 }, { "epoch": 0.82, "learning_rate": 1.4518972075331666e-05, - "loss": 0.1355, + "loss": 0.1465, "step": 2954 }, { "epoch": 0.82, "learning_rate": 1.4517116615641526e-05, - "loss": 0.311, + "loss": 0.0638, "step": 2955 }, { "epoch": 0.82, "learning_rate": 1.4515261155951388e-05, - "loss": 0.1437, + "loss": 0.0141, "step": 2956 }, { "epoch": 0.82, "learning_rate": 1.451340569626125e-05, - "loss": 0.0838, + "loss": 0.0943, "step": 2957 }, { "epoch": 0.82, "learning_rate": 1.4511550236571113e-05, - "loss": 0.2563, + "loss": 0.0565, "step": 2958 }, { "epoch": 0.82, "learning_rate": 1.4509694776880973e-05, - "loss": 0.0862, + "loss": 0.0143, "step": 2959 }, { "epoch": 0.82, "learning_rate": 1.4507839317190835e-05, - "loss": 0.0875, + "loss": 0.1306, "step": 2960 }, { "epoch": 0.82, "learning_rate": 1.4505983857500695e-05, - "loss": 0.1326, + "loss": 0.0216, "step": 2961 }, { "epoch": 0.82, "learning_rate": 1.4504128397810559e-05, - "loss": 0.0863, + "loss": 0.0479, "step": 2962 }, { "epoch": 0.82, "learning_rate": 1.4502272938120421e-05, - "loss": 0.1932, + "loss": 0.0608, "step": 2963 }, { "epoch": 0.82, "learning_rate": 1.4500417478430283e-05, - "loss": 0.2447, + "loss": 0.0794, "step": 2964 }, { "epoch": 0.83, "learning_rate": 1.4498562018740143e-05, - "loss": 0.1969, + "loss": 0.0197, "step": 2965 }, { "epoch": 0.83, "learning_rate": 1.4496706559050007e-05, - "loss": 0.2432, + "loss": 0.0802, "step": 2966 }, { "epoch": 0.83, "learning_rate": 1.4494851099359868e-05, - "loss": 0.2453, + "loss": 0.0638, "step": 2967 }, { "epoch": 0.83, "learning_rate": 1.4492995639669729e-05, - "loss": 0.1894, + "loss": 0.0419, "step": 2968 }, { "epoch": 0.83, "learning_rate": 1.449114017997959e-05, - "loss": 0.2929, + "loss": 0.1068, "step": 2969 }, { "epoch": 0.83, "learning_rate": 1.4489284720289454e-05, - "loss": 0.1389, + "loss": 0.0341, "step": 2970 }, { "epoch": 0.83, "learning_rate": 1.4487429260599314e-05, - "loss": 0.1407, + "loss": 0.0139, "step": 2971 }, { "epoch": 0.83, "learning_rate": 1.4485573800909176e-05, - "loss": 0.0383, + "loss": 0.0654, "step": 2972 }, { "epoch": 0.83, "learning_rate": 1.4483718341219038e-05, - "loss": 0.1424, + "loss": 0.0165, "step": 2973 }, { "epoch": 0.83, "learning_rate": 1.4481862881528901e-05, - "loss": 0.0905, + "loss": 0.0142, "step": 2974 }, { "epoch": 0.83, "learning_rate": 1.4480007421838762e-05, - "loss": 0.0915, + "loss": 0.0926, "step": 2975 }, { "epoch": 0.83, "learning_rate": 1.4478151962148624e-05, - "loss": 0.1872, + "loss": 0.163, "step": 2976 }, { "epoch": 0.83, "learning_rate": 1.4476296502458484e-05, - "loss": 0.1901, + "loss": 0.2603, "step": 2977 }, { "epoch": 0.83, "learning_rate": 1.4474441042768347e-05, - "loss": 0.0399, + "loss": 0.0651, "step": 2978 }, { "epoch": 0.83, "learning_rate": 1.4472585583078209e-05, - "loss": 0.0886, + "loss": 0.0152, "step": 2979 }, { "epoch": 0.83, "learning_rate": 1.4470730123388071e-05, - "loss": 0.0882, + "loss": 0.0095, "step": 2980 }, { "epoch": 0.83, "learning_rate": 1.4468874663697931e-05, - "loss": 0.2362, + "loss": 0.0622, "step": 2981 }, { "epoch": 0.83, "learning_rate": 1.4467019204007795e-05, - "loss": 0.0881, + "loss": 0.0176, "step": 2982 }, { "epoch": 0.83, "learning_rate": 1.4465163744317657e-05, - "loss": 0.1905, + "loss": 0.2451, "step": 2983 }, { "epoch": 0.83, "learning_rate": 1.4463308284627517e-05, - "loss": 0.1382, + "loss": 0.2832, "step": 2984 }, { "epoch": 0.83, "learning_rate": 1.4461452824937379e-05, - "loss": 0.1872, + "loss": 0.0596, "step": 2985 }, { "epoch": 0.83, "learning_rate": 1.4459597365247242e-05, - "loss": 0.0856, + "loss": 0.1179, "step": 2986 }, { "epoch": 0.83, "learning_rate": 1.4457741905557104e-05, - "loss": 0.246, + "loss": 0.0575, "step": 2987 }, { "epoch": 0.83, "learning_rate": 1.4455886445866964e-05, - "loss": 0.2428, + "loss": 0.0221, "step": 2988 }, { "epoch": 0.83, "learning_rate": 1.4454030986176826e-05, - "loss": 0.1377, + "loss": 0.0947, "step": 2989 }, { "epoch": 0.83, "learning_rate": 1.445217552648669e-05, - "loss": 0.1916, + "loss": 0.1205, "step": 2990 }, { "epoch": 0.83, "learning_rate": 1.445032006679655e-05, - "loss": 0.0903, + "loss": 0.1733, "step": 2991 }, { "epoch": 0.83, "learning_rate": 1.4448464607106412e-05, - "loss": 0.2448, + "loss": 0.1546, "step": 2992 }, { "epoch": 0.83, "learning_rate": 1.4446609147416274e-05, - "loss": 0.0368, + "loss": 0.0654, "step": 2993 }, { "epoch": 0.83, "learning_rate": 1.4444753687726136e-05, - "loss": 0.241, + "loss": 0.0202, "step": 2994 }, { "epoch": 0.83, "learning_rate": 1.4442898228035997e-05, - "loss": 0.1376, + "loss": 0.1414, "step": 2995 }, { "epoch": 0.83, "learning_rate": 1.444104276834586e-05, - "loss": 0.2428, + "loss": 0.0694, "step": 2996 }, { "epoch": 0.83, "learning_rate": 1.443918730865572e-05, - "loss": 0.1936, + "loss": 0.0684, "step": 2997 }, { "epoch": 0.83, "learning_rate": 1.4437331848965583e-05, - "loss": 0.3398, + "loss": 0.0526, "step": 2998 }, { "epoch": 0.83, "learning_rate": 1.4435476389275445e-05, - "loss": 0.092, + "loss": 0.0677, "step": 2999 }, { "epoch": 0.83, "learning_rate": 1.4433620929585305e-05, - "loss": 0.2434, + "loss": 0.0257, "step": 3000 }, { "epoch": 0.84, "learning_rate": 1.4431765469895167e-05, - "loss": 0.2919, + "loss": 0.2032, "step": 3001 }, { "epoch": 0.84, "learning_rate": 1.442991001020503e-05, - "loss": 0.1417, + "loss": 0.073, "step": 3002 }, { "epoch": 0.84, "learning_rate": 1.4428054550514892e-05, - "loss": 0.0943, + "loss": 0.0699, "step": 3003 }, { "epoch": 0.84, "learning_rate": 1.4426199090824753e-05, - "loss": 0.0946, + "loss": 0.0326, "step": 3004 }, { "epoch": 0.84, "learning_rate": 1.4424343631134614e-05, - "loss": 0.1385, + "loss": 0.0971, "step": 3005 }, { "epoch": 0.84, "learning_rate": 1.4422488171444478e-05, - "loss": 0.186, + "loss": 0.08, "step": 3006 }, { "epoch": 0.84, "learning_rate": 1.4420632711754338e-05, - "loss": 0.3847, + "loss": 0.0822, "step": 3007 }, { "epoch": 0.84, "learning_rate": 1.44187772520642e-05, - "loss": 0.2404, + "loss": 0.0194, "step": 3008 }, { "epoch": 0.84, "learning_rate": 1.4416921792374062e-05, - "loss": 0.192, + "loss": 0.0202, "step": 3009 }, { "epoch": 0.84, "learning_rate": 1.4415066332683925e-05, - "loss": 0.2309, + "loss": 0.0139, "step": 3010 }, { "epoch": 0.84, "learning_rate": 1.4413210872993786e-05, - "loss": 0.1395, + "loss": 0.0753, "step": 3011 }, { "epoch": 0.84, "learning_rate": 1.4411355413303647e-05, - "loss": 0.326, + "loss": 0.0593, "step": 3012 }, { "epoch": 0.84, "learning_rate": 1.4409499953613508e-05, - "loss": 0.0541, + "loss": 0.1633, "step": 3013 }, { "epoch": 0.84, "learning_rate": 1.4407644493923371e-05, - "loss": 0.1001, + "loss": 0.0137, "step": 3014 }, { "epoch": 0.84, "learning_rate": 1.4405789034233233e-05, - "loss": 0.0944, + "loss": 0.0916, "step": 3015 }, { "epoch": 0.84, "learning_rate": 1.4403933574543095e-05, - "loss": 0.1889, + "loss": 0.0691, "step": 3016 }, { "epoch": 0.84, "learning_rate": 1.4402078114852955e-05, - "loss": 0.1934, + "loss": 0.1383, "step": 3017 }, { "epoch": 0.84, "learning_rate": 1.4400222655162819e-05, - "loss": 0.0976, + "loss": 0.0116, "step": 3018 }, { "epoch": 0.84, "learning_rate": 1.439836719547268e-05, - "loss": 0.1405, + "loss": 0.0823, "step": 3019 }, { "epoch": 0.84, "learning_rate": 1.439651173578254e-05, - "loss": 0.2891, + "loss": 0.0166, "step": 3020 }, { "epoch": 0.84, "learning_rate": 1.4394656276092403e-05, - "loss": 0.1898, + "loss": 0.0203, "step": 3021 }, { "epoch": 0.84, "learning_rate": 1.4392800816402266e-05, - "loss": 0.0939, + "loss": 0.066, "step": 3022 }, { "epoch": 0.84, "learning_rate": 1.4390945356712126e-05, - "loss": 0.2785, + "loss": 0.0688, "step": 3023 }, { "epoch": 0.84, "learning_rate": 1.4389089897021988e-05, - "loss": 0.1375, + "loss": 0.045, "step": 3024 }, { "epoch": 0.84, "learning_rate": 1.438723443733185e-05, - "loss": 0.0906, + "loss": 0.0139, "step": 3025 }, { "epoch": 0.84, "learning_rate": 1.4385378977641714e-05, - "loss": 0.1425, + "loss": 0.2548, "step": 3026 }, { "epoch": 0.84, "learning_rate": 1.4383523517951574e-05, - "loss": 0.0897, + "loss": 0.1146, "step": 3027 }, { "epoch": 0.84, "learning_rate": 1.4381668058261436e-05, - "loss": 0.1868, + "loss": 0.0682, "step": 3028 }, { "epoch": 0.84, "learning_rate": 1.4379812598571296e-05, - "loss": 0.1368, + "loss": 0.1651, "step": 3029 }, { "epoch": 0.84, "learning_rate": 1.437795713888116e-05, - "loss": 0.0883, + "loss": 0.0134, "step": 3030 }, { "epoch": 0.84, "learning_rate": 1.4376101679191021e-05, - "loss": 0.1417, + "loss": 0.062, "step": 3031 }, { "epoch": 0.84, "learning_rate": 1.4374246219500883e-05, - "loss": 0.2384, + "loss": 0.1078, "step": 3032 }, { "epoch": 0.84, "learning_rate": 1.4372390759810743e-05, - "loss": 0.0905, + "loss": 0.0146, "step": 3033 }, { "epoch": 0.84, "learning_rate": 1.4370535300120607e-05, - "loss": 0.3474, + "loss": 0.0592, "step": 3034 }, { "epoch": 0.84, "learning_rate": 1.4368679840430469e-05, - "loss": 0.0884, + "loss": 0.2167, "step": 3035 }, { "epoch": 0.84, "learning_rate": 1.4366824380740329e-05, - "loss": 0.0868, + "loss": 0.1249, "step": 3036 }, { "epoch": 0.85, "learning_rate": 1.4364968921050191e-05, - "loss": 0.1894, + "loss": 0.1364, "step": 3037 }, { "epoch": 0.85, "learning_rate": 1.4363113461360053e-05, - "loss": 0.0869, + "loss": 0.1656, "step": 3038 }, { "epoch": 0.85, "learning_rate": 1.4361258001669916e-05, - "loss": 0.0371, + "loss": 0.0212, "step": 3039 }, { "epoch": 0.85, "learning_rate": 1.4359402541979776e-05, - "loss": 0.3974, + "loss": 0.1067, "step": 3040 }, { "epoch": 0.85, "learning_rate": 1.4357547082289638e-05, - "loss": 0.0888, + "loss": 0.1033, "step": 3041 }, { "epoch": 0.85, "learning_rate": 1.4355691622599499e-05, - "loss": 0.0868, + "loss": 0.0202, "step": 3042 }, { "epoch": 0.85, "learning_rate": 1.4353836162909362e-05, - "loss": 0.0891, + "loss": 0.0679, "step": 3043 }, { "epoch": 0.85, "learning_rate": 1.4351980703219224e-05, - "loss": 0.1974, + "loss": 0.1174, "step": 3044 }, { "epoch": 0.85, "learning_rate": 1.4350125243529086e-05, - "loss": 0.1921, + "loss": 0.0728, "step": 3045 }, { "epoch": 0.85, "learning_rate": 1.4348269783838946e-05, - "loss": 0.2398, + "loss": 0.0295, "step": 3046 }, { "epoch": 0.85, "learning_rate": 1.434641432414881e-05, - "loss": 0.1899, + "loss": 0.0798, "step": 3047 }, { "epoch": 0.85, "learning_rate": 1.4344558864458671e-05, - "loss": 0.0343, + "loss": 0.0308, "step": 3048 }, { "epoch": 0.85, "learning_rate": 1.4342703404768532e-05, - "loss": 0.1956, + "loss": 0.1334, "step": 3049 }, { "epoch": 0.85, "learning_rate": 1.4340847945078393e-05, - "loss": 0.0875, + "loss": 0.0573, "step": 3050 }, { "epoch": 0.85, "learning_rate": 1.4338992485388257e-05, - "loss": 0.1938, + "loss": 0.1212, "step": 3051 }, { "epoch": 0.85, "learning_rate": 1.4337137025698117e-05, - "loss": 0.0846, + "loss": 0.0346, "step": 3052 }, { "epoch": 0.85, "learning_rate": 1.4335281566007979e-05, - "loss": 0.0871, + "loss": 0.1231, "step": 3053 }, { "epoch": 0.85, "learning_rate": 1.4333426106317841e-05, - "loss": 0.3484, + "loss": 0.0372, "step": 3054 }, { "epoch": 0.85, "learning_rate": 1.4331570646627705e-05, - "loss": 0.1455, + "loss": 0.0101, "step": 3055 }, { "epoch": 0.85, "learning_rate": 1.4329715186937565e-05, - "loss": 0.135, + "loss": 0.0666, "step": 3056 }, { "epoch": 0.85, "learning_rate": 1.4327859727247427e-05, - "loss": 0.1948, + "loss": 0.0132, "step": 3057 }, { "epoch": 0.85, "learning_rate": 1.4326004267557287e-05, - "loss": 0.4538, + "loss": 0.1643, "step": 3058 }, { "epoch": 0.85, "learning_rate": 1.432414880786715e-05, - "loss": 0.2957, + "loss": 0.0145, "step": 3059 }, { "epoch": 0.85, "learning_rate": 1.4322293348177012e-05, - "loss": 0.0355, + "loss": 0.2079, "step": 3060 }, { "epoch": 0.85, "learning_rate": 1.4320437888486874e-05, - "loss": 0.0893, + "loss": 0.0117, "step": 3061 }, { "epoch": 0.85, "learning_rate": 1.4318582428796734e-05, - "loss": 0.0372, + "loss": 0.0612, "step": 3062 }, { "epoch": 0.85, "learning_rate": 1.4316726969106598e-05, - "loss": 0.242, + "loss": 0.1083, "step": 3063 }, { "epoch": 0.85, "learning_rate": 1.431487150941646e-05, - "loss": 0.1916, + "loss": 0.0232, "step": 3064 }, { "epoch": 0.85, "learning_rate": 1.431301604972632e-05, - "loss": 0.0903, + "loss": 0.1144, "step": 3065 }, { "epoch": 0.85, "learning_rate": 1.4311160590036182e-05, - "loss": 0.1411, + "loss": 0.0543, "step": 3066 }, { "epoch": 0.85, "learning_rate": 1.4309305130346045e-05, - "loss": 0.0909, + "loss": 0.0719, "step": 3067 }, { "epoch": 0.85, "learning_rate": 1.4307449670655905e-05, - "loss": 0.0871, + "loss": 0.0088, "step": 3068 }, { "epoch": 0.85, "learning_rate": 1.4305594210965767e-05, - "loss": 0.1884, + "loss": 0.0091, "step": 3069 }, { "epoch": 0.85, "learning_rate": 1.430373875127563e-05, - "loss": 0.2476, + "loss": 0.0089, "step": 3070 }, { "epoch": 0.85, "learning_rate": 1.4301883291585493e-05, - "loss": 0.1894, + "loss": 0.1489, "step": 3071 }, { "epoch": 0.85, "learning_rate": 1.4300027831895353e-05, - "loss": 0.1907, + "loss": 0.2164, "step": 3072 }, { "epoch": 0.86, "learning_rate": 1.4298172372205215e-05, - "loss": 0.089, + "loss": 0.2021, "step": 3073 }, { "epoch": 0.86, "learning_rate": 1.4296316912515075e-05, - "loss": 0.287, + "loss": 0.1237, "step": 3074 }, { "epoch": 0.86, "learning_rate": 1.4294461452824939e-05, - "loss": 0.1893, + "loss": 0.0583, "step": 3075 }, { "epoch": 0.86, "learning_rate": 1.42926059931348e-05, - "loss": 0.1909, + "loss": 0.0953, "step": 3076 }, { "epoch": 0.86, "learning_rate": 1.4290750533444662e-05, - "loss": 0.1408, + "loss": 0.2002, "step": 3077 }, { "epoch": 0.86, "learning_rate": 1.4288895073754522e-05, - "loss": 0.1891, + "loss": 0.0122, "step": 3078 }, { "epoch": 0.86, "learning_rate": 1.4287039614064386e-05, - "loss": 0.2385, + "loss": 0.0692, "step": 3079 }, { "epoch": 0.86, "learning_rate": 1.4285184154374248e-05, - "loss": 0.1398, + "loss": 0.0161, "step": 3080 }, { "epoch": 0.86, "learning_rate": 1.4283328694684108e-05, - "loss": 0.2386, + "loss": 0.1243, "step": 3081 }, { "epoch": 0.86, "learning_rate": 1.428147323499397e-05, - "loss": 0.294, + "loss": 0.0936, "step": 3082 }, { "epoch": 0.86, "learning_rate": 1.4279617775303834e-05, - "loss": 0.0895, + "loss": 0.1063, "step": 3083 }, { "epoch": 0.86, "learning_rate": 1.4277762315613695e-05, - "loss": 0.0915, + "loss": 0.1009, "step": 3084 }, { "epoch": 0.86, "learning_rate": 1.4275906855923556e-05, - "loss": 0.044, + "loss": 0.125, "step": 3085 }, { "epoch": 0.86, "learning_rate": 1.4274051396233417e-05, - "loss": 0.0422, + "loss": 0.1268, "step": 3086 }, { "epoch": 0.86, "learning_rate": 1.4272195936543281e-05, - "loss": 0.0905, + "loss": 0.0988, "step": 3087 }, { "epoch": 0.86, "learning_rate": 1.4270340476853141e-05, - "loss": 0.1916, + "loss": 0.0579, "step": 3088 }, { "epoch": 0.86, "learning_rate": 1.4268485017163003e-05, - "loss": 0.1427, + "loss": 0.0668, "step": 3089 }, { "epoch": 0.86, "learning_rate": 1.4266629557472865e-05, - "loss": 0.2816, + "loss": 0.0725, "step": 3090 }, { "epoch": 0.86, "learning_rate": 1.4264774097782727e-05, - "loss": 0.1875, + "loss": 0.0823, "step": 3091 }, { "epoch": 0.86, "learning_rate": 1.4262918638092589e-05, - "loss": 0.0891, + "loss": 0.0631, "step": 3092 }, { "epoch": 0.86, "learning_rate": 1.426106317840245e-05, - "loss": 0.0909, + "loss": 0.063, "step": 3093 }, { "epoch": 0.86, "learning_rate": 1.425920771871231e-05, - "loss": 0.1931, + "loss": 0.104, "step": 3094 }, { "epoch": 0.86, "learning_rate": 1.4257352259022174e-05, - "loss": 0.1431, + "loss": 0.0246, "step": 3095 }, { "epoch": 0.86, "learning_rate": 1.4255496799332036e-05, - "loss": 0.0888, + "loss": 0.0203, "step": 3096 }, { "epoch": 0.86, "learning_rate": 1.4253641339641896e-05, - "loss": 0.1411, + "loss": 0.2355, "step": 3097 }, { "epoch": 0.86, "learning_rate": 1.4251785879951758e-05, - "loss": 0.0885, + "loss": 0.0623, "step": 3098 }, { "epoch": 0.86, "learning_rate": 1.4249930420261622e-05, - "loss": 0.1955, + "loss": 0.1141, "step": 3099 }, { "epoch": 0.86, "learning_rate": 1.4248074960571484e-05, - "loss": 0.1349, + "loss": 0.0326, "step": 3100 }, { "epoch": 0.86, "learning_rate": 1.4246219500881344e-05, - "loss": 0.0852, + "loss": 0.0236, "step": 3101 }, { "epoch": 0.86, "learning_rate": 1.4244364041191206e-05, - "loss": 0.0362, + "loss": 0.0534, "step": 3102 }, { "epoch": 0.86, "learning_rate": 1.424250858150107e-05, - "loss": 0.1414, + "loss": 0.0896, "step": 3103 }, { "epoch": 0.86, "learning_rate": 1.424065312181093e-05, - "loss": 0.1406, + "loss": 0.0522, "step": 3104 }, { "epoch": 0.86, "learning_rate": 1.4238797662120791e-05, - "loss": 0.1392, + "loss": 0.1273, "step": 3105 }, { "epoch": 0.86, "learning_rate": 1.4236942202430653e-05, - "loss": 0.184, + "loss": 0.0111, "step": 3106 }, { "epoch": 0.86, "learning_rate": 1.4235086742740517e-05, - "loss": 0.2429, + "loss": 0.0255, "step": 3107 }, { "epoch": 0.87, "learning_rate": 1.4233231283050377e-05, - "loss": 0.2528, + "loss": 0.0158, "step": 3108 }, { "epoch": 0.87, "learning_rate": 1.4231375823360239e-05, - "loss": 0.1949, + "loss": 0.02, "step": 3109 }, { "epoch": 0.87, "learning_rate": 1.4229520363670099e-05, - "loss": 0.3587, + "loss": 0.1161, "step": 3110 }, { "epoch": 0.87, "learning_rate": 1.4227664903979963e-05, - "loss": 0.1948, + "loss": 0.0506, "step": 3111 }, { "epoch": 0.87, "learning_rate": 1.4225809444289824e-05, - "loss": 0.14, + "loss": 0.1055, "step": 3112 }, { "epoch": 0.87, "learning_rate": 1.4223953984599686e-05, - "loss": 0.0895, + "loss": 0.0397, "step": 3113 }, { "epoch": 0.87, "learning_rate": 1.4222098524909546e-05, - "loss": 0.24, + "loss": 0.1235, "step": 3114 }, { "epoch": 0.87, "learning_rate": 1.422024306521941e-05, - "loss": 0.0897, + "loss": 0.1477, "step": 3115 }, { "epoch": 0.87, "learning_rate": 1.4218387605529272e-05, - "loss": 0.2395, + "loss": 0.0137, "step": 3116 }, { "epoch": 0.87, "learning_rate": 1.4216532145839132e-05, - "loss": 0.0888, + "loss": 0.0438, "step": 3117 }, { "epoch": 0.87, "learning_rate": 1.4214676686148994e-05, - "loss": 0.0884, + "loss": 0.0928, "step": 3118 }, { "epoch": 0.87, "learning_rate": 1.4212821226458857e-05, - "loss": 0.1958, + "loss": 0.0926, "step": 3119 }, { "epoch": 0.87, "learning_rate": 1.4210965766768718e-05, - "loss": 0.0877, + "loss": 0.0696, "step": 3120 }, { "epoch": 0.87, "learning_rate": 1.420911030707858e-05, - "loss": 0.1401, + "loss": 0.1537, "step": 3121 }, { "epoch": 0.87, "learning_rate": 1.4207254847388441e-05, - "loss": 0.1372, + "loss": 0.0514, "step": 3122 }, { "epoch": 0.87, "learning_rate": 1.4205399387698305e-05, - "loss": 0.1855, + "loss": 0.1404, "step": 3123 }, { "epoch": 0.87, "learning_rate": 1.4203543928008165e-05, - "loss": 0.088, + "loss": 0.0187, "step": 3124 }, { "epoch": 0.87, "learning_rate": 1.4201688468318027e-05, - "loss": 0.2502, + "loss": 0.1848, "step": 3125 }, { "epoch": 0.87, "learning_rate": 1.4199833008627887e-05, - "loss": 0.2991, + "loss": 0.0117, "step": 3126 }, { "epoch": 0.87, "learning_rate": 1.419797754893775e-05, - "loss": 0.139, + "loss": 0.0958, "step": 3127 }, { "epoch": 0.87, "learning_rate": 1.4196122089247613e-05, - "loss": 0.0889, + "loss": 0.0488, "step": 3128 }, { "epoch": 0.87, "learning_rate": 1.4194266629557474e-05, - "loss": 0.1334, + "loss": 0.0222, "step": 3129 }, { "epoch": 0.87, "learning_rate": 1.4192411169867335e-05, - "loss": 0.1378, + "loss": 0.0186, "step": 3130 }, { "epoch": 0.87, "learning_rate": 1.4190555710177198e-05, - "loss": 0.2986, + "loss": 0.1964, "step": 3131 }, { "epoch": 0.87, "learning_rate": 1.418870025048706e-05, - "loss": 0.1423, + "loss": 0.1226, "step": 3132 }, { "epoch": 0.87, "learning_rate": 1.418684479079692e-05, - "loss": 0.1857, + "loss": 0.0215, "step": 3133 }, { "epoch": 0.87, "learning_rate": 1.4184989331106782e-05, - "loss": 0.1887, + "loss": 0.0713, "step": 3134 }, { "epoch": 0.87, "learning_rate": 1.4183133871416646e-05, - "loss": 0.1989, + "loss": 0.1805, "step": 3135 }, { "epoch": 0.87, "learning_rate": 1.4181278411726508e-05, - "loss": 0.344, + "loss": 0.0217, "step": 3136 }, { "epoch": 0.87, "learning_rate": 1.4179422952036368e-05, - "loss": 0.0887, + "loss": 0.068, "step": 3137 }, { "epoch": 0.87, "learning_rate": 1.417756749234623e-05, - "loss": 0.1939, + "loss": 0.0368, "step": 3138 }, { "epoch": 0.87, "learning_rate": 1.4175712032656093e-05, - "loss": 0.1383, + "loss": 0.0684, "step": 3139 }, { "epoch": 0.87, "learning_rate": 1.4173856572965953e-05, - "loss": 0.0949, + "loss": 0.0981, "step": 3140 }, { "epoch": 0.87, "learning_rate": 1.4172001113275815e-05, - "loss": 0.0429, + "loss": 0.0325, "step": 3141 }, { "epoch": 0.87, "learning_rate": 1.4170145653585677e-05, - "loss": 0.1937, + "loss": 0.0612, "step": 3142 }, { "epoch": 0.87, "learning_rate": 1.4168290193895539e-05, - "loss": 0.1423, + "loss": 0.1016, "step": 3143 }, { "epoch": 0.88, "learning_rate": 1.41664347342054e-05, - "loss": 0.092, + "loss": 0.1218, "step": 3144 }, { "epoch": 0.88, "learning_rate": 1.4164579274515263e-05, - "loss": 0.289, + "loss": 0.2063, "step": 3145 }, { "epoch": 0.88, "learning_rate": 1.4162723814825123e-05, - "loss": 0.1408, + "loss": 0.0207, "step": 3146 }, { "epoch": 0.88, "learning_rate": 1.4160868355134986e-05, - "loss": 0.0419, + "loss": 0.203, "step": 3147 }, { "epoch": 0.88, "learning_rate": 1.4159012895444848e-05, - "loss": 0.1385, + "loss": 0.059, "step": 3148 }, { "epoch": 0.88, "learning_rate": 1.4157157435754709e-05, - "loss": 0.135, + "loss": 0.1738, "step": 3149 }, { "epoch": 0.88, "learning_rate": 1.415530197606457e-05, - "loss": 0.0403, + "loss": 0.1184, "step": 3150 }, { "epoch": 0.88, "learning_rate": 1.4153446516374434e-05, - "loss": 0.1869, + "loss": 0.0242, "step": 3151 }, { "epoch": 0.88, "learning_rate": 1.4151591056684296e-05, - "loss": 0.09, + "loss": 0.0211, "step": 3152 }, { "epoch": 0.88, "learning_rate": 1.4149735596994156e-05, - "loss": 0.1399, + "loss": 0.048, "step": 3153 }, { "epoch": 0.88, "learning_rate": 1.4147880137304018e-05, - "loss": 0.1387, + "loss": 0.0274, "step": 3154 }, { "epoch": 0.88, "learning_rate": 1.4146024677613881e-05, - "loss": 0.2941, + "loss": 0.0689, "step": 3155 }, { "epoch": 0.88, "learning_rate": 1.4144169217923742e-05, - "loss": 0.142, + "loss": 0.1136, "step": 3156 }, { "epoch": 0.88, "learning_rate": 1.4142313758233603e-05, - "loss": 0.089, + "loss": 0.061, "step": 3157 }, { "epoch": 0.88, "learning_rate": 1.4140458298543465e-05, - "loss": 0.1407, + "loss": 0.2634, "step": 3158 }, { "epoch": 0.88, "learning_rate": 1.4138602838853329e-05, - "loss": 0.2431, + "loss": 0.0191, "step": 3159 }, { "epoch": 0.88, "learning_rate": 1.4136747379163189e-05, - "loss": 0.246, + "loss": 0.2015, "step": 3160 }, { "epoch": 0.88, "learning_rate": 1.4134891919473051e-05, - "loss": 0.1953, + "loss": 0.0179, "step": 3161 }, { "epoch": 0.88, "learning_rate": 1.4133036459782911e-05, - "loss": 0.0883, + "loss": 0.0195, "step": 3162 }, { "epoch": 0.88, "learning_rate": 1.4131181000092775e-05, - "loss": 0.0837, + "loss": 0.0508, "step": 3163 }, { "epoch": 0.88, "learning_rate": 1.4129325540402637e-05, - "loss": 0.0808, + "loss": 0.0199, "step": 3164 }, { "epoch": 0.88, "learning_rate": 1.4127470080712498e-05, - "loss": 0.1926, + "loss": 0.0804, "step": 3165 }, { "epoch": 0.88, "learning_rate": 1.4125614621022359e-05, - "loss": 0.1401, + "loss": 0.0654, "step": 3166 }, { "epoch": 0.88, "learning_rate": 1.4123759161332222e-05, - "loss": 0.1901, + "loss": 0.1657, "step": 3167 }, { "epoch": 0.88, "learning_rate": 1.4121903701642084e-05, - "loss": 0.3551, + "loss": 0.1358, "step": 3168 }, { "epoch": 0.88, "learning_rate": 1.4120048241951944e-05, - "loss": 0.085, + "loss": 0.0617, "step": 3169 }, { "epoch": 0.88, "learning_rate": 1.4118192782261806e-05, - "loss": 0.2414, + "loss": 0.0594, "step": 3170 }, { "epoch": 0.88, "learning_rate": 1.411633732257167e-05, - "loss": 0.0371, + "loss": 0.1108, "step": 3171 }, { "epoch": 0.88, "learning_rate": 1.411448186288153e-05, - "loss": 0.0875, + "loss": 0.0773, "step": 3172 }, { "epoch": 0.88, "learning_rate": 1.4112626403191392e-05, - "loss": 0.0368, + "loss": 0.0506, "step": 3173 }, { "epoch": 0.88, "learning_rate": 1.4110770943501254e-05, - "loss": 0.1872, + "loss": 0.0635, "step": 3174 }, { "epoch": 0.88, "learning_rate": 1.4108915483811117e-05, - "loss": 0.0922, + "loss": 0.0517, "step": 3175 }, { "epoch": 0.88, "learning_rate": 1.4107060024120977e-05, - "loss": 0.1937, + "loss": 0.0885, "step": 3176 }, { "epoch": 0.88, "learning_rate": 1.410520456443084e-05, - "loss": 0.0348, + "loss": 0.0195, "step": 3177 }, { "epoch": 0.88, "learning_rate": 1.41033491047407e-05, - "loss": 0.191, + "loss": 0.1001, "step": 3178 }, { "epoch": 0.88, "learning_rate": 1.4101493645050563e-05, - "loss": 0.1383, + "loss": 0.0837, "step": 3179 }, { "epoch": 0.89, "learning_rate": 1.4099638185360425e-05, - "loss": 0.19, + "loss": 0.0112, "step": 3180 }, { "epoch": 0.89, "learning_rate": 1.4097782725670287e-05, - "loss": 0.2944, + "loss": 0.1106, "step": 3181 }, { "epoch": 0.89, "learning_rate": 1.4095927265980147e-05, - "loss": 0.086, + "loss": 0.0361, "step": 3182 }, { "epoch": 0.89, "learning_rate": 1.409407180629001e-05, - "loss": 0.2406, + "loss": 0.1781, "step": 3183 }, { "epoch": 0.89, "learning_rate": 1.4092216346599872e-05, - "loss": 0.138, + "loss": 0.1238, "step": 3184 }, { "epoch": 0.89, "learning_rate": 1.4090360886909732e-05, - "loss": 0.1362, + "loss": 0.129, "step": 3185 }, { "epoch": 0.89, "learning_rate": 1.4088505427219594e-05, - "loss": 0.2991, + "loss": 0.0616, "step": 3186 }, { "epoch": 0.89, "learning_rate": 1.4086649967529458e-05, - "loss": 0.1381, + "loss": 0.0556, "step": 3187 }, { "epoch": 0.89, "learning_rate": 1.4084794507839318e-05, - "loss": 0.142, + "loss": 0.073, "step": 3188 }, { "epoch": 0.89, "learning_rate": 1.408293904814918e-05, - "loss": 0.1936, + "loss": 0.0142, "step": 3189 }, { "epoch": 0.89, "learning_rate": 1.4081083588459042e-05, - "loss": 0.1893, + "loss": 0.0994, "step": 3190 }, { "epoch": 0.89, "learning_rate": 1.4079228128768902e-05, - "loss": 0.1414, + "loss": 0.0668, "step": 3191 }, { "epoch": 0.89, "learning_rate": 1.4077372669078766e-05, - "loss": 0.2483, + "loss": 0.0541, "step": 3192 }, { "epoch": 0.89, "learning_rate": 1.4075517209388627e-05, - "loss": 0.189, + "loss": 0.1254, "step": 3193 }, { "epoch": 0.89, "learning_rate": 1.4073661749698488e-05, - "loss": 0.1368, + "loss": 0.0827, "step": 3194 }, { "epoch": 0.89, "learning_rate": 1.407180629000835e-05, - "loss": 0.1918, + "loss": 0.0482, "step": 3195 }, { "epoch": 0.89, "learning_rate": 1.4069950830318213e-05, - "loss": 0.0882, + "loss": 0.0239, "step": 3196 }, { "epoch": 0.89, "learning_rate": 1.4068095370628075e-05, - "loss": 0.1898, + "loss": 0.118, "step": 3197 }, { "epoch": 0.89, "learning_rate": 1.4066239910937935e-05, - "loss": 0.3908, + "loss": 0.1409, "step": 3198 }, { "epoch": 0.89, "learning_rate": 1.4064384451247797e-05, - "loss": 0.1375, + "loss": 0.0501, "step": 3199 }, { "epoch": 0.89, "learning_rate": 1.406252899155766e-05, - "loss": 0.1894, + "loss": 0.1427, "step": 3200 }, { "epoch": 0.89, "learning_rate": 1.406067353186752e-05, - "loss": 0.0448, + "loss": 0.0721, "step": 3201 }, { "epoch": 0.89, "learning_rate": 1.4058818072177383e-05, - "loss": 0.0918, + "loss": 0.1672, "step": 3202 }, { "epoch": 0.89, "learning_rate": 1.4056962612487244e-05, - "loss": 0.0929, + "loss": 0.0277, "step": 3203 }, { "epoch": 0.89, "learning_rate": 1.4055107152797108e-05, - "loss": 0.1877, + "loss": 0.0868, "step": 3204 }, { "epoch": 0.89, "learning_rate": 1.4053251693106968e-05, - "loss": 0.2373, + "loss": 0.1297, "step": 3205 }, { "epoch": 0.89, "learning_rate": 1.405139623341683e-05, - "loss": 0.1865, + "loss": 0.0988, "step": 3206 }, { "epoch": 0.89, "learning_rate": 1.404954077372669e-05, - "loss": 0.186, + "loss": 0.0891, "step": 3207 }, { "epoch": 0.89, "learning_rate": 1.4047685314036554e-05, - "loss": 0.1454, + "loss": 0.0623, "step": 3208 }, { "epoch": 0.89, "learning_rate": 1.4045829854346416e-05, - "loss": 0.091, + "loss": 0.0268, "step": 3209 }, { "epoch": 0.89, "learning_rate": 1.4043974394656278e-05, - "loss": 0.1412, + "loss": 0.1342, "step": 3210 }, { "epoch": 0.89, "learning_rate": 1.4042118934966138e-05, - "loss": 0.2436, + "loss": 0.0651, "step": 3211 }, { "epoch": 0.89, "learning_rate": 1.4040263475276001e-05, - "loss": 0.2936, + "loss": 0.0317, "step": 3212 }, { "epoch": 0.89, "learning_rate": 1.4038408015585863e-05, - "loss": 0.1383, + "loss": 0.0189, "step": 3213 }, { "epoch": 0.89, "learning_rate": 1.4036552555895723e-05, - "loss": 0.0414, + "loss": 0.0606, "step": 3214 }, { "epoch": 0.89, "learning_rate": 1.4034697096205585e-05, - "loss": 0.1923, + "loss": 0.019, "step": 3215 }, { "epoch": 0.9, "learning_rate": 1.4032841636515449e-05, - "loss": 0.2855, + "loss": 0.0551, "step": 3216 }, { "epoch": 0.9, "learning_rate": 1.4030986176825309e-05, - "loss": 0.0862, + "loss": 0.1807, "step": 3217 }, { "epoch": 0.9, "learning_rate": 1.402913071713517e-05, - "loss": 0.1854, + "loss": 0.0698, "step": 3218 }, { "epoch": 0.9, "learning_rate": 1.4027275257445033e-05, - "loss": 0.1385, + "loss": 0.0682, "step": 3219 }, { "epoch": 0.9, "learning_rate": 1.4025419797754896e-05, - "loss": 0.2433, + "loss": 0.0353, "step": 3220 }, { "epoch": 0.9, "learning_rate": 1.4023564338064756e-05, - "loss": 0.1892, + "loss": 0.1306, "step": 3221 }, { "epoch": 0.9, "learning_rate": 1.4021708878374618e-05, - "loss": 0.1897, + "loss": 0.0794, "step": 3222 }, { "epoch": 0.9, "learning_rate": 1.4019853418684478e-05, - "loss": 0.1345, + "loss": 0.0116, "step": 3223 }, { "epoch": 0.9, "learning_rate": 1.4017997958994342e-05, - "loss": 0.238, + "loss": 0.0738, "step": 3224 }, { "epoch": 0.9, "learning_rate": 1.4016142499304204e-05, - "loss": 0.1872, + "loss": 0.159, "step": 3225 }, { "epoch": 0.9, "learning_rate": 1.4014287039614066e-05, - "loss": 0.1897, + "loss": 0.2471, "step": 3226 }, { "epoch": 0.9, "learning_rate": 1.4012431579923926e-05, - "loss": 0.0889, + "loss": 0.0938, "step": 3227 }, { "epoch": 0.9, "learning_rate": 1.401057612023379e-05, - "loss": 0.2373, + "loss": 0.1635, "step": 3228 }, { "epoch": 0.9, "learning_rate": 1.4008720660543651e-05, - "loss": 0.0945, + "loss": 0.0431, "step": 3229 }, { "epoch": 0.9, "learning_rate": 1.4006865200853512e-05, - "loss": 0.0453, + "loss": 0.0892, "step": 3230 }, { "epoch": 0.9, "learning_rate": 1.4005009741163373e-05, - "loss": 0.1432, + "loss": 0.0294, "step": 3231 }, { "epoch": 0.9, "learning_rate": 1.4003154281473237e-05, - "loss": 0.284, + "loss": 0.082, "step": 3232 }, { "epoch": 0.9, "learning_rate": 1.4001298821783099e-05, - "loss": 0.187, + "loss": 0.1238, "step": 3233 }, { "epoch": 0.9, "learning_rate": 1.3999443362092959e-05, - "loss": 0.1872, + "loss": 0.0296, "step": 3234 }, { "epoch": 0.9, "learning_rate": 1.3997587902402821e-05, - "loss": 0.339, + "loss": 0.0715, "step": 3235 }, { "epoch": 0.9, "learning_rate": 1.3995732442712684e-05, - "loss": 0.3292, + "loss": 0.0301, "step": 3236 }, { "epoch": 0.9, "learning_rate": 1.3993876983022545e-05, - "loss": 0.2357, + "loss": 0.0739, "step": 3237 }, { "epoch": 0.9, "learning_rate": 1.3992021523332407e-05, - "loss": 0.0946, + "loss": 0.0503, "step": 3238 }, { "epoch": 0.9, "learning_rate": 1.3990166063642268e-05, - "loss": 0.1375, + "loss": 0.023, "step": 3239 }, { "epoch": 0.9, "learning_rate": 1.398831060395213e-05, - "loss": 0.0481, + "loss": 0.1101, "step": 3240 }, { "epoch": 0.9, "learning_rate": 1.3986455144261992e-05, - "loss": 0.2289, + "loss": 0.0674, "step": 3241 }, { "epoch": 0.9, "learning_rate": 1.3984599684571854e-05, - "loss": 0.0988, + "loss": 0.0549, "step": 3242 }, { "epoch": 0.9, "learning_rate": 1.3982744224881714e-05, - "loss": 0.0964, + "loss": 0.024, "step": 3243 }, { "epoch": 0.9, "learning_rate": 1.3980888765191578e-05, - "loss": 0.0951, + "loss": 0.0937, "step": 3244 }, { "epoch": 0.9, "learning_rate": 1.397903330550144e-05, - "loss": 0.3858, + "loss": 0.0171, "step": 3245 }, { "epoch": 0.9, "learning_rate": 1.39771778458113e-05, - "loss": 0.2351, + "loss": 0.0568, "step": 3246 }, { "epoch": 0.9, "learning_rate": 1.3975322386121162e-05, - "loss": 0.0447, + "loss": 0.1426, "step": 3247 }, { "epoch": 0.9, "learning_rate": 1.3973466926431025e-05, - "loss": 0.1904, + "loss": 0.0162, "step": 3248 }, { "epoch": 0.9, "learning_rate": 1.3971611466740887e-05, - "loss": 0.0944, + "loss": 0.0922, "step": 3249 }, { "epoch": 0.9, "learning_rate": 1.3969756007050747e-05, - "loss": 0.1462, + "loss": 0.104, "step": 3250 }, { "epoch": 0.9, "learning_rate": 1.3967900547360609e-05, - "loss": 0.2892, + "loss": 0.202, "step": 3251 }, { "epoch": 0.91, "learning_rate": 1.3966045087670473e-05, - "loss": 0.0897, + "loss": 0.0883, "step": 3252 }, { "epoch": 0.91, "learning_rate": 1.3964189627980333e-05, - "loss": 0.2381, + "loss": 0.0813, "step": 3253 }, { "epoch": 0.91, "learning_rate": 1.3962334168290195e-05, - "loss": 0.1404, + "loss": 0.0664, "step": 3254 }, { "epoch": 0.91, "learning_rate": 1.3960478708600057e-05, - "loss": 0.1863, + "loss": 0.0701, "step": 3255 }, { "epoch": 0.91, "learning_rate": 1.395862324890992e-05, - "loss": 0.0935, + "loss": 0.1106, "step": 3256 }, { "epoch": 0.91, "learning_rate": 1.395676778921978e-05, - "loss": 0.1943, + "loss": 0.1297, "step": 3257 }, { "epoch": 0.91, "learning_rate": 1.3954912329529642e-05, - "loss": 0.1408, + "loss": 0.0569, "step": 3258 }, { "epoch": 0.91, "learning_rate": 1.3953056869839502e-05, - "loss": 0.0915, + "loss": 0.1619, "step": 3259 }, { "epoch": 0.91, "learning_rate": 1.3951201410149366e-05, - "loss": 0.1402, + "loss": 0.2024, "step": 3260 }, { "epoch": 0.91, "learning_rate": 1.3949345950459228e-05, - "loss": 0.0386, + "loss": 0.0261, "step": 3261 }, { "epoch": 0.91, "learning_rate": 1.394749049076909e-05, - "loss": 0.0893, + "loss": 0.108, "step": 3262 }, { "epoch": 0.91, "learning_rate": 1.394563503107895e-05, - "loss": 0.0901, + "loss": 0.0887, "step": 3263 }, { "epoch": 0.91, "learning_rate": 1.3943779571388813e-05, - "loss": 0.1903, + "loss": 0.0239, "step": 3264 }, { "epoch": 0.91, "learning_rate": 1.3941924111698675e-05, - "loss": 0.1408, + "loss": 0.029, "step": 3265 }, { "epoch": 0.91, "learning_rate": 1.3940068652008535e-05, - "loss": 0.3468, + "loss": 0.036, "step": 3266 }, { "epoch": 0.91, "learning_rate": 1.3938213192318397e-05, - "loss": 0.2396, + "loss": 0.0456, "step": 3267 }, { "epoch": 0.91, "learning_rate": 1.3936357732628261e-05, - "loss": 0.1847, + "loss": 0.0256, "step": 3268 }, { "epoch": 0.91, "learning_rate": 1.3934502272938121e-05, - "loss": 0.0867, + "loss": 0.1113, "step": 3269 }, { "epoch": 0.91, "learning_rate": 1.3932646813247983e-05, - "loss": 0.2413, + "loss": 0.0713, "step": 3270 }, { "epoch": 0.91, "learning_rate": 1.3930791353557845e-05, - "loss": 0.1395, + "loss": 0.0429, "step": 3271 }, { "epoch": 0.91, "learning_rate": 1.3928935893867708e-05, - "loss": 0.2402, + "loss": 0.2337, "step": 3272 }, { "epoch": 0.91, "learning_rate": 1.3927080434177569e-05, - "loss": 0.0878, + "loss": 0.1197, "step": 3273 }, { "epoch": 0.91, "learning_rate": 1.392522497448743e-05, - "loss": 0.1418, + "loss": 0.0649, "step": 3274 }, { "epoch": 0.91, "learning_rate": 1.392336951479729e-05, - "loss": 0.2451, + "loss": 0.0164, "step": 3275 }, { "epoch": 0.91, "learning_rate": 1.3921514055107154e-05, - "loss": 0.2374, + "loss": 0.058, "step": 3276 }, { "epoch": 0.91, "learning_rate": 1.3919658595417016e-05, - "loss": 0.1383, + "loss": 0.0225, "step": 3277 }, { "epoch": 0.91, "learning_rate": 1.3917803135726878e-05, - "loss": 0.1395, + "loss": 0.0137, "step": 3278 }, { "epoch": 0.91, "learning_rate": 1.3915947676036738e-05, - "loss": 0.0385, + "loss": 0.0996, "step": 3279 }, { "epoch": 0.91, "learning_rate": 1.3914092216346602e-05, - "loss": 0.1408, + "loss": 0.013, "step": 3280 }, { "epoch": 0.91, "learning_rate": 1.3912236756656464e-05, - "loss": 0.0919, + "loss": 0.2657, "step": 3281 }, { "epoch": 0.91, "learning_rate": 1.3910381296966324e-05, - "loss": 0.1377, + "loss": 0.3283, "step": 3282 }, { "epoch": 0.91, "learning_rate": 1.3908525837276186e-05, - "loss": 0.0883, + "loss": 0.1002, "step": 3283 }, { "epoch": 0.91, "learning_rate": 1.390667037758605e-05, - "loss": 0.2414, + "loss": 0.1301, "step": 3284 }, { "epoch": 0.91, "learning_rate": 1.3904814917895911e-05, - "loss": 0.2948, + "loss": 0.1011, "step": 3285 }, { "epoch": 0.91, "learning_rate": 1.3902959458205771e-05, - "loss": 0.0866, + "loss": 0.0583, "step": 3286 }, { "epoch": 0.91, "learning_rate": 1.3901103998515633e-05, - "loss": 0.087, + "loss": 0.1008, "step": 3287 }, { "epoch": 0.92, "learning_rate": 1.3899248538825497e-05, - "loss": 0.2413, + "loss": 0.0993, "step": 3288 }, { "epoch": 0.92, "learning_rate": 1.3897393079135357e-05, - "loss": 0.0373, + "loss": 0.0216, "step": 3289 }, { "epoch": 0.92, "learning_rate": 1.3895537619445219e-05, - "loss": 0.088, + "loss": 0.0673, "step": 3290 }, { "epoch": 0.92, "learning_rate": 1.389368215975508e-05, - "loss": 0.1358, + "loss": 0.0715, "step": 3291 }, { "epoch": 0.92, "learning_rate": 1.3891826700064942e-05, - "loss": 0.4028, + "loss": 0.0245, "step": 3292 }, { "epoch": 0.92, "learning_rate": 1.3889971240374804e-05, - "loss": 0.1413, + "loss": 0.025, "step": 3293 }, { "epoch": 0.92, "learning_rate": 1.3888115780684666e-05, - "loss": 0.235, + "loss": 0.0793, "step": 3294 }, { "epoch": 0.92, "learning_rate": 1.3886260320994526e-05, - "loss": 0.0878, + "loss": 0.0438, "step": 3295 }, { "epoch": 0.92, "learning_rate": 1.388440486130439e-05, - "loss": 0.346, + "loss": 0.0208, "step": 3296 }, { "epoch": 0.92, "learning_rate": 1.3882549401614252e-05, - "loss": 0.139, + "loss": 0.0153, "step": 3297 }, { "epoch": 0.92, "learning_rate": 1.3880693941924112e-05, - "loss": 0.1942, + "loss": 0.0647, "step": 3298 }, { "epoch": 0.92, "learning_rate": 1.3878838482233974e-05, - "loss": 0.2438, + "loss": 0.0209, "step": 3299 }, { "epoch": 0.92, "learning_rate": 1.3876983022543837e-05, - "loss": 0.0898, + "loss": 0.06, "step": 3300 }, { "epoch": 0.92, "learning_rate": 1.38751275628537e-05, - "loss": 0.0917, + "loss": 0.1171, "step": 3301 }, { "epoch": 0.92, "learning_rate": 1.387327210316356e-05, - "loss": 0.2395, + "loss": 0.0666, "step": 3302 }, { "epoch": 0.92, "learning_rate": 1.3871416643473421e-05, - "loss": 0.1384, + "loss": 0.1569, "step": 3303 }, { "epoch": 0.92, "learning_rate": 1.3869561183783285e-05, - "loss": 0.1437, + "loss": 0.0127, "step": 3304 }, { "epoch": 0.92, "learning_rate": 1.3867705724093145e-05, - "loss": 0.1392, + "loss": 0.0099, "step": 3305 }, { "epoch": 0.92, "learning_rate": 1.3865850264403007e-05, - "loss": 0.0884, + "loss": 0.013, "step": 3306 }, { "epoch": 0.92, "learning_rate": 1.3863994804712869e-05, - "loss": 0.1927, + "loss": 0.1594, "step": 3307 }, { "epoch": 0.92, "learning_rate": 1.386213934502273e-05, - "loss": 0.1918, + "loss": 0.1088, "step": 3308 }, { "epoch": 0.92, "learning_rate": 1.3860283885332593e-05, - "loss": 0.0387, + "loss": 0.0534, "step": 3309 }, { "epoch": 0.92, "learning_rate": 1.3858428425642454e-05, - "loss": 0.141, + "loss": 0.1987, "step": 3310 }, { "epoch": 0.92, "learning_rate": 1.3856572965952315e-05, - "loss": 0.2924, + "loss": 0.119, "step": 3311 }, { "epoch": 0.92, "learning_rate": 1.3854717506262178e-05, - "loss": 0.1354, + "loss": 0.1136, "step": 3312 }, { "epoch": 0.92, "learning_rate": 1.385286204657204e-05, - "loss": 0.1429, + "loss": 0.0599, "step": 3313 }, { "epoch": 0.92, "learning_rate": 1.38510065868819e-05, - "loss": 0.1908, + "loss": 0.2213, "step": 3314 }, { "epoch": 0.92, "learning_rate": 1.3849151127191762e-05, - "loss": 0.0403, + "loss": 0.1, "step": 3315 }, { "epoch": 0.92, "learning_rate": 1.3847295667501626e-05, - "loss": 0.1903, + "loss": 0.0242, "step": 3316 }, { "epoch": 0.92, "learning_rate": 1.3845440207811488e-05, - "loss": 0.0923, + "loss": 0.0694, "step": 3317 }, { "epoch": 0.92, "learning_rate": 1.3843584748121348e-05, - "loss": 0.0388, + "loss": 0.0596, "step": 3318 }, { "epoch": 0.92, "learning_rate": 1.384172928843121e-05, - "loss": 0.1938, + "loss": 0.0878, "step": 3319 }, { "epoch": 0.92, "learning_rate": 1.3839873828741073e-05, - "loss": 0.0374, + "loss": 0.0286, "step": 3320 }, { "epoch": 0.92, "learning_rate": 1.3838018369050933e-05, - "loss": 0.0861, + "loss": 0.1957, "step": 3321 }, { "epoch": 0.92, "learning_rate": 1.3836162909360795e-05, - "loss": 0.1377, + "loss": 0.02, "step": 3322 }, { "epoch": 0.92, "learning_rate": 1.3834307449670657e-05, - "loss": 0.2479, + "loss": 0.0862, "step": 3323 }, { "epoch": 0.93, "learning_rate": 1.383245198998052e-05, - "loss": 0.0871, + "loss": 0.1362, "step": 3324 }, { "epoch": 0.93, "learning_rate": 1.383059653029038e-05, - "loss": 0.0869, + "loss": 0.0224, "step": 3325 }, { "epoch": 0.93, "learning_rate": 1.3828741070600243e-05, - "loss": 0.0853, + "loss": 0.0559, "step": 3326 }, { "epoch": 0.93, "learning_rate": 1.3826885610910103e-05, - "loss": 0.2427, + "loss": 0.0609, "step": 3327 }, { "epoch": 0.93, "learning_rate": 1.3825030151219966e-05, - "loss": 0.1438, + "loss": 0.0553, "step": 3328 }, { "epoch": 0.93, "learning_rate": 1.3823174691529828e-05, - "loss": 0.0319, + "loss": 0.1048, "step": 3329 }, { "epoch": 0.93, "learning_rate": 1.382131923183969e-05, - "loss": 0.0311, + "loss": 0.209, "step": 3330 }, { "epoch": 0.93, "learning_rate": 1.381946377214955e-05, - "loss": 0.1961, + "loss": 0.0997, "step": 3331 }, { "epoch": 0.93, "learning_rate": 1.3817608312459414e-05, - "loss": 0.3567, + "loss": 0.1355, "step": 3332 }, { "epoch": 0.93, "learning_rate": 1.3815752852769276e-05, - "loss": 0.0858, + "loss": 0.0615, "step": 3333 }, { "epoch": 0.93, "learning_rate": 1.3813897393079136e-05, - "loss": 0.086, + "loss": 0.1038, "step": 3334 }, { "epoch": 0.93, "learning_rate": 1.3812041933388998e-05, - "loss": 0.1426, + "loss": 0.1204, "step": 3335 }, { "epoch": 0.93, "learning_rate": 1.3810186473698861e-05, - "loss": 0.1426, + "loss": 0.0807, "step": 3336 }, { "epoch": 0.93, "learning_rate": 1.3808331014008722e-05, - "loss": 0.3072, + "loss": 0.2308, "step": 3337 }, { "epoch": 0.93, "learning_rate": 1.3806475554318583e-05, - "loss": 0.195, + "loss": 0.1033, "step": 3338 }, { "epoch": 0.93, "learning_rate": 1.3804620094628445e-05, - "loss": 0.0302, + "loss": 0.0938, "step": 3339 }, { "epoch": 0.93, "learning_rate": 1.3802764634938305e-05, - "loss": 0.1419, + "loss": 0.0644, "step": 3340 }, { "epoch": 0.93, "learning_rate": 1.3800909175248169e-05, - "loss": 0.0844, + "loss": 0.0618, "step": 3341 }, { "epoch": 0.93, "learning_rate": 1.3799053715558031e-05, - "loss": 0.3003, + "loss": 0.0919, "step": 3342 }, { "epoch": 0.93, "learning_rate": 1.3797198255867891e-05, - "loss": 0.0859, + "loss": 0.0491, "step": 3343 }, { "epoch": 0.93, "learning_rate": 1.3795342796177753e-05, - "loss": 0.0318, + "loss": 0.0568, "step": 3344 }, { "epoch": 0.93, "learning_rate": 1.3793487336487616e-05, - "loss": 0.3016, + "loss": 0.0505, "step": 3345 }, { "epoch": 0.93, "learning_rate": 1.3791631876797478e-05, - "loss": 0.1921, + "loss": 0.0896, "step": 3346 }, { "epoch": 0.93, "learning_rate": 1.3789776417107339e-05, - "loss": 0.194, + "loss": 0.0983, "step": 3347 }, { "epoch": 0.93, "learning_rate": 1.37879209574172e-05, - "loss": 0.0853, + "loss": 0.0428, "step": 3348 }, { "epoch": 0.93, "learning_rate": 1.3786065497727064e-05, - "loss": 0.1417, + "loss": 0.2701, "step": 3349 }, { "epoch": 0.93, "learning_rate": 1.3784210038036924e-05, - "loss": 0.2429, + "loss": 0.2988, "step": 3350 }, { "epoch": 0.93, "learning_rate": 1.3782354578346786e-05, - "loss": 0.3014, + "loss": 0.0535, "step": 3351 }, { "epoch": 0.93, "learning_rate": 1.3780499118656648e-05, - "loss": 0.1873, + "loss": 0.1732, "step": 3352 }, { "epoch": 0.93, "learning_rate": 1.3778643658966511e-05, - "loss": 0.1392, + "loss": 0.1498, "step": 3353 }, { "epoch": 0.93, "learning_rate": 1.3776788199276372e-05, - "loss": 0.2991, + "loss": 0.0263, "step": 3354 }, { "epoch": 0.93, "learning_rate": 1.3774932739586234e-05, - "loss": 0.0873, + "loss": 0.1087, "step": 3355 }, { "epoch": 0.93, "learning_rate": 1.3773077279896094e-05, - "loss": 0.1896, + "loss": 0.0559, "step": 3356 }, { "epoch": 0.93, "learning_rate": 1.3771221820205957e-05, - "loss": 0.0896, + "loss": 0.073, "step": 3357 }, { "epoch": 0.93, "learning_rate": 1.3769366360515819e-05, - "loss": 0.1902, + "loss": 0.0879, "step": 3358 }, { "epoch": 0.93, "learning_rate": 1.3767510900825681e-05, - "loss": 0.2353, + "loss": 0.061, "step": 3359 }, { "epoch": 0.94, "learning_rate": 1.3765655441135541e-05, - "loss": 0.1324, + "loss": 0.1546, "step": 3360 }, { "epoch": 0.94, "learning_rate": 1.3763799981445405e-05, - "loss": 0.1928, + "loss": 0.1254, "step": 3361 }, { "epoch": 0.94, "learning_rate": 1.3761944521755267e-05, - "loss": 0.0949, + "loss": 0.0295, "step": 3362 }, { "epoch": 0.94, "learning_rate": 1.3760089062065127e-05, - "loss": 0.1427, + "loss": 0.0497, "step": 3363 }, { "epoch": 0.94, "learning_rate": 1.3758233602374989e-05, - "loss": 0.1873, + "loss": 0.0594, "step": 3364 }, { "epoch": 0.94, "learning_rate": 1.3756378142684852e-05, - "loss": 0.2811, + "loss": 0.0269, "step": 3365 }, { "epoch": 0.94, "learning_rate": 1.3754522682994712e-05, - "loss": 0.1455, + "loss": 0.0667, "step": 3366 }, { "epoch": 0.94, "learning_rate": 1.3752667223304574e-05, - "loss": 0.0947, + "loss": 0.0234, "step": 3367 }, { "epoch": 0.94, "learning_rate": 1.3750811763614436e-05, - "loss": 0.0903, + "loss": 0.1025, "step": 3368 }, { "epoch": 0.94, "learning_rate": 1.37489563039243e-05, - "loss": 0.046, + "loss": 0.0203, "step": 3369 }, { "epoch": 0.94, "learning_rate": 1.374710084423416e-05, - "loss": 0.2396, + "loss": 0.1105, "step": 3370 }, { "epoch": 0.94, "learning_rate": 1.3745245384544022e-05, - "loss": 0.0895, + "loss": 0.1616, "step": 3371 }, { "epoch": 0.94, "learning_rate": 1.3743389924853882e-05, - "loss": 0.0421, + "loss": 0.0631, "step": 3372 }, { "epoch": 0.94, "learning_rate": 1.3741534465163745e-05, - "loss": 0.1395, + "loss": 0.1521, "step": 3373 }, { "epoch": 0.94, "learning_rate": 1.3739679005473607e-05, - "loss": 0.1902, + "loss": 0.0173, "step": 3374 }, { "epoch": 0.94, "learning_rate": 1.373782354578347e-05, - "loss": 0.1411, + "loss": 0.0642, "step": 3375 }, { "epoch": 0.94, "learning_rate": 1.373596808609333e-05, - "loss": 0.2926, + "loss": 0.1252, "step": 3376 }, { "epoch": 0.94, "learning_rate": 1.3734112626403193e-05, - "loss": 0.0373, + "loss": 0.0613, "step": 3377 }, { "epoch": 0.94, "learning_rate": 1.3732257166713055e-05, - "loss": 0.2455, + "loss": 0.0935, "step": 3378 }, { "epoch": 0.94, "learning_rate": 1.3730401707022915e-05, - "loss": 0.1893, + "loss": 0.0144, "step": 3379 }, { "epoch": 0.94, "learning_rate": 1.3728546247332777e-05, - "loss": 0.0892, + "loss": 0.0699, "step": 3380 }, { "epoch": 0.94, "learning_rate": 1.372669078764264e-05, - "loss": 0.0358, + "loss": 0.0177, "step": 3381 }, { "epoch": 0.94, "learning_rate": 1.3724835327952502e-05, - "loss": 0.1398, + "loss": 0.1725, "step": 3382 }, { "epoch": 0.94, "learning_rate": 1.3722979868262362e-05, - "loss": 0.1424, + "loss": 0.0939, "step": 3383 }, { "epoch": 0.94, "learning_rate": 1.3721124408572224e-05, - "loss": 0.0858, + "loss": 0.0382, "step": 3384 }, { "epoch": 0.94, "learning_rate": 1.3719268948882088e-05, - "loss": 0.1416, + "loss": 0.119, "step": 3385 }, { "epoch": 0.94, "learning_rate": 1.3717413489191948e-05, - "loss": 0.139, + "loss": 0.0584, "step": 3386 }, { "epoch": 0.94, "learning_rate": 1.371555802950181e-05, - "loss": 0.0844, + "loss": 0.0364, "step": 3387 }, { "epoch": 0.94, "learning_rate": 1.3713702569811672e-05, - "loss": 0.0846, + "loss": 0.0241, "step": 3388 }, { "epoch": 0.94, "learning_rate": 1.3711847110121534e-05, - "loss": 0.1994, + "loss": 0.0502, "step": 3389 }, { "epoch": 0.94, "learning_rate": 1.3709991650431396e-05, - "loss": 0.1359, + "loss": 0.0579, "step": 3390 }, { "epoch": 0.94, "learning_rate": 1.3708136190741257e-05, - "loss": 0.242, + "loss": 0.1322, "step": 3391 }, { "epoch": 0.94, "learning_rate": 1.3706280731051118e-05, - "loss": 0.1948, + "loss": 0.0634, "step": 3392 }, { "epoch": 0.94, "learning_rate": 1.3704425271360981e-05, - "loss": 0.082, + "loss": 0.0646, "step": 3393 }, { "epoch": 0.94, "learning_rate": 1.3702569811670843e-05, - "loss": 0.0307, + "loss": 0.0538, "step": 3394 }, { "epoch": 0.94, "learning_rate": 1.3700714351980703e-05, - "loss": 0.0848, + "loss": 0.0211, "step": 3395 }, { "epoch": 0.95, "learning_rate": 1.3698858892290565e-05, - "loss": 0.0869, + "loss": 0.0614, "step": 3396 }, { "epoch": 0.95, "learning_rate": 1.3697003432600429e-05, - "loss": 0.2449, + "loss": 0.0874, "step": 3397 }, { "epoch": 0.95, "learning_rate": 1.369514797291029e-05, - "loss": 0.0858, + "loss": 0.1349, "step": 3398 }, { "epoch": 0.95, "learning_rate": 1.369329251322015e-05, - "loss": 0.138, + "loss": 0.0148, "step": 3399 }, { "epoch": 0.95, "learning_rate": 1.3691437053530013e-05, - "loss": 0.1909, + "loss": 0.0859, "step": 3400 }, { "epoch": 0.95, "learning_rate": 1.3689581593839876e-05, - "loss": 0.1912, + "loss": 0.1305, "step": 3401 }, { "epoch": 0.95, "learning_rate": 1.3687726134149736e-05, - "loss": 0.1406, + "loss": 0.0572, "step": 3402 }, { "epoch": 0.95, "learning_rate": 1.3685870674459598e-05, - "loss": 0.0826, + "loss": 0.1113, "step": 3403 }, { "epoch": 0.95, "learning_rate": 1.368401521476946e-05, - "loss": 0.1382, + "loss": 0.0339, "step": 3404 }, { "epoch": 0.95, "learning_rate": 1.3682159755079324e-05, - "loss": 0.251, + "loss": 0.1702, "step": 3405 }, { "epoch": 0.95, "learning_rate": 1.3680304295389184e-05, - "loss": 0.2499, + "loss": 0.0596, "step": 3406 }, { "epoch": 0.95, "learning_rate": 1.3678448835699046e-05, - "loss": 0.2451, + "loss": 0.0581, "step": 3407 }, { "epoch": 0.95, "learning_rate": 1.3676593376008906e-05, - "loss": 0.0869, + "loss": 0.016, "step": 3408 }, { "epoch": 0.95, "learning_rate": 1.367473791631877e-05, - "loss": 0.3028, + "loss": 0.0744, "step": 3409 }, { "epoch": 0.95, "learning_rate": 1.3672882456628631e-05, - "loss": 0.0834, + "loss": 0.0669, "step": 3410 }, { "epoch": 0.95, "learning_rate": 1.3671026996938493e-05, - "loss": 0.1399, + "loss": 0.06, "step": 3411 }, { "epoch": 0.95, "learning_rate": 1.3669171537248353e-05, - "loss": 0.1916, + "loss": 0.0483, "step": 3412 }, { "epoch": 0.95, "learning_rate": 1.3667316077558217e-05, - "loss": 0.0337, + "loss": 0.0103, "step": 3413 }, { "epoch": 0.95, "learning_rate": 1.3665460617868079e-05, - "loss": 0.246, + "loss": 0.098, "step": 3414 }, { "epoch": 0.95, "learning_rate": 1.3663605158177939e-05, - "loss": 0.1391, + "loss": 0.0542, "step": 3415 }, { "epoch": 0.95, "learning_rate": 1.36617496984878e-05, - "loss": 0.0884, + "loss": 0.0654, "step": 3416 }, { "epoch": 0.95, "learning_rate": 1.3659894238797664e-05, - "loss": 0.1416, + "loss": 0.0991, "step": 3417 }, { "epoch": 0.95, "learning_rate": 1.3658038779107525e-05, - "loss": 0.2965, + "loss": 0.1221, "step": 3418 }, { "epoch": 0.95, "learning_rate": 1.3656183319417386e-05, - "loss": 0.1434, + "loss": 0.0484, "step": 3419 }, { "epoch": 0.95, "learning_rate": 1.3654327859727248e-05, - "loss": 0.1385, + "loss": 0.1649, "step": 3420 }, { "epoch": 0.95, "learning_rate": 1.3652472400037112e-05, - "loss": 0.2408, + "loss": 0.1003, "step": 3421 }, { "epoch": 0.95, "learning_rate": 1.3650616940346972e-05, - "loss": 0.1852, + "loss": 0.0141, "step": 3422 }, { "epoch": 0.95, "learning_rate": 1.3648761480656834e-05, - "loss": 0.0396, + "loss": 0.0497, "step": 3423 }, { "epoch": 0.95, "learning_rate": 1.3646906020966694e-05, - "loss": 0.0911, + "loss": 0.0158, "step": 3424 }, { "epoch": 0.95, "learning_rate": 1.3645050561276558e-05, - "loss": 0.1923, + "loss": 0.0125, "step": 3425 }, { "epoch": 0.95, "learning_rate": 1.364319510158642e-05, - "loss": 0.0874, + "loss": 0.0517, "step": 3426 }, { "epoch": 0.95, "learning_rate": 1.3641339641896281e-05, - "loss": 0.0377, + "loss": 0.0303, "step": 3427 }, { "epoch": 0.95, "learning_rate": 1.3639484182206142e-05, - "loss": 0.0903, + "loss": 0.0194, "step": 3428 }, { "epoch": 0.95, "learning_rate": 1.3637628722516005e-05, - "loss": 0.1375, + "loss": 0.0692, "step": 3429 }, { "epoch": 0.95, "learning_rate": 1.3635773262825867e-05, - "loss": 0.2442, + "loss": 0.0125, "step": 3430 }, { "epoch": 0.95, "learning_rate": 1.3633917803135727e-05, - "loss": 0.0361, + "loss": 0.056, "step": 3431 }, { "epoch": 0.96, "learning_rate": 1.3632062343445589e-05, - "loss": 0.1906, + "loss": 0.2221, "step": 3432 }, { "epoch": 0.96, "learning_rate": 1.3630206883755453e-05, - "loss": 0.1384, + "loss": 0.1815, "step": 3433 }, { "epoch": 0.96, "learning_rate": 1.3628351424065313e-05, - "loss": 0.1428, + "loss": 0.0575, "step": 3434 }, { "epoch": 0.96, "learning_rate": 1.3626495964375175e-05, - "loss": 0.0847, + "loss": 0.1275, "step": 3435 }, { "epoch": 0.96, "learning_rate": 1.3624640504685037e-05, - "loss": 0.1422, + "loss": 0.2304, "step": 3436 }, { "epoch": 0.96, "learning_rate": 1.36227850449949e-05, - "loss": 0.139, + "loss": 0.0539, "step": 3437 }, { "epoch": 0.96, "learning_rate": 1.362092958530476e-05, - "loss": 0.1934, + "loss": 0.0532, "step": 3438 }, { "epoch": 0.96, "learning_rate": 1.3619074125614622e-05, - "loss": 0.239, + "loss": 0.0516, "step": 3439 }, { "epoch": 0.96, "learning_rate": 1.3617218665924482e-05, - "loss": 0.1836, + "loss": 0.0192, "step": 3440 }, { "epoch": 0.96, "learning_rate": 1.3615363206234346e-05, - "loss": 0.3534, + "loss": 0.0213, "step": 3441 }, { "epoch": 0.96, "learning_rate": 1.3613507746544208e-05, - "loss": 0.1379, + "loss": 0.1157, "step": 3442 }, { "epoch": 0.96, "learning_rate": 1.361165228685407e-05, - "loss": 0.1859, + "loss": 0.0221, "step": 3443 }, { "epoch": 0.96, "learning_rate": 1.360979682716393e-05, - "loss": 0.3026, + "loss": 0.1822, "step": 3444 }, { "epoch": 0.96, "learning_rate": 1.3607941367473793e-05, - "loss": 0.359, + "loss": 0.0714, "step": 3445 }, { "epoch": 0.96, "learning_rate": 1.3606085907783655e-05, - "loss": 0.1393, + "loss": 0.0785, "step": 3446 }, { "epoch": 0.96, "learning_rate": 1.3604230448093515e-05, - "loss": 0.0392, + "loss": 0.0882, "step": 3447 }, { "epoch": 0.96, "learning_rate": 1.3602374988403377e-05, - "loss": 0.1456, + "loss": 0.1014, "step": 3448 }, { "epoch": 0.96, "learning_rate": 1.3600519528713241e-05, - "loss": 0.1891, + "loss": 0.0208, "step": 3449 }, { "epoch": 0.96, "learning_rate": 1.3598664069023103e-05, - "loss": 0.1902, + "loss": 0.0248, "step": 3450 }, { "epoch": 0.96, "learning_rate": 1.3596808609332963e-05, - "loss": 0.1393, + "loss": 0.101, "step": 3451 }, { "epoch": 0.96, "learning_rate": 1.3594953149642825e-05, - "loss": 0.0803, + "loss": 0.0309, "step": 3452 }, { "epoch": 0.96, "learning_rate": 1.3593097689952688e-05, - "loss": 0.1001, + "loss": 0.0605, "step": 3453 }, { "epoch": 0.96, "learning_rate": 1.3591242230262549e-05, - "loss": 0.1968, + "loss": 0.0901, "step": 3454 }, { "epoch": 0.96, "learning_rate": 1.358938677057241e-05, - "loss": 0.2399, + "loss": 0.0896, "step": 3455 }, { "epoch": 0.96, "learning_rate": 1.3587531310882272e-05, - "loss": 0.1909, + "loss": 0.0694, "step": 3456 }, { "epoch": 0.96, "learning_rate": 1.3585675851192134e-05, - "loss": 0.1411, + "loss": 0.1677, "step": 3457 }, { "epoch": 0.96, "learning_rate": 1.3583820391501996e-05, - "loss": 0.0922, + "loss": 0.0524, "step": 3458 }, { "epoch": 0.96, "learning_rate": 1.3581964931811858e-05, - "loss": 0.1929, + "loss": 0.0658, "step": 3459 }, { "epoch": 0.96, "learning_rate": 1.3580109472121718e-05, - "loss": 0.0965, + "loss": 0.1023, "step": 3460 }, { "epoch": 0.96, "learning_rate": 1.3578254012431582e-05, - "loss": 0.0426, + "loss": 0.1023, "step": 3461 }, { "epoch": 0.96, "learning_rate": 1.3576398552741443e-05, - "loss": 0.0931, + "loss": 0.0253, "step": 3462 }, { "epoch": 0.96, "learning_rate": 1.3574543093051304e-05, - "loss": 0.1967, + "loss": 0.0332, "step": 3463 }, { "epoch": 0.96, "learning_rate": 1.3572687633361166e-05, - "loss": 0.1899, + "loss": 0.0268, "step": 3464 }, { "epoch": 0.96, "learning_rate": 1.3570832173671029e-05, - "loss": 0.0375, + "loss": 0.0655, "step": 3465 }, { "epoch": 0.96, "learning_rate": 1.3568976713980891e-05, - "loss": 0.0871, + "loss": 0.0936, "step": 3466 }, { "epoch": 0.96, "learning_rate": 1.3567121254290751e-05, - "loss": 0.141, + "loss": 0.0253, "step": 3467 }, { "epoch": 0.97, "learning_rate": 1.3565265794600613e-05, - "loss": 0.1936, + "loss": 0.1215, "step": 3468 }, { "epoch": 0.97, "learning_rate": 1.3563410334910477e-05, - "loss": 0.244, + "loss": 0.0603, "step": 3469 }, { "epoch": 0.97, "learning_rate": 1.3561554875220337e-05, - "loss": 0.1386, + "loss": 0.0773, "step": 3470 }, { "epoch": 0.97, "learning_rate": 1.3559699415530199e-05, - "loss": 0.1956, + "loss": 0.0614, "step": 3471 }, { "epoch": 0.97, "learning_rate": 1.355784395584006e-05, - "loss": 0.0334, + "loss": 0.0988, "step": 3472 }, { "epoch": 0.97, "learning_rate": 1.3555988496149924e-05, - "loss": 0.139, + "loss": 0.1275, "step": 3473 }, { "epoch": 0.97, "learning_rate": 1.3554133036459784e-05, - "loss": 0.3525, + "loss": 0.0663, "step": 3474 }, { "epoch": 0.97, "learning_rate": 1.3552277576769646e-05, - "loss": 0.0349, + "loss": 0.1246, "step": 3475 }, { "epoch": 0.97, "learning_rate": 1.3550422117079506e-05, - "loss": 0.1413, + "loss": 0.102, "step": 3476 }, { "epoch": 0.97, "learning_rate": 1.354856665738937e-05, - "loss": 0.142, + "loss": 0.0618, "step": 3477 }, { "epoch": 0.97, "learning_rate": 1.3546711197699232e-05, - "loss": 0.1411, + "loss": 0.1284, "step": 3478 }, { "epoch": 0.97, "learning_rate": 1.3544855738009094e-05, - "loss": 0.1381, + "loss": 0.015, "step": 3479 }, { "epoch": 0.97, "learning_rate": 1.3543000278318954e-05, - "loss": 0.2516, + "loss": 0.0191, "step": 3480 }, { "epoch": 0.97, "learning_rate": 1.3541144818628817e-05, - "loss": 0.287, + "loss": 0.1104, "step": 3481 }, { "epoch": 0.97, "learning_rate": 1.353928935893868e-05, - "loss": 0.0331, + "loss": 0.0601, "step": 3482 }, { "epoch": 0.97, "learning_rate": 1.353743389924854e-05, - "loss": 0.0816, + "loss": 0.1219, "step": 3483 }, { "epoch": 0.97, "learning_rate": 1.3535578439558401e-05, - "loss": 0.1385, + "loss": 0.2037, "step": 3484 }, { "epoch": 0.97, "learning_rate": 1.3533722979868265e-05, - "loss": 0.0328, + "loss": 0.0222, "step": 3485 }, { "epoch": 0.97, "learning_rate": 1.3531867520178125e-05, - "loss": 0.2485, + "loss": 0.0189, "step": 3486 }, { "epoch": 0.97, "learning_rate": 1.3530012060487987e-05, - "loss": 0.0844, + "loss": 0.0418, "step": 3487 }, { "epoch": 0.97, "learning_rate": 1.3528156600797849e-05, - "loss": 0.1958, + "loss": 0.1183, "step": 3488 }, { "epoch": 0.97, "learning_rate": 1.3526301141107712e-05, - "loss": 0.087, + "loss": 0.1521, "step": 3489 }, { "epoch": 0.97, "learning_rate": 1.3524445681417572e-05, - "loss": 0.1386, + "loss": 0.0109, "step": 3490 }, { "epoch": 0.97, "learning_rate": 1.3522590221727434e-05, - "loss": 0.1929, + "loss": 0.0588, "step": 3491 }, { "epoch": 0.97, "learning_rate": 1.3520734762037295e-05, - "loss": 0.1386, + "loss": 0.0432, "step": 3492 }, { "epoch": 0.97, "learning_rate": 1.3518879302347156e-05, - "loss": 0.0314, + "loss": 0.2196, "step": 3493 }, { "epoch": 0.97, "learning_rate": 1.351702384265702e-05, - "loss": 0.1384, + "loss": 0.0895, "step": 3494 }, { "epoch": 0.97, "learning_rate": 1.3515168382966882e-05, - "loss": 0.3039, + "loss": 0.0182, "step": 3495 }, { "epoch": 0.97, "learning_rate": 1.3513312923276742e-05, - "loss": 0.0874, + "loss": 0.0545, "step": 3496 }, { "epoch": 0.97, "learning_rate": 1.3511457463586604e-05, - "loss": 0.1403, + "loss": 0.0946, "step": 3497 }, { "epoch": 0.97, "learning_rate": 1.3509602003896467e-05, - "loss": 0.0877, + "loss": 0.0706, "step": 3498 }, { "epoch": 0.97, "learning_rate": 1.3507746544206328e-05, - "loss": 0.0849, + "loss": 0.0577, "step": 3499 }, { "epoch": 0.97, "learning_rate": 1.350589108451619e-05, - "loss": 0.1372, + "loss": 0.1129, "step": 3500 }, { "epoch": 0.97, "learning_rate": 1.3504035624826051e-05, - "loss": 0.1978, + "loss": 0.0593, "step": 3501 }, { "epoch": 0.97, "learning_rate": 1.3502180165135915e-05, - "loss": 0.245, + "loss": 0.2103, "step": 3502 }, { "epoch": 0.97, "learning_rate": 1.3500324705445775e-05, - "loss": 0.1931, + "loss": 0.0643, "step": 3503 }, { "epoch": 0.98, "learning_rate": 1.3498469245755637e-05, - "loss": 0.3005, + "loss": 0.0627, "step": 3504 }, { "epoch": 0.98, "learning_rate": 1.3496613786065497e-05, - "loss": 0.0313, + "loss": 0.0613, "step": 3505 }, { "epoch": 0.98, "learning_rate": 1.349475832637536e-05, - "loss": 0.1399, + "loss": 0.1242, "step": 3506 }, { "epoch": 0.98, "learning_rate": 1.3492902866685223e-05, - "loss": 0.0879, + "loss": 0.099, "step": 3507 }, { "epoch": 0.98, "learning_rate": 1.3491047406995084e-05, - "loss": 0.1889, + "loss": 0.0185, "step": 3508 }, { "epoch": 0.98, "learning_rate": 1.3489191947304945e-05, - "loss": 0.1378, + "loss": 0.0192, "step": 3509 }, { "epoch": 0.98, "learning_rate": 1.3487336487614808e-05, - "loss": 0.0906, + "loss": 0.1411, "step": 3510 }, { "epoch": 0.98, "learning_rate": 1.348548102792467e-05, - "loss": 0.0892, + "loss": 0.0196, "step": 3511 }, { "epoch": 0.98, "learning_rate": 1.348362556823453e-05, - "loss": 0.143, + "loss": 0.095, "step": 3512 }, { "epoch": 0.98, "learning_rate": 1.3481770108544392e-05, - "loss": 0.0884, + "loss": 0.0725, "step": 3513 }, { "epoch": 0.98, "learning_rate": 1.3479914648854256e-05, - "loss": 0.1385, + "loss": 0.016, "step": 3514 }, { "epoch": 0.98, "learning_rate": 1.3478059189164116e-05, - "loss": 0.1411, + "loss": 0.1284, "step": 3515 }, { "epoch": 0.98, "learning_rate": 1.3476203729473978e-05, - "loss": 0.0868, + "loss": 0.1985, "step": 3516 }, { "epoch": 0.98, "learning_rate": 1.347434826978384e-05, - "loss": 0.0859, + "loss": 0.089, "step": 3517 }, { "epoch": 0.98, "learning_rate": 1.3472492810093703e-05, - "loss": 0.0324, + "loss": 0.0202, "step": 3518 }, { "epoch": 0.98, "learning_rate": 1.3470637350403563e-05, - "loss": 0.1945, + "loss": 0.0543, "step": 3519 }, { "epoch": 0.98, "learning_rate": 1.3468781890713425e-05, - "loss": 0.0883, + "loss": 0.0529, "step": 3520 }, { "epoch": 0.98, "learning_rate": 1.3466926431023285e-05, - "loss": 0.1401, + "loss": 0.0326, "step": 3521 }, { "epoch": 0.98, "learning_rate": 1.3465070971333149e-05, - "loss": 0.1959, + "loss": 0.0555, "step": 3522 }, { "epoch": 0.98, "learning_rate": 1.346321551164301e-05, - "loss": 0.0297, + "loss": 0.0918, "step": 3523 }, { "epoch": 0.98, "learning_rate": 1.3461360051952873e-05, - "loss": 0.1369, + "loss": 0.056, "step": 3524 }, { "epoch": 0.98, "learning_rate": 1.3459504592262733e-05, - "loss": 0.1929, + "loss": 0.0262, "step": 3525 }, { "epoch": 0.98, "learning_rate": 1.3457649132572596e-05, - "loss": 0.1372, + "loss": 0.145, "step": 3526 }, { "epoch": 0.98, "learning_rate": 1.3455793672882458e-05, - "loss": 0.0843, + "loss": 0.2898, "step": 3527 }, { "epoch": 0.98, "learning_rate": 1.3453938213192318e-05, - "loss": 0.1398, + "loss": 0.0646, "step": 3528 }, { "epoch": 0.98, "learning_rate": 1.345208275350218e-05, - "loss": 0.0275, + "loss": 0.0091, "step": 3529 }, { "epoch": 0.98, "learning_rate": 1.3450227293812044e-05, - "loss": 0.0844, + "loss": 0.0535, "step": 3530 }, { "epoch": 0.98, "learning_rate": 1.3448371834121906e-05, - "loss": 0.0838, + "loss": 0.0679, "step": 3531 }, { "epoch": 0.98, "learning_rate": 1.3446516374431766e-05, - "loss": 0.1427, + "loss": 0.0908, "step": 3532 }, { "epoch": 0.98, "learning_rate": 1.3444660914741628e-05, - "loss": 0.3025, + "loss": 0.0977, "step": 3533 }, { "epoch": 0.98, "learning_rate": 1.3442805455051491e-05, - "loss": 0.028, + "loss": 0.0465, "step": 3534 }, { "epoch": 0.98, "learning_rate": 1.3440949995361352e-05, - "loss": 0.0824, + "loss": 0.1136, "step": 3535 }, { "epoch": 0.98, "learning_rate": 1.3439094535671213e-05, - "loss": 0.2548, + "loss": 0.101, "step": 3536 }, { "epoch": 0.98, "learning_rate": 1.3437239075981074e-05, - "loss": 0.252, + "loss": 0.0954, "step": 3537 }, { "epoch": 0.98, "learning_rate": 1.3435383616290937e-05, - "loss": 0.0836, + "loss": 0.0843, "step": 3538 }, { "epoch": 0.98, "learning_rate": 1.3433528156600799e-05, - "loss": 0.0271, + "loss": 0.0544, "step": 3539 }, { "epoch": 0.99, "learning_rate": 1.3431672696910661e-05, - "loss": 0.1385, + "loss": 0.0256, "step": 3540 }, { "epoch": 0.99, "learning_rate": 1.3429817237220521e-05, - "loss": 0.0278, + "loss": 0.06, "step": 3541 }, { "epoch": 0.99, "learning_rate": 1.3427961777530385e-05, - "loss": 0.0854, + "loss": 0.02, "step": 3542 }, { "epoch": 0.99, "learning_rate": 1.3426106317840247e-05, - "loss": 0.0852, + "loss": 0.1058, "step": 3543 }, { "epoch": 0.99, "learning_rate": 1.3424250858150107e-05, - "loss": 0.0833, + "loss": 0.2259, "step": 3544 }, { "epoch": 0.99, "learning_rate": 1.3422395398459969e-05, - "loss": 0.0276, + "loss": 0.0209, "step": 3545 }, { "epoch": 0.99, "learning_rate": 1.3420539938769832e-05, - "loss": 0.0854, + "loss": 0.0153, "step": 3546 }, { "epoch": 0.99, "learning_rate": 1.3418684479079694e-05, - "loss": 0.0838, + "loss": 0.0203, "step": 3547 }, { "epoch": 0.99, "learning_rate": 1.3416829019389554e-05, - "loss": 0.0832, + "loss": 0.0659, "step": 3548 }, { "epoch": 0.99, "learning_rate": 1.3414973559699416e-05, - "loss": 0.1411, + "loss": 0.2351, "step": 3549 }, { "epoch": 0.99, "learning_rate": 1.341311810000928e-05, - "loss": 0.083, + "loss": 0.0955, "step": 3550 }, { "epoch": 0.99, "learning_rate": 1.341126264031914e-05, - "loss": 0.1972, + "loss": 0.068, "step": 3551 }, { "epoch": 0.99, "learning_rate": 1.3409407180629002e-05, - "loss": 0.0254, + "loss": 0.1686, "step": 3552 }, { "epoch": 0.99, "learning_rate": 1.3407551720938864e-05, - "loss": 0.0231, + "loss": 0.0088, "step": 3553 }, { "epoch": 0.99, "learning_rate": 1.3405696261248725e-05, - "loss": 0.0808, + "loss": 0.065, "step": 3554 }, { "epoch": 0.99, "learning_rate": 1.3403840801558587e-05, - "loss": 0.2561, + "loss": 0.0766, "step": 3555 }, { "epoch": 0.99, "learning_rate": 1.340198534186845e-05, - "loss": 0.1996, + "loss": 0.0211, "step": 3556 }, { "epoch": 0.99, "learning_rate": 1.340012988217831e-05, - "loss": 0.0821, + "loss": 0.0241, "step": 3557 }, { "epoch": 0.99, "learning_rate": 1.3398274422488173e-05, - "loss": 0.0811, + "loss": 0.0776, "step": 3558 }, { "epoch": 0.99, "learning_rate": 1.3396418962798035e-05, - "loss": 0.1391, + "loss": 0.016, "step": 3559 }, { "epoch": 0.99, "learning_rate": 1.3394563503107895e-05, - "loss": 0.1929, + "loss": 0.0786, "step": 3560 }, { "epoch": 0.99, "learning_rate": 1.3392708043417757e-05, - "loss": 0.2025, + "loss": 0.1187, "step": 3561 }, { "epoch": 0.99, "learning_rate": 1.339085258372762e-05, - "loss": 0.1393, + "loss": 0.2772, "step": 3562 }, { "epoch": 0.99, "learning_rate": 1.3388997124037482e-05, - "loss": 0.0849, + "loss": 0.0181, "step": 3563 }, { "epoch": 0.99, "learning_rate": 1.3387141664347342e-05, - "loss": 0.0232, + "loss": 0.0759, "step": 3564 }, { "epoch": 0.99, "learning_rate": 1.3385286204657204e-05, - "loss": 0.1371, + "loss": 0.0224, "step": 3565 }, { "epoch": 0.99, "learning_rate": 1.3383430744967068e-05, - "loss": 0.1982, + "loss": 0.0587, "step": 3566 }, { "epoch": 0.99, "learning_rate": 1.3381575285276928e-05, - "loss": 0.2008, + "loss": 0.0149, "step": 3567 }, { "epoch": 0.99, "learning_rate": 1.337971982558679e-05, - "loss": 0.0822, + "loss": 0.0123, "step": 3568 }, { "epoch": 0.99, "learning_rate": 1.3377864365896652e-05, - "loss": 0.1973, + "loss": 0.1851, "step": 3569 }, { "epoch": 0.99, "learning_rate": 1.3376008906206515e-05, - "loss": 0.0255, + "loss": 0.0362, "step": 3570 }, { "epoch": 0.99, "learning_rate": 1.3374153446516376e-05, - "loss": 0.1394, + "loss": 0.0687, "step": 3571 }, { "epoch": 0.99, "learning_rate": 1.3372297986826237e-05, - "loss": 0.0808, + "loss": 0.1331, "step": 3572 }, { "epoch": 0.99, "learning_rate": 1.3370442527136098e-05, - "loss": 0.1394, + "loss": 0.2052, "step": 3573 }, { "epoch": 0.99, "learning_rate": 1.3368587067445961e-05, - "loss": 0.2551, + "loss": 0.1056, "step": 3574 }, { "epoch": 0.99, "learning_rate": 1.3366731607755823e-05, - "loss": 0.1438, + "loss": 0.02, "step": 3575 }, { "epoch": 1.0, "learning_rate": 1.3364876148065685e-05, - "loss": 0.1408, + "loss": 0.1132, "step": 3576 }, { "epoch": 1.0, "learning_rate": 1.3363020688375545e-05, - "loss": 0.142, + "loss": 0.0714, "step": 3577 }, { "epoch": 1.0, "learning_rate": 1.3361165228685409e-05, - "loss": 0.1431, + "loss": 0.0885, "step": 3578 }, { "epoch": 1.0, "learning_rate": 1.335930976899527e-05, - "loss": 0.3031, + "loss": 0.0602, "step": 3579 }, { "epoch": 1.0, "learning_rate": 1.335745430930513e-05, - "loss": 0.03, + "loss": 0.1419, "step": 3580 }, { "epoch": 1.0, "learning_rate": 1.3355598849614993e-05, - "loss": 0.0291, + "loss": 0.1483, "step": 3581 }, { "epoch": 1.0, "learning_rate": 1.3353743389924856e-05, - "loss": 0.1958, + "loss": 0.2047, "step": 3582 }, { "epoch": 1.0, "learning_rate": 1.3351887930234716e-05, - "loss": 0.3051, + "loss": 0.1558, "step": 3583 }, { "epoch": 1.0, "learning_rate": 1.3350032470544578e-05, - "loss": 0.3044, + "loss": 0.0981, "step": 3584 }, { "epoch": 1.0, "learning_rate": 1.334817701085444e-05, - "loss": 0.1934, + "loss": 0.0751, "step": 3585 }, { "epoch": 1.0, "learning_rate": 1.3346321551164304e-05, - "loss": 0.1385, + "loss": 0.1334, "step": 3586 }, { "epoch": 1.0, "learning_rate": 1.3344466091474164e-05, - "loss": 0.0344, + "loss": 0.1741, "step": 3587 }, { "epoch": 1.0, "learning_rate": 1.3342610631784026e-05, - "loss": 0.0841, + "loss": 0.0741, "step": 3588 }, { "epoch": 1.0, "learning_rate": 1.3340755172093886e-05, - "loss": 0.138, + "loss": 0.118, "step": 3589 }, { "epoch": 1.0, "learning_rate": 1.333889971240375e-05, - "loss": 0.1417, + "loss": 0.0697, "step": 3590 }, { "epoch": 1.0, "learning_rate": 1.3337044252713611e-05, - "loss": 0.0346, + "loss": 0.0506, "step": 3591 }, { "epoch": 1.0, "learning_rate": 1.3335188793023473e-05, - "loss": 0.1959, + "loss": 0.0991, "step": 3592 }, { "epoch": 1.0, "learning_rate": 1.3333333333333333e-05, - "loss": 0.4093, + "loss": 0.0759, "step": 3593 }, { "epoch": 1.0, "learning_rate": 1.3331477873643197e-05, - "loss": 0.0882, + "loss": 0.067, "step": 3594 }, { "epoch": 1.0, "learning_rate": 1.3329622413953059e-05, - "loss": 0.1888, + "loss": 0.1084, "step": 3595 }, { "epoch": 1.0, "learning_rate": 1.3327766954262919e-05, - "loss": 0.1377, + "loss": 0.2516, "step": 3596 }, { "epoch": 1.0, "learning_rate": 1.332591149457278e-05, - "loss": 0.139, + "loss": 0.0861, "step": 3597 }, { "epoch": 1.0, "learning_rate": 1.3324056034882644e-05, - "loss": 0.1403, + "loss": 0.0609, "step": 3598 }, { "epoch": 1.0, "learning_rate": 1.3322200575192506e-05, - "loss": 0.1379, + "loss": 0.081, "step": 3599 }, { "epoch": 1.0, "learning_rate": 1.3320345115502366e-05, - "loss": 0.1945, + "loss": 0.0417, "step": 3600 }, { "epoch": 1.0, "learning_rate": 1.3318489655812228e-05, - "loss": 0.1378, + "loss": 0.1091, "step": 3601 }, { "epoch": 1.0, "learning_rate": 1.3316634196122092e-05, - "loss": 0.2417, + "loss": 0.1901, "step": 3602 }, { "epoch": 1.0, "learning_rate": 1.3314778736431952e-05, - "loss": 0.1891, + "loss": 0.0713, "step": 3603 }, { "epoch": 1.0, "learning_rate": 1.3312923276741814e-05, - "loss": 0.0881, + "loss": 0.0247, "step": 3604 }, { "epoch": 1.0, "learning_rate": 1.3311067817051676e-05, - "loss": 0.1424, + "loss": 0.1091, "step": 3605 }, { "epoch": 1.0, "learning_rate": 1.3309212357361538e-05, - "loss": 0.1926, + "loss": 0.0189, "step": 3606 }, { "epoch": 1.0, "learning_rate": 1.33073568976714e-05, - "loss": 0.0876, + "loss": 0.0256, "step": 3607 }, { "epoch": 1.0, "learning_rate": 1.3305501437981261e-05, - "loss": 0.1863, + "loss": 0.0873, "step": 3608 }, { "epoch": 1.0, "learning_rate": 1.3303645978291122e-05, - "loss": 0.1414, + "loss": 0.0188, "step": 3609 }, { "epoch": 1.0, "learning_rate": 1.3301790518600985e-05, - "loss": 0.2941, + "loss": 0.0248, "step": 3610 }, { "epoch": 1.01, "learning_rate": 1.3299935058910847e-05, - "loss": 0.1912, + "loss": 0.0845, "step": 3611 }, { "epoch": 1.01, "learning_rate": 1.3298079599220707e-05, - "loss": 0.087, + "loss": 0.0101, "step": 3612 }, { "epoch": 1.01, "learning_rate": 1.3296224139530569e-05, - "loss": 0.338, + "loss": 0.0515, "step": 3613 }, { "epoch": 1.01, "learning_rate": 1.3294368679840433e-05, - "loss": 0.1375, + "loss": 0.0145, "step": 3614 }, { "epoch": 1.01, "learning_rate": 1.3292513220150294e-05, - "loss": 0.1968, + "loss": 0.0081, "step": 3615 }, { "epoch": 1.01, "learning_rate": 1.3290657760460155e-05, - "loss": 0.286, + "loss": 0.1699, "step": 3616 }, { "epoch": 1.01, "learning_rate": 1.3288802300770016e-05, - "loss": 0.1382, + "loss": 0.0559, "step": 3617 }, { "epoch": 1.01, "learning_rate": 1.328694684107988e-05, - "loss": 0.0932, + "loss": 0.0071, "step": 3618 }, { "epoch": 1.01, "learning_rate": 1.328509138138974e-05, - "loss": 0.1865, + "loss": 0.0266, "step": 3619 }, { "epoch": 1.01, "learning_rate": 1.3283235921699602e-05, - "loss": 0.2364, + "loss": 0.0082, "step": 3620 }, { "epoch": 1.01, "learning_rate": 1.3281380462009464e-05, - "loss": 0.1436, + "loss": 0.0885, "step": 3621 }, { "epoch": 1.01, "learning_rate": 1.3279525002319328e-05, - "loss": 0.1473, + "loss": 0.0515, "step": 3622 }, { "epoch": 1.01, "learning_rate": 1.3277669542629188e-05, - "loss": 0.1387, + "loss": 0.0054, "step": 3623 }, { "epoch": 1.01, "learning_rate": 1.327581408293905e-05, - "loss": 0.1944, + "loss": 0.0701, "step": 3624 }, { "epoch": 1.01, "learning_rate": 1.327395862324891e-05, - "loss": 0.2368, + "loss": 0.0097, "step": 3625 }, { "epoch": 1.01, "learning_rate": 1.3272103163558773e-05, - "loss": 0.0944, + "loss": 0.1414, "step": 3626 }, { "epoch": 1.01, "learning_rate": 1.3270247703868635e-05, - "loss": 0.0457, + "loss": 0.0747, "step": 3627 }, { "epoch": 1.01, "learning_rate": 1.3268392244178497e-05, - "loss": 0.1888, + "loss": 0.0271, "step": 3628 }, { "epoch": 1.01, "learning_rate": 1.3266536784488357e-05, - "loss": 0.0926, + "loss": 0.009, "step": 3629 }, { "epoch": 1.01, "learning_rate": 1.326468132479822e-05, - "loss": 0.1931, + "loss": 0.1751, "step": 3630 }, { "epoch": 1.01, "learning_rate": 1.3262825865108083e-05, - "loss": 0.0923, + "loss": 0.0684, "step": 3631 }, { "epoch": 1.01, "learning_rate": 1.3260970405417943e-05, - "loss": 0.1409, + "loss": 0.0956, "step": 3632 }, { "epoch": 1.01, "learning_rate": 1.3259114945727805e-05, - "loss": 0.137, + "loss": 0.0547, "step": 3633 }, { "epoch": 1.01, "learning_rate": 1.3257259486037668e-05, - "loss": 0.1401, + "loss": 0.0582, "step": 3634 }, { "epoch": 1.01, "learning_rate": 1.3255404026347528e-05, - "loss": 0.2927, + "loss": 0.0185, "step": 3635 }, { "epoch": 1.01, "learning_rate": 1.325354856665739e-05, - "loss": 0.142, + "loss": 0.1134, "step": 3636 }, { "epoch": 1.01, "learning_rate": 1.3251693106967252e-05, - "loss": 0.2451, + "loss": 0.0372, "step": 3637 }, { "epoch": 1.01, "learning_rate": 1.3249837647277116e-05, - "loss": 0.0379, + "loss": 0.1949, "step": 3638 }, { "epoch": 1.01, "learning_rate": 1.3247982187586976e-05, - "loss": 0.1413, + "loss": 0.0139, "step": 3639 }, { "epoch": 1.01, "learning_rate": 1.3246126727896838e-05, - "loss": 0.0862, + "loss": 0.0219, "step": 3640 }, { "epoch": 1.01, "learning_rate": 1.3244271268206698e-05, - "loss": 0.0356, + "loss": 0.013, "step": 3641 }, { "epoch": 1.01, "learning_rate": 1.3242415808516562e-05, - "loss": 0.0902, + "loss": 0.0649, "step": 3642 }, { "epoch": 1.01, "learning_rate": 1.3240560348826423e-05, - "loss": 0.0867, + "loss": 0.0662, "step": 3643 }, { "epoch": 1.01, "learning_rate": 1.3238704889136285e-05, - "loss": 0.087, + "loss": 0.0633, "step": 3644 }, { "epoch": 1.01, "learning_rate": 1.3236849429446145e-05, - "loss": 0.1382, + "loss": 0.0204, "step": 3645 }, { "epoch": 1.01, "learning_rate": 1.3234993969756007e-05, - "loss": 0.0324, + "loss": 0.0629, "step": 3646 }, { "epoch": 1.02, "learning_rate": 1.3233138510065871e-05, - "loss": 0.1376, + "loss": 0.0517, "step": 3647 }, { "epoch": 1.02, "learning_rate": 1.3231283050375731e-05, - "loss": 0.2425, + "loss": 0.1309, "step": 3648 }, { "epoch": 1.02, "learning_rate": 1.3229427590685593e-05, - "loss": 0.3618, + "loss": 0.1694, "step": 3649 }, { "epoch": 1.02, "learning_rate": 1.3227572130995455e-05, - "loss": 0.1945, + "loss": 0.1513, "step": 3650 }, { "epoch": 1.02, "learning_rate": 1.3225716671305318e-05, - "loss": 0.1372, + "loss": 0.0142, "step": 3651 }, { "epoch": 1.02, "learning_rate": 1.3223861211615179e-05, - "loss": 0.0844, + "loss": 0.0764, "step": 3652 }, { "epoch": 1.02, "learning_rate": 1.322200575192504e-05, - "loss": 0.2465, + "loss": 0.0926, "step": 3653 }, { "epoch": 1.02, "learning_rate": 1.32201502922349e-05, - "loss": 0.1407, + "loss": 0.0616, "step": 3654 }, { "epoch": 1.02, "learning_rate": 1.3218294832544764e-05, - "loss": 0.0844, + "loss": 0.1134, "step": 3655 }, { "epoch": 1.02, "learning_rate": 1.3216439372854626e-05, - "loss": 0.0854, + "loss": 0.101, "step": 3656 }, { "epoch": 1.02, "learning_rate": 1.3214583913164486e-05, - "loss": 0.1404, + "loss": 0.0786, "step": 3657 }, { "epoch": 1.02, "learning_rate": 1.3212728453474348e-05, - "loss": 0.0314, + "loss": 0.0249, "step": 3658 }, { "epoch": 1.02, "learning_rate": 1.3210872993784212e-05, - "loss": 0.0318, + "loss": 0.1624, "step": 3659 }, { "epoch": 1.02, "learning_rate": 1.3209017534094074e-05, - "loss": 0.1387, + "loss": 0.0243, "step": 3660 }, { "epoch": 1.02, "learning_rate": 1.3207162074403934e-05, - "loss": 0.0302, + "loss": 0.0258, "step": 3661 }, { "epoch": 1.02, "learning_rate": 1.3205306614713796e-05, - "loss": 0.0868, + "loss": 0.0619, "step": 3662 }, { "epoch": 1.02, "learning_rate": 1.3203451155023659e-05, - "loss": 0.138, + "loss": 0.066, "step": 3663 }, { "epoch": 1.02, "learning_rate": 1.320159569533352e-05, - "loss": 0.0841, + "loss": 0.0245, "step": 3664 }, { "epoch": 1.02, "learning_rate": 1.3199740235643381e-05, - "loss": 0.0836, + "loss": 0.1215, "step": 3665 }, { "epoch": 1.02, "learning_rate": 1.3197884775953243e-05, - "loss": 0.1383, + "loss": 0.1076, "step": 3666 }, { "epoch": 1.02, "learning_rate": 1.3196029316263107e-05, - "loss": 0.1424, + "loss": 0.1283, "step": 3667 }, { "epoch": 1.02, "learning_rate": 1.3194173856572967e-05, - "loss": 0.085, + "loss": 0.1034, "step": 3668 }, { "epoch": 1.02, "learning_rate": 1.3192318396882829e-05, - "loss": 0.0278, + "loss": 0.0287, "step": 3669 }, { "epoch": 1.02, "learning_rate": 1.3190462937192689e-05, - "loss": 0.2026, + "loss": 0.0227, "step": 3670 }, { "epoch": 1.02, "learning_rate": 1.3188607477502552e-05, - "loss": 0.1393, + "loss": 0.1412, "step": 3671 }, { "epoch": 1.02, "learning_rate": 1.3186752017812414e-05, - "loss": 0.1441, + "loss": 0.0683, "step": 3672 }, { "epoch": 1.02, "learning_rate": 1.3184896558122276e-05, - "loss": 0.0839, + "loss": 0.0699, "step": 3673 }, { "epoch": 1.02, "learning_rate": 1.3183041098432136e-05, - "loss": 0.1967, + "loss": 0.0933, "step": 3674 }, { "epoch": 1.02, "learning_rate": 1.3181185638742e-05, - "loss": 0.1411, + "loss": 0.0655, "step": 3675 }, { "epoch": 1.02, "learning_rate": 1.3179330179051862e-05, - "loss": 0.1394, + "loss": 0.2068, "step": 3676 }, { "epoch": 1.02, "learning_rate": 1.3177474719361722e-05, - "loss": 0.0825, + "loss": 0.0603, "step": 3677 }, { "epoch": 1.02, "learning_rate": 1.3175619259671584e-05, - "loss": 0.1355, + "loss": 0.0647, "step": 3678 }, { "epoch": 1.02, "learning_rate": 1.3173763799981447e-05, - "loss": 0.2003, + "loss": 0.1638, "step": 3679 }, { "epoch": 1.02, "learning_rate": 1.3171908340291308e-05, - "loss": 0.1989, + "loss": 0.269, "step": 3680 }, { "epoch": 1.02, "learning_rate": 1.317005288060117e-05, - "loss": 0.0818, + "loss": 0.2056, "step": 3681 }, { "epoch": 1.02, "learning_rate": 1.3168197420911031e-05, - "loss": 0.1412, + "loss": 0.0781, "step": 3682 }, { "epoch": 1.03, "learning_rate": 1.3166341961220895e-05, - "loss": 0.1375, + "loss": 0.0182, "step": 3683 }, { "epoch": 1.03, "learning_rate": 1.3164486501530755e-05, - "loss": 0.1369, + "loss": 0.0115, "step": 3684 }, { "epoch": 1.03, "learning_rate": 1.3162631041840617e-05, - "loss": 0.1415, + "loss": 0.0218, "step": 3685 }, { "epoch": 1.03, "learning_rate": 1.3160775582150477e-05, - "loss": 0.1996, + "loss": 0.0292, "step": 3686 }, { "epoch": 1.03, "learning_rate": 1.315892012246034e-05, - "loss": 0.2017, + "loss": 0.114, "step": 3687 }, { "epoch": 1.03, "learning_rate": 1.3157064662770203e-05, - "loss": 0.139, + "loss": 0.1259, "step": 3688 }, { "epoch": 1.03, "learning_rate": 1.3155209203080064e-05, - "loss": 0.1977, + "loss": 0.0618, "step": 3689 }, { "epoch": 1.03, "learning_rate": 1.3153353743389925e-05, - "loss": 0.1405, + "loss": 0.1108, "step": 3690 }, { "epoch": 1.03, "learning_rate": 1.3151498283699788e-05, - "loss": 0.4702, + "loss": 0.0717, "step": 3691 }, { "epoch": 1.03, "learning_rate": 1.314964282400965e-05, - "loss": 0.1362, + "loss": 0.0512, "step": 3692 }, { "epoch": 1.03, "learning_rate": 1.314778736431951e-05, - "loss": 0.0844, + "loss": 0.012, "step": 3693 }, { "epoch": 1.03, "learning_rate": 1.3145931904629372e-05, - "loss": 0.0866, + "loss": 0.0528, "step": 3694 }, { "epoch": 1.03, "learning_rate": 1.3144076444939236e-05, - "loss": 0.2499, + "loss": 0.0646, "step": 3695 }, { "epoch": 1.03, "learning_rate": 1.3142220985249097e-05, - "loss": 0.2508, + "loss": 0.1199, "step": 3696 }, { "epoch": 1.03, "learning_rate": 1.3140365525558958e-05, - "loss": 0.0849, + "loss": 0.0225, "step": 3697 }, { "epoch": 1.03, "learning_rate": 1.313851006586882e-05, - "loss": 0.1407, + "loss": 0.09, "step": 3698 }, { "epoch": 1.03, "learning_rate": 1.3136654606178683e-05, - "loss": 0.0343, + "loss": 0.0697, "step": 3699 }, { "epoch": 1.03, "learning_rate": 1.3134799146488543e-05, - "loss": 0.1962, + "loss": 0.1004, "step": 3700 }, { "epoch": 1.03, "learning_rate": 1.3132943686798405e-05, - "loss": 0.0874, + "loss": 0.1302, "step": 3701 }, { "epoch": 1.03, "learning_rate": 1.3131088227108267e-05, - "loss": 0.2486, + "loss": 0.1097, "step": 3702 }, { "epoch": 1.03, "learning_rate": 1.3129232767418129e-05, - "loss": 0.0875, + "loss": 0.091, "step": 3703 }, { "epoch": 1.03, "learning_rate": 1.312737730772799e-05, - "loss": 0.1409, + "loss": 0.0657, "step": 3704 }, { "epoch": 1.03, "learning_rate": 1.3125521848037853e-05, - "loss": 0.0353, + "loss": 0.0354, "step": 3705 }, { "epoch": 1.03, "learning_rate": 1.3123666388347713e-05, - "loss": 0.1932, + "loss": 0.0576, "step": 3706 }, { "epoch": 1.03, "learning_rate": 1.3121810928657576e-05, - "loss": 0.0347, + "loss": 0.1045, "step": 3707 }, { "epoch": 1.03, "learning_rate": 1.3119955468967438e-05, - "loss": 0.2439, + "loss": 0.0544, "step": 3708 }, { "epoch": 1.03, "learning_rate": 1.3118100009277298e-05, - "loss": 0.1895, + "loss": 0.0608, "step": 3709 }, { "epoch": 1.03, "learning_rate": 1.311624454958716e-05, - "loss": 0.0898, + "loss": 0.0514, "step": 3710 }, { "epoch": 1.03, "learning_rate": 1.3114389089897024e-05, - "loss": 0.0866, + "loss": 0.0319, "step": 3711 }, { "epoch": 1.03, "learning_rate": 1.3112533630206886e-05, - "loss": 0.0863, + "loss": 0.1432, "step": 3712 }, { "epoch": 1.03, "learning_rate": 1.3110678170516746e-05, - "loss": 0.0881, + "loss": 0.0343, "step": 3713 }, { "epoch": 1.03, "learning_rate": 1.3108822710826608e-05, - "loss": 0.1434, + "loss": 0.105, "step": 3714 }, { "epoch": 1.03, "learning_rate": 1.3106967251136471e-05, - "loss": 0.1355, + "loss": 0.1345, "step": 3715 }, { "epoch": 1.03, "learning_rate": 1.3105111791446332e-05, - "loss": 0.2442, + "loss": 0.097, "step": 3716 }, { "epoch": 1.03, "learning_rate": 1.3103256331756193e-05, - "loss": 0.0334, + "loss": 0.0993, "step": 3717 }, { "epoch": 1.03, "learning_rate": 1.3101400872066055e-05, - "loss": 0.1871, + "loss": 0.0615, "step": 3718 }, { "epoch": 1.04, "learning_rate": 1.3099545412375919e-05, - "loss": 0.0332, + "loss": 0.0282, "step": 3719 }, { "epoch": 1.04, "learning_rate": 1.3097689952685779e-05, - "loss": 0.1971, + "loss": 0.1234, "step": 3720 }, { "epoch": 1.04, "learning_rate": 1.3095834492995641e-05, - "loss": 0.0882, + "loss": 0.0224, "step": 3721 }, { "epoch": 1.04, "learning_rate": 1.3093979033305501e-05, - "loss": 0.1903, + "loss": 0.1519, "step": 3722 }, { "epoch": 1.04, "learning_rate": 1.3092123573615365e-05, - "loss": 0.0866, + "loss": 0.053, "step": 3723 }, { "epoch": 1.04, "learning_rate": 1.3090268113925226e-05, - "loss": 0.1929, + "loss": 0.0185, "step": 3724 }, { "epoch": 1.04, "learning_rate": 1.3088412654235088e-05, - "loss": 0.1914, + "loss": 0.161, "step": 3725 }, { "epoch": 1.04, "learning_rate": 1.3086557194544949e-05, - "loss": 0.0305, + "loss": 0.0411, "step": 3726 }, { "epoch": 1.04, "learning_rate": 1.3084701734854812e-05, - "loss": 0.0317, + "loss": 0.096, "step": 3727 }, { "epoch": 1.04, "learning_rate": 1.3082846275164674e-05, - "loss": 0.0302, + "loss": 0.0529, "step": 3728 }, { "epoch": 1.04, "learning_rate": 1.3080990815474534e-05, - "loss": 0.1922, + "loss": 0.1079, "step": 3729 }, { "epoch": 1.04, "learning_rate": 1.3079135355784396e-05, - "loss": 0.1951, + "loss": 0.0938, "step": 3730 }, { "epoch": 1.04, "learning_rate": 1.307727989609426e-05, - "loss": 0.0852, + "loss": 0.0805, "step": 3731 }, { "epoch": 1.04, "learning_rate": 1.307542443640412e-05, - "loss": 0.1393, + "loss": 0.0606, "step": 3732 }, { "epoch": 1.04, "learning_rate": 1.3073568976713982e-05, - "loss": 0.1371, + "loss": 0.054, "step": 3733 }, { "epoch": 1.04, "learning_rate": 1.3071713517023843e-05, - "loss": 0.1423, + "loss": 0.0114, "step": 3734 }, { "epoch": 1.04, "learning_rate": 1.3069858057333707e-05, - "loss": 0.2001, + "loss": 0.04, "step": 3735 }, { "epoch": 1.04, "learning_rate": 1.3068002597643567e-05, - "loss": 0.1948, + "loss": 0.0166, "step": 3736 }, { "epoch": 1.04, "learning_rate": 1.3066147137953429e-05, - "loss": 0.3078, + "loss": 0.0551, "step": 3737 }, { "epoch": 1.04, "learning_rate": 1.306429167826329e-05, - "loss": 0.1389, + "loss": 0.0459, "step": 3738 }, { "epoch": 1.04, "learning_rate": 1.3062436218573153e-05, - "loss": 0.0864, + "loss": 0.0773, "step": 3739 }, { "epoch": 1.04, "learning_rate": 1.3060580758883015e-05, - "loss": 0.1913, + "loss": 0.0751, "step": 3740 }, { "epoch": 1.04, "learning_rate": 1.3058725299192877e-05, - "loss": 0.2471, + "loss": 0.0889, "step": 3741 }, { "epoch": 1.04, "learning_rate": 1.3056869839502737e-05, - "loss": 0.084, + "loss": 0.1411, "step": 3742 }, { "epoch": 1.04, "learning_rate": 1.30550143798126e-05, - "loss": 0.1392, + "loss": 0.2186, "step": 3743 }, { "epoch": 1.04, "learning_rate": 1.3053158920122462e-05, - "loss": 0.1924, + "loss": 0.0105, "step": 3744 }, { "epoch": 1.04, "learning_rate": 1.3051303460432322e-05, - "loss": 0.1385, + "loss": 0.1006, "step": 3745 }, { "epoch": 1.04, "learning_rate": 1.3049448000742184e-05, - "loss": 0.1411, + "loss": 0.0578, "step": 3746 }, { "epoch": 1.04, "learning_rate": 1.3047592541052048e-05, - "loss": 0.1901, + "loss": 0.0136, "step": 3747 }, { "epoch": 1.04, "learning_rate": 1.304573708136191e-05, - "loss": 0.1362, + "loss": 0.2098, "step": 3748 }, { "epoch": 1.04, "learning_rate": 1.304388162167177e-05, - "loss": 0.0906, + "loss": 0.06, "step": 3749 }, { "epoch": 1.04, "learning_rate": 1.3042026161981632e-05, - "loss": 0.0856, + "loss": 0.016, "step": 3750 }, { "epoch": 1.04, "learning_rate": 1.3040170702291495e-05, - "loss": 0.1924, + "loss": 0.1407, "step": 3751 }, { "epoch": 1.04, "learning_rate": 1.3038315242601355e-05, - "loss": 0.036, + "loss": 0.0189, "step": 3752 }, { "epoch": 1.04, "learning_rate": 1.3036459782911217e-05, - "loss": 0.1427, + "loss": 0.0522, "step": 3753 }, { "epoch": 1.04, "learning_rate": 1.303460432322108e-05, - "loss": 0.189, + "loss": 0.114, "step": 3754 }, { "epoch": 1.05, "learning_rate": 1.3032748863530941e-05, - "loss": 0.0854, + "loss": 0.1436, "step": 3755 }, { "epoch": 1.05, "learning_rate": 1.3030893403840803e-05, - "loss": 0.1889, + "loss": 0.0472, "step": 3756 }, { "epoch": 1.05, "learning_rate": 1.3029037944150665e-05, - "loss": 0.0868, + "loss": 0.1825, "step": 3757 }, { "epoch": 1.05, "learning_rate": 1.3027182484460525e-05, - "loss": 0.1925, + "loss": 0.0849, "step": 3758 }, { "epoch": 1.05, "learning_rate": 1.3025327024770389e-05, - "loss": 0.1358, + "loss": 0.1088, "step": 3759 }, { "epoch": 1.05, "learning_rate": 1.302347156508025e-05, - "loss": 0.1851, + "loss": 0.2055, "step": 3760 }, { "epoch": 1.05, "learning_rate": 1.302161610539011e-05, - "loss": 0.0873, + "loss": 0.084, "step": 3761 }, { "epoch": 1.05, "learning_rate": 1.3019760645699972e-05, - "loss": 0.1403, + "loss": 0.0216, "step": 3762 }, { "epoch": 1.05, "learning_rate": 1.3017905186009836e-05, - "loss": 0.1941, + "loss": 0.0203, "step": 3763 }, { "epoch": 1.05, "learning_rate": 1.3016049726319698e-05, - "loss": 0.2375, + "loss": 0.1174, "step": 3764 }, { "epoch": 1.05, "learning_rate": 1.3014194266629558e-05, - "loss": 0.0344, + "loss": 0.0894, "step": 3765 }, { "epoch": 1.05, "learning_rate": 1.301233880693942e-05, - "loss": 0.0836, + "loss": 0.0757, "step": 3766 }, { "epoch": 1.05, "learning_rate": 1.3010483347249284e-05, - "loss": 0.034, + "loss": 0.1171, "step": 3767 }, { "epoch": 1.05, "learning_rate": 1.3008627887559144e-05, - "loss": 0.0892, + "loss": 0.1634, "step": 3768 }, { "epoch": 1.05, "learning_rate": 1.3006772427869006e-05, - "loss": 0.1898, + "loss": 0.0235, "step": 3769 }, { "epoch": 1.05, "learning_rate": 1.3004916968178867e-05, - "loss": 0.3047, + "loss": 0.1299, "step": 3770 }, { "epoch": 1.05, "learning_rate": 1.3003061508488731e-05, - "loss": 0.0831, + "loss": 0.0181, "step": 3771 }, { "epoch": 1.05, "learning_rate": 1.3001206048798591e-05, - "loss": 0.0313, + "loss": 0.0562, "step": 3772 }, { "epoch": 1.05, "learning_rate": 1.2999350589108453e-05, - "loss": 0.193, + "loss": 0.1046, "step": 3773 }, { "epoch": 1.05, "learning_rate": 1.2997495129418313e-05, - "loss": 0.0839, + "loss": 0.1089, "step": 3774 }, { "epoch": 1.05, "learning_rate": 1.2995639669728177e-05, - "loss": 0.0897, + "loss": 0.0553, "step": 3775 }, { "epoch": 1.05, "learning_rate": 1.2993784210038039e-05, - "loss": 0.1391, + "loss": 0.1611, "step": 3776 }, { "epoch": 1.05, "learning_rate": 1.2991928750347899e-05, - "loss": 0.2516, + "loss": 0.1451, "step": 3777 }, { "epoch": 1.05, "learning_rate": 1.299007329065776e-05, - "loss": 0.2486, + "loss": 0.067, "step": 3778 }, { "epoch": 1.05, "learning_rate": 1.2988217830967624e-05, - "loss": 0.2989, + "loss": 0.0556, "step": 3779 }, { "epoch": 1.05, "learning_rate": 1.2986362371277486e-05, - "loss": 0.1425, + "loss": 0.0767, "step": 3780 }, { "epoch": 1.05, "learning_rate": 1.2984506911587346e-05, - "loss": 0.1949, + "loss": 0.0568, "step": 3781 }, { "epoch": 1.05, "learning_rate": 1.2982651451897208e-05, - "loss": 0.0849, + "loss": 0.0875, "step": 3782 }, { "epoch": 1.05, "learning_rate": 1.2980795992207072e-05, - "loss": 0.0338, + "loss": 0.1003, "step": 3783 }, { "epoch": 1.05, "learning_rate": 1.2978940532516932e-05, - "loss": 0.0853, + "loss": 0.103, "step": 3784 }, { "epoch": 1.05, "learning_rate": 1.2977085072826794e-05, - "loss": 0.2416, + "loss": 0.0164, "step": 3785 }, { "epoch": 1.05, "learning_rate": 1.2975229613136656e-05, - "loss": 0.1397, + "loss": 0.0201, "step": 3786 }, { "epoch": 1.05, "learning_rate": 1.297337415344652e-05, - "loss": 0.0865, + "loss": 0.0574, "step": 3787 }, { "epoch": 1.05, "learning_rate": 1.297151869375638e-05, - "loss": 0.0839, + "loss": 0.0339, "step": 3788 }, { "epoch": 1.05, "learning_rate": 1.2969663234066241e-05, - "loss": 0.0865, + "loss": 0.0651, "step": 3789 }, { "epoch": 1.05, "learning_rate": 1.2967807774376101e-05, - "loss": 0.1939, + "loss": 0.0531, "step": 3790 }, { "epoch": 1.06, "learning_rate": 1.2965952314685965e-05, - "loss": 0.0346, + "loss": 0.0142, "step": 3791 }, { "epoch": 1.06, "learning_rate": 1.2964096854995827e-05, - "loss": 0.1898, + "loss": 0.0485, "step": 3792 }, { "epoch": 1.06, "learning_rate": 1.2962241395305689e-05, - "loss": 0.0321, + "loss": 0.0365, "step": 3793 }, { "epoch": 1.06, "learning_rate": 1.2960385935615549e-05, - "loss": 0.1904, + "loss": 0.0561, "step": 3794 }, { "epoch": 1.06, "learning_rate": 1.2958530475925413e-05, - "loss": 0.0316, + "loss": 0.1146, "step": 3795 }, { "epoch": 1.06, "learning_rate": 1.2956675016235274e-05, - "loss": 0.0864, + "loss": 0.0811, "step": 3796 }, { "epoch": 1.06, "learning_rate": 1.2954819556545135e-05, - "loss": 0.198, + "loss": 0.133, "step": 3797 }, { "epoch": 1.06, "learning_rate": 1.2952964096854996e-05, - "loss": 0.2462, + "loss": 0.0561, "step": 3798 }, { "epoch": 1.06, "learning_rate": 1.2951108637164858e-05, - "loss": 0.2462, + "loss": 0.0816, "step": 3799 }, { "epoch": 1.06, "learning_rate": 1.294925317747472e-05, - "loss": 0.0306, + "loss": 0.0925, "step": 3800 }, { "epoch": 1.06, "learning_rate": 1.2947397717784582e-05, - "loss": 0.1963, + "loss": 0.0444, "step": 3801 }, { "epoch": 1.06, "learning_rate": 1.2945542258094444e-05, - "loss": 0.0843, + "loss": 0.1219, "step": 3802 }, { "epoch": 1.06, "learning_rate": 1.2943686798404304e-05, - "loss": 0.2513, + "loss": 0.0999, "step": 3803 }, { "epoch": 1.06, "learning_rate": 1.2941831338714168e-05, - "loss": 0.3001, + "loss": 0.0748, "step": 3804 }, { "epoch": 1.06, "learning_rate": 1.293997587902403e-05, - "loss": 0.0845, + "loss": 0.1197, "step": 3805 }, { "epoch": 1.06, "learning_rate": 1.293812041933389e-05, - "loss": 0.1385, + "loss": 0.1355, "step": 3806 }, { "epoch": 1.06, "learning_rate": 1.2936264959643752e-05, - "loss": 0.084, + "loss": 0.0189, "step": 3807 }, { "epoch": 1.06, "learning_rate": 1.2934409499953615e-05, - "loss": 0.1924, + "loss": 0.0184, "step": 3808 }, { "epoch": 1.06, "learning_rate": 1.2932554040263477e-05, - "loss": 0.1361, + "loss": 0.0995, "step": 3809 }, { "epoch": 1.06, "learning_rate": 1.2930698580573337e-05, - "loss": 0.1313, + "loss": 0.03, "step": 3810 }, { "epoch": 1.06, "learning_rate": 1.2928843120883199e-05, - "loss": 0.0331, + "loss": 0.0185, "step": 3811 }, { "epoch": 1.06, "learning_rate": 1.2926987661193063e-05, - "loss": 0.2923, + "loss": 0.0942, "step": 3812 }, { "epoch": 1.06, "learning_rate": 1.2925132201502923e-05, - "loss": 0.1393, + "loss": 0.0465, "step": 3813 }, { "epoch": 1.06, "learning_rate": 1.2923276741812785e-05, - "loss": 0.0842, + "loss": 0.0104, "step": 3814 }, { "epoch": 1.06, "learning_rate": 1.2921421282122647e-05, - "loss": 0.0825, + "loss": 0.0112, "step": 3815 }, { "epoch": 1.06, "learning_rate": 1.291956582243251e-05, - "loss": 0.1373, + "loss": 0.1109, "step": 3816 }, { "epoch": 1.06, "learning_rate": 1.291771036274237e-05, - "loss": 0.1355, + "loss": 0.2626, "step": 3817 }, { "epoch": 1.06, "learning_rate": 1.2915854903052232e-05, - "loss": 0.1369, + "loss": 0.1373, "step": 3818 }, { "epoch": 1.06, "learning_rate": 1.2913999443362092e-05, - "loss": 0.0838, + "loss": 0.0119, "step": 3819 }, { "epoch": 1.06, "learning_rate": 1.2912143983671956e-05, - "loss": 0.1386, + "loss": 0.0131, "step": 3820 }, { "epoch": 1.06, "learning_rate": 1.2910288523981818e-05, - "loss": 0.0886, + "loss": 0.0971, "step": 3821 }, { "epoch": 1.06, "learning_rate": 1.290843306429168e-05, - "loss": 0.1923, + "loss": 0.0613, "step": 3822 }, { "epoch": 1.06, "learning_rate": 1.290657760460154e-05, - "loss": 0.2466, + "loss": 0.0566, "step": 3823 }, { "epoch": 1.06, "learning_rate": 1.2904722144911403e-05, - "loss": 0.1919, + "loss": 0.0102, "step": 3824 }, { "epoch": 1.06, "learning_rate": 1.2902866685221265e-05, - "loss": 0.1957, + "loss": 0.0096, "step": 3825 }, { "epoch": 1.06, "learning_rate": 1.2901011225531125e-05, - "loss": 0.1352, + "loss": 0.0884, "step": 3826 }, { "epoch": 1.07, "learning_rate": 1.2899155765840987e-05, - "loss": 0.1368, + "loss": 0.0092, "step": 3827 }, { "epoch": 1.07, "learning_rate": 1.2897300306150851e-05, - "loss": 0.1401, + "loss": 0.011, "step": 3828 }, { "epoch": 1.07, "learning_rate": 1.2895444846460711e-05, - "loss": 0.1863, + "loss": 0.0066, "step": 3829 }, { "epoch": 1.07, "learning_rate": 1.2893589386770573e-05, - "loss": 0.0852, + "loss": 0.0089, "step": 3830 }, { "epoch": 1.07, "learning_rate": 1.2891733927080435e-05, - "loss": 0.1428, + "loss": 0.0595, "step": 3831 }, { "epoch": 1.07, "learning_rate": 1.2889878467390298e-05, - "loss": 0.1905, + "loss": 0.0827, "step": 3832 }, { "epoch": 1.07, "learning_rate": 1.2888023007700159e-05, - "loss": 0.1447, + "loss": 0.1333, "step": 3833 }, { "epoch": 1.07, "learning_rate": 1.288616754801002e-05, - "loss": 0.1863, + "loss": 0.0754, "step": 3834 }, { "epoch": 1.07, "learning_rate": 1.288431208831988e-05, - "loss": 0.0903, + "loss": 0.0185, "step": 3835 }, { "epoch": 1.07, "learning_rate": 1.2882456628629744e-05, - "loss": 0.0855, + "loss": 0.0915, "step": 3836 }, { "epoch": 1.07, "learning_rate": 1.2880601168939606e-05, - "loss": 0.1402, + "loss": 0.0064, "step": 3837 }, { "epoch": 1.07, "learning_rate": 1.2878745709249468e-05, - "loss": 0.0882, + "loss": 0.0859, "step": 3838 }, { "epoch": 1.07, "learning_rate": 1.2876890249559328e-05, - "loss": 0.0344, + "loss": 0.1047, "step": 3839 }, { "epoch": 1.07, "learning_rate": 1.2875034789869192e-05, - "loss": 0.3016, + "loss": 0.1557, "step": 3840 }, { "epoch": 1.07, "learning_rate": 1.2873179330179053e-05, - "loss": 0.137, + "loss": 0.1363, "step": 3841 }, { "epoch": 1.07, "learning_rate": 1.2871323870488914e-05, - "loss": 0.1448, + "loss": 0.0134, "step": 3842 }, { "epoch": 1.07, "learning_rate": 1.2869468410798776e-05, - "loss": 0.0839, + "loss": 0.1839, "step": 3843 }, { "epoch": 1.07, "learning_rate": 1.2867612951108639e-05, - "loss": 0.0862, + "loss": 0.0925, "step": 3844 }, { "epoch": 1.07, "learning_rate": 1.2865757491418501e-05, - "loss": 0.0317, + "loss": 0.1369, "step": 3845 }, { "epoch": 1.07, "learning_rate": 1.2863902031728361e-05, - "loss": 0.1404, + "loss": 0.0362, "step": 3846 }, { "epoch": 1.07, "learning_rate": 1.2862046572038223e-05, - "loss": 0.1423, + "loss": 0.0238, "step": 3847 }, { "epoch": 1.07, "learning_rate": 1.2860191112348087e-05, - "loss": 0.0838, + "loss": 0.1847, "step": 3848 }, { "epoch": 1.07, "learning_rate": 1.2858335652657947e-05, - "loss": 0.1373, + "loss": 0.0134, "step": 3849 }, { "epoch": 1.07, "learning_rate": 1.2856480192967809e-05, - "loss": 0.0301, + "loss": 0.0924, "step": 3850 }, { "epoch": 1.07, "learning_rate": 1.285462473327767e-05, - "loss": 0.0869, + "loss": 0.1206, "step": 3851 }, { "epoch": 1.07, "learning_rate": 1.2852769273587532e-05, - "loss": 0.0864, + "loss": 0.0614, "step": 3852 }, { "epoch": 1.07, "learning_rate": 1.2850913813897394e-05, - "loss": 0.0844, + "loss": 0.0175, "step": 3853 }, { "epoch": 1.07, "learning_rate": 1.2849058354207256e-05, - "loss": 0.1978, + "loss": 0.0853, "step": 3854 }, { "epoch": 1.07, "learning_rate": 1.2847202894517116e-05, - "loss": 0.0269, + "loss": 0.027, "step": 3855 }, { "epoch": 1.07, "learning_rate": 1.284534743482698e-05, - "loss": 0.0268, + "loss": 0.0247, "step": 3856 }, { "epoch": 1.07, "learning_rate": 1.2843491975136842e-05, - "loss": 0.2585, + "loss": 0.0509, "step": 3857 }, { "epoch": 1.07, "learning_rate": 1.2841636515446702e-05, - "loss": 0.0839, + "loss": 0.1695, "step": 3858 }, { "epoch": 1.07, "learning_rate": 1.2839781055756564e-05, - "loss": 0.1398, + "loss": 0.0277, "step": 3859 }, { "epoch": 1.07, "learning_rate": 1.2837925596066427e-05, - "loss": 0.3029, + "loss": 0.0169, "step": 3860 }, { "epoch": 1.07, "learning_rate": 1.283607013637629e-05, - "loss": 0.0827, + "loss": 0.2458, "step": 3861 }, { "epoch": 1.07, "learning_rate": 1.283421467668615e-05, - "loss": 0.0841, + "loss": 0.0186, "step": 3862 }, { "epoch": 1.08, "learning_rate": 1.2832359216996011e-05, - "loss": 0.2509, + "loss": 0.0164, "step": 3863 }, { "epoch": 1.08, "learning_rate": 1.2830503757305875e-05, - "loss": 0.0833, + "loss": 0.0108, "step": 3864 }, { "epoch": 1.08, "learning_rate": 1.2828648297615735e-05, - "loss": 0.0837, + "loss": 0.1172, "step": 3865 }, { "epoch": 1.08, "learning_rate": 1.2826792837925597e-05, - "loss": 0.1981, + "loss": 0.1065, "step": 3866 }, { "epoch": 1.08, "learning_rate": 1.2824937378235459e-05, - "loss": 0.14, + "loss": 0.1022, "step": 3867 }, { "epoch": 1.08, "learning_rate": 1.2823081918545322e-05, - "loss": 0.0838, + "loss": 0.0145, "step": 3868 }, { "epoch": 1.08, "learning_rate": 1.2821226458855182e-05, - "loss": 0.1984, + "loss": 0.1176, "step": 3869 }, { "epoch": 1.08, "learning_rate": 1.2819370999165044e-05, - "loss": 0.1961, + "loss": 0.0736, "step": 3870 }, { "epoch": 1.08, "learning_rate": 1.2817515539474905e-05, - "loss": 0.0263, + "loss": 0.1379, "step": 3871 }, { "epoch": 1.08, "learning_rate": 1.2815660079784768e-05, - "loss": 0.0265, + "loss": 0.0335, "step": 3872 }, { "epoch": 1.08, "learning_rate": 1.281380462009463e-05, - "loss": 0.0268, + "loss": 0.0763, "step": 3873 }, { "epoch": 1.08, "learning_rate": 1.2811949160404492e-05, - "loss": 0.0842, + "loss": 0.082, "step": 3874 }, { "epoch": 1.08, "learning_rate": 1.2810093700714352e-05, - "loss": 0.0823, + "loss": 0.0175, "step": 3875 }, { "epoch": 1.08, "learning_rate": 1.2808238241024216e-05, - "loss": 0.1388, + "loss": 0.0625, "step": 3876 }, { "epoch": 1.08, "learning_rate": 1.2806382781334077e-05, - "loss": 0.1928, + "loss": 0.1443, "step": 3877 }, { "epoch": 1.08, "learning_rate": 1.2804527321643938e-05, - "loss": 0.0821, + "loss": 0.0483, "step": 3878 }, { "epoch": 1.08, "learning_rate": 1.28026718619538e-05, - "loss": 0.1382, + "loss": 0.0174, "step": 3879 }, { "epoch": 1.08, "learning_rate": 1.2800816402263663e-05, - "loss": 0.1406, + "loss": 0.0218, "step": 3880 }, { "epoch": 1.08, "learning_rate": 1.2798960942573523e-05, - "loss": 0.0822, + "loss": 0.0116, "step": 3881 }, { "epoch": 1.08, "learning_rate": 1.2797105482883385e-05, - "loss": 0.0258, + "loss": 0.0469, "step": 3882 }, { "epoch": 1.08, "learning_rate": 1.2795250023193247e-05, - "loss": 0.1376, + "loss": 0.122, "step": 3883 }, { "epoch": 1.08, "learning_rate": 1.279339456350311e-05, - "loss": 0.0253, + "loss": 0.1724, "step": 3884 }, { "epoch": 1.08, "learning_rate": 1.279153910381297e-05, - "loss": 0.0817, + "loss": 0.0605, "step": 3885 }, { "epoch": 1.08, "learning_rate": 1.2789683644122833e-05, - "loss": 0.1385, + "loss": 0.1414, "step": 3886 }, { "epoch": 1.08, "learning_rate": 1.2787828184432693e-05, - "loss": 0.144, + "loss": 0.0248, "step": 3887 }, { "epoch": 1.08, "learning_rate": 1.2785972724742556e-05, - "loss": 0.1997, + "loss": 0.1055, "step": 3888 }, { "epoch": 1.08, "learning_rate": 1.2784117265052418e-05, - "loss": 0.0244, + "loss": 0.1515, "step": 3889 }, { "epoch": 1.08, "learning_rate": 1.278226180536228e-05, - "loss": 0.0807, + "loss": 0.1934, "step": 3890 }, { "epoch": 1.08, "learning_rate": 1.278040634567214e-05, - "loss": 0.2595, + "loss": 0.0493, "step": 3891 }, { "epoch": 1.08, "learning_rate": 1.2778550885982004e-05, - "loss": 0.024, + "loss": 0.0974, "step": 3892 }, { "epoch": 1.08, "learning_rate": 1.2776695426291866e-05, - "loss": 0.2547, + "loss": 0.0336, "step": 3893 }, { "epoch": 1.08, "learning_rate": 1.2774839966601726e-05, - "loss": 0.081, + "loss": 0.0602, "step": 3894 }, { "epoch": 1.08, "learning_rate": 1.2772984506911588e-05, - "loss": 0.0253, + "loss": 0.0649, "step": 3895 }, { "epoch": 1.08, "learning_rate": 1.2771129047221451e-05, - "loss": 0.2541, + "loss": 0.1293, "step": 3896 }, { "epoch": 1.08, "learning_rate": 1.2769273587531311e-05, - "loss": 0.0796, + "loss": 0.1628, "step": 3897 }, { "epoch": 1.08, "learning_rate": 1.2767418127841173e-05, - "loss": 0.2512, + "loss": 0.0193, "step": 3898 }, { "epoch": 1.09, "learning_rate": 1.2765562668151035e-05, - "loss": 0.1362, + "loss": 0.112, "step": 3899 }, { "epoch": 1.09, "learning_rate": 1.2763707208460899e-05, - "loss": 0.1398, + "loss": 0.0254, "step": 3900 }, { "epoch": 1.09, "learning_rate": 1.2761851748770759e-05, - "loss": 0.1397, + "loss": 0.0213, "step": 3901 }, { "epoch": 1.09, "learning_rate": 1.275999628908062e-05, - "loss": 0.2573, + "loss": 0.0835, "step": 3902 }, { "epoch": 1.09, "learning_rate": 1.2758140829390481e-05, - "loss": 0.1966, + "loss": 0.0217, "step": 3903 }, { "epoch": 1.09, "learning_rate": 1.2756285369700345e-05, - "loss": 0.0814, + "loss": 0.1028, "step": 3904 }, { "epoch": 1.09, "learning_rate": 1.2754429910010206e-05, - "loss": 0.0824, + "loss": 0.1077, "step": 3905 }, { "epoch": 1.09, "learning_rate": 1.2752574450320068e-05, - "loss": 0.418, + "loss": 0.0157, "step": 3906 }, { "epoch": 1.09, "learning_rate": 1.2750718990629928e-05, - "loss": 0.2499, + "loss": 0.1502, "step": 3907 }, { "epoch": 1.09, "learning_rate": 1.2748863530939792e-05, - "loss": 0.1438, + "loss": 0.0275, "step": 3908 }, { "epoch": 1.09, "learning_rate": 1.2747008071249654e-05, - "loss": 0.0847, + "loss": 0.1275, "step": 3909 }, { "epoch": 1.09, "learning_rate": 1.2745152611559514e-05, - "loss": 0.0836, + "loss": 0.0795, "step": 3910 }, { "epoch": 1.09, "learning_rate": 1.2743297151869376e-05, - "loss": 0.1394, + "loss": 0.1065, "step": 3911 }, { "epoch": 1.09, "learning_rate": 1.274144169217924e-05, - "loss": 0.087, + "loss": 0.0103, "step": 3912 }, { "epoch": 1.09, "learning_rate": 1.2739586232489101e-05, - "loss": 0.1369, + "loss": 0.09, "step": 3913 }, { "epoch": 1.09, "learning_rate": 1.2737730772798962e-05, - "loss": 0.1373, + "loss": 0.0876, "step": 3914 }, { "epoch": 1.09, "learning_rate": 1.2735875313108823e-05, - "loss": 0.0836, + "loss": 0.0139, "step": 3915 }, { "epoch": 1.09, "learning_rate": 1.2734019853418687e-05, - "loss": 0.1405, + "loss": 0.0517, "step": 3916 }, { "epoch": 1.09, "learning_rate": 1.2732164393728547e-05, - "loss": 0.032, + "loss": 0.012, "step": 3917 }, { "epoch": 1.09, "learning_rate": 1.2730308934038409e-05, - "loss": 0.0855, + "loss": 0.1016, "step": 3918 }, { "epoch": 1.09, "learning_rate": 1.2728453474348271e-05, - "loss": 0.0842, + "loss": 0.1538, "step": 3919 }, { "epoch": 1.09, "learning_rate": 1.2726598014658133e-05, - "loss": 0.0866, + "loss": 0.0757, "step": 3920 }, { "epoch": 1.09, "learning_rate": 1.2724742554967995e-05, - "loss": 0.1406, + "loss": 0.0484, "step": 3921 }, { "epoch": 1.09, "learning_rate": 1.2722887095277857e-05, - "loss": 0.0308, + "loss": 0.12, "step": 3922 }, { "epoch": 1.09, "learning_rate": 1.2721031635587717e-05, - "loss": 0.0861, + "loss": 0.1204, "step": 3923 }, { "epoch": 1.09, "learning_rate": 1.271917617589758e-05, - "loss": 0.1423, + "loss": 0.056, "step": 3924 }, { "epoch": 1.09, "learning_rate": 1.2717320716207442e-05, - "loss": 0.083, + "loss": 0.1044, "step": 3925 }, { "epoch": 1.09, "learning_rate": 1.2715465256517302e-05, - "loss": 0.085, + "loss": 0.0527, "step": 3926 }, { "epoch": 1.09, "learning_rate": 1.2713609796827164e-05, - "loss": 0.2454, + "loss": 0.0567, "step": 3927 }, { "epoch": 1.09, "learning_rate": 1.2711754337137028e-05, - "loss": 0.2496, + "loss": 0.1662, "step": 3928 }, { "epoch": 1.09, "learning_rate": 1.270989887744689e-05, - "loss": 0.1406, + "loss": 0.1695, "step": 3929 }, { "epoch": 1.09, "learning_rate": 1.270804341775675e-05, - "loss": 0.028, + "loss": 0.1562, "step": 3930 }, { "epoch": 1.09, "learning_rate": 1.2706187958066612e-05, - "loss": 0.1449, + "loss": 0.0612, "step": 3931 }, { "epoch": 1.09, "learning_rate": 1.2704332498376475e-05, - "loss": 0.0855, + "loss": 0.1055, "step": 3932 }, { "epoch": 1.09, "learning_rate": 1.2702477038686335e-05, - "loss": 0.0279, + "loss": 0.0371, "step": 3933 }, { "epoch": 1.09, "learning_rate": 1.2700621578996197e-05, - "loss": 0.2471, + "loss": 0.0331, "step": 3934 }, { "epoch": 1.1, "learning_rate": 1.2698766119306059e-05, - "loss": 0.2498, + "loss": 0.1566, "step": 3935 }, { "epoch": 1.1, "learning_rate": 1.2696910659615923e-05, - "loss": 0.0825, + "loss": 0.13, "step": 3936 }, { "epoch": 1.1, "learning_rate": 1.2695055199925783e-05, - "loss": 0.0865, + "loss": 0.0281, "step": 3937 }, { "epoch": 1.1, "learning_rate": 1.2693199740235645e-05, - "loss": 0.1422, + "loss": 0.0566, "step": 3938 }, { "epoch": 1.1, "learning_rate": 1.2691344280545505e-05, - "loss": 0.2506, + "loss": 0.029, "step": 3939 }, { "epoch": 1.1, "learning_rate": 1.2689488820855369e-05, - "loss": 0.0865, + "loss": 0.024, "step": 3940 }, { "epoch": 1.1, "learning_rate": 1.268763336116523e-05, - "loss": 0.0821, + "loss": 0.083, "step": 3941 }, { "epoch": 1.1, "learning_rate": 1.2685777901475092e-05, - "loss": 0.2468, + "loss": 0.0692, "step": 3942 }, { "epoch": 1.1, "learning_rate": 1.2683922441784952e-05, - "loss": 0.0838, + "loss": 0.0987, "step": 3943 }, { "epoch": 1.1, "learning_rate": 1.2682066982094816e-05, - "loss": 0.1434, + "loss": 0.0196, "step": 3944 }, { "epoch": 1.1, "learning_rate": 1.2680211522404678e-05, - "loss": 0.1375, + "loss": 0.0617, "step": 3945 }, { "epoch": 1.1, "learning_rate": 1.2678356062714538e-05, - "loss": 0.029, + "loss": 0.0905, "step": 3946 }, { "epoch": 1.1, "learning_rate": 1.26765006030244e-05, - "loss": 0.1413, + "loss": 0.1167, "step": 3947 }, { "epoch": 1.1, "learning_rate": 1.2674645143334262e-05, - "loss": 0.1449, + "loss": 0.0661, "step": 3948 }, { "epoch": 1.1, "learning_rate": 1.2672789683644124e-05, - "loss": 0.0283, + "loss": 0.0247, "step": 3949 }, { "epoch": 1.1, "learning_rate": 1.2670934223953986e-05, - "loss": 0.2555, + "loss": 0.1133, "step": 3950 }, { "epoch": 1.1, "learning_rate": 1.2669078764263847e-05, - "loss": 0.0841, + "loss": 0.0835, "step": 3951 }, { "epoch": 1.1, "learning_rate": 1.2667223304573708e-05, - "loss": 0.0869, + "loss": 0.0121, "step": 3952 }, { "epoch": 1.1, "learning_rate": 1.2665367844883571e-05, - "loss": 0.0834, + "loss": 0.0513, "step": 3953 }, { "epoch": 1.1, "learning_rate": 1.2663512385193433e-05, - "loss": 0.2489, + "loss": 0.0969, "step": 3954 }, { "epoch": 1.1, "learning_rate": 1.2661656925503293e-05, - "loss": 0.1403, + "loss": 0.1016, "step": 3955 }, { "epoch": 1.1, "learning_rate": 1.2659801465813155e-05, - "loss": 0.2516, + "loss": 0.1051, "step": 3956 }, { "epoch": 1.1, "learning_rate": 1.2657946006123019e-05, - "loss": 0.1913, + "loss": 0.1122, "step": 3957 }, { "epoch": 1.1, "learning_rate": 1.265609054643288e-05, - "loss": 0.2437, + "loss": 0.1417, "step": 3958 }, { "epoch": 1.1, "learning_rate": 1.265423508674274e-05, - "loss": 0.1883, + "loss": 0.0591, "step": 3959 }, { "epoch": 1.1, "learning_rate": 1.2652379627052603e-05, - "loss": 0.1971, + "loss": 0.1789, "step": 3960 }, { "epoch": 1.1, "learning_rate": 1.2650524167362466e-05, - "loss": 0.0842, + "loss": 0.0173, "step": 3961 }, { "epoch": 1.1, "learning_rate": 1.2648668707672326e-05, - "loss": 0.0848, + "loss": 0.0188, "step": 3962 }, { @@ -23788,40912 +23788,40912 @@ { "epoch": 1.1, "learning_rate": 1.264495778829205e-05, - "loss": 0.0879, + "loss": 0.0825, "step": 3964 }, { "epoch": 1.1, "learning_rate": 1.2643102328601914e-05, - "loss": 0.084, + "loss": 0.1889, "step": 3965 }, { "epoch": 1.1, "learning_rate": 1.2641246868911774e-05, - "loss": 0.138, + "loss": 0.0322, "step": 3966 }, { "epoch": 1.1, "learning_rate": 1.2639391409221636e-05, - "loss": 0.0324, + "loss": 0.0707, "step": 3967 }, { "epoch": 1.1, "learning_rate": 1.2637535949531496e-05, - "loss": 0.0857, + "loss": 0.1363, "step": 3968 }, { "epoch": 1.1, "learning_rate": 1.263568048984136e-05, - "loss": 0.0866, + "loss": 0.0275, "step": 3969 }, { "epoch": 1.1, "learning_rate": 1.2633825030151221e-05, - "loss": 0.1411, + "loss": 0.0217, "step": 3970 }, { "epoch": 1.11, "learning_rate": 1.2631969570461083e-05, - "loss": 0.1366, + "loss": 0.0351, "step": 3971 }, { "epoch": 1.11, "learning_rate": 1.2630114110770943e-05, - "loss": 0.0312, + "loss": 0.0951, "step": 3972 }, { "epoch": 1.11, "learning_rate": 1.2628258651080807e-05, - "loss": 0.1926, + "loss": 0.1691, "step": 3973 }, { "epoch": 1.11, "learning_rate": 1.2626403191390669e-05, - "loss": 0.0304, + "loss": 0.0732, "step": 3974 }, { "epoch": 1.11, "learning_rate": 1.2624547731700529e-05, - "loss": 0.0845, + "loss": 0.0552, "step": 3975 }, { "epoch": 1.11, "learning_rate": 1.262269227201039e-05, - "loss": 0.1414, + "loss": 0.0224, "step": 3976 }, { "epoch": 1.11, "learning_rate": 1.2620836812320254e-05, - "loss": 0.0849, + "loss": 0.0606, "step": 3977 }, { "epoch": 1.11, "learning_rate": 1.2618981352630115e-05, - "loss": 0.0822, + "loss": 0.0609, "step": 3978 }, { "epoch": 1.11, "learning_rate": 1.2617125892939976e-05, - "loss": 0.082, + "loss": 0.0175, "step": 3979 }, { "epoch": 1.11, "learning_rate": 1.2615270433249838e-05, - "loss": 0.084, + "loss": 0.0988, "step": 3980 }, { "epoch": 1.11, "learning_rate": 1.2613414973559702e-05, - "loss": 0.2545, + "loss": 0.0756, "step": 3981 }, { "epoch": 1.11, "learning_rate": 1.2611559513869562e-05, - "loss": 0.1409, + "loss": 0.0891, "step": 3982 }, { "epoch": 1.11, "learning_rate": 1.2609704054179424e-05, - "loss": 0.0844, + "loss": 0.022, "step": 3983 }, { "epoch": 1.11, "learning_rate": 1.2607848594489284e-05, - "loss": 0.3597, + "loss": 0.0497, "step": 3984 }, { "epoch": 1.11, "learning_rate": 1.2605993134799148e-05, - "loss": 0.0848, + "loss": 0.0301, "step": 3985 }, { "epoch": 1.11, "learning_rate": 1.260413767510901e-05, - "loss": 0.0805, + "loss": 0.0569, "step": 3986 }, { "epoch": 1.11, "learning_rate": 1.2602282215418871e-05, - "loss": 0.0843, + "loss": 0.0132, "step": 3987 }, { "epoch": 1.11, "learning_rate": 1.2600426755728732e-05, - "loss": 0.2019, + "loss": 0.1112, "step": 3988 }, { "epoch": 1.11, "learning_rate": 1.2598571296038595e-05, - "loss": 0.0816, + "loss": 0.0157, "step": 3989 }, { "epoch": 1.11, "learning_rate": 1.2596715836348457e-05, - "loss": 0.1409, + "loss": 0.1442, "step": 3990 }, { "epoch": 1.11, "learning_rate": 1.2594860376658317e-05, - "loss": 0.0809, + "loss": 0.0559, "step": 3991 }, { "epoch": 1.11, "learning_rate": 1.2593004916968179e-05, - "loss": 0.1944, + "loss": 0.0143, "step": 3992 }, { "epoch": 1.11, "learning_rate": 1.2591149457278043e-05, - "loss": 0.1948, + "loss": 0.0684, "step": 3993 }, { "epoch": 1.11, "learning_rate": 1.2589293997587904e-05, - "loss": 0.1401, + "loss": 0.0182, "step": 3994 }, { "epoch": 1.11, "learning_rate": 1.2587438537897765e-05, - "loss": 0.3086, + "loss": 0.1145, "step": 3995 }, { "epoch": 1.11, "learning_rate": 1.2585583078207626e-05, - "loss": 0.1425, + "loss": 0.0266, "step": 3996 }, { "epoch": 1.11, "learning_rate": 1.258372761851749e-05, - "loss": 0.0814, + "loss": 0.062, "step": 3997 }, { "epoch": 1.11, "learning_rate": 1.258187215882735e-05, - "loss": 0.3042, + "loss": 0.0732, "step": 3998 }, { "epoch": 1.11, "learning_rate": 1.2580016699137212e-05, - "loss": 0.3029, + "loss": 0.104, "step": 3999 }, { "epoch": 1.11, "learning_rate": 1.2578161239447074e-05, - "loss": 0.087, + "loss": 0.1321, "step": 4000 }, { "epoch": 1.11, "learning_rate": 1.2576305779756936e-05, - "loss": 0.0853, + "loss": 0.1142, "step": 4001 }, { "epoch": 1.11, "learning_rate": 1.2574450320066798e-05, - "loss": 0.2455, + "loss": 0.0712, "step": 4002 }, { "epoch": 1.11, "learning_rate": 1.257259486037666e-05, - "loss": 0.1883, + "loss": 0.141, "step": 4003 }, { "epoch": 1.11, "learning_rate": 1.257073940068652e-05, - "loss": 0.0853, + "loss": 0.055, "step": 4004 }, { "epoch": 1.11, "learning_rate": 1.2568883940996383e-05, - "loss": 0.0861, + "loss": 0.1616, "step": 4005 }, { "epoch": 1.11, "learning_rate": 1.2567028481306245e-05, - "loss": 0.1346, + "loss": 0.1061, "step": 4006 }, { "epoch": 1.12, "learning_rate": 1.2565173021616105e-05, - "loss": 0.1939, + "loss": 0.121, "step": 4007 }, { "epoch": 1.12, "learning_rate": 1.2563317561925967e-05, - "loss": 0.1393, + "loss": 0.1369, "step": 4008 }, { "epoch": 1.12, "learning_rate": 1.256146210223583e-05, - "loss": 0.0353, + "loss": 0.0636, "step": 4009 }, { "epoch": 1.12, "learning_rate": 1.2559606642545693e-05, - "loss": 0.1431, + "loss": 0.0545, "step": 4010 }, { "epoch": 1.12, "learning_rate": 1.2557751182855553e-05, - "loss": 0.0902, + "loss": 0.0153, "step": 4011 }, { "epoch": 1.12, "learning_rate": 1.2555895723165415e-05, - "loss": 0.0887, + "loss": 0.0238, "step": 4012 }, { "epoch": 1.12, "learning_rate": 1.2554040263475278e-05, - "loss": 0.2896, + "loss": 0.033, "step": 4013 }, { "epoch": 1.12, "learning_rate": 1.2552184803785138e-05, - "loss": 0.0365, + "loss": 0.0568, "step": 4014 }, { "epoch": 1.12, "learning_rate": 1.2550329344095e-05, - "loss": 0.1396, + "loss": 0.0187, "step": 4015 }, { "epoch": 1.12, "learning_rate": 1.2548473884404862e-05, - "loss": 0.1403, + "loss": 0.139, "step": 4016 }, { "epoch": 1.12, "learning_rate": 1.2546618424714724e-05, - "loss": 0.0883, + "loss": 0.1013, "step": 4017 }, { "epoch": 1.12, "learning_rate": 1.2544762965024586e-05, - "loss": 0.1941, + "loss": 0.1044, "step": 4018 }, { "epoch": 1.12, "learning_rate": 1.2542907505334448e-05, - "loss": 0.1943, + "loss": 0.097, "step": 4019 }, { "epoch": 1.12, "learning_rate": 1.2541052045644308e-05, - "loss": 0.1401, + "loss": 0.0682, "step": 4020 }, { "epoch": 1.12, "learning_rate": 1.2539196585954172e-05, - "loss": 0.0322, + "loss": 0.2452, "step": 4021 }, { "epoch": 1.12, "learning_rate": 1.2537341126264033e-05, - "loss": 0.1897, + "loss": 0.0163, "step": 4022 }, { "epoch": 1.12, "learning_rate": 1.2535485666573894e-05, - "loss": 0.2456, + "loss": 0.0614, "step": 4023 }, { "epoch": 1.12, "learning_rate": 1.2533630206883755e-05, - "loss": 0.1397, + "loss": 0.1819, "step": 4024 }, { "epoch": 1.12, "learning_rate": 1.2531774747193619e-05, - "loss": 0.087, + "loss": 0.0883, "step": 4025 }, { "epoch": 1.12, "learning_rate": 1.2529919287503481e-05, - "loss": 0.14, + "loss": 0.1542, "step": 4026 }, { "epoch": 1.12, "learning_rate": 1.2528063827813341e-05, - "loss": 0.3006, + "loss": 0.0239, "step": 4027 }, { "epoch": 1.12, "learning_rate": 1.2526208368123203e-05, - "loss": 0.3029, + "loss": 0.094, "step": 4028 }, { "epoch": 1.12, "learning_rate": 1.2524352908433067e-05, - "loss": 0.0884, + "loss": 0.0526, "step": 4029 }, { "epoch": 1.12, "learning_rate": 1.2522497448742927e-05, - "loss": 0.0343, + "loss": 0.0236, "step": 4030 }, { "epoch": 1.12, "learning_rate": 1.2520641989052789e-05, - "loss": 0.0888, + "loss": 0.0184, "step": 4031 }, { "epoch": 1.12, "learning_rate": 1.251878652936265e-05, - "loss": 0.1869, + "loss": 0.0161, "step": 4032 }, { "epoch": 1.12, "learning_rate": 1.2516931069672514e-05, - "loss": 0.0343, + "loss": 0.2993, "step": 4033 }, { "epoch": 1.12, "learning_rate": 1.2515075609982374e-05, - "loss": 0.1412, + "loss": 0.0549, "step": 4034 }, { "epoch": 1.12, "learning_rate": 1.2513220150292236e-05, - "loss": 0.1432, + "loss": 0.0565, "step": 4035 }, { "epoch": 1.12, "learning_rate": 1.2511364690602096e-05, - "loss": 0.1413, + "loss": 0.0161, "step": 4036 }, { "epoch": 1.12, "learning_rate": 1.250950923091196e-05, - "loss": 0.0857, + "loss": 0.1148, "step": 4037 }, { "epoch": 1.12, "learning_rate": 1.2507653771221822e-05, - "loss": 0.033, + "loss": 0.1414, "step": 4038 }, { "epoch": 1.12, "learning_rate": 1.2505798311531684e-05, - "loss": 0.0871, + "loss": 0.0171, "step": 4039 }, { "epoch": 1.12, "learning_rate": 1.2503942851841544e-05, - "loss": 0.1392, + "loss": 0.1172, "step": 4040 }, { "epoch": 1.12, "learning_rate": 1.2502087392151407e-05, - "loss": 0.1983, + "loss": 0.0479, "step": 4041 }, { "epoch": 1.12, "learning_rate": 1.2500231932461269e-05, - "loss": 0.2452, + "loss": 0.1148, "step": 4042 }, { "epoch": 1.13, "learning_rate": 1.249837647277113e-05, - "loss": 0.2973, + "loss": 0.0599, "step": 4043 }, { "epoch": 1.13, "learning_rate": 1.2496521013080991e-05, - "loss": 0.1351, + "loss": 0.1059, "step": 4044 }, { "epoch": 1.13, "learning_rate": 1.2494665553390855e-05, - "loss": 0.2538, + "loss": 0.0118, "step": 4045 }, { "epoch": 1.13, "learning_rate": 1.2492810093700715e-05, - "loss": 0.2487, + "loss": 0.0911, "step": 4046 }, { "epoch": 1.13, "learning_rate": 1.2490954634010577e-05, - "loss": 0.1439, + "loss": 0.0133, "step": 4047 }, { "epoch": 1.13, "learning_rate": 1.2489099174320439e-05, - "loss": 0.0866, + "loss": 0.1291, "step": 4048 }, { "epoch": 1.13, "learning_rate": 1.2487243714630302e-05, - "loss": 0.0337, + "loss": 0.0122, "step": 4049 }, { "epoch": 1.13, "learning_rate": 1.2485388254940162e-05, - "loss": 0.0846, + "loss": 0.0992, "step": 4050 }, { "epoch": 1.13, "learning_rate": 1.2483532795250024e-05, - "loss": 0.0835, + "loss": 0.0154, "step": 4051 }, { "epoch": 1.13, "learning_rate": 1.2481677335559884e-05, - "loss": 0.0328, + "loss": 0.0668, "step": 4052 }, { "epoch": 1.13, "learning_rate": 1.2479821875869748e-05, - "loss": 0.1407, + "loss": 0.2084, "step": 4053 }, { "epoch": 1.13, "learning_rate": 1.247796641617961e-05, - "loss": 0.2486, + "loss": 0.024, "step": 4054 }, { "epoch": 1.13, "learning_rate": 1.2476110956489472e-05, - "loss": 0.1967, + "loss": 0.017, "step": 4055 }, { "epoch": 1.13, "learning_rate": 1.2474255496799332e-05, - "loss": 0.1952, + "loss": 0.0188, "step": 4056 }, { "epoch": 1.13, "learning_rate": 1.2472400037109196e-05, - "loss": 0.1884, + "loss": 0.1048, "step": 4057 }, { "epoch": 1.13, "learning_rate": 1.2470544577419057e-05, - "loss": 0.0883, + "loss": 0.0585, "step": 4058 }, { "epoch": 1.13, "learning_rate": 1.2468689117728918e-05, - "loss": 0.1375, + "loss": 0.0167, "step": 4059 }, { "epoch": 1.13, "learning_rate": 1.246683365803878e-05, - "loss": 0.087, + "loss": 0.2807, "step": 4060 }, { "epoch": 1.13, "learning_rate": 1.2464978198348643e-05, - "loss": 0.1417, + "loss": 0.0367, "step": 4061 }, { "epoch": 1.13, "learning_rate": 1.2463122738658505e-05, - "loss": 0.0846, + "loss": 0.0165, "step": 4062 }, { "epoch": 1.13, "learning_rate": 1.2461267278968365e-05, - "loss": 0.0845, + "loss": 0.2478, "step": 4063 }, { "epoch": 1.13, "learning_rate": 1.2459411819278227e-05, - "loss": 0.0845, + "loss": 0.0184, "step": 4064 }, { "epoch": 1.13, "learning_rate": 1.245755635958809e-05, - "loss": 0.0839, + "loss": 0.0742, "step": 4065 }, { "epoch": 1.13, "learning_rate": 1.245570089989795e-05, - "loss": 0.1424, + "loss": 0.1176, "step": 4066 }, { "epoch": 1.13, "learning_rate": 1.2453845440207813e-05, - "loss": 0.0841, + "loss": 0.1826, "step": 4067 }, { "epoch": 1.13, "learning_rate": 1.2451989980517674e-05, - "loss": 0.0313, + "loss": 0.0249, "step": 4068 }, { "epoch": 1.13, "learning_rate": 1.2450134520827536e-05, - "loss": 0.1903, + "loss": 0.0482, "step": 4069 }, { "epoch": 1.13, "learning_rate": 1.2448279061137398e-05, - "loss": 0.1405, + "loss": 0.0691, "step": 4070 }, { "epoch": 1.13, "learning_rate": 1.244642360144726e-05, - "loss": 0.1395, + "loss": 0.1187, "step": 4071 }, { "epoch": 1.13, "learning_rate": 1.244456814175712e-05, - "loss": 0.0844, + "loss": 0.0259, "step": 4072 }, { "epoch": 1.13, "learning_rate": 1.2442712682066984e-05, - "loss": 0.1338, + "loss": 0.0683, "step": 4073 }, { "epoch": 1.13, "learning_rate": 1.2440857222376846e-05, - "loss": 0.0829, + "loss": 0.1248, "step": 4074 }, { "epoch": 1.13, "learning_rate": 1.2439001762686706e-05, - "loss": 0.0867, + "loss": 0.0235, "step": 4075 }, { "epoch": 1.13, "learning_rate": 1.2437146302996568e-05, - "loss": 0.0862, + "loss": 0.136, "step": 4076 }, { "epoch": 1.13, "learning_rate": 1.2435290843306431e-05, - "loss": 0.0823, + "loss": 0.0667, "step": 4077 }, { "epoch": 1.13, "learning_rate": 1.2433435383616293e-05, - "loss": 0.0821, + "loss": 0.0595, "step": 4078 }, { "epoch": 1.14, "learning_rate": 1.2431579923926153e-05, - "loss": 0.1989, + "loss": 0.2757, "step": 4079 }, { "epoch": 1.14, "learning_rate": 1.2429724464236015e-05, - "loss": 0.1989, + "loss": 0.022, "step": 4080 }, { "epoch": 1.14, "learning_rate": 1.2427869004545879e-05, - "loss": 0.0265, + "loss": 0.1444, "step": 4081 }, { "epoch": 1.14, "learning_rate": 1.2426013544855739e-05, - "loss": 0.0267, + "loss": 0.019, "step": 4082 }, { "epoch": 1.14, "learning_rate": 1.24241580851656e-05, - "loss": 0.3136, + "loss": 0.1694, "step": 4083 }, { "epoch": 1.14, "learning_rate": 1.2422302625475463e-05, - "loss": 0.1379, + "loss": 0.0961, "step": 4084 }, { "epoch": 1.14, "learning_rate": 1.2420447165785326e-05, - "loss": 0.08, + "loss": 0.1071, "step": 4085 }, { "epoch": 1.14, "learning_rate": 1.2418591706095186e-05, - "loss": 0.1411, + "loss": 0.1076, "step": 4086 }, { "epoch": 1.14, "learning_rate": 1.2416736246405048e-05, - "loss": 0.1416, + "loss": 0.0863, "step": 4087 }, { "epoch": 1.14, "learning_rate": 1.2414880786714908e-05, - "loss": 0.1377, + "loss": 0.1457, "step": 4088 }, { "epoch": 1.14, "learning_rate": 1.2413025327024772e-05, - "loss": 0.0833, + "loss": 0.1406, "step": 4089 }, { "epoch": 1.14, "learning_rate": 1.2411169867334634e-05, - "loss": 0.201, + "loss": 0.031, "step": 4090 }, { "epoch": 1.14, "learning_rate": 1.2409314407644496e-05, - "loss": 0.1388, + "loss": 0.1495, "step": 4091 }, { "epoch": 1.14, "learning_rate": 1.2407458947954356e-05, - "loss": 0.1975, + "loss": 0.0633, "step": 4092 }, { "epoch": 1.14, "learning_rate": 1.240560348826422e-05, - "loss": 0.0845, + "loss": 0.0267, "step": 4093 }, { "epoch": 1.14, "learning_rate": 1.2403748028574081e-05, - "loss": 0.1944, + "loss": 0.2095, "step": 4094 }, { "epoch": 1.14, "learning_rate": 1.2401892568883941e-05, - "loss": 0.0269, + "loss": 0.0307, "step": 4095 }, { "epoch": 1.14, "learning_rate": 1.2400037109193803e-05, - "loss": 0.0824, + "loss": 0.0688, "step": 4096 }, { "epoch": 1.14, "learning_rate": 1.2398181649503667e-05, - "loss": 0.0279, + "loss": 0.0723, "step": 4097 }, { "epoch": 1.14, "learning_rate": 1.2396326189813527e-05, - "loss": 0.1936, + "loss": 0.0276, "step": 4098 }, { "epoch": 1.14, "learning_rate": 1.2394470730123389e-05, - "loss": 0.2559, + "loss": 0.0596, "step": 4099 }, { "epoch": 1.14, "learning_rate": 1.2392615270433251e-05, - "loss": 0.0836, + "loss": 0.0981, "step": 4100 }, { "epoch": 1.14, "learning_rate": 1.2390759810743111e-05, - "loss": 0.2516, + "loss": 0.0621, "step": 4101 }, { "epoch": 1.14, "learning_rate": 1.2388904351052975e-05, - "loss": 0.1904, + "loss": 0.0694, "step": 4102 }, { "epoch": 1.14, "learning_rate": 1.2387048891362836e-05, - "loss": 0.0841, + "loss": 0.0205, "step": 4103 }, { "epoch": 1.14, "learning_rate": 1.2385193431672697e-05, - "loss": 0.139, + "loss": 0.0235, "step": 4104 }, { "epoch": 1.14, "learning_rate": 1.2383337971982559e-05, - "loss": 0.1399, + "loss": 0.153, "step": 4105 }, { "epoch": 1.14, "learning_rate": 1.2381482512292422e-05, - "loss": 0.1984, + "loss": 0.158, "step": 4106 }, { "epoch": 1.14, "learning_rate": 1.2379627052602284e-05, - "loss": 0.0855, + "loss": 0.0106, "step": 4107 }, { "epoch": 1.14, "learning_rate": 1.2377771592912144e-05, - "loss": 0.0857, + "loss": 0.0111, "step": 4108 }, { "epoch": 1.14, "learning_rate": 1.2375916133222006e-05, - "loss": 0.2425, + "loss": 0.0754, "step": 4109 }, { "epoch": 1.14, "learning_rate": 1.237406067353187e-05, - "loss": 0.1917, + "loss": 0.0498, "step": 4110 }, { "epoch": 1.14, "learning_rate": 1.237220521384173e-05, - "loss": 0.0853, + "loss": 0.1057, "step": 4111 }, { "epoch": 1.14, "learning_rate": 1.2370349754151592e-05, - "loss": 0.0297, + "loss": 0.0917, "step": 4112 }, { "epoch": 1.14, "learning_rate": 1.2368494294461453e-05, - "loss": 0.2473, + "loss": 0.0554, "step": 4113 }, { "epoch": 1.15, "learning_rate": 1.2366638834771317e-05, - "loss": 0.0859, + "loss": 0.111, "step": 4114 }, { "epoch": 1.15, "learning_rate": 1.2364783375081177e-05, - "loss": 0.1929, + "loss": 0.0941, "step": 4115 }, { "epoch": 1.15, "learning_rate": 1.2362927915391039e-05, - "loss": 0.1918, + "loss": 0.0519, "step": 4116 }, { "epoch": 1.15, "learning_rate": 1.23610724557009e-05, - "loss": 0.1902, + "loss": 0.0079, "step": 4117 }, { "epoch": 1.15, "learning_rate": 1.2359216996010763e-05, - "loss": 0.1986, + "loss": 0.0987, "step": 4118 }, { "epoch": 1.15, "learning_rate": 1.2357361536320625e-05, - "loss": 0.1916, + "loss": 0.1292, "step": 4119 }, { "epoch": 1.15, "learning_rate": 1.2355506076630487e-05, - "loss": 0.195, + "loss": 0.0482, "step": 4120 }, { "epoch": 1.15, "learning_rate": 1.2353650616940347e-05, - "loss": 0.192, + "loss": 0.0169, "step": 4121 }, { "epoch": 1.15, "learning_rate": 1.235179515725021e-05, - "loss": 0.0882, + "loss": 0.0213, "step": 4122 }, { "epoch": 1.15, "learning_rate": 1.2349939697560072e-05, - "loss": 0.0353, + "loss": 0.0892, "step": 4123 }, { "epoch": 1.15, "learning_rate": 1.2348084237869932e-05, - "loss": 0.0342, + "loss": 0.0566, "step": 4124 }, { "epoch": 1.15, "learning_rate": 1.2346228778179794e-05, - "loss": 0.1367, + "loss": 0.0167, "step": 4125 }, { "epoch": 1.15, "learning_rate": 1.2344373318489658e-05, - "loss": 0.0351, + "loss": 0.0136, "step": 4126 }, { "epoch": 1.15, "learning_rate": 1.2342517858799518e-05, - "loss": 0.035, + "loss": 0.0535, "step": 4127 }, { "epoch": 1.15, "learning_rate": 1.234066239910938e-05, - "loss": 0.1391, + "loss": 0.0102, "step": 4128 }, { "epoch": 1.15, "learning_rate": 1.2338806939419242e-05, - "loss": 0.0339, + "loss": 0.1009, "step": 4129 }, { "epoch": 1.15, "learning_rate": 1.2336951479729105e-05, - "loss": 0.1426, + "loss": 0.0161, "step": 4130 }, { "epoch": 1.15, "learning_rate": 1.2335096020038965e-05, - "loss": 0.1396, + "loss": 0.0213, "step": 4131 }, { "epoch": 1.15, "learning_rate": 1.2333240560348827e-05, - "loss": 0.0842, + "loss": 0.1543, "step": 4132 }, { "epoch": 1.15, "learning_rate": 1.2331385100658687e-05, - "loss": 0.0307, + "loss": 0.0528, "step": 4133 }, { "epoch": 1.15, "learning_rate": 1.2329529640968551e-05, - "loss": 0.1972, + "loss": 0.3578, "step": 4134 }, { "epoch": 1.15, "learning_rate": 1.2327674181278413e-05, - "loss": 0.249, + "loss": 0.0732, "step": 4135 }, { "epoch": 1.15, "learning_rate": 1.2325818721588275e-05, - "loss": 0.1449, + "loss": 0.0844, "step": 4136 }, { "epoch": 1.15, "learning_rate": 1.2323963261898135e-05, - "loss": 0.0871, + "loss": 0.0074, "step": 4137 }, { "epoch": 1.15, "learning_rate": 1.2322107802207999e-05, - "loss": 0.1952, + "loss": 0.0105, "step": 4138 }, { "epoch": 1.15, "learning_rate": 1.232025234251786e-05, - "loss": 0.084, + "loss": 0.0065, "step": 4139 }, { "epoch": 1.15, "learning_rate": 1.231839688282772e-05, - "loss": 0.1372, + "loss": 0.0221, "step": 4140 }, { "epoch": 1.15, "learning_rate": 1.2316541423137582e-05, - "loss": 0.2547, + "loss": 0.1892, "step": 4141 }, { "epoch": 1.15, "learning_rate": 1.2314685963447446e-05, - "loss": 0.243, + "loss": 0.0101, "step": 4142 }, { "epoch": 1.15, "learning_rate": 1.2312830503757306e-05, - "loss": 0.138, + "loss": 0.0374, "step": 4143 }, { "epoch": 1.15, "learning_rate": 1.2310975044067168e-05, - "loss": 0.1422, + "loss": 0.1085, "step": 4144 }, { "epoch": 1.15, "learning_rate": 1.230911958437703e-05, - "loss": 0.0848, + "loss": 0.04, "step": 4145 }, { "epoch": 1.15, "learning_rate": 1.2307264124686894e-05, - "loss": 0.1937, + "loss": 0.0569, "step": 4146 }, { "epoch": 1.15, "learning_rate": 1.2305408664996754e-05, - "loss": 0.1909, + "loss": 0.0099, "step": 4147 }, { "epoch": 1.15, "learning_rate": 1.2303553205306616e-05, - "loss": 0.0843, + "loss": 0.0193, "step": 4148 }, { "epoch": 1.15, "learning_rate": 1.2301697745616476e-05, - "loss": 0.0834, + "loss": 0.0745, "step": 4149 }, { "epoch": 1.16, "learning_rate": 1.229984228592634e-05, - "loss": 0.085, + "loss": 0.146, "step": 4150 }, { "epoch": 1.16, "learning_rate": 1.2297986826236201e-05, - "loss": 0.1903, + "loss": 0.0299, "step": 4151 }, { "epoch": 1.16, "learning_rate": 1.2296131366546063e-05, - "loss": 0.0859, + "loss": 0.0149, "step": 4152 }, { "epoch": 1.16, "learning_rate": 1.2294275906855923e-05, - "loss": 0.0835, + "loss": 0.0521, "step": 4153 }, { "epoch": 1.16, "learning_rate": 1.2292420447165787e-05, - "loss": 0.0843, + "loss": 0.0678, "step": 4154 }, { "epoch": 1.16, "learning_rate": 1.2290564987475649e-05, - "loss": 0.1427, + "loss": 0.0097, "step": 4155 }, { "epoch": 1.16, "learning_rate": 1.2288709527785509e-05, - "loss": 0.1927, + "loss": 0.0576, "step": 4156 }, { "epoch": 1.16, "learning_rate": 1.228685406809537e-05, - "loss": 0.0842, + "loss": 0.0269, "step": 4157 }, { "epoch": 1.16, "learning_rate": 1.2284998608405234e-05, - "loss": 0.1943, + "loss": 0.1185, "step": 4158 }, { "epoch": 1.16, "learning_rate": 1.2283143148715096e-05, - "loss": 0.0872, + "loss": 0.0089, "step": 4159 }, { "epoch": 1.16, "learning_rate": 1.2281287689024956e-05, - "loss": 0.2461, + "loss": 0.1063, "step": 4160 }, { "epoch": 1.16, "learning_rate": 1.2279432229334818e-05, - "loss": 0.1871, + "loss": 0.2197, "step": 4161 }, { "epoch": 1.16, "learning_rate": 1.2277576769644682e-05, - "loss": 0.0306, + "loss": 0.1119, "step": 4162 }, { "epoch": 1.16, "learning_rate": 1.2275721309954542e-05, - "loss": 0.1415, + "loss": 0.1628, "step": 4163 }, { "epoch": 1.16, "learning_rate": 1.2273865850264404e-05, - "loss": 0.2521, + "loss": 0.1643, "step": 4164 }, { "epoch": 1.16, "learning_rate": 1.2272010390574266e-05, - "loss": 0.1424, + "loss": 0.0687, "step": 4165 }, { "epoch": 1.16, "learning_rate": 1.2270154930884128e-05, - "loss": 0.031, + "loss": 0.0442, "step": 4166 }, { "epoch": 1.16, "learning_rate": 1.226829947119399e-05, - "loss": 0.0836, + "loss": 0.153, "step": 4167 }, { "epoch": 1.16, "learning_rate": 1.2266444011503851e-05, - "loss": 0.0831, + "loss": 0.0278, "step": 4168 }, { "epoch": 1.16, "learning_rate": 1.2264588551813711e-05, - "loss": 0.0872, + "loss": 0.0884, "step": 4169 }, { "epoch": 1.16, "learning_rate": 1.2262733092123575e-05, - "loss": 0.2493, + "loss": 0.0786, "step": 4170 }, { "epoch": 1.16, "learning_rate": 1.2260877632433437e-05, - "loss": 0.1931, + "loss": 0.0595, "step": 4171 }, { "epoch": 1.16, "learning_rate": 1.2259022172743297e-05, - "loss": 0.0301, + "loss": 0.0309, "step": 4172 }, { "epoch": 1.16, "learning_rate": 1.2257166713053159e-05, - "loss": 0.1393, + "loss": 0.0228, "step": 4173 }, { "epoch": 1.16, "learning_rate": 1.2255311253363022e-05, - "loss": 0.0825, + "loss": 0.0556, "step": 4174 }, { "epoch": 1.16, "learning_rate": 1.2253455793672884e-05, - "loss": 0.1942, + "loss": 0.1329, "step": 4175 }, { "epoch": 1.16, "learning_rate": 1.2251600333982745e-05, - "loss": 0.0848, + "loss": 0.0867, "step": 4176 }, { "epoch": 1.16, "learning_rate": 1.2249744874292606e-05, - "loss": 0.3051, + "loss": 0.0138, "step": 4177 }, { "epoch": 1.16, "learning_rate": 1.224788941460247e-05, - "loss": 0.2479, + "loss": 0.0143, "step": 4178 }, { "epoch": 1.16, "learning_rate": 1.224603395491233e-05, - "loss": 0.1897, + "loss": 0.0669, "step": 4179 }, { "epoch": 1.16, "learning_rate": 1.2244178495222192e-05, - "loss": 0.0858, + "loss": 0.0496, "step": 4180 }, { "epoch": 1.16, "learning_rate": 1.2242323035532054e-05, - "loss": 0.1927, + "loss": 0.1084, "step": 4181 }, { "epoch": 1.16, "learning_rate": 1.2240467575841917e-05, - "loss": 0.1975, + "loss": 0.0544, "step": 4182 }, { "epoch": 1.16, "learning_rate": 1.2238612116151778e-05, - "loss": 0.2569, + "loss": 0.1459, "step": 4183 }, { "epoch": 1.16, "learning_rate": 1.223675665646164e-05, - "loss": 0.0854, + "loss": 0.0424, "step": 4184 }, { "epoch": 1.16, "learning_rate": 1.22349011967715e-05, - "loss": 0.0825, + "loss": 0.064, "step": 4185 }, { "epoch": 1.17, "learning_rate": 1.2233045737081363e-05, - "loss": 0.0866, + "loss": 0.1419, "step": 4186 }, { "epoch": 1.17, "learning_rate": 1.2231190277391225e-05, - "loss": 0.1934, + "loss": 0.1729, "step": 4187 }, { "epoch": 1.17, "learning_rate": 1.2229334817701087e-05, - "loss": 0.1428, + "loss": 0.0096, "step": 4188 }, { "epoch": 1.17, "learning_rate": 1.2227479358010947e-05, - "loss": 0.1932, + "loss": 0.015, "step": 4189 }, { "epoch": 1.17, "learning_rate": 1.222562389832081e-05, - "loss": 0.1356, + "loss": 0.0129, "step": 4190 }, { "epoch": 1.17, "learning_rate": 1.2223768438630673e-05, - "loss": 0.0882, + "loss": 0.1015, "step": 4191 }, { "epoch": 1.17, "learning_rate": 1.2221912978940533e-05, - "loss": 0.1946, + "loss": 0.1448, "step": 4192 }, { "epoch": 1.17, "learning_rate": 1.2220057519250395e-05, - "loss": 0.1389, + "loss": 0.0671, "step": 4193 }, { "epoch": 1.17, "learning_rate": 1.2218202059560258e-05, - "loss": 0.2408, + "loss": 0.1172, "step": 4194 }, { "epoch": 1.17, "learning_rate": 1.2216346599870118e-05, - "loss": 0.0835, + "loss": 0.091, "step": 4195 }, { "epoch": 1.17, "learning_rate": 1.221449114017998e-05, - "loss": 0.0852, + "loss": 0.0219, "step": 4196 }, { "epoch": 1.17, "learning_rate": 1.2212635680489842e-05, - "loss": 0.1964, + "loss": 0.0297, "step": 4197 }, { "epoch": 1.17, "learning_rate": 1.2210780220799706e-05, - "loss": 0.1898, + "loss": 0.0858, "step": 4198 }, { "epoch": 1.17, "learning_rate": 1.2208924761109566e-05, - "loss": 0.1888, + "loss": 0.1582, "step": 4199 }, { "epoch": 1.17, "learning_rate": 1.2207069301419428e-05, - "loss": 0.0355, + "loss": 0.0567, "step": 4200 }, { "epoch": 1.17, "learning_rate": 1.2205213841729288e-05, - "loss": 0.0886, + "loss": 0.0702, "step": 4201 }, { "epoch": 1.17, "learning_rate": 1.2203358382039151e-05, - "loss": 0.1888, + "loss": 0.0687, "step": 4202 }, { "epoch": 1.17, "learning_rate": 1.2201502922349013e-05, - "loss": 0.2938, + "loss": 0.1964, "step": 4203 }, { "epoch": 1.17, "learning_rate": 1.2199647462658875e-05, - "loss": 0.135, + "loss": 0.0295, "step": 4204 }, { "epoch": 1.17, "learning_rate": 1.2197792002968735e-05, - "loss": 0.0914, + "loss": 0.0141, "step": 4205 }, { "epoch": 1.17, "learning_rate": 1.2195936543278599e-05, - "loss": 0.0374, + "loss": 0.0212, "step": 4206 }, { "epoch": 1.17, "learning_rate": 1.219408108358846e-05, - "loss": 0.2406, + "loss": 0.0623, "step": 4207 }, { "epoch": 1.17, "learning_rate": 1.2192225623898321e-05, - "loss": 0.2429, + "loss": 0.1577, "step": 4208 }, { "epoch": 1.17, "learning_rate": 1.2190370164208183e-05, - "loss": 0.1414, + "loss": 0.0618, "step": 4209 }, { "epoch": 1.17, "learning_rate": 1.2188514704518046e-05, - "loss": 0.1895, + "loss": 0.021, "step": 4210 }, { "epoch": 1.17, "learning_rate": 1.2186659244827908e-05, - "loss": 0.0878, + "loss": 0.1423, "step": 4211 }, { "epoch": 1.17, "learning_rate": 1.2184803785137768e-05, - "loss": 0.0356, + "loss": 0.0626, "step": 4212 }, { "epoch": 1.17, "learning_rate": 1.218294832544763e-05, - "loss": 0.135, + "loss": 0.0583, "step": 4213 }, { "epoch": 1.17, "learning_rate": 1.2181092865757494e-05, - "loss": 0.24, + "loss": 0.0171, "step": 4214 }, { "epoch": 1.17, "learning_rate": 1.2179237406067354e-05, - "loss": 0.0351, + "loss": 0.0624, "step": 4215 }, { "epoch": 1.17, "learning_rate": 1.2177381946377216e-05, - "loss": 0.1428, + "loss": 0.1192, "step": 4216 }, { "epoch": 1.17, "learning_rate": 1.2175526486687078e-05, - "loss": 0.0861, + "loss": 0.0671, "step": 4217 }, { "epoch": 1.17, "learning_rate": 1.217367102699694e-05, - "loss": 0.0343, + "loss": 0.1221, "step": 4218 }, { "epoch": 1.17, "learning_rate": 1.2171815567306802e-05, - "loss": 0.0876, + "loss": 0.053, "step": 4219 }, { "epoch": 1.17, "learning_rate": 1.2169960107616663e-05, - "loss": 0.0337, + "loss": 0.0594, "step": 4220 }, { "epoch": 1.17, "learning_rate": 1.2168104647926524e-05, - "loss": 0.0862, + "loss": 0.0457, "step": 4221 }, { "epoch": 1.18, "learning_rate": 1.2166249188236387e-05, - "loss": 0.1362, + "loss": 0.0517, "step": 4222 }, { "epoch": 1.18, "learning_rate": 1.2164393728546249e-05, - "loss": 0.1369, + "loss": 0.0226, "step": 4223 }, { "epoch": 1.18, "learning_rate": 1.216253826885611e-05, - "loss": 0.1422, + "loss": 0.0604, "step": 4224 }, { "epoch": 1.18, "learning_rate": 1.2160682809165971e-05, - "loss": 0.1455, + "loss": 0.0532, "step": 4225 }, { "epoch": 1.18, "learning_rate": 1.2158827349475835e-05, - "loss": 0.1936, + "loss": 0.0119, "step": 4226 }, { "epoch": 1.18, "learning_rate": 1.2156971889785697e-05, - "loss": 0.0874, + "loss": 0.0062, "step": 4227 }, { "epoch": 1.18, "learning_rate": 1.2155116430095557e-05, - "loss": 0.141, + "loss": 0.0143, "step": 4228 }, { "epoch": 1.18, "learning_rate": 1.2153260970405419e-05, - "loss": 0.0829, + "loss": 0.0162, "step": 4229 }, { "epoch": 1.18, "learning_rate": 1.2151405510715282e-05, - "loss": 0.1383, + "loss": 0.1294, "step": 4230 }, { "epoch": 1.18, "learning_rate": 1.2149550051025142e-05, - "loss": 0.1392, + "loss": 0.0453, "step": 4231 }, { "epoch": 1.18, "learning_rate": 1.2147694591335004e-05, - "loss": 0.1407, + "loss": 0.056, "step": 4232 }, { "epoch": 1.18, "learning_rate": 1.2145839131644866e-05, - "loss": 0.1969, + "loss": 0.0575, "step": 4233 }, { "epoch": 1.18, "learning_rate": 1.214398367195473e-05, - "loss": 0.1348, + "loss": 0.0533, "step": 4234 }, { "epoch": 1.18, "learning_rate": 1.214212821226459e-05, - "loss": 0.1885, + "loss": 0.1601, "step": 4235 }, { "epoch": 1.18, "learning_rate": 1.2140272752574452e-05, - "loss": 0.0292, + "loss": 0.1082, "step": 4236 }, { "epoch": 1.18, "learning_rate": 1.2138417292884312e-05, - "loss": 0.198, + "loss": 0.3605, "step": 4237 }, { "epoch": 1.18, "learning_rate": 1.2136561833194175e-05, - "loss": 0.1948, + "loss": 0.0139, "step": 4238 }, { "epoch": 1.18, "learning_rate": 1.2134706373504037e-05, - "loss": 0.0844, + "loss": 0.0456, "step": 4239 }, { "epoch": 1.18, "learning_rate": 1.21328509138139e-05, - "loss": 0.1902, + "loss": 0.0539, "step": 4240 }, { "epoch": 1.18, "learning_rate": 1.213099545412376e-05, - "loss": 0.1381, + "loss": 0.0134, "step": 4241 }, { "epoch": 1.18, "learning_rate": 1.2129139994433623e-05, - "loss": 0.1401, + "loss": 0.0193, "step": 4242 }, { "epoch": 1.18, "learning_rate": 1.2127284534743485e-05, - "loss": 0.1437, + "loss": 0.0574, "step": 4243 }, { "epoch": 1.18, "learning_rate": 1.2125429075053345e-05, - "loss": 0.0849, + "loss": 0.0975, "step": 4244 }, { "epoch": 1.18, "learning_rate": 1.2123573615363207e-05, - "loss": 0.136, + "loss": 0.0166, "step": 4245 }, { "epoch": 1.18, "learning_rate": 1.212171815567307e-05, - "loss": 0.2499, + "loss": 0.0124, "step": 4246 }, { "epoch": 1.18, "learning_rate": 1.211986269598293e-05, - "loss": 0.1428, + "loss": 0.1162, "step": 4247 }, { "epoch": 1.18, "learning_rate": 1.2118007236292792e-05, - "loss": 0.1384, + "loss": 0.0487, "step": 4248 }, { "epoch": 1.18, "learning_rate": 1.2116151776602654e-05, - "loss": 0.0838, + "loss": 0.0526, "step": 4249 }, { "epoch": 1.18, "learning_rate": 1.2114296316912518e-05, - "loss": 0.086, + "loss": 0.0158, "step": 4250 }, { "epoch": 1.18, "learning_rate": 1.2112440857222378e-05, - "loss": 0.0842, + "loss": 0.0463, "step": 4251 }, { "epoch": 1.18, "learning_rate": 1.211058539753224e-05, - "loss": 0.0862, + "loss": 0.0131, "step": 4252 }, { "epoch": 1.18, "learning_rate": 1.21087299378421e-05, - "loss": 0.141, + "loss": 0.0546, "step": 4253 }, { "epoch": 1.18, "learning_rate": 1.2106874478151962e-05, - "loss": 0.0306, + "loss": 0.0096, "step": 4254 }, { "epoch": 1.18, "learning_rate": 1.2105019018461826e-05, - "loss": 0.0847, + "loss": 0.0461, "step": 4255 }, { "epoch": 1.18, "learning_rate": 1.2103163558771687e-05, - "loss": 0.2518, + "loss": 0.0118, "step": 4256 }, { "epoch": 1.18, "learning_rate": 1.2101308099081548e-05, - "loss": 0.1358, + "loss": 0.0371, "step": 4257 }, { "epoch": 1.19, "learning_rate": 1.209945263939141e-05, - "loss": 0.0281, + "loss": 0.1348, "step": 4258 }, { "epoch": 1.19, "learning_rate": 1.2097597179701273e-05, - "loss": 0.0868, + "loss": 0.2103, "step": 4259 }, { "epoch": 1.19, "learning_rate": 1.2095741720011133e-05, - "loss": 0.1977, + "loss": 0.4192, "step": 4260 }, { "epoch": 1.19, "learning_rate": 1.2093886260320995e-05, - "loss": 0.0276, + "loss": 0.0148, "step": 4261 }, { "epoch": 1.19, "learning_rate": 1.2092030800630857e-05, - "loss": 0.2449, + "loss": 0.1814, "step": 4262 }, { "epoch": 1.19, "learning_rate": 1.2090175340940719e-05, - "loss": 0.1402, + "loss": 0.0541, "step": 4263 }, { "epoch": 1.19, "learning_rate": 1.208831988125058e-05, - "loss": 0.1988, + "loss": 0.1429, "step": 4264 }, { "epoch": 1.19, "learning_rate": 1.2086464421560443e-05, - "loss": 0.0841, + "loss": 0.0677, "step": 4265 }, { "epoch": 1.19, "learning_rate": 1.2084608961870303e-05, - "loss": 0.1412, + "loss": 0.1037, "step": 4266 }, { "epoch": 1.19, "learning_rate": 1.2082753502180166e-05, - "loss": 0.1952, + "loss": 0.0567, "step": 4267 }, { "epoch": 1.19, "learning_rate": 1.2080898042490028e-05, - "loss": 0.0289, + "loss": 0.054, "step": 4268 }, { "epoch": 1.19, "learning_rate": 1.2079042582799888e-05, - "loss": 0.0879, + "loss": 0.1316, "step": 4269 }, { "epoch": 1.19, "learning_rate": 1.207718712310975e-05, - "loss": 0.0286, + "loss": 0.0613, "step": 4270 }, { "epoch": 1.19, "learning_rate": 1.2075331663419614e-05, - "loss": 0.1394, + "loss": 0.0321, "step": 4271 }, { "epoch": 1.19, "learning_rate": 1.2073476203729476e-05, - "loss": 0.1412, + "loss": 0.0347, "step": 4272 }, { "epoch": 1.19, "learning_rate": 1.2071620744039336e-05, - "loss": 0.1428, + "loss": 0.1082, "step": 4273 }, { "epoch": 1.19, "learning_rate": 1.2069765284349198e-05, - "loss": 0.0813, + "loss": 0.0366, "step": 4274 }, { "epoch": 1.19, "learning_rate": 1.2067909824659061e-05, - "loss": 0.0859, + "loss": 0.0513, "step": 4275 }, { "epoch": 1.19, "learning_rate": 1.2066054364968921e-05, - "loss": 0.0842, + "loss": 0.0715, "step": 4276 }, { "epoch": 1.19, "learning_rate": 1.2064198905278783e-05, - "loss": 0.2537, + "loss": 0.0795, "step": 4277 }, { "epoch": 1.19, "learning_rate": 1.2062343445588645e-05, - "loss": 0.3062, + "loss": 0.0608, "step": 4278 }, { "epoch": 1.19, "learning_rate": 1.2060487985898509e-05, - "loss": 0.0828, + "loss": 0.0426, "step": 4279 }, { "epoch": 1.19, "learning_rate": 1.2058632526208369e-05, - "loss": 0.1382, + "loss": 0.1172, "step": 4280 }, { "epoch": 1.19, "learning_rate": 1.205677706651823e-05, - "loss": 0.1437, + "loss": 0.0513, "step": 4281 }, { "epoch": 1.19, "learning_rate": 1.2054921606828091e-05, - "loss": 0.0838, + "loss": 0.0107, "step": 4282 }, { "epoch": 1.19, "learning_rate": 1.2053066147137955e-05, - "loss": 0.1425, + "loss": 0.1859, "step": 4283 }, { "epoch": 1.19, "learning_rate": 1.2051210687447816e-05, - "loss": 0.191, + "loss": 0.1453, "step": 4284 }, { "epoch": 1.19, "learning_rate": 1.2049355227757678e-05, - "loss": 0.3106, + "loss": 0.096, "step": 4285 }, { "epoch": 1.19, "learning_rate": 1.2047499768067538e-05, - "loss": 0.0855, + "loss": 0.0133, "step": 4286 }, { "epoch": 1.19, "learning_rate": 1.2045644308377402e-05, - "loss": 0.0844, + "loss": 0.0105, "step": 4287 }, { "epoch": 1.19, "learning_rate": 1.2043788848687264e-05, - "loss": 0.1961, + "loss": 0.1177, "step": 4288 }, { "epoch": 1.19, "learning_rate": 1.2041933388997124e-05, - "loss": 0.0293, + "loss": 0.0767, "step": 4289 }, { "epoch": 1.19, "learning_rate": 1.2040077929306986e-05, - "loss": 0.0854, + "loss": 0.0953, "step": 4290 }, { "epoch": 1.19, "learning_rate": 1.203822246961685e-05, - "loss": 0.0816, + "loss": 0.0595, "step": 4291 }, { "epoch": 1.19, "learning_rate": 1.203636700992671e-05, - "loss": 0.0872, + "loss": 0.1106, "step": 4292 }, { "epoch": 1.19, "learning_rate": 1.2034511550236572e-05, - "loss": 0.0292, + "loss": 0.233, "step": 4293 }, { "epoch": 1.2, "learning_rate": 1.2032656090546433e-05, - "loss": 0.1951, + "loss": 0.1181, "step": 4294 }, { "epoch": 1.2, "learning_rate": 1.2030800630856297e-05, - "loss": 0.1971, + "loss": 0.1486, "step": 4295 }, { "epoch": 1.2, "learning_rate": 1.2028945171166157e-05, - "loss": 0.1375, + "loss": 0.1087, "step": 4296 }, { "epoch": 1.2, "learning_rate": 1.2027089711476019e-05, - "loss": 0.188, + "loss": 0.1937, "step": 4297 }, { "epoch": 1.2, "learning_rate": 1.202523425178588e-05, - "loss": 0.1427, + "loss": 0.2813, "step": 4298 }, { "epoch": 1.2, "learning_rate": 1.2023378792095743e-05, - "loss": 0.14, + "loss": 0.0653, "step": 4299 }, { "epoch": 1.2, "learning_rate": 1.2021523332405605e-05, - "loss": 0.1946, + "loss": 0.0216, "step": 4300 }, { "epoch": 1.2, "learning_rate": 1.2019667872715467e-05, - "loss": 0.193, + "loss": 0.1392, "step": 4301 }, { "epoch": 1.2, "learning_rate": 1.2017812413025327e-05, - "loss": 0.0309, + "loss": 0.0349, "step": 4302 }, { "epoch": 1.2, "learning_rate": 1.201595695333519e-05, - "loss": 0.3601, + "loss": 0.067, "step": 4303 }, { "epoch": 1.2, "learning_rate": 1.2014101493645052e-05, - "loss": 0.0824, + "loss": 0.0757, "step": 4304 }, { "epoch": 1.2, "learning_rate": 1.2012246033954912e-05, - "loss": 0.197, + "loss": 0.0273, "step": 4305 }, { "epoch": 1.2, "learning_rate": 1.2010390574264774e-05, - "loss": 0.0856, + "loss": 0.0684, "step": 4306 }, { "epoch": 1.2, "learning_rate": 1.2008535114574638e-05, - "loss": 0.1406, + "loss": 0.0278, "step": 4307 }, { "epoch": 1.2, "learning_rate": 1.20066796548845e-05, - "loss": 0.0329, + "loss": 0.16, "step": 4308 }, { "epoch": 1.2, "learning_rate": 1.200482419519436e-05, - "loss": 0.1389, + "loss": 0.0232, "step": 4309 }, { "epoch": 1.2, "learning_rate": 1.2002968735504222e-05, - "loss": 0.1387, + "loss": 0.1415, "step": 4310 }, { "epoch": 1.2, "learning_rate": 1.2001113275814085e-05, - "loss": 0.1897, + "loss": 0.0629, "step": 4311 }, { "epoch": 1.2, "learning_rate": 1.1999257816123945e-05, - "loss": 0.087, + "loss": 0.0572, "step": 4312 }, { "epoch": 1.2, "learning_rate": 1.1997402356433807e-05, - "loss": 0.1931, + "loss": 0.0224, "step": 4313 }, { "epoch": 1.2, "learning_rate": 1.1995546896743669e-05, - "loss": 0.1937, + "loss": 0.1585, "step": 4314 }, { "epoch": 1.2, "learning_rate": 1.1993691437053531e-05, - "loss": 0.1409, + "loss": 0.0204, "step": 4315 }, { "epoch": 1.2, "learning_rate": 1.1991835977363393e-05, - "loss": 0.0877, + "loss": 0.129, "step": 4316 }, { "epoch": 1.2, "learning_rate": 1.1989980517673255e-05, - "loss": 0.0852, + "loss": 0.0584, "step": 4317 }, { "epoch": 1.2, "learning_rate": 1.1988125057983115e-05, - "loss": 0.1397, + "loss": 0.0467, "step": 4318 }, { "epoch": 1.2, "learning_rate": 1.1986269598292978e-05, - "loss": 0.1368, + "loss": 0.0237, "step": 4319 }, { "epoch": 1.2, "learning_rate": 1.198441413860284e-05, - "loss": 0.2493, + "loss": 0.0615, "step": 4320 }, { "epoch": 1.2, "learning_rate": 1.19825586789127e-05, - "loss": 0.1441, + "loss": 0.057, "step": 4321 }, { "epoch": 1.2, "learning_rate": 1.1980703219222562e-05, - "loss": 0.0863, + "loss": 0.0163, "step": 4322 }, { "epoch": 1.2, "learning_rate": 1.1978847759532426e-05, - "loss": 0.0332, + "loss": 0.0841, "step": 4323 }, { "epoch": 1.2, "learning_rate": 1.1976992299842288e-05, - "loss": 0.1411, + "loss": 0.0152, "step": 4324 }, { "epoch": 1.2, "learning_rate": 1.1975136840152148e-05, - "loss": 0.0882, + "loss": 0.1546, "step": 4325 }, { "epoch": 1.2, "learning_rate": 1.197328138046201e-05, - "loss": 0.0896, + "loss": 0.1109, "step": 4326 }, { "epoch": 1.2, "learning_rate": 1.1971425920771873e-05, - "loss": 0.0315, + "loss": 0.0653, "step": 4327 }, { "epoch": 1.2, "learning_rate": 1.1969570461081734e-05, - "loss": 0.2466, + "loss": 0.1584, "step": 4328 }, { "epoch": 1.2, "learning_rate": 1.1967715001391595e-05, - "loss": 0.0833, + "loss": 0.03, "step": 4329 }, { "epoch": 1.21, "learning_rate": 1.1965859541701457e-05, - "loss": 0.1929, + "loss": 0.0131, "step": 4330 }, { "epoch": 1.21, "learning_rate": 1.1964004082011321e-05, - "loss": 0.1448, + "loss": 0.0582, "step": 4331 }, { "epoch": 1.21, "learning_rate": 1.1962148622321181e-05, - "loss": 0.1442, + "loss": 0.0185, "step": 4332 }, { "epoch": 1.21, "learning_rate": 1.1960293162631043e-05, - "loss": 0.0299, + "loss": 0.0158, "step": 4333 }, { "epoch": 1.21, "learning_rate": 1.1958437702940903e-05, - "loss": 0.1971, + "loss": 0.0671, "step": 4334 }, { "epoch": 1.21, "learning_rate": 1.1956582243250767e-05, - "loss": 0.1961, + "loss": 0.1185, "step": 4335 }, { "epoch": 1.21, "learning_rate": 1.1954726783560629e-05, - "loss": 0.029, + "loss": 0.0093, "step": 4336 }, { "epoch": 1.21, "learning_rate": 1.195287132387049e-05, - "loss": 0.1925, + "loss": 0.2443, "step": 4337 }, { "epoch": 1.21, "learning_rate": 1.195101586418035e-05, - "loss": 0.0848, + "loss": 0.0112, "step": 4338 }, { "epoch": 1.21, "learning_rate": 1.1949160404490214e-05, - "loss": 0.0835, + "loss": 0.0718, "step": 4339 }, { "epoch": 1.21, "learning_rate": 1.1947304944800076e-05, - "loss": 0.0848, + "loss": 0.0604, "step": 4340 }, { "epoch": 1.21, "learning_rate": 1.1945449485109936e-05, - "loss": 0.0863, + "loss": 0.1391, "step": 4341 }, { "epoch": 1.21, "learning_rate": 1.1943594025419798e-05, - "loss": 0.199, + "loss": 0.0526, "step": 4342 }, { "epoch": 1.21, "learning_rate": 1.1941738565729662e-05, - "loss": 0.1965, + "loss": 0.0952, "step": 4343 }, { "epoch": 1.21, "learning_rate": 1.1939883106039522e-05, - "loss": 0.2515, + "loss": 0.0852, "step": 4344 }, { "epoch": 1.21, "learning_rate": 1.1938027646349384e-05, - "loss": 0.1427, + "loss": 0.0549, "step": 4345 }, { "epoch": 1.21, "learning_rate": 1.1936172186659246e-05, - "loss": 0.1411, + "loss": 0.0669, "step": 4346 }, { "epoch": 1.21, "learning_rate": 1.193431672696911e-05, - "loss": 0.2533, + "loss": 0.0642, "step": 4347 }, { "epoch": 1.21, "learning_rate": 1.193246126727897e-05, - "loss": 0.1936, + "loss": 0.1348, "step": 4348 }, { "epoch": 1.21, "learning_rate": 1.1930605807588831e-05, - "loss": 0.1368, + "loss": 0.1595, "step": 4349 }, { "epoch": 1.21, "learning_rate": 1.1928750347898691e-05, - "loss": 0.0301, + "loss": 0.0546, "step": 4350 }, { "epoch": 1.21, "learning_rate": 1.1926894888208555e-05, - "loss": 0.1378, + "loss": 0.1629, "step": 4351 }, { "epoch": 1.21, "learning_rate": 1.1925039428518417e-05, - "loss": 0.1933, + "loss": 0.0246, "step": 4352 }, { "epoch": 1.21, "learning_rate": 1.1923183968828279e-05, - "loss": 0.031, + "loss": 0.0307, "step": 4353 }, { "epoch": 1.21, "learning_rate": 1.1921328509138139e-05, - "loss": 0.1448, + "loss": 0.0395, "step": 4354 }, { "epoch": 1.21, "learning_rate": 1.1919473049448002e-05, - "loss": 0.1966, + "loss": 0.0198, "step": 4355 }, { "epoch": 1.21, "learning_rate": 1.1917617589757864e-05, - "loss": 0.1882, + "loss": 0.1671, "step": 4356 }, { "epoch": 1.21, "learning_rate": 1.1915762130067724e-05, - "loss": 0.1901, + "loss": 0.0663, "step": 4357 }, { "epoch": 1.21, "learning_rate": 1.1913906670377586e-05, - "loss": 0.0852, + "loss": 0.0733, "step": 4358 }, { "epoch": 1.21, "learning_rate": 1.191205121068745e-05, - "loss": 0.1408, + "loss": 0.0206, "step": 4359 }, { "epoch": 1.21, "learning_rate": 1.1910195750997312e-05, - "loss": 0.0847, + "loss": 0.0395, "step": 4360 }, { "epoch": 1.21, "learning_rate": 1.1908340291307172e-05, - "loss": 0.1385, + "loss": 0.0601, "step": 4361 }, { "epoch": 1.21, "learning_rate": 1.1906484831617034e-05, - "loss": 0.0342, + "loss": 0.2027, "step": 4362 }, { "epoch": 1.21, "learning_rate": 1.1904629371926897e-05, - "loss": 0.0855, + "loss": 0.0606, "step": 4363 }, { "epoch": 1.21, "learning_rate": 1.1902773912236758e-05, - "loss": 0.0871, + "loss": 0.1564, "step": 4364 }, { "epoch": 1.21, "learning_rate": 1.190091845254662e-05, - "loss": 0.1381, + "loss": 0.0292, "step": 4365 }, { "epoch": 1.22, "learning_rate": 1.1899062992856481e-05, - "loss": 0.0319, + "loss": 0.0178, "step": 4366 }, { "epoch": 1.22, "learning_rate": 1.1897207533166343e-05, - "loss": 0.2449, + "loss": 0.0178, "step": 4367 }, { "epoch": 1.22, "learning_rate": 1.1895352073476205e-05, - "loss": 0.1334, + "loss": 0.0462, "step": 4368 }, { "epoch": 1.22, "learning_rate": 1.1893496613786067e-05, - "loss": 0.1921, + "loss": 0.1457, "step": 4369 }, { "epoch": 1.22, "learning_rate": 1.1891641154095927e-05, - "loss": 0.0852, + "loss": 0.0917, "step": 4370 }, { "epoch": 1.22, "learning_rate": 1.188978569440579e-05, - "loss": 0.1372, + "loss": 0.1203, "step": 4371 }, { "epoch": 1.22, "learning_rate": 1.1887930234715653e-05, - "loss": 0.1923, + "loss": 0.0233, "step": 4372 }, { "epoch": 1.22, "learning_rate": 1.1886074775025513e-05, - "loss": 0.1408, + "loss": 0.026, "step": 4373 }, { "epoch": 1.22, "learning_rate": 1.1884219315335375e-05, - "loss": 0.0852, + "loss": 0.1146, "step": 4374 }, { "epoch": 1.22, "learning_rate": 1.1882363855645238e-05, - "loss": 0.0861, + "loss": 0.0163, "step": 4375 }, { "epoch": 1.22, "learning_rate": 1.18805083959551e-05, - "loss": 0.1391, + "loss": 0.1568, "step": 4376 }, { "epoch": 1.22, "learning_rate": 1.187865293626496e-05, - "loss": 0.0853, + "loss": 0.0187, "step": 4377 }, { "epoch": 1.22, "learning_rate": 1.1876797476574822e-05, - "loss": 0.1375, + "loss": 0.1023, "step": 4378 }, { "epoch": 1.22, "learning_rate": 1.1874942016884686e-05, - "loss": 0.2417, + "loss": 0.0967, "step": 4379 }, { "epoch": 1.22, "learning_rate": 1.1873086557194546e-05, - "loss": 0.1961, + "loss": 0.0135, "step": 4380 }, { "epoch": 1.22, "learning_rate": 1.1871231097504408e-05, - "loss": 0.0841, + "loss": 0.0486, "step": 4381 }, { "epoch": 1.22, "learning_rate": 1.186937563781427e-05, - "loss": 0.0844, + "loss": 0.1653, "step": 4382 }, { "epoch": 1.22, "learning_rate": 1.1867520178124131e-05, - "loss": 0.1414, + "loss": 0.1082, "step": 4383 }, { "epoch": 1.22, "learning_rate": 1.1865664718433993e-05, - "loss": 0.1366, + "loss": 0.0775, "step": 4384 }, { "epoch": 1.22, "learning_rate": 1.1863809258743855e-05, - "loss": 0.1931, + "loss": 0.0638, "step": 4385 }, { "epoch": 1.22, "learning_rate": 1.1861953799053715e-05, - "loss": 0.1417, + "loss": 0.0953, "step": 4386 }, { "epoch": 1.22, "learning_rate": 1.1860098339363579e-05, - "loss": 0.4096, + "loss": 0.0169, "step": 4387 }, { "epoch": 1.22, "learning_rate": 1.185824287967344e-05, - "loss": 0.0864, + "loss": 0.0595, "step": 4388 }, { "epoch": 1.22, "learning_rate": 1.1856387419983301e-05, - "loss": 0.1379, + "loss": 0.0874, "step": 4389 }, { "epoch": 1.22, "learning_rate": 1.1854531960293163e-05, - "loss": 0.1906, + "loss": 0.0152, "step": 4390 }, { "epoch": 1.22, "learning_rate": 1.1852676500603026e-05, - "loss": 0.0871, + "loss": 0.1005, "step": 4391 }, { "epoch": 1.22, "learning_rate": 1.1850821040912888e-05, - "loss": 0.0891, + "loss": 0.012, "step": 4392 }, { "epoch": 1.22, "learning_rate": 1.1848965581222748e-05, - "loss": 0.1438, + "loss": 0.1553, "step": 4393 }, { "epoch": 1.22, "learning_rate": 1.184711012153261e-05, - "loss": 0.086, + "loss": 0.078, "step": 4394 }, { "epoch": 1.22, "learning_rate": 1.1845254661842474e-05, - "loss": 0.1439, + "loss": 0.0642, "step": 4395 }, { "epoch": 1.22, "learning_rate": 1.1843399202152334e-05, - "loss": 0.0861, + "loss": 0.0648, "step": 4396 }, { "epoch": 1.22, "learning_rate": 1.1841543742462196e-05, - "loss": 0.138, + "loss": 0.0195, "step": 4397 }, { "epoch": 1.22, "learning_rate": 1.1839688282772058e-05, - "loss": 0.1949, + "loss": 0.1026, "step": 4398 }, { "epoch": 1.22, "learning_rate": 1.1837832823081921e-05, - "loss": 0.0847, + "loss": 0.059, "step": 4399 }, { "epoch": 1.22, "learning_rate": 1.1835977363391782e-05, - "loss": 0.0309, + "loss": 0.1293, "step": 4400 }, { "epoch": 1.22, "learning_rate": 1.1834121903701643e-05, - "loss": 0.1879, + "loss": 0.0359, "step": 4401 }, { "epoch": 1.23, "learning_rate": 1.1832266444011504e-05, - "loss": 0.1909, + "loss": 0.0593, "step": 4402 }, { "epoch": 1.23, "learning_rate": 1.1830410984321365e-05, - "loss": 0.1939, + "loss": 0.0389, "step": 4403 }, { "epoch": 1.23, "learning_rate": 1.1828555524631229e-05, - "loss": 0.2497, + "loss": 0.0876, "step": 4404 }, { "epoch": 1.23, "learning_rate": 1.1826700064941091e-05, - "loss": 0.1964, + "loss": 0.0196, "step": 4405 }, { "epoch": 1.23, "learning_rate": 1.1824844605250951e-05, - "loss": 0.1422, + "loss": 0.1049, "step": 4406 }, { "epoch": 1.23, "learning_rate": 1.1822989145560813e-05, - "loss": 0.302, + "loss": 0.0812, "step": 4407 }, { "epoch": 1.23, "learning_rate": 1.1821133685870676e-05, - "loss": 0.0322, + "loss": 0.0926, "step": 4408 }, { "epoch": 1.23, "learning_rate": 1.1819278226180537e-05, - "loss": 0.1373, + "loss": 0.0621, "step": 4409 }, { "epoch": 1.23, "learning_rate": 1.1817422766490399e-05, - "loss": 0.1372, + "loss": 0.1923, "step": 4410 }, { "epoch": 1.23, "learning_rate": 1.181556730680026e-05, - "loss": 0.192, + "loss": 0.1184, "step": 4411 }, { "epoch": 1.23, "learning_rate": 1.1813711847110122e-05, - "loss": 0.243, + "loss": 0.1847, "step": 4412 }, { "epoch": 1.23, "learning_rate": 1.1811856387419984e-05, - "loss": 0.2438, + "loss": 0.0907, "step": 4413 }, { "epoch": 1.23, "learning_rate": 1.1810000927729846e-05, - "loss": 0.1377, + "loss": 0.2125, "step": 4414 }, { "epoch": 1.23, "learning_rate": 1.1808145468039706e-05, - "loss": 0.2903, + "loss": 0.0722, "step": 4415 }, { "epoch": 1.23, "learning_rate": 1.180629000834957e-05, - "loss": 0.1958, + "loss": 0.1982, "step": 4416 }, { "epoch": 1.23, "learning_rate": 1.1804434548659432e-05, - "loss": 0.0367, + "loss": 0.0892, "step": 4417 }, { "epoch": 1.23, "learning_rate": 1.1802579088969292e-05, - "loss": 0.089, + "loss": 0.0546, "step": 4418 }, { "epoch": 1.23, "learning_rate": 1.1800723629279154e-05, - "loss": 0.0389, + "loss": 0.1011, "step": 4419 }, { "epoch": 1.23, "learning_rate": 1.1798868169589017e-05, - "loss": 0.1408, + "loss": 0.0458, "step": 4420 }, { "epoch": 1.23, "learning_rate": 1.1797012709898879e-05, - "loss": 0.0915, + "loss": 0.119, "step": 4421 }, { "epoch": 1.23, "learning_rate": 1.179515725020874e-05, - "loss": 0.1846, + "loss": 0.0847, "step": 4422 }, { "epoch": 1.23, "learning_rate": 1.1793301790518601e-05, - "loss": 0.0865, + "loss": 0.0574, "step": 4423 }, { "epoch": 1.23, "learning_rate": 1.1791446330828465e-05, - "loss": 0.0372, + "loss": 0.1149, "step": 4424 }, { "epoch": 1.23, "learning_rate": 1.1789590871138325e-05, - "loss": 0.136, + "loss": 0.0617, "step": 4425 }, { "epoch": 1.23, "learning_rate": 1.1787735411448187e-05, - "loss": 0.1425, + "loss": 0.0772, "step": 4426 }, { "epoch": 1.23, "learning_rate": 1.1785879951758049e-05, - "loss": 0.1852, + "loss": 0.0883, "step": 4427 }, { "epoch": 1.23, "learning_rate": 1.1784024492067912e-05, - "loss": 0.2451, + "loss": 0.0539, "step": 4428 }, { "epoch": 1.23, "learning_rate": 1.1782169032377772e-05, - "loss": 0.1372, + "loss": 0.0625, "step": 4429 }, { "epoch": 1.23, "learning_rate": 1.1780313572687634e-05, - "loss": 0.0344, + "loss": 0.1702, "step": 4430 }, { "epoch": 1.23, "learning_rate": 1.1778458112997494e-05, - "loss": 0.1427, + "loss": 0.1112, "step": 4431 }, { "epoch": 1.23, "learning_rate": 1.1776602653307358e-05, - "loss": 0.2418, + "loss": 0.0623, "step": 4432 }, { "epoch": 1.23, "learning_rate": 1.177474719361722e-05, - "loss": 0.0855, + "loss": 0.0671, "step": 4433 }, { "epoch": 1.23, "learning_rate": 1.1772891733927082e-05, - "loss": 0.1916, + "loss": 0.1256, "step": 4434 }, { "epoch": 1.23, "learning_rate": 1.1771036274236942e-05, - "loss": 0.1404, + "loss": 0.0832, "step": 4435 }, { "epoch": 1.23, "learning_rate": 1.1769180814546805e-05, - "loss": 0.1376, + "loss": 0.0724, "step": 4436 }, { "epoch": 1.23, "learning_rate": 1.1767325354856667e-05, - "loss": 0.033, + "loss": 0.0591, "step": 4437 }, { "epoch": 1.24, "learning_rate": 1.1765469895166528e-05, - "loss": 0.1402, + "loss": 0.1747, "step": 4438 }, { "epoch": 1.24, "learning_rate": 1.176361443547639e-05, - "loss": 0.0333, + "loss": 0.0165, "step": 4439 }, { "epoch": 1.24, "learning_rate": 1.1761758975786253e-05, - "loss": 0.0335, + "loss": 0.1088, "step": 4440 }, { "epoch": 1.24, "learning_rate": 1.1759903516096113e-05, - "loss": 0.0826, + "loss": 0.0896, "step": 4441 }, { "epoch": 1.24, "learning_rate": 1.1758048056405975e-05, - "loss": 0.3081, + "loss": 0.0215, "step": 4442 }, { "epoch": 1.24, "learning_rate": 1.1756192596715837e-05, - "loss": 0.1391, + "loss": 0.1401, "step": 4443 }, { "epoch": 1.24, "learning_rate": 1.17543371370257e-05, - "loss": 0.0312, + "loss": 0.113, "step": 4444 }, { "epoch": 1.24, "learning_rate": 1.175248167733556e-05, - "loss": 0.3008, + "loss": 0.1239, "step": 4445 }, { "epoch": 1.24, "learning_rate": 1.1750626217645422e-05, - "loss": 0.0864, + "loss": 0.0181, "step": 4446 }, { "epoch": 1.24, "learning_rate": 1.1748770757955283e-05, - "loss": 0.2517, + "loss": 0.1391, "step": 4447 }, { "epoch": 1.24, "learning_rate": 1.1746915298265146e-05, - "loss": 0.1412, + "loss": 0.0879, "step": 4448 }, { "epoch": 1.24, "learning_rate": 1.1745059838575008e-05, - "loss": 0.084, + "loss": 0.0518, "step": 4449 }, { "epoch": 1.24, "learning_rate": 1.174320437888487e-05, - "loss": 0.1389, + "loss": 0.0632, "step": 4450 }, { "epoch": 1.24, "learning_rate": 1.174134891919473e-05, - "loss": 0.1403, + "loss": 0.1977, "step": 4451 }, { "epoch": 1.24, "learning_rate": 1.1739493459504594e-05, - "loss": 0.0297, + "loss": 0.0252, "step": 4452 }, { "epoch": 1.24, "learning_rate": 1.1737637999814456e-05, - "loss": 0.1404, + "loss": 0.1041, "step": 4453 }, { "epoch": 1.24, "learning_rate": 1.1735782540124316e-05, - "loss": 0.0298, + "loss": 0.0519, "step": 4454 }, { "epoch": 1.24, "learning_rate": 1.1733927080434178e-05, - "loss": 0.0838, + "loss": 0.0794, "step": 4455 }, { "epoch": 1.24, "learning_rate": 1.1732071620744041e-05, - "loss": 0.2526, + "loss": 0.0169, "step": 4456 }, { "epoch": 1.24, "learning_rate": 1.1730216161053903e-05, - "loss": 0.1934, + "loss": 0.0286, "step": 4457 }, { "epoch": 1.24, "learning_rate": 1.1728360701363763e-05, - "loss": 0.194, + "loss": 0.0526, "step": 4458 }, { "epoch": 1.24, "learning_rate": 1.1726505241673625e-05, - "loss": 0.24, + "loss": 0.1058, "step": 4459 }, { "epoch": 1.24, "learning_rate": 1.1724649781983489e-05, - "loss": 0.138, + "loss": 0.1403, "step": 4460 }, { "epoch": 1.24, "learning_rate": 1.1722794322293349e-05, - "loss": 0.1437, + "loss": 0.15, "step": 4461 }, { "epoch": 1.24, "learning_rate": 1.172093886260321e-05, - "loss": 0.1395, + "loss": 0.0616, "step": 4462 }, { "epoch": 1.24, "learning_rate": 1.1719083402913073e-05, - "loss": 0.0844, + "loss": 0.109, "step": 4463 }, { "epoch": 1.24, "learning_rate": 1.1717227943222934e-05, - "loss": 0.1347, + "loss": 0.1751, "step": 4464 }, { "epoch": 1.24, "learning_rate": 1.1715372483532796e-05, - "loss": 0.1344, + "loss": 0.1139, "step": 4465 }, { "epoch": 1.24, "learning_rate": 1.1713517023842658e-05, - "loss": 0.2458, + "loss": 0.0192, "step": 4466 }, { "epoch": 1.24, "learning_rate": 1.1711661564152518e-05, - "loss": 0.0331, + "loss": 0.0289, "step": 4467 }, { "epoch": 1.24, "learning_rate": 1.1709806104462382e-05, - "loss": 0.2478, + "loss": 0.0324, "step": 4468 }, { "epoch": 1.24, "learning_rate": 1.1707950644772244e-05, - "loss": 0.0852, + "loss": 0.025, "step": 4469 }, { "epoch": 1.24, "learning_rate": 1.1706095185082104e-05, - "loss": 0.1378, + "loss": 0.0839, "step": 4470 }, { "epoch": 1.24, "learning_rate": 1.1704239725391966e-05, - "loss": 0.1924, + "loss": 0.0482, "step": 4471 }, { "epoch": 1.24, "learning_rate": 1.170238426570183e-05, - "loss": 0.1407, + "loss": 0.1907, "step": 4472 }, { "epoch": 1.24, "learning_rate": 1.1700528806011691e-05, - "loss": 0.2494, + "loss": 0.0672, "step": 4473 }, { "epoch": 1.25, "learning_rate": 1.1698673346321551e-05, - "loss": 0.0826, + "loss": 0.0769, "step": 4474 }, { "epoch": 1.25, "learning_rate": 1.1696817886631413e-05, - "loss": 0.2934, + "loss": 0.069, "step": 4475 }, { "epoch": 1.25, "learning_rate": 1.1694962426941277e-05, - "loss": 0.142, + "loss": 0.0695, "step": 4476 }, { "epoch": 1.25, "learning_rate": 1.1693106967251137e-05, - "loss": 0.3011, + "loss": 0.0198, "step": 4477 }, { "epoch": 1.25, "learning_rate": 1.1691251507560999e-05, - "loss": 0.1916, + "loss": 0.0147, "step": 4478 }, { "epoch": 1.25, "learning_rate": 1.168939604787086e-05, - "loss": 0.2443, + "loss": 0.0149, "step": 4479 }, { "epoch": 1.25, "learning_rate": 1.1687540588180724e-05, - "loss": 0.1378, + "loss": 0.0152, "step": 4480 }, { "epoch": 1.25, "learning_rate": 1.1685685128490585e-05, - "loss": 0.0864, + "loss": 0.1051, "step": 4481 }, { "epoch": 1.25, "learning_rate": 1.1683829668800446e-05, - "loss": 0.0884, + "loss": 0.0117, "step": 4482 }, { "epoch": 1.25, "learning_rate": 1.1681974209110307e-05, - "loss": 0.0389, + "loss": 0.0824, "step": 4483 }, { "epoch": 1.25, "learning_rate": 1.168011874942017e-05, - "loss": 0.14, + "loss": 0.161, "step": 4484 }, { "epoch": 1.25, "learning_rate": 1.1678263289730032e-05, - "loss": 0.1926, + "loss": 0.1752, "step": 4485 }, { "epoch": 1.25, "learning_rate": 1.1676407830039894e-05, - "loss": 0.0854, + "loss": 0.1227, "step": 4486 }, { "epoch": 1.25, "learning_rate": 1.1674552370349754e-05, - "loss": 0.0873, + "loss": 0.0667, "step": 4487 }, { "epoch": 1.25, "learning_rate": 1.1672696910659618e-05, - "loss": 0.189, + "loss": 0.0926, "step": 4488 }, { "epoch": 1.25, "learning_rate": 1.167084145096948e-05, - "loss": 0.0911, + "loss": 0.1193, "step": 4489 }, { "epoch": 1.25, "learning_rate": 1.166898599127934e-05, - "loss": 0.1432, + "loss": 0.1643, "step": 4490 }, { "epoch": 1.25, "learning_rate": 1.1667130531589202e-05, - "loss": 0.1953, + "loss": 0.0126, "step": 4491 }, { "epoch": 1.25, "learning_rate": 1.1665275071899065e-05, - "loss": 0.0865, + "loss": 0.2928, "step": 4492 }, { "epoch": 1.25, "learning_rate": 1.1663419612208925e-05, - "loss": 0.0879, + "loss": 0.0219, "step": 4493 }, { "epoch": 1.25, "learning_rate": 1.1661564152518787e-05, - "loss": 0.1886, + "loss": 0.0882, "step": 4494 }, { "epoch": 1.25, "learning_rate": 1.1659708692828649e-05, - "loss": 0.2429, + "loss": 0.1279, "step": 4495 }, { "epoch": 1.25, "learning_rate": 1.1657853233138513e-05, - "loss": 0.1431, + "loss": 0.019, "step": 4496 }, { "epoch": 1.25, "learning_rate": 1.1655997773448373e-05, - "loss": 0.1419, + "loss": 0.0615, "step": 4497 }, { "epoch": 1.25, "learning_rate": 1.1654142313758235e-05, - "loss": 0.1399, + "loss": 0.0657, "step": 4498 }, { "epoch": 1.25, "learning_rate": 1.1652286854068095e-05, - "loss": 0.1359, + "loss": 0.0237, "step": 4499 }, { "epoch": 1.25, "learning_rate": 1.1650431394377958e-05, - "loss": 0.1379, + "loss": 0.0733, "step": 4500 }, { "epoch": 1.25, "learning_rate": 1.164857593468782e-05, - "loss": 0.1407, + "loss": 0.075, "step": 4501 }, { "epoch": 1.25, "learning_rate": 1.1646720474997682e-05, - "loss": 0.187, + "loss": 0.0712, "step": 4502 }, { "epoch": 1.25, "learning_rate": 1.1644865015307542e-05, - "loss": 0.0875, + "loss": 0.0221, "step": 4503 }, { "epoch": 1.25, "learning_rate": 1.1643009555617406e-05, - "loss": 0.2394, + "loss": 0.0171, "step": 4504 }, { "epoch": 1.25, "learning_rate": 1.1641154095927268e-05, - "loss": 0.1381, + "loss": 0.1055, "step": 4505 }, { "epoch": 1.25, "learning_rate": 1.1639298636237128e-05, - "loss": 0.0365, + "loss": 0.1147, "step": 4506 }, { "epoch": 1.25, "learning_rate": 1.163744317654699e-05, - "loss": 0.1402, + "loss": 0.0896, "step": 4507 }, { "epoch": 1.25, "learning_rate": 1.1635587716856853e-05, - "loss": 0.2947, + "loss": 0.0209, "step": 4508 }, { "epoch": 1.25, "learning_rate": 1.1633732257166714e-05, - "loss": 0.1404, + "loss": 0.1213, "step": 4509 }, { "epoch": 1.26, "learning_rate": 1.1631876797476575e-05, - "loss": 0.1421, + "loss": 0.0752, "step": 4510 }, { "epoch": 1.26, "learning_rate": 1.1630021337786437e-05, - "loss": 0.0871, + "loss": 0.1267, "step": 4511 }, { "epoch": 1.26, "learning_rate": 1.1628165878096301e-05, - "loss": 0.0344, + "loss": 0.1158, "step": 4512 }, { "epoch": 1.26, "learning_rate": 1.1626310418406161e-05, - "loss": 0.0875, + "loss": 0.0653, "step": 4513 }, { "epoch": 1.26, "learning_rate": 1.1624454958716023e-05, - "loss": 0.244, + "loss": 0.0394, "step": 4514 }, { "epoch": 1.26, "learning_rate": 1.1622599499025883e-05, - "loss": 0.0337, + "loss": 0.1839, "step": 4515 }, { "epoch": 1.26, "learning_rate": 1.1620744039335747e-05, - "loss": 0.2966, + "loss": 0.1648, "step": 4516 }, { "epoch": 1.26, "learning_rate": 1.1618888579645609e-05, - "loss": 0.0864, + "loss": 0.0552, "step": 4517 }, { "epoch": 1.26, "learning_rate": 1.161703311995547e-05, - "loss": 0.1408, + "loss": 0.1277, "step": 4518 }, { "epoch": 1.26, "learning_rate": 1.161517766026533e-05, - "loss": 0.1925, + "loss": 0.016, "step": 4519 }, { "epoch": 1.26, "learning_rate": 1.1613322200575194e-05, - "loss": 0.3508, + "loss": 0.0587, "step": 4520 }, { "epoch": 1.26, "learning_rate": 1.1611466740885056e-05, - "loss": 0.1409, + "loss": 0.0621, "step": 4521 }, { "epoch": 1.26, "learning_rate": 1.1609611281194916e-05, - "loss": 0.1326, + "loss": 0.0766, "step": 4522 }, { "epoch": 1.26, "learning_rate": 1.1607755821504778e-05, - "loss": 0.0331, + "loss": 0.0766, "step": 4523 }, { "epoch": 1.26, "learning_rate": 1.1605900361814642e-05, - "loss": 0.0343, + "loss": 0.07, "step": 4524 }, { "epoch": 1.26, "learning_rate": 1.1604044902124503e-05, - "loss": 0.0891, + "loss": 0.023, "step": 4525 }, { "epoch": 1.26, "learning_rate": 1.1602189442434364e-05, - "loss": 0.1384, + "loss": 0.1281, "step": 4526 }, { "epoch": 1.26, "learning_rate": 1.1600333982744226e-05, - "loss": 0.1915, + "loss": 0.1846, "step": 4527 }, { "epoch": 1.26, "learning_rate": 1.1598478523054089e-05, - "loss": 0.14, + "loss": 0.0104, "step": 4528 }, { "epoch": 1.26, "learning_rate": 1.159662306336395e-05, - "loss": 0.1346, + "loss": 0.0622, "step": 4529 }, { "epoch": 1.26, "learning_rate": 1.1594767603673811e-05, - "loss": 0.1391, + "loss": 0.0542, "step": 4530 }, { "epoch": 1.26, "learning_rate": 1.1592912143983673e-05, - "loss": 0.1386, + "loss": 0.2742, "step": 4531 }, { "epoch": 1.26, "learning_rate": 1.1591056684293535e-05, - "loss": 0.2445, + "loss": 0.0887, "step": 4532 }, { "epoch": 1.26, "learning_rate": 1.1589201224603397e-05, - "loss": 0.0319, + "loss": 0.0147, "step": 4533 }, { "epoch": 1.26, "learning_rate": 1.1587345764913259e-05, - "loss": 0.3053, + "loss": 0.0225, "step": 4534 }, { "epoch": 1.26, "learning_rate": 1.1585490305223119e-05, - "loss": 0.2459, + "loss": 0.2156, "step": 4535 }, { "epoch": 1.26, "learning_rate": 1.1583634845532982e-05, - "loss": 0.0872, + "loss": 0.1261, "step": 4536 }, { "epoch": 1.26, "learning_rate": 1.1581779385842844e-05, - "loss": 0.0344, + "loss": 0.021, "step": 4537 }, { "epoch": 1.26, "learning_rate": 1.1579923926152704e-05, - "loss": 0.0328, + "loss": 0.0647, "step": 4538 }, { "epoch": 1.26, "learning_rate": 1.1578068466462566e-05, - "loss": 0.0849, + "loss": 0.0255, "step": 4539 }, { "epoch": 1.26, "learning_rate": 1.157621300677243e-05, - "loss": 0.1939, + "loss": 0.1302, "step": 4540 }, { "epoch": 1.26, "learning_rate": 1.1574357547082292e-05, - "loss": 0.0873, + "loss": 0.0585, "step": 4541 }, { "epoch": 1.26, "learning_rate": 1.1572502087392152e-05, - "loss": 0.1942, + "loss": 0.0644, "step": 4542 }, { "epoch": 1.26, "learning_rate": 1.1570646627702014e-05, - "loss": 0.0861, + "loss": 0.0872, "step": 4543 }, { "epoch": 1.26, "learning_rate": 1.1568791168011877e-05, - "loss": 0.2468, + "loss": 0.033, "step": 4544 }, { "epoch": 1.26, "learning_rate": 1.1566935708321738e-05, - "loss": 0.0868, + "loss": 0.1017, "step": 4545 }, { "epoch": 1.27, "learning_rate": 1.15650802486316e-05, - "loss": 0.087, + "loss": 0.0976, "step": 4546 }, { "epoch": 1.27, "learning_rate": 1.1563224788941461e-05, - "loss": 0.0856, + "loss": 0.0517, "step": 4547 }, { "epoch": 1.27, "learning_rate": 1.1561369329251325e-05, - "loss": 0.1362, + "loss": 0.0235, "step": 4548 }, { "epoch": 1.27, "learning_rate": 1.1559513869561185e-05, - "loss": 0.0314, + "loss": 0.0898, "step": 4549 }, { "epoch": 1.27, "learning_rate": 1.1557658409871047e-05, - "loss": 0.1368, + "loss": 0.316, "step": 4550 }, { "epoch": 1.27, "learning_rate": 1.1555802950180907e-05, - "loss": 0.1971, + "loss": 0.0238, "step": 4551 }, { "epoch": 1.27, "learning_rate": 1.155394749049077e-05, - "loss": 0.1987, + "loss": 0.0771, "step": 4552 }, { "epoch": 1.27, "learning_rate": 1.1552092030800632e-05, - "loss": 0.2502, + "loss": 0.0245, "step": 4553 }, { "epoch": 1.27, "learning_rate": 1.1550236571110494e-05, - "loss": 0.0305, + "loss": 0.1708, "step": 4554 }, { "epoch": 1.27, "learning_rate": 1.1548381111420355e-05, - "loss": 0.0836, + "loss": 0.0732, "step": 4555 }, { "epoch": 1.27, "learning_rate": 1.1546525651730216e-05, - "loss": 0.1393, + "loss": 0.0549, "step": 4556 }, { "epoch": 1.27, "learning_rate": 1.154467019204008e-05, - "loss": 0.2509, + "loss": 0.1254, "step": 4557 }, { "epoch": 1.27, "learning_rate": 1.154281473234994e-05, - "loss": 0.1411, + "loss": 0.0224, "step": 4558 }, { "epoch": 1.27, "learning_rate": 1.1540959272659802e-05, - "loss": 0.0837, + "loss": 0.2152, "step": 4559 }, { "epoch": 1.27, "learning_rate": 1.1539103812969664e-05, - "loss": 0.0858, + "loss": 0.0226, "step": 4560 }, { "epoch": 1.27, "learning_rate": 1.1537248353279526e-05, - "loss": 0.1393, + "loss": 0.0293, "step": 4561 }, { "epoch": 1.27, "learning_rate": 1.1535392893589388e-05, - "loss": 0.1419, + "loss": 0.1049, "step": 4562 }, { "epoch": 1.27, "learning_rate": 1.153353743389925e-05, - "loss": 0.1424, + "loss": 0.023, "step": 4563 }, { "epoch": 1.27, "learning_rate": 1.153168197420911e-05, - "loss": 0.2508, + "loss": 0.0519, "step": 4564 }, { "epoch": 1.27, "learning_rate": 1.1529826514518973e-05, - "loss": 0.0295, + "loss": 0.0706, "step": 4565 }, { "epoch": 1.27, "learning_rate": 1.1527971054828835e-05, - "loss": 0.0846, + "loss": 0.0233, "step": 4566 }, { "epoch": 1.27, "learning_rate": 1.1526115595138695e-05, - "loss": 0.1397, + "loss": 0.0583, "step": 4567 }, { "epoch": 1.27, "learning_rate": 1.1524260135448557e-05, - "loss": 0.0793, + "loss": 0.083, "step": 4568 }, { "epoch": 1.27, "learning_rate": 1.152240467575842e-05, - "loss": 0.0305, + "loss": 0.0944, "step": 4569 }, { "epoch": 1.27, "learning_rate": 1.1520549216068283e-05, - "loss": 0.1364, + "loss": 0.0482, "step": 4570 }, { "epoch": 1.27, "learning_rate": 1.1518693756378143e-05, - "loss": 0.0838, + "loss": 0.1584, "step": 4571 }, { "epoch": 1.27, "learning_rate": 1.1516838296688005e-05, - "loss": 0.1356, + "loss": 0.1081, "step": 4572 }, { "epoch": 1.27, "learning_rate": 1.1514982836997868e-05, - "loss": 0.0837, + "loss": 0.0189, "step": 4573 }, { "epoch": 1.27, "learning_rate": 1.1513127377307728e-05, - "loss": 0.2518, + "loss": 0.014, "step": 4574 }, { "epoch": 1.27, "learning_rate": 1.151127191761759e-05, - "loss": 0.0841, + "loss": 0.0687, "step": 4575 }, { "epoch": 1.27, "learning_rate": 1.1509416457927452e-05, - "loss": 0.1399, + "loss": 0.1832, "step": 4576 }, { "epoch": 1.27, "learning_rate": 1.1507560998237316e-05, - "loss": 0.1421, + "loss": 0.0145, "step": 4577 }, { "epoch": 1.27, "learning_rate": 1.1505705538547176e-05, - "loss": 0.1365, + "loss": 0.0181, "step": 4578 }, { "epoch": 1.27, "learning_rate": 1.1503850078857038e-05, - "loss": 0.0829, + "loss": 0.1086, "step": 4579 }, { "epoch": 1.27, "learning_rate": 1.1501994619166898e-05, - "loss": 0.0825, + "loss": 0.0163, "step": 4580 }, { "epoch": 1.27, "learning_rate": 1.1500139159476761e-05, - "loss": 0.0835, + "loss": 0.0828, "step": 4581 }, { "epoch": 1.28, "learning_rate": 1.1498283699786623e-05, - "loss": 0.1974, + "loss": 0.1509, "step": 4582 }, { "epoch": 1.28, "learning_rate": 1.1496428240096485e-05, - "loss": 0.1992, + "loss": 0.0548, "step": 4583 }, { "epoch": 1.28, "learning_rate": 1.1494572780406345e-05, - "loss": 0.1338, + "loss": 0.0764, "step": 4584 }, { "epoch": 1.28, "learning_rate": 1.1492717320716209e-05, - "loss": 0.2492, + "loss": 0.0161, "step": 4585 }, { "epoch": 1.28, "learning_rate": 1.149086186102607e-05, - "loss": 0.2512, + "loss": 0.0999, "step": 4586 }, { "epoch": 1.28, "learning_rate": 1.1489006401335931e-05, - "loss": 0.1393, + "loss": 0.1449, "step": 4587 }, { "epoch": 1.28, "learning_rate": 1.1487150941645793e-05, - "loss": 0.36, + "loss": 0.0181, "step": 4588 }, { "epoch": 1.28, "learning_rate": 1.1485295481955656e-05, - "loss": 0.1921, + "loss": 0.1307, "step": 4589 }, { "epoch": 1.28, "learning_rate": 1.1483440022265517e-05, - "loss": 0.1358, + "loss": 0.0713, "step": 4590 }, { "epoch": 1.28, "learning_rate": 1.1481584562575378e-05, - "loss": 0.1896, + "loss": 0.0551, "step": 4591 }, { "epoch": 1.28, "learning_rate": 1.147972910288524e-05, - "loss": 0.3017, + "loss": 0.0306, "step": 4592 }, { "epoch": 1.28, "learning_rate": 1.1477873643195104e-05, - "loss": 0.1944, + "loss": 0.1069, "step": 4593 }, { "epoch": 1.28, "learning_rate": 1.1476018183504964e-05, - "loss": 0.2986, + "loss": 0.064, "step": 4594 }, { "epoch": 1.28, "learning_rate": 1.1474162723814826e-05, - "loss": 0.0893, + "loss": 0.056, "step": 4595 }, { "epoch": 1.28, "learning_rate": 1.1472307264124686e-05, - "loss": 0.189, + "loss": 0.173, "step": 4596 }, { "epoch": 1.28, "learning_rate": 1.147045180443455e-05, - "loss": 0.1408, + "loss": 0.0161, "step": 4597 }, { "epoch": 1.28, "learning_rate": 1.1468596344744412e-05, - "loss": 0.1933, + "loss": 0.1316, "step": 4598 }, { "epoch": 1.28, "learning_rate": 1.1466740885054273e-05, - "loss": 0.1371, + "loss": 0.1665, "step": 4599 }, { "epoch": 1.28, "learning_rate": 1.1464885425364134e-05, - "loss": 0.1392, + "loss": 0.0208, "step": 4600 }, { "epoch": 1.28, "learning_rate": 1.1463029965673997e-05, - "loss": 0.1415, + "loss": 0.1951, "step": 4601 }, { "epoch": 1.28, "learning_rate": 1.1461174505983859e-05, - "loss": 0.1424, + "loss": 0.0726, "step": 4602 }, { "epoch": 1.28, "learning_rate": 1.145931904629372e-05, - "loss": 0.0417, + "loss": 0.0722, "step": 4603 }, { "epoch": 1.28, "learning_rate": 1.1457463586603581e-05, - "loss": 0.1928, + "loss": 0.0681, "step": 4604 }, { "epoch": 1.28, "learning_rate": 1.1455608126913445e-05, - "loss": 0.2893, + "loss": 0.02, "step": 4605 }, { "epoch": 1.28, "learning_rate": 1.1453752667223307e-05, - "loss": 0.0933, + "loss": 0.0883, "step": 4606 }, { "epoch": 1.28, "learning_rate": 1.1451897207533167e-05, - "loss": 0.1377, + "loss": 0.1398, "step": 4607 }, { "epoch": 1.28, "learning_rate": 1.1450041747843029e-05, - "loss": 0.1386, + "loss": 0.0211, "step": 4608 }, { "epoch": 1.28, "learning_rate": 1.1448186288152892e-05, - "loss": 0.1381, + "loss": 0.0833, "step": 4609 }, { "epoch": 1.28, "learning_rate": 1.1446330828462752e-05, - "loss": 0.1869, + "loss": 0.06, "step": 4610 }, { "epoch": 1.28, "learning_rate": 1.1444475368772614e-05, - "loss": 0.1379, + "loss": 0.1823, "step": 4611 }, { "epoch": 1.28, "learning_rate": 1.1442619909082474e-05, - "loss": 0.1417, + "loss": 0.2365, "step": 4612 }, { "epoch": 1.28, "learning_rate": 1.1440764449392338e-05, - "loss": 0.0415, + "loss": 0.1909, "step": 4613 }, { "epoch": 1.28, "learning_rate": 1.14389089897022e-05, - "loss": 0.0926, + "loss": 0.0583, "step": 4614 }, { "epoch": 1.28, "learning_rate": 1.1437053530012062e-05, - "loss": 0.2901, + "loss": 0.195, "step": 4615 }, { "epoch": 1.28, "learning_rate": 1.1435198070321922e-05, - "loss": 0.0389, + "loss": 0.0515, "step": 4616 }, { "epoch": 1.28, "learning_rate": 1.1433342610631785e-05, - "loss": 0.089, + "loss": 0.0375, "step": 4617 }, { "epoch": 1.29, "learning_rate": 1.1431487150941647e-05, - "loss": 0.0393, + "loss": 0.1736, "step": 4618 }, { "epoch": 1.29, "learning_rate": 1.1429631691251507e-05, - "loss": 0.1855, + "loss": 0.0262, "step": 4619 }, { "epoch": 1.29, "learning_rate": 1.142777623156137e-05, - "loss": 0.1415, + "loss": 0.0932, "step": 4620 }, { "epoch": 1.29, "learning_rate": 1.1425920771871233e-05, - "loss": 0.2378, + "loss": 0.0703, "step": 4621 }, { "epoch": 1.29, "learning_rate": 1.1424065312181095e-05, - "loss": 0.0864, + "loss": 0.0912, "step": 4622 }, { "epoch": 1.29, "learning_rate": 1.1422209852490955e-05, - "loss": 0.1949, + "loss": 0.0606, "step": 4623 }, { "epoch": 1.29, "learning_rate": 1.1420354392800817e-05, - "loss": 0.1424, + "loss": 0.0588, "step": 4624 }, { "epoch": 1.29, "learning_rate": 1.141849893311068e-05, - "loss": 0.0337, + "loss": 0.0658, "step": 4625 }, { "epoch": 1.29, "learning_rate": 1.141664347342054e-05, - "loss": 0.245, + "loss": 0.075, "step": 4626 }, { "epoch": 1.29, "learning_rate": 1.1414788013730402e-05, - "loss": 0.1359, + "loss": 0.0524, "step": 4627 }, { "epoch": 1.29, "learning_rate": 1.1412932554040264e-05, - "loss": 0.1866, + "loss": 0.0865, "step": 4628 }, { "epoch": 1.29, "learning_rate": 1.1411077094350126e-05, - "loss": 0.0342, + "loss": 0.1721, "step": 4629 }, { "epoch": 1.29, "learning_rate": 1.1409221634659988e-05, - "loss": 0.191, + "loss": 0.0237, "step": 4630 }, { "epoch": 1.29, "learning_rate": 1.140736617496985e-05, - "loss": 0.1377, + "loss": 0.1132, "step": 4631 }, { "epoch": 1.29, "learning_rate": 1.140551071527971e-05, - "loss": 0.2951, + "loss": 0.1076, "step": 4632 }, { "epoch": 1.29, "learning_rate": 1.1403655255589574e-05, - "loss": 0.0842, + "loss": 0.0244, "step": 4633 }, { "epoch": 1.29, "learning_rate": 1.1401799795899436e-05, - "loss": 0.0328, + "loss": 0.1644, "step": 4634 }, { "epoch": 1.29, "learning_rate": 1.1399944336209296e-05, - "loss": 0.0847, + "loss": 0.0508, "step": 4635 }, { "epoch": 1.29, "learning_rate": 1.1398088876519158e-05, - "loss": 0.0881, + "loss": 0.0138, "step": 4636 }, { "epoch": 1.29, "learning_rate": 1.1396233416829021e-05, - "loss": 0.1414, + "loss": 0.116, "step": 4637 }, { "epoch": 1.29, "learning_rate": 1.1394377957138883e-05, - "loss": 0.2414, + "loss": 0.1698, "step": 4638 }, { "epoch": 1.29, "learning_rate": 1.1392522497448743e-05, - "loss": 0.2476, + "loss": 0.0137, "step": 4639 }, { "epoch": 1.29, "learning_rate": 1.1390667037758605e-05, - "loss": 0.142, + "loss": 0.1986, "step": 4640 }, { "epoch": 1.29, "learning_rate": 1.1388811578068469e-05, - "loss": 0.1424, + "loss": 0.1462, "step": 4641 }, { "epoch": 1.29, "learning_rate": 1.1386956118378329e-05, - "loss": 0.0315, + "loss": 0.1433, "step": 4642 }, { "epoch": 1.29, "learning_rate": 1.138510065868819e-05, - "loss": 0.2495, + "loss": 0.0265, "step": 4643 }, { "epoch": 1.29, "learning_rate": 1.1383245198998053e-05, - "loss": 0.3033, + "loss": 0.1611, "step": 4644 }, { "epoch": 1.29, "learning_rate": 1.1381389739307916e-05, - "loss": 0.2492, + "loss": 0.1128, "step": 4645 }, { "epoch": 1.29, "learning_rate": 1.1379534279617776e-05, - "loss": 0.0863, + "loss": 0.0393, "step": 4646 }, { "epoch": 1.29, "learning_rate": 1.1377678819927638e-05, - "loss": 0.1888, + "loss": 0.0315, "step": 4647 }, { "epoch": 1.29, "learning_rate": 1.1375823360237498e-05, - "loss": 0.1372, + "loss": 0.1151, "step": 4648 }, { "epoch": 1.29, "learning_rate": 1.1373967900547362e-05, - "loss": 0.1397, + "loss": 0.1402, "step": 4649 }, { "epoch": 1.29, "learning_rate": 1.1372112440857224e-05, - "loss": 0.1908, + "loss": 0.063, "step": 4650 }, { "epoch": 1.29, "learning_rate": 1.1370256981167086e-05, - "loss": 0.1906, + "loss": 0.0641, "step": 4651 }, { "epoch": 1.29, "learning_rate": 1.1368401521476946e-05, - "loss": 0.1851, + "loss": 0.0685, "step": 4652 }, { "epoch": 1.3, "learning_rate": 1.136654606178681e-05, - "loss": 0.1396, + "loss": 0.0592, "step": 4653 }, { "epoch": 1.3, "learning_rate": 1.1364690602096671e-05, - "loss": 0.2394, + "loss": 0.0329, "step": 4654 }, { "epoch": 1.3, "learning_rate": 1.1362835142406531e-05, - "loss": 0.1351, + "loss": 0.0637, "step": 4655 }, { "epoch": 1.3, "learning_rate": 1.1360979682716393e-05, - "loss": 0.1918, + "loss": 0.1079, "step": 4656 }, { "epoch": 1.3, "learning_rate": 1.1359124223026257e-05, - "loss": 0.1423, + "loss": 0.1089, "step": 4657 }, { "epoch": 1.3, "learning_rate": 1.1357268763336117e-05, - "loss": 0.2404, + "loss": 0.0727, "step": 4658 }, { "epoch": 1.3, "learning_rate": 1.1355413303645979e-05, - "loss": 0.143, + "loss": 0.0237, "step": 4659 }, { "epoch": 1.3, "learning_rate": 1.135355784395584e-05, - "loss": 0.0398, + "loss": 0.1066, "step": 4660 }, { "epoch": 1.3, "learning_rate": 1.1351702384265704e-05, - "loss": 0.0937, + "loss": 0.2418, "step": 4661 }, { "epoch": 1.3, "learning_rate": 1.1349846924575565e-05, - "loss": 0.19, + "loss": 0.0458, "step": 4662 }, { "epoch": 1.3, "learning_rate": 1.1347991464885426e-05, - "loss": 0.0879, + "loss": 0.0191, "step": 4663 }, { "epoch": 1.3, "learning_rate": 1.1346136005195287e-05, - "loss": 0.1395, + "loss": 0.0144, "step": 4664 }, { "epoch": 1.3, "learning_rate": 1.134428054550515e-05, - "loss": 0.0891, + "loss": 0.022, "step": 4665 }, { "epoch": 1.3, "learning_rate": 1.1342425085815012e-05, - "loss": 0.0867, + "loss": 0.203, "step": 4666 }, { "epoch": 1.3, "learning_rate": 1.1340569626124874e-05, - "loss": 0.0884, + "loss": 0.0056, "step": 4667 }, { "epoch": 1.3, "learning_rate": 1.1338714166434734e-05, - "loss": 0.0369, + "loss": 0.1947, "step": 4668 }, { "epoch": 1.3, "learning_rate": 1.1336858706744598e-05, - "loss": 0.087, + "loss": 0.1227, "step": 4669 }, { "epoch": 1.3, "learning_rate": 1.133500324705446e-05, - "loss": 0.0866, + "loss": 0.0158, "step": 4670 }, { "epoch": 1.3, "learning_rate": 1.133314778736432e-05, - "loss": 0.1913, + "loss": 0.0973, "step": 4671 }, { "epoch": 1.3, "learning_rate": 1.1331292327674182e-05, - "loss": 0.0866, + "loss": 0.0572, "step": 4672 }, { "epoch": 1.3, "learning_rate": 1.1329436867984045e-05, - "loss": 0.1906, + "loss": 0.2054, "step": 4673 }, { "epoch": 1.3, "learning_rate": 1.1327581408293907e-05, - "loss": 0.1937, + "loss": 0.1078, "step": 4674 }, { "epoch": 1.3, "learning_rate": 1.1325725948603767e-05, - "loss": 0.0335, + "loss": 0.0815, "step": 4675 }, { "epoch": 1.3, "learning_rate": 1.1323870488913629e-05, - "loss": 0.0862, + "loss": 0.0246, "step": 4676 }, { "epoch": 1.3, "learning_rate": 1.1322015029223493e-05, - "loss": 0.1393, + "loss": 0.1649, "step": 4677 }, { "epoch": 1.3, "learning_rate": 1.1320159569533353e-05, - "loss": 0.1384, + "loss": 0.03, "step": 4678 }, { "epoch": 1.3, "learning_rate": 1.1318304109843215e-05, - "loss": 0.0829, + "loss": 0.1977, "step": 4679 }, { "epoch": 1.3, "learning_rate": 1.1316448650153076e-05, - "loss": 0.2567, + "loss": 0.1446, "step": 4680 }, { "epoch": 1.3, "learning_rate": 1.1314593190462938e-05, - "loss": 0.1969, + "loss": 0.0961, "step": 4681 }, { "epoch": 1.3, "learning_rate": 1.13127377307728e-05, - "loss": 0.1412, + "loss": 0.0264, "step": 4682 }, { "epoch": 1.3, "learning_rate": 1.1310882271082662e-05, - "loss": 0.1923, + "loss": 0.0784, "step": 4683 }, { "epoch": 1.3, "learning_rate": 1.1309026811392522e-05, - "loss": 0.0828, + "loss": 0.0624, "step": 4684 }, { "epoch": 1.3, "learning_rate": 1.1307171351702386e-05, - "loss": 0.0867, + "loss": 0.083, "step": 4685 }, { "epoch": 1.3, "learning_rate": 1.1305315892012248e-05, - "loss": 0.1402, + "loss": 0.045, "step": 4686 }, { "epoch": 1.3, "learning_rate": 1.1303460432322108e-05, - "loss": 0.1391, + "loss": 0.1207, "step": 4687 }, { "epoch": 1.3, "learning_rate": 1.130160497263197e-05, - "loss": 0.144, + "loss": 0.0571, "step": 4688 }, { "epoch": 1.31, "learning_rate": 1.1299749512941833e-05, - "loss": 0.1928, + "loss": 0.2186, "step": 4689 }, { "epoch": 1.31, "learning_rate": 1.1297894053251695e-05, - "loss": 0.3004, + "loss": 0.0942, "step": 4690 }, { "epoch": 1.31, "learning_rate": 1.1296038593561555e-05, - "loss": 0.2013, + "loss": 0.026, "step": 4691 }, { "epoch": 1.31, "learning_rate": 1.1294183133871417e-05, - "loss": 0.1401, + "loss": 0.0589, "step": 4692 }, { "epoch": 1.31, "learning_rate": 1.129232767418128e-05, - "loss": 0.085, + "loss": 0.0703, "step": 4693 }, { "epoch": 1.31, "learning_rate": 1.1290472214491141e-05, - "loss": 0.1926, + "loss": 0.0809, "step": 4694 }, { "epoch": 1.31, "learning_rate": 1.1288616754801003e-05, - "loss": 0.1924, + "loss": 0.0848, "step": 4695 }, { "epoch": 1.31, "learning_rate": 1.1286761295110865e-05, - "loss": 0.0319, + "loss": 0.1226, "step": 4696 }, { "epoch": 1.31, "learning_rate": 1.1284905835420728e-05, - "loss": 0.0857, + "loss": 0.0418, "step": 4697 }, { "epoch": 1.31, "learning_rate": 1.1283050375730588e-05, - "loss": 0.031, + "loss": 0.0503, "step": 4698 }, { "epoch": 1.31, "learning_rate": 1.128119491604045e-05, - "loss": 0.139, + "loss": 0.1651, "step": 4699 }, { "epoch": 1.31, "learning_rate": 1.127933945635031e-05, - "loss": 0.1386, + "loss": 0.0149, "step": 4700 }, { "epoch": 1.31, "learning_rate": 1.1277483996660174e-05, - "loss": 0.0313, + "loss": 0.0828, "step": 4701 }, { "epoch": 1.31, "learning_rate": 1.1275628536970036e-05, - "loss": 0.1922, + "loss": 0.136, "step": 4702 }, { "epoch": 1.31, "learning_rate": 1.1273773077279898e-05, - "loss": 0.141, + "loss": 0.0546, "step": 4703 }, { "epoch": 1.31, "learning_rate": 1.1271917617589758e-05, - "loss": 0.1414, + "loss": 0.1082, "step": 4704 }, { "epoch": 1.31, "learning_rate": 1.1270062157899622e-05, - "loss": 0.0309, + "loss": 0.0119, "step": 4705 }, { "epoch": 1.31, "learning_rate": 1.1268206698209483e-05, - "loss": 0.0313, + "loss": 0.143, "step": 4706 }, { "epoch": 1.31, "learning_rate": 1.1266351238519344e-05, - "loss": 0.0859, + "loss": 0.017, "step": 4707 }, { "epoch": 1.31, "learning_rate": 1.1264495778829205e-05, - "loss": 0.1391, + "loss": 0.1063, "step": 4708 }, { "epoch": 1.31, "learning_rate": 1.1262640319139067e-05, - "loss": 0.0298, + "loss": 0.0794, "step": 4709 }, { "epoch": 1.31, "learning_rate": 1.126078485944893e-05, - "loss": 0.1398, + "loss": 0.106, "step": 4710 }, { "epoch": 1.31, "learning_rate": 1.1258929399758791e-05, - "loss": 0.3104, + "loss": 0.0736, "step": 4711 }, { "epoch": 1.31, "learning_rate": 1.1257073940068653e-05, - "loss": 0.0812, + "loss": 0.1649, "step": 4712 }, { "epoch": 1.31, "learning_rate": 1.1255218480378513e-05, - "loss": 0.1916, + "loss": 0.0977, "step": 4713 }, { "epoch": 1.31, "learning_rate": 1.1253363020688377e-05, - "loss": 0.082, + "loss": 0.0919, "step": 4714 }, { "epoch": 1.31, "learning_rate": 1.1251507560998239e-05, - "loss": 0.3071, + "loss": 0.0218, "step": 4715 }, { "epoch": 1.31, "learning_rate": 1.1249652101308099e-05, - "loss": 0.0274, + "loss": 0.0511, "step": 4716 }, { "epoch": 1.31, "learning_rate": 1.124779664161796e-05, - "loss": 0.1387, + "loss": 0.0704, "step": 4717 }, { "epoch": 1.31, "learning_rate": 1.1245941181927824e-05, - "loss": 0.14, + "loss": 0.0524, "step": 4718 }, { "epoch": 1.31, "learning_rate": 1.1244085722237686e-05, - "loss": 0.1916, + "loss": 0.1145, "step": 4719 }, { "epoch": 1.31, "learning_rate": 1.1242230262547546e-05, - "loss": 0.0798, + "loss": 0.1355, "step": 4720 }, { "epoch": 1.31, "learning_rate": 1.1240374802857408e-05, - "loss": 0.0273, + "loss": 0.0408, "step": 4721 }, { "epoch": 1.31, "learning_rate": 1.1238519343167272e-05, - "loss": 0.2465, + "loss": 0.0506, "step": 4722 }, { "epoch": 1.31, "learning_rate": 1.1236663883477132e-05, - "loss": 0.1975, + "loss": 0.0275, "step": 4723 }, { "epoch": 1.31, "learning_rate": 1.1234808423786994e-05, - "loss": 0.1374, + "loss": 0.0204, "step": 4724 }, { "epoch": 1.32, "learning_rate": 1.1232952964096856e-05, - "loss": 0.1391, + "loss": 0.1141, "step": 4725 }, { "epoch": 1.32, "learning_rate": 1.1231097504406719e-05, - "loss": 0.0826, + "loss": 0.0434, "step": 4726 }, { "epoch": 1.32, "learning_rate": 1.122924204471658e-05, - "loss": 0.0837, + "loss": 0.1413, "step": 4727 }, { "epoch": 1.32, "learning_rate": 1.1227386585026441e-05, - "loss": 0.0851, + "loss": 0.0147, "step": 4728 }, { "epoch": 1.32, "learning_rate": 1.1225531125336301e-05, - "loss": 0.0839, + "loss": 0.06, "step": 4729 }, { "epoch": 1.32, "learning_rate": 1.1223675665646165e-05, - "loss": 0.0831, + "loss": 0.0781, "step": 4730 }, { "epoch": 1.32, "learning_rate": 1.1221820205956027e-05, - "loss": 0.1963, + "loss": 0.1213, "step": 4731 }, { "epoch": 1.32, "learning_rate": 1.1219964746265887e-05, - "loss": 0.2537, + "loss": 0.1016, "step": 4732 }, { "epoch": 1.32, "learning_rate": 1.1218109286575749e-05, - "loss": 0.2007, + "loss": 0.0198, "step": 4733 }, { "epoch": 1.32, "learning_rate": 1.1216253826885612e-05, - "loss": 0.1427, + "loss": 0.042, "step": 4734 }, { "epoch": 1.32, "learning_rate": 1.1214398367195474e-05, - "loss": 0.0848, + "loss": 0.101, "step": 4735 }, { "epoch": 1.32, "learning_rate": 1.1212542907505334e-05, - "loss": 0.0297, + "loss": 0.0146, "step": 4736 }, { "epoch": 1.32, "learning_rate": 1.1210687447815196e-05, - "loss": 0.2494, + "loss": 0.0532, "step": 4737 }, { "epoch": 1.32, "learning_rate": 1.120883198812506e-05, - "loss": 0.1427, + "loss": 0.1389, "step": 4738 }, { "epoch": 1.32, "learning_rate": 1.120697652843492e-05, - "loss": 0.1389, + "loss": 0.046, "step": 4739 }, { "epoch": 1.32, "learning_rate": 1.1205121068744782e-05, - "loss": 0.0864, + "loss": 0.094, "step": 4740 }, { "epoch": 1.32, "learning_rate": 1.1203265609054644e-05, - "loss": 0.1431, + "loss": 0.1708, "step": 4741 }, { "epoch": 1.32, "learning_rate": 1.1201410149364507e-05, - "loss": 0.0843, + "loss": 0.0536, "step": 4742 }, { "epoch": 1.32, "learning_rate": 1.1199554689674368e-05, - "loss": 0.0304, + "loss": 0.0793, "step": 4743 }, { "epoch": 1.32, "learning_rate": 1.119769922998423e-05, - "loss": 0.0849, + "loss": 0.0878, "step": 4744 }, { "epoch": 1.32, "learning_rate": 1.119584377029409e-05, - "loss": 0.1436, + "loss": 0.0404, "step": 4745 }, { "epoch": 1.32, "learning_rate": 1.1193988310603953e-05, - "loss": 0.1387, + "loss": 0.061, "step": 4746 }, { "epoch": 1.32, "learning_rate": 1.1192132850913815e-05, - "loss": 0.1393, + "loss": 0.0272, "step": 4747 }, { "epoch": 1.32, "learning_rate": 1.1190277391223677e-05, - "loss": 0.2479, + "loss": 0.0966, "step": 4748 }, { "epoch": 1.32, "learning_rate": 1.1188421931533537e-05, - "loss": 0.0286, + "loss": 0.1587, "step": 4749 }, { "epoch": 1.32, "learning_rate": 1.11865664718434e-05, - "loss": 0.1922, + "loss": 0.1043, "step": 4750 }, { "epoch": 1.32, "learning_rate": 1.1184711012153263e-05, - "loss": 0.2538, + "loss": 0.0617, "step": 4751 }, { "epoch": 1.32, "learning_rate": 1.1182855552463123e-05, - "loss": 0.1414, + "loss": 0.1811, "step": 4752 }, { "epoch": 1.32, "learning_rate": 1.1181000092772985e-05, - "loss": 0.0835, + "loss": 0.0749, "step": 4753 }, { "epoch": 1.32, "learning_rate": 1.1179144633082848e-05, - "loss": 0.1396, + "loss": 0.0897, "step": 4754 }, { "epoch": 1.32, "learning_rate": 1.1177289173392708e-05, - "loss": 0.1934, + "loss": 0.0183, "step": 4755 }, { "epoch": 1.32, "learning_rate": 1.117543371370257e-05, - "loss": 0.0847, + "loss": 0.0591, "step": 4756 }, { "epoch": 1.32, "learning_rate": 1.1173578254012432e-05, - "loss": 0.1973, + "loss": 0.0313, "step": 4757 }, { "epoch": 1.32, "learning_rate": 1.1171722794322296e-05, - "loss": 0.2426, + "loss": 0.0713, "step": 4758 }, { "epoch": 1.32, "learning_rate": 1.1169867334632156e-05, - "loss": 0.029, + "loss": 0.105, "step": 4759 }, { "epoch": 1.32, "learning_rate": 1.1168011874942018e-05, - "loss": 0.192, + "loss": 0.0728, "step": 4760 }, { "epoch": 1.33, "learning_rate": 1.1166156415251878e-05, - "loss": 0.3038, + "loss": 0.0813, "step": 4761 }, { "epoch": 1.33, "learning_rate": 1.1164300955561741e-05, - "loss": 0.0869, + "loss": 0.0113, "step": 4762 }, { "epoch": 1.33, "learning_rate": 1.1162445495871603e-05, - "loss": 0.1912, + "loss": 0.1416, "step": 4763 }, { "epoch": 1.33, "learning_rate": 1.1160590036181465e-05, - "loss": 0.0863, + "loss": 0.0179, "step": 4764 }, { "epoch": 1.33, "learning_rate": 1.1158734576491325e-05, - "loss": 0.1385, + "loss": 0.0711, "step": 4765 }, { "epoch": 1.33, "learning_rate": 1.1156879116801189e-05, - "loss": 0.0872, + "loss": 0.0228, "step": 4766 }, { "epoch": 1.33, "learning_rate": 1.115502365711105e-05, - "loss": 0.0315, + "loss": 0.207, "step": 4767 }, { "epoch": 1.33, "learning_rate": 1.1153168197420911e-05, - "loss": 0.1365, + "loss": 0.2517, "step": 4768 }, { "epoch": 1.33, "learning_rate": 1.1151312737730773e-05, - "loss": 0.1375, + "loss": 0.0452, "step": 4769 }, { "epoch": 1.33, "learning_rate": 1.1149457278040636e-05, - "loss": 0.1434, + "loss": 0.0559, "step": 4770 }, { "epoch": 1.33, "learning_rate": 1.1147601818350498e-05, - "loss": 0.3472, + "loss": 0.0878, "step": 4771 }, { "epoch": 1.33, "learning_rate": 1.1145746358660358e-05, - "loss": 0.085, + "loss": 0.0208, "step": 4772 }, { "epoch": 1.33, "learning_rate": 1.114389089897022e-05, - "loss": 0.0851, + "loss": 0.0263, "step": 4773 }, { "epoch": 1.33, "learning_rate": 1.1142035439280084e-05, - "loss": 0.0858, + "loss": 0.0572, "step": 4774 }, { "epoch": 1.33, "learning_rate": 1.1140179979589944e-05, - "loss": 0.0838, + "loss": 0.0599, "step": 4775 }, { "epoch": 1.33, "learning_rate": 1.1138324519899806e-05, - "loss": 0.1394, + "loss": 0.171, "step": 4776 }, { "epoch": 1.33, "learning_rate": 1.1136469060209668e-05, - "loss": 0.0857, + "loss": 0.1173, "step": 4777 }, { "epoch": 1.33, "learning_rate": 1.113461360051953e-05, - "loss": 0.1386, + "loss": 0.1857, "step": 4778 }, { "epoch": 1.33, "learning_rate": 1.1132758140829392e-05, - "loss": 0.0317, + "loss": 0.0532, "step": 4779 }, { "epoch": 1.33, "learning_rate": 1.1130902681139253e-05, - "loss": 0.2483, + "loss": 0.1831, "step": 4780 }, { "epoch": 1.33, "learning_rate": 1.1129047221449114e-05, - "loss": 0.0856, + "loss": 0.0583, "step": 4781 }, { "epoch": 1.33, "learning_rate": 1.1127191761758977e-05, - "loss": 0.0873, + "loss": 0.0909, "step": 4782 }, { "epoch": 1.33, "learning_rate": 1.1125336302068839e-05, - "loss": 0.1904, + "loss": 0.068, "step": 4783 }, { "epoch": 1.33, "learning_rate": 1.11234808423787e-05, - "loss": 0.1383, + "loss": 0.0584, "step": 4784 }, { "epoch": 1.33, "learning_rate": 1.1121625382688561e-05, - "loss": 0.136, + "loss": 0.0911, "step": 4785 }, { "epoch": 1.33, "learning_rate": 1.1119769922998425e-05, - "loss": 0.0305, + "loss": 0.0959, "step": 4786 }, { "epoch": 1.33, "learning_rate": 1.1117914463308286e-05, - "loss": 0.087, + "loss": 0.1147, "step": 4787 }, { "epoch": 1.33, "learning_rate": 1.1116059003618147e-05, - "loss": 0.2482, + "loss": 0.0938, "step": 4788 }, { "epoch": 1.33, "learning_rate": 1.1114203543928009e-05, - "loss": 0.1368, + "loss": 0.086, "step": 4789 }, { "epoch": 1.33, "learning_rate": 1.1112348084237872e-05, - "loss": 0.1406, + "loss": 0.124, "step": 4790 }, { "epoch": 1.33, "learning_rate": 1.1110492624547732e-05, - "loss": 0.1947, + "loss": 0.0778, "step": 4791 }, { "epoch": 1.33, "learning_rate": 1.1108637164857594e-05, - "loss": 0.1941, + "loss": 0.0623, "step": 4792 }, { "epoch": 1.33, "learning_rate": 1.1106781705167456e-05, - "loss": 0.0302, + "loss": 0.0936, "step": 4793 }, { "epoch": 1.33, "learning_rate": 1.110492624547732e-05, - "loss": 0.0852, + "loss": 0.0261, "step": 4794 }, { "epoch": 1.33, "learning_rate": 1.110307078578718e-05, - "loss": 0.1365, + "loss": 0.0604, "step": 4795 }, { "epoch": 1.33, "learning_rate": 1.1101215326097042e-05, - "loss": 0.1946, + "loss": 0.0801, "step": 4796 }, { "epoch": 1.34, "learning_rate": 1.1099359866406902e-05, - "loss": 0.0841, + "loss": 0.0203, "step": 4797 }, { "epoch": 1.34, "learning_rate": 1.1097504406716765e-05, - "loss": 0.1454, + "loss": 0.0239, "step": 4798 }, { "epoch": 1.34, "learning_rate": 1.1095648947026627e-05, - "loss": 0.1391, + "loss": 0.0997, "step": 4799 }, { "epoch": 1.34, "learning_rate": 1.1093793487336489e-05, - "loss": 0.0289, + "loss": 0.0809, "step": 4800 }, { "epoch": 1.34, "learning_rate": 1.109193802764635e-05, - "loss": 0.0291, + "loss": 0.0999, "step": 4801 }, { "epoch": 1.34, "learning_rate": 1.1090082567956213e-05, - "loss": 0.0856, + "loss": 0.0578, "step": 4802 }, { "epoch": 1.34, "learning_rate": 1.1088227108266075e-05, - "loss": 0.0847, + "loss": 0.0905, "step": 4803 }, { "epoch": 1.34, "learning_rate": 1.1086371648575935e-05, - "loss": 0.0826, + "loss": 0.0155, "step": 4804 }, { "epoch": 1.34, "learning_rate": 1.1084516188885797e-05, - "loss": 0.3143, + "loss": 0.1671, "step": 4805 }, { "epoch": 1.34, "learning_rate": 1.108266072919566e-05, - "loss": 0.3067, + "loss": 0.1305, "step": 4806 }, { "epoch": 1.34, "learning_rate": 1.108080526950552e-05, - "loss": 0.2573, + "loss": 0.1691, "step": 4807 }, { "epoch": 1.34, "learning_rate": 1.1078949809815382e-05, - "loss": 0.1395, + "loss": 0.0207, "step": 4808 }, { "epoch": 1.34, "learning_rate": 1.1077094350125244e-05, - "loss": 0.1914, + "loss": 0.1547, "step": 4809 }, { "epoch": 1.34, "learning_rate": 1.1075238890435108e-05, - "loss": 0.1939, + "loss": 0.0562, "step": 4810 }, { "epoch": 1.34, "learning_rate": 1.1073383430744968e-05, - "loss": 0.0863, + "loss": 0.1699, "step": 4811 }, { "epoch": 1.34, "learning_rate": 1.107152797105483e-05, - "loss": 0.1931, + "loss": 0.0121, "step": 4812 }, { "epoch": 1.34, "learning_rate": 1.106967251136469e-05, - "loss": 0.0838, + "loss": 0.1205, "step": 4813 }, { "epoch": 1.34, "learning_rate": 1.1067817051674554e-05, - "loss": 0.1374, + "loss": 0.0149, "step": 4814 }, { "epoch": 1.34, "learning_rate": 1.1065961591984415e-05, - "loss": 0.0843, + "loss": 0.2094, "step": 4815 }, { "epoch": 1.34, "learning_rate": 1.1064106132294277e-05, - "loss": 0.1939, + "loss": 0.0578, "step": 4816 }, { "epoch": 1.34, "learning_rate": 1.1062250672604138e-05, - "loss": 0.0849, + "loss": 0.0218, "step": 4817 }, { "epoch": 1.34, "learning_rate": 1.1060395212914001e-05, - "loss": 0.1376, + "loss": 0.023, "step": 4818 }, { "epoch": 1.34, "learning_rate": 1.1058539753223863e-05, - "loss": 0.0811, + "loss": 0.035, "step": 4819 }, { "epoch": 1.34, "learning_rate": 1.1056684293533723e-05, - "loss": 0.2497, + "loss": 0.0976, "step": 4820 }, { "epoch": 1.34, "learning_rate": 1.1054828833843585e-05, - "loss": 0.031, + "loss": 0.0962, "step": 4821 }, { "epoch": 1.34, "learning_rate": 1.1052973374153449e-05, - "loss": 0.031, + "loss": 0.0455, "step": 4822 }, { "epoch": 1.34, "learning_rate": 1.105111791446331e-05, - "loss": 0.0847, + "loss": 0.0213, "step": 4823 }, { "epoch": 1.34, "learning_rate": 1.104926245477317e-05, - "loss": 0.1935, + "loss": 0.0671, "step": 4824 }, { "epoch": 1.34, "learning_rate": 1.1047406995083032e-05, - "loss": 0.193, + "loss": 0.0182, "step": 4825 }, { "epoch": 1.34, "learning_rate": 1.1045551535392896e-05, - "loss": 0.2492, + "loss": 0.0257, "step": 4826 }, { "epoch": 1.34, "learning_rate": 1.1043696075702756e-05, - "loss": 0.1987, + "loss": 0.0117, "step": 4827 }, { "epoch": 1.34, "learning_rate": 1.1041840616012618e-05, - "loss": 0.0843, + "loss": 0.131, "step": 4828 }, { "epoch": 1.34, "learning_rate": 1.103998515632248e-05, - "loss": 0.1381, + "loss": 0.0162, "step": 4829 }, { "epoch": 1.34, "learning_rate": 1.1038129696632342e-05, - "loss": 0.0878, + "loss": 0.0116, "step": 4830 }, { "epoch": 1.34, "learning_rate": 1.1036274236942204e-05, - "loss": 0.0862, + "loss": 0.018, "step": 4831 }, { "epoch": 1.34, "learning_rate": 1.1034418777252066e-05, - "loss": 0.2995, + "loss": 0.2101, "step": 4832 }, { "epoch": 1.35, "learning_rate": 1.1032563317561926e-05, - "loss": 0.138, + "loss": 0.2285, "step": 4833 }, { "epoch": 1.35, "learning_rate": 1.103070785787179e-05, - "loss": 0.2999, + "loss": 0.1232, "step": 4834 }, { "epoch": 1.35, "learning_rate": 1.1028852398181651e-05, - "loss": 0.2966, + "loss": 0.1192, "step": 4835 }, { "epoch": 1.35, "learning_rate": 1.1026996938491511e-05, - "loss": 0.0334, + "loss": 0.1934, "step": 4836 }, { "epoch": 1.35, "learning_rate": 1.1025141478801373e-05, - "loss": 0.0339, + "loss": 0.0489, "step": 4837 }, { "epoch": 1.35, "learning_rate": 1.1023286019111237e-05, - "loss": 0.0337, + "loss": 0.0161, "step": 4838 }, { "epoch": 1.35, "learning_rate": 1.1021430559421099e-05, - "loss": 0.1418, + "loss": 0.0234, "step": 4839 }, { "epoch": 1.35, "learning_rate": 1.1019575099730959e-05, - "loss": 0.4036, + "loss": 0.1134, "step": 4840 }, { "epoch": 1.35, "learning_rate": 1.101771964004082e-05, - "loss": 0.1372, + "loss": 0.0652, "step": 4841 }, { "epoch": 1.35, "learning_rate": 1.1015864180350684e-05, - "loss": 0.0855, + "loss": 0.0177, "step": 4842 }, { "epoch": 1.35, "learning_rate": 1.1014008720660544e-05, - "loss": 0.0342, + "loss": 0.0282, "step": 4843 }, { "epoch": 1.35, "learning_rate": 1.1012153260970406e-05, - "loss": 0.1384, + "loss": 0.0768, "step": 4844 }, { "epoch": 1.35, "learning_rate": 1.1010297801280268e-05, - "loss": 0.1905, + "loss": 0.1224, "step": 4845 }, { "epoch": 1.35, "learning_rate": 1.1008442341590132e-05, - "loss": 0.0344, + "loss": 0.0997, "step": 4846 }, { "epoch": 1.35, "learning_rate": 1.1006586881899992e-05, - "loss": 0.088, + "loss": 0.0262, "step": 4847 }, { "epoch": 1.35, "learning_rate": 1.1004731422209854e-05, - "loss": 0.0882, + "loss": 0.0161, "step": 4848 }, { "epoch": 1.35, "learning_rate": 1.1002875962519714e-05, - "loss": 0.2473, + "loss": 0.0293, "step": 4849 }, { "epoch": 1.35, "learning_rate": 1.1001020502829578e-05, - "loss": 0.1419, + "loss": 0.2002, "step": 4850 }, { "epoch": 1.35, "learning_rate": 1.099916504313944e-05, - "loss": 0.1929, + "loss": 0.0224, "step": 4851 }, { "epoch": 1.35, "learning_rate": 1.09973095834493e-05, - "loss": 0.1411, + "loss": 0.0147, "step": 4852 }, { "epoch": 1.35, "learning_rate": 1.0995454123759161e-05, - "loss": 0.086, + "loss": 0.0205, "step": 4853 }, { "epoch": 1.35, "learning_rate": 1.0993598664069025e-05, - "loss": 0.2429, + "loss": 0.1961, "step": 4854 }, { "epoch": 1.35, "learning_rate": 1.0991743204378887e-05, - "loss": 0.2943, + "loss": 0.0218, "step": 4855 }, { "epoch": 1.35, "learning_rate": 1.0989887744688747e-05, - "loss": 0.1922, + "loss": 0.1319, "step": 4856 }, { "epoch": 1.35, "learning_rate": 1.0988032284998609e-05, - "loss": 0.1356, + "loss": 0.0184, "step": 4857 }, { "epoch": 1.35, "learning_rate": 1.0986176825308473e-05, - "loss": 0.1379, + "loss": 0.0612, "step": 4858 }, { "epoch": 1.35, "learning_rate": 1.0984321365618333e-05, - "loss": 0.1923, + "loss": 0.0631, "step": 4859 }, { "epoch": 1.35, "learning_rate": 1.0982465905928195e-05, - "loss": 0.0346, + "loss": 0.1403, "step": 4860 }, { "epoch": 1.35, "learning_rate": 1.0980610446238056e-05, - "loss": 0.1891, + "loss": 0.1237, "step": 4861 }, { "epoch": 1.35, "learning_rate": 1.0978754986547917e-05, - "loss": 0.1389, + "loss": 0.0136, "step": 4862 }, { "epoch": 1.35, "learning_rate": 1.097689952685778e-05, - "loss": 0.1416, + "loss": 0.1976, "step": 4863 }, { "epoch": 1.35, "learning_rate": 1.0975044067167642e-05, - "loss": 0.0356, + "loss": 0.0828, "step": 4864 }, { "epoch": 1.35, "learning_rate": 1.0973188607477502e-05, - "loss": 0.1335, + "loss": 0.0108, "step": 4865 }, { "epoch": 1.35, "learning_rate": 1.0971333147787364e-05, - "loss": 0.0355, + "loss": 0.0623, "step": 4866 }, { "epoch": 1.35, "learning_rate": 1.0969477688097228e-05, - "loss": 0.1404, + "loss": 0.0636, "step": 4867 }, { "epoch": 1.35, "learning_rate": 1.096762222840709e-05, - "loss": 0.1405, + "loss": 0.0983, "step": 4868 }, { "epoch": 1.36, "learning_rate": 1.096576676871695e-05, - "loss": 0.087, + "loss": 0.0162, "step": 4869 }, { "epoch": 1.36, "learning_rate": 1.0963911309026812e-05, - "loss": 0.0343, + "loss": 0.0925, "step": 4870 }, { "epoch": 1.36, "learning_rate": 1.0962055849336675e-05, - "loss": 0.249, + "loss": 0.115, "step": 4871 }, { "epoch": 1.36, "learning_rate": 1.0960200389646535e-05, - "loss": 0.0821, + "loss": 0.1468, "step": 4872 }, { "epoch": 1.36, "learning_rate": 1.0958344929956397e-05, - "loss": 0.0884, + "loss": 0.0274, "step": 4873 }, { "epoch": 1.36, "learning_rate": 1.0956489470266259e-05, - "loss": 0.0311, + "loss": 0.0496, "step": 4874 }, { "epoch": 1.36, "learning_rate": 1.0954634010576121e-05, - "loss": 0.193, + "loss": 0.065, "step": 4875 }, { "epoch": 1.36, "learning_rate": 1.0952778550885983e-05, - "loss": 0.1395, + "loss": 0.0167, "step": 4876 }, { "epoch": 1.36, "learning_rate": 1.0950923091195845e-05, - "loss": 0.1395, + "loss": 0.0146, "step": 4877 }, { "epoch": 1.36, "learning_rate": 1.0949067631505705e-05, - "loss": 0.1379, + "loss": 0.0595, "step": 4878 }, { "epoch": 1.36, "learning_rate": 1.0947212171815568e-05, - "loss": 0.0852, + "loss": 0.153, "step": 4879 }, { "epoch": 1.36, "learning_rate": 1.094535671212543e-05, - "loss": 0.0294, + "loss": 0.1683, "step": 4880 }, { "epoch": 1.36, "learning_rate": 1.094350125243529e-05, - "loss": 0.0844, + "loss": 0.0226, "step": 4881 }, { "epoch": 1.36, "learning_rate": 1.0941645792745152e-05, - "loss": 0.2475, + "loss": 0.0212, "step": 4882 }, { "epoch": 1.36, "learning_rate": 1.0939790333055016e-05, - "loss": 0.0292, + "loss": 0.0747, "step": 4883 }, { "epoch": 1.36, "learning_rate": 1.0937934873364878e-05, - "loss": 0.1422, + "loss": 0.1229, "step": 4884 }, { "epoch": 1.36, "learning_rate": 1.0936079413674738e-05, - "loss": 0.1399, + "loss": 0.0776, "step": 4885 }, { "epoch": 1.36, "learning_rate": 1.09342239539846e-05, - "loss": 0.0845, + "loss": 0.0202, "step": 4886 }, { "epoch": 1.36, "learning_rate": 1.0932368494294463e-05, - "loss": 0.0838, + "loss": 0.0212, "step": 4887 }, { "epoch": 1.36, "learning_rate": 1.0930513034604324e-05, - "loss": 0.0846, + "loss": 0.0161, "step": 4888 }, { "epoch": 1.36, "learning_rate": 1.0928657574914185e-05, - "loss": 0.1442, + "loss": 0.1907, "step": 4889 }, { "epoch": 1.36, "learning_rate": 1.0926802115224047e-05, - "loss": 0.2466, + "loss": 0.0181, "step": 4890 }, { "epoch": 1.36, "learning_rate": 1.0924946655533911e-05, - "loss": 0.0265, + "loss": 0.0814, "step": 4891 }, { "epoch": 1.36, "learning_rate": 1.0923091195843771e-05, - "loss": 0.1951, + "loss": 0.1337, "step": 4892 }, { "epoch": 1.36, "learning_rate": 1.0921235736153633e-05, - "loss": 0.1382, + "loss": 0.0585, "step": 4893 }, { "epoch": 1.36, "learning_rate": 1.0919380276463493e-05, - "loss": 0.2011, + "loss": 0.0511, "step": 4894 }, { "epoch": 1.36, "learning_rate": 1.0917524816773357e-05, - "loss": 0.1962, + "loss": 0.0204, "step": 4895 }, { "epoch": 1.36, "learning_rate": 1.0915669357083219e-05, - "loss": 0.2012, + "loss": 0.0575, "step": 4896 }, { "epoch": 1.36, "learning_rate": 1.091381389739308e-05, - "loss": 0.2526, + "loss": 0.0216, "step": 4897 }, { "epoch": 1.36, "learning_rate": 1.091195843770294e-05, - "loss": 0.1396, + "loss": 0.2863, "step": 4898 }, { "epoch": 1.36, "learning_rate": 1.0910102978012804e-05, - "loss": 0.4121, + "loss": 0.1177, "step": 4899 }, { "epoch": 1.36, "learning_rate": 1.0908247518322666e-05, - "loss": 0.138, + "loss": 0.1791, "step": 4900 }, { "epoch": 1.36, "learning_rate": 1.0906392058632526e-05, - "loss": 0.0285, + "loss": 0.0184, "step": 4901 }, { "epoch": 1.36, "learning_rate": 1.0904536598942388e-05, - "loss": 0.0838, + "loss": 0.1488, "step": 4902 }, { "epoch": 1.36, "learning_rate": 1.0902681139252252e-05, - "loss": 0.1411, + "loss": 0.0322, "step": 4903 }, { "epoch": 1.36, "learning_rate": 1.0900825679562112e-05, - "loss": 0.0828, + "loss": 0.1158, "step": 4904 }, { "epoch": 1.37, "learning_rate": 1.0898970219871974e-05, - "loss": 0.0848, + "loss": 0.0286, "step": 4905 }, { "epoch": 1.37, "learning_rate": 1.0897114760181836e-05, - "loss": 0.4066, + "loss": 0.0597, "step": 4906 }, { "epoch": 1.37, "learning_rate": 1.0895259300491699e-05, - "loss": 0.0861, + "loss": 0.0848, "step": 4907 }, { "epoch": 1.37, "learning_rate": 1.089340384080156e-05, - "loss": 0.0832, + "loss": 0.0998, "step": 4908 }, { "epoch": 1.37, "learning_rate": 1.0891548381111421e-05, - "loss": 0.0833, + "loss": 0.0957, "step": 4909 }, { "epoch": 1.37, "learning_rate": 1.0889692921421281e-05, - "loss": 0.1398, + "loss": 0.0191, "step": 4910 }, { "epoch": 1.37, "learning_rate": 1.0887837461731145e-05, - "loss": 0.0306, + "loss": 0.0216, "step": 4911 }, { "epoch": 1.37, "learning_rate": 1.0885982002041007e-05, - "loss": 0.1379, + "loss": 0.097, "step": 4912 }, { "epoch": 1.37, "learning_rate": 1.0884126542350869e-05, - "loss": 0.0303, + "loss": 0.0547, "step": 4913 }, { "epoch": 1.37, "learning_rate": 1.0882271082660729e-05, - "loss": 0.0883, + "loss": 0.0651, "step": 4914 }, { "epoch": 1.37, "learning_rate": 1.0880415622970592e-05, - "loss": 0.1418, + "loss": 0.1234, "step": 4915 }, { "epoch": 1.37, "learning_rate": 1.0878560163280454e-05, - "loss": 0.084, + "loss": 0.0451, "step": 4916 }, { "epoch": 1.37, "learning_rate": 1.0876704703590314e-05, - "loss": 0.1983, + "loss": 0.1031, "step": 4917 }, { "epoch": 1.37, "learning_rate": 1.0874849243900176e-05, - "loss": 0.0867, + "loss": 0.0211, "step": 4918 }, { "epoch": 1.37, "learning_rate": 1.087299378421004e-05, - "loss": 0.0289, + "loss": 0.0157, "step": 4919 }, { "epoch": 1.37, "learning_rate": 1.0871138324519902e-05, - "loss": 0.1362, + "loss": 0.0102, "step": 4920 }, { "epoch": 1.37, "learning_rate": 1.0869282864829762e-05, - "loss": 0.1896, + "loss": 0.0161, "step": 4921 }, { "epoch": 1.37, "learning_rate": 1.0867427405139624e-05, - "loss": 0.2546, + "loss": 0.0971, "step": 4922 }, { "epoch": 1.37, "learning_rate": 1.0865571945449487e-05, - "loss": 0.08, + "loss": 0.0138, "step": 4923 }, { "epoch": 1.37, "learning_rate": 1.0863716485759348e-05, - "loss": 0.1397, + "loss": 0.0134, "step": 4924 }, { "epoch": 1.37, "learning_rate": 1.086186102606921e-05, - "loss": 0.0855, + "loss": 0.0554, "step": 4925 }, { "epoch": 1.37, "learning_rate": 1.0860005566379071e-05, - "loss": 0.195, + "loss": 0.1038, "step": 4926 }, { "epoch": 1.37, "learning_rate": 1.0858150106688933e-05, - "loss": 0.0292, + "loss": 0.089, "step": 4927 }, { "epoch": 1.37, "learning_rate": 1.0856294646998795e-05, - "loss": 0.1962, + "loss": 0.1561, "step": 4928 }, { "epoch": 1.37, "learning_rate": 1.0854439187308657e-05, - "loss": 0.0839, + "loss": 0.1773, "step": 4929 }, { "epoch": 1.37, "learning_rate": 1.0852583727618517e-05, - "loss": 0.0854, + "loss": 0.2002, "step": 4930 }, { "epoch": 1.37, "learning_rate": 1.085072826792838e-05, - "loss": 0.0807, + "loss": 0.0885, "step": 4931 }, { "epoch": 1.37, "learning_rate": 1.0848872808238242e-05, - "loss": 0.1377, + "loss": 0.1021, "step": 4932 }, { "epoch": 1.37, "learning_rate": 1.0847017348548103e-05, - "loss": 0.0277, + "loss": 0.0144, "step": 4933 }, { "epoch": 1.37, "learning_rate": 1.0845161888857965e-05, - "loss": 0.2542, + "loss": 0.1855, "step": 4934 }, { "epoch": 1.37, "learning_rate": 1.0843306429167828e-05, - "loss": 0.1381, + "loss": 0.0549, "step": 4935 }, { "epoch": 1.37, "learning_rate": 1.084145096947769e-05, - "loss": 0.3152, + "loss": 0.0649, "step": 4936 }, { "epoch": 1.37, "learning_rate": 1.083959550978755e-05, - "loss": 0.3053, + "loss": 0.0167, "step": 4937 }, { "epoch": 1.37, "learning_rate": 1.0837740050097412e-05, - "loss": 0.0273, + "loss": 0.034, "step": 4938 }, { "epoch": 1.37, "learning_rate": 1.0835884590407276e-05, - "loss": 0.0851, + "loss": 0.0904, "step": 4939 }, { "epoch": 1.37, "learning_rate": 1.0834029130717136e-05, - "loss": 0.0292, + "loss": 0.0903, "step": 4940 }, { "epoch": 1.38, "learning_rate": 1.0832173671026998e-05, - "loss": 0.3061, + "loss": 0.0201, "step": 4941 }, { "epoch": 1.38, "learning_rate": 1.083031821133686e-05, - "loss": 0.0829, + "loss": 0.0238, "step": 4942 }, { "epoch": 1.38, "learning_rate": 1.0828462751646723e-05, - "loss": 0.0822, + "loss": 0.0143, "step": 4943 }, { "epoch": 1.38, "learning_rate": 1.0826607291956583e-05, - "loss": 0.1342, + "loss": 0.0518, "step": 4944 }, { "epoch": 1.38, "learning_rate": 1.0824751832266445e-05, - "loss": 0.1959, + "loss": 0.0647, "step": 4945 }, { "epoch": 1.38, "learning_rate": 1.0822896372576305e-05, - "loss": 0.0307, + "loss": 0.0564, "step": 4946 }, { "epoch": 1.38, "learning_rate": 1.0821040912886169e-05, - "loss": 0.0838, + "loss": 0.012, "step": 4947 }, { "epoch": 1.38, "learning_rate": 1.081918545319603e-05, - "loss": 0.1362, + "loss": 0.2018, "step": 4948 }, { "epoch": 1.38, "learning_rate": 1.0817329993505893e-05, - "loss": 0.0293, + "loss": 0.0258, "step": 4949 }, { "epoch": 1.38, "learning_rate": 1.0815474533815753e-05, - "loss": 0.1964, + "loss": 0.1049, "step": 4950 }, { "epoch": 1.38, "learning_rate": 1.0813619074125616e-05, - "loss": 0.1919, + "loss": 0.0154, "step": 4951 }, { "epoch": 1.38, "learning_rate": 1.0811763614435478e-05, - "loss": 0.2508, + "loss": 0.0526, "step": 4952 }, { "epoch": 1.38, "learning_rate": 1.0809908154745338e-05, - "loss": 0.141, + "loss": 0.1941, "step": 4953 }, { "epoch": 1.38, "learning_rate": 1.08080526950552e-05, - "loss": 0.0842, + "loss": 0.0457, "step": 4954 }, { "epoch": 1.38, "learning_rate": 1.0806197235365064e-05, - "loss": 0.0848, + "loss": 0.0212, "step": 4955 }, { "epoch": 1.38, "learning_rate": 1.0804341775674924e-05, - "loss": 0.0828, + "loss": 0.0952, "step": 4956 }, { "epoch": 1.38, "learning_rate": 1.0802486315984786e-05, - "loss": 0.0298, + "loss": 0.01, "step": 4957 }, { "epoch": 1.38, "learning_rate": 1.0800630856294648e-05, - "loss": 0.1931, + "loss": 0.0182, "step": 4958 }, { "epoch": 1.38, "learning_rate": 1.0798775396604511e-05, - "loss": 0.1401, + "loss": 0.1983, "step": 4959 }, { "epoch": 1.38, "learning_rate": 1.0796919936914371e-05, - "loss": 0.1969, + "loss": 0.0608, "step": 4960 }, { "epoch": 1.38, "learning_rate": 1.0795064477224233e-05, - "loss": 0.0853, + "loss": 0.0175, "step": 4961 }, { "epoch": 1.38, "learning_rate": 1.0793209017534094e-05, - "loss": 0.0862, + "loss": 0.0192, "step": 4962 }, { "epoch": 1.38, "learning_rate": 1.0791353557843957e-05, - "loss": 0.1421, + "loss": 0.0493, "step": 4963 }, { "epoch": 1.38, "learning_rate": 1.0789498098153819e-05, - "loss": 0.1384, + "loss": 0.0168, "step": 4964 }, { "epoch": 1.38, "learning_rate": 1.078764263846368e-05, - "loss": 0.14, + "loss": 0.0824, "step": 4965 }, { "epoch": 1.38, "learning_rate": 1.0785787178773541e-05, - "loss": 0.0881, + "loss": 0.0598, "step": 4966 }, { "epoch": 1.38, "learning_rate": 1.0783931719083405e-05, - "loss": 0.0853, + "loss": 0.0521, "step": 4967 }, { "epoch": 1.38, "learning_rate": 1.0782076259393266e-05, - "loss": 0.1912, + "loss": 0.151, "step": 4968 }, { "epoch": 1.38, "learning_rate": 1.0780220799703127e-05, - "loss": 0.1331, + "loss": 0.0622, "step": 4969 }, { "epoch": 1.38, "learning_rate": 1.0778365340012988e-05, - "loss": 0.3014, + "loss": 0.0557, "step": 4970 }, { "epoch": 1.38, "learning_rate": 1.0776509880322852e-05, - "loss": 0.1388, + "loss": 0.1069, "step": 4971 }, { "epoch": 1.38, "learning_rate": 1.0774654420632712e-05, - "loss": 0.0294, + "loss": 0.1071, "step": 4972 }, { "epoch": 1.38, "learning_rate": 1.0772798960942574e-05, - "loss": 0.14, + "loss": 0.0848, "step": 4973 }, { "epoch": 1.38, "learning_rate": 1.0770943501252436e-05, - "loss": 0.0807, + "loss": 0.0221, "step": 4974 }, { "epoch": 1.38, "learning_rate": 1.07690880415623e-05, - "loss": 0.0833, + "loss": 0.0934, "step": 4975 }, { "epoch": 1.38, "learning_rate": 1.076723258187216e-05, - "loss": 0.2471, + "loss": 0.1805, "step": 4976 }, { "epoch": 1.39, "learning_rate": 1.0765377122182022e-05, - "loss": 0.0826, + "loss": 0.0108, "step": 4977 }, { "epoch": 1.39, "learning_rate": 1.0763521662491882e-05, - "loss": 0.0847, + "loss": 0.0704, "step": 4978 }, { "epoch": 1.39, "learning_rate": 1.0761666202801745e-05, - "loss": 0.1376, + "loss": 0.028, "step": 4979 }, { "epoch": 1.39, "learning_rate": 1.0759810743111607e-05, - "loss": 0.2489, + "loss": 0.0542, "step": 4980 }, { "epoch": 1.39, "learning_rate": 1.0757955283421469e-05, - "loss": 0.0885, + "loss": 0.0616, "step": 4981 }, { "epoch": 1.39, "learning_rate": 1.075609982373133e-05, - "loss": 0.1932, + "loss": 0.0492, "step": 4982 }, { "epoch": 1.39, "learning_rate": 1.0754244364041193e-05, - "loss": 0.1382, + "loss": 0.0418, "step": 4983 }, { "epoch": 1.39, "learning_rate": 1.0752388904351055e-05, - "loss": 0.1874, + "loss": 0.0213, "step": 4984 }, { "epoch": 1.39, "learning_rate": 1.0750533444660915e-05, - "loss": 0.1961, + "loss": 0.1262, "step": 4985 }, { "epoch": 1.39, "learning_rate": 1.0748677984970777e-05, - "loss": 0.3081, + "loss": 0.1477, "step": 4986 }, { "epoch": 1.39, "learning_rate": 1.074682252528064e-05, - "loss": 0.0851, + "loss": 0.034, "step": 4987 }, { "epoch": 1.39, "learning_rate": 1.0744967065590502e-05, - "loss": 0.1387, + "loss": 0.0256, "step": 4988 }, { "epoch": 1.39, "learning_rate": 1.0743111605900362e-05, - "loss": 0.1409, + "loss": 0.1171, "step": 4989 }, { "epoch": 1.39, "learning_rate": 1.0741256146210224e-05, - "loss": 0.1408, + "loss": 0.0256, "step": 4990 }, { "epoch": 1.39, "learning_rate": 1.0739400686520088e-05, - "loss": 0.0321, + "loss": 0.0756, "step": 4991 }, { "epoch": 1.39, "learning_rate": 1.0737545226829948e-05, - "loss": 0.2513, + "loss": 0.0552, "step": 4992 }, { "epoch": 1.39, "learning_rate": 1.073568976713981e-05, - "loss": 0.1921, + "loss": 0.0754, "step": 4993 }, { "epoch": 1.39, "learning_rate": 1.0733834307449672e-05, - "loss": 0.0875, + "loss": 0.1637, "step": 4994 }, { "epoch": 1.39, "learning_rate": 1.0731978847759534e-05, - "loss": 0.2422, + "loss": 0.0139, "step": 4995 }, { "epoch": 1.39, "learning_rate": 1.0730123388069395e-05, - "loss": 0.189, + "loss": 0.0689, "step": 4996 }, { "epoch": 1.39, "learning_rate": 1.0728267928379257e-05, - "loss": 0.1374, + "loss": 0.0528, "step": 4997 }, { "epoch": 1.39, "learning_rate": 1.0726412468689117e-05, - "loss": 0.0343, + "loss": 0.0171, "step": 4998 }, { "epoch": 1.39, "learning_rate": 1.0724557008998981e-05, - "loss": 0.0358, + "loss": 0.0769, "step": 4999 }, { "epoch": 1.39, "learning_rate": 1.0722701549308843e-05, - "loss": 0.3455, + "loss": 0.0203, "step": 5000 }, { "epoch": 1.39, "learning_rate": 1.0720846089618703e-05, - "loss": 0.0844, + "loss": 0.0718, "step": 5001 }, { "epoch": 1.39, "learning_rate": 1.0718990629928565e-05, - "loss": 0.1423, + "loss": 0.0849, "step": 5002 }, { "epoch": 1.39, "learning_rate": 1.0717135170238428e-05, - "loss": 0.0899, + "loss": 0.05, "step": 5003 }, { "epoch": 1.39, "learning_rate": 1.071527971054829e-05, - "loss": 0.0907, + "loss": 0.1146, "step": 5004 }, { "epoch": 1.39, "learning_rate": 1.071342425085815e-05, - "loss": 0.14, + "loss": 0.0801, "step": 5005 }, { "epoch": 1.39, "learning_rate": 1.0711568791168012e-05, - "loss": 0.1397, + "loss": 0.1599, "step": 5006 }, { "epoch": 1.39, "learning_rate": 1.0709713331477876e-05, - "loss": 0.1388, + "loss": 0.1072, "step": 5007 }, { "epoch": 1.39, "learning_rate": 1.0707857871787736e-05, - "loss": 0.0341, + "loss": 0.1797, "step": 5008 }, { "epoch": 1.39, "learning_rate": 1.0706002412097598e-05, - "loss": 0.1363, + "loss": 0.0777, "step": 5009 }, { "epoch": 1.39, "learning_rate": 1.070414695240746e-05, - "loss": 0.1939, + "loss": 0.0579, "step": 5010 }, { "epoch": 1.39, "learning_rate": 1.070229149271732e-05, - "loss": 0.0322, + "loss": 0.1137, "step": 5011 }, { "epoch": 1.39, "learning_rate": 1.0700436033027184e-05, - "loss": 0.1873, + "loss": 0.1153, "step": 5012 }, { "epoch": 1.4, "learning_rate": 1.0698580573337046e-05, - "loss": 0.034, + "loss": 0.0613, "step": 5013 }, { "epoch": 1.4, "learning_rate": 1.0696725113646906e-05, - "loss": 0.0322, + "loss": 0.0683, "step": 5014 }, { "epoch": 1.4, "learning_rate": 1.0694869653956768e-05, - "loss": 0.302, + "loss": 0.0567, "step": 5015 }, { "epoch": 1.4, "learning_rate": 1.0693014194266631e-05, - "loss": 0.2391, + "loss": 0.1227, "step": 5016 }, { "epoch": 1.4, "learning_rate": 1.0691158734576493e-05, - "loss": 0.132, + "loss": 0.1107, "step": 5017 }, { "epoch": 1.4, "learning_rate": 1.0689303274886353e-05, - "loss": 0.0868, + "loss": 0.0664, "step": 5018 }, { "epoch": 1.4, "learning_rate": 1.0687447815196215e-05, - "loss": 0.1942, + "loss": 0.0664, "step": 5019 }, { "epoch": 1.4, "learning_rate": 1.0685592355506079e-05, - "loss": 0.1944, + "loss": 0.0186, "step": 5020 }, { "epoch": 1.4, "learning_rate": 1.0683736895815939e-05, - "loss": 0.0877, + "loss": 0.0667, "step": 5021 }, { "epoch": 1.4, "learning_rate": 1.06818814361258e-05, - "loss": 0.1347, + "loss": 0.0545, "step": 5022 }, { "epoch": 1.4, "learning_rate": 1.0680025976435663e-05, - "loss": 0.2494, + "loss": 0.0267, "step": 5023 }, { "epoch": 1.4, "learning_rate": 1.0678170516745524e-05, - "loss": 0.2461, + "loss": 0.0378, "step": 5024 }, { "epoch": 1.4, "learning_rate": 1.0676315057055386e-05, - "loss": 0.1417, + "loss": 0.0864, "step": 5025 }, { "epoch": 1.4, "learning_rate": 1.0674459597365248e-05, - "loss": 0.1389, + "loss": 0.0161, "step": 5026 }, { "epoch": 1.4, "learning_rate": 1.0672604137675108e-05, - "loss": 0.1389, + "loss": 0.0591, "step": 5027 }, { "epoch": 1.4, "learning_rate": 1.0670748677984972e-05, - "loss": 0.0873, + "loss": 0.1434, "step": 5028 }, { "epoch": 1.4, "learning_rate": 1.0668893218294834e-05, - "loss": 0.1914, + "loss": 0.0113, "step": 5029 }, { "epoch": 1.4, "learning_rate": 1.0667037758604694e-05, - "loss": 0.0876, + "loss": 0.0148, "step": 5030 }, { "epoch": 1.4, "learning_rate": 1.0665182298914556e-05, - "loss": 0.1926, + "loss": 0.0202, "step": 5031 }, { "epoch": 1.4, "learning_rate": 1.066332683922442e-05, - "loss": 0.034, + "loss": 0.007, "step": 5032 }, { "epoch": 1.4, "learning_rate": 1.0661471379534281e-05, - "loss": 0.0347, + "loss": 0.0187, "step": 5033 }, { "epoch": 1.4, "learning_rate": 1.0659615919844141e-05, - "loss": 0.2449, + "loss": 0.0378, "step": 5034 }, { "epoch": 1.4, "learning_rate": 1.0657760460154003e-05, - "loss": 0.3474, + "loss": 0.15, "step": 5035 }, { "epoch": 1.4, "learning_rate": 1.0655905000463867e-05, - "loss": 0.1408, + "loss": 0.1468, "step": 5036 }, { "epoch": 1.4, "learning_rate": 1.0654049540773727e-05, - "loss": 0.1396, + "loss": 0.1239, "step": 5037 }, { "epoch": 1.4, "learning_rate": 1.0652194081083589e-05, - "loss": 0.0848, + "loss": 0.0213, "step": 5038 }, { "epoch": 1.4, "learning_rate": 1.065033862139345e-05, - "loss": 0.1416, + "loss": 0.0697, "step": 5039 }, { "epoch": 1.4, "learning_rate": 1.0648483161703314e-05, - "loss": 0.0856, + "loss": 0.0142, "step": 5040 }, { "epoch": 1.4, "learning_rate": 1.0646627702013174e-05, - "loss": 0.086, + "loss": 0.1177, "step": 5041 }, { "epoch": 1.4, "learning_rate": 1.0644772242323036e-05, - "loss": 0.1357, + "loss": 0.0135, "step": 5042 }, { "epoch": 1.4, "learning_rate": 1.0642916782632897e-05, - "loss": 0.1367, + "loss": 0.0122, "step": 5043 }, { "epoch": 1.4, "learning_rate": 1.064106132294276e-05, - "loss": 0.1364, + "loss": 0.1111, "step": 5044 }, { "epoch": 1.4, "learning_rate": 1.0639205863252622e-05, - "loss": 0.1438, + "loss": 0.0724, "step": 5045 }, { "epoch": 1.4, "learning_rate": 1.0637350403562484e-05, - "loss": 0.1904, + "loss": 0.1002, "step": 5046 }, { "epoch": 1.4, "learning_rate": 1.0635494943872344e-05, - "loss": 0.1386, + "loss": 0.1263, "step": 5047 }, { "epoch": 1.4, "learning_rate": 1.0633639484182208e-05, - "loss": 0.1951, + "loss": 0.0207, "step": 5048 }, { "epoch": 1.41, "learning_rate": 1.063178402449207e-05, - "loss": 0.2436, + "loss": 0.0888, "step": 5049 }, { "epoch": 1.41, "learning_rate": 1.062992856480193e-05, - "loss": 0.3012, + "loss": 0.0731, "step": 5050 }, { "epoch": 1.41, "learning_rate": 1.0628073105111792e-05, - "loss": 0.1907, + "loss": 0.0584, "step": 5051 }, { "epoch": 1.41, "learning_rate": 1.0626217645421655e-05, - "loss": 0.1908, + "loss": 0.1578, "step": 5052 }, { "epoch": 1.41, "learning_rate": 1.0624362185731515e-05, - "loss": 0.0361, + "loss": 0.1076, "step": 5053 }, { "epoch": 1.41, "learning_rate": 1.0622506726041377e-05, - "loss": 0.0875, + "loss": 0.0255, "step": 5054 }, { "epoch": 1.41, "learning_rate": 1.0620651266351239e-05, - "loss": 0.0901, + "loss": 0.0998, "step": 5055 }, { "epoch": 1.41, "learning_rate": 1.0618795806661103e-05, - "loss": 0.1403, + "loss": 0.0922, "step": 5056 }, { "epoch": 1.41, "learning_rate": 1.0616940346970963e-05, - "loss": 0.0364, + "loss": 0.1171, "step": 5057 }, { "epoch": 1.41, "learning_rate": 1.0615084887280825e-05, - "loss": 0.0363, + "loss": 0.0683, "step": 5058 }, { "epoch": 1.41, "learning_rate": 1.0613229427590685e-05, - "loss": 0.0343, + "loss": 0.0721, "step": 5059 }, { "epoch": 1.41, "learning_rate": 1.0611373967900548e-05, - "loss": 0.1383, + "loss": 0.1216, "step": 5060 }, { "epoch": 1.41, "learning_rate": 1.060951850821041e-05, - "loss": 0.1372, + "loss": 0.0278, "step": 5061 }, { "epoch": 1.41, "learning_rate": 1.0607663048520272e-05, - "loss": 0.0875, + "loss": 0.0274, "step": 5062 }, { "epoch": 1.41, "learning_rate": 1.0605807588830132e-05, - "loss": 0.0834, + "loss": 0.095, "step": 5063 }, { "epoch": 1.41, "learning_rate": 1.0603952129139996e-05, - "loss": 0.1968, + "loss": 0.0835, "step": 5064 }, { "epoch": 1.41, "learning_rate": 1.0602096669449858e-05, - "loss": 0.2456, + "loss": 0.1529, "step": 5065 }, { "epoch": 1.41, "learning_rate": 1.0600241209759718e-05, - "loss": 0.0859, + "loss": 0.1772, "step": 5066 }, { "epoch": 1.41, "learning_rate": 1.059838575006958e-05, - "loss": 0.0884, + "loss": 0.3128, "step": 5067 }, { "epoch": 1.41, "learning_rate": 1.0596530290379443e-05, - "loss": 0.0304, + "loss": 0.1471, "step": 5068 }, { "epoch": 1.41, "learning_rate": 1.0594674830689305e-05, - "loss": 0.1408, + "loss": 0.1388, "step": 5069 }, { "epoch": 1.41, "learning_rate": 1.0592819370999165e-05, - "loss": 0.251, + "loss": 0.027, "step": 5070 }, { "epoch": 1.41, "learning_rate": 1.0590963911309027e-05, - "loss": 0.1893, + "loss": 0.1202, "step": 5071 }, { "epoch": 1.41, "learning_rate": 1.058910845161889e-05, - "loss": 0.0815, + "loss": 0.1147, "step": 5072 }, { "epoch": 1.41, "learning_rate": 1.0587252991928751e-05, - "loss": 0.1431, + "loss": 0.0258, "step": 5073 }, { "epoch": 1.41, "learning_rate": 1.0585397532238613e-05, - "loss": 0.0806, + "loss": 0.0686, "step": 5074 }, { "epoch": 1.41, "learning_rate": 1.0583542072548475e-05, - "loss": 0.2444, + "loss": 0.0283, "step": 5075 }, { "epoch": 1.41, "learning_rate": 1.0581686612858337e-05, - "loss": 0.1931, + "loss": 0.1446, "step": 5076 }, { "epoch": 1.41, "learning_rate": 1.0579831153168198e-05, - "loss": 0.0286, + "loss": 0.0806, "step": 5077 }, { "epoch": 1.41, "learning_rate": 1.057797569347806e-05, - "loss": 0.1365, + "loss": 0.0641, "step": 5078 }, { "epoch": 1.41, "learning_rate": 1.057612023378792e-05, - "loss": 0.1389, + "loss": 0.126, "step": 5079 }, { "epoch": 1.41, "learning_rate": 1.0574264774097784e-05, - "loss": 0.0822, + "loss": 0.1319, "step": 5080 }, { "epoch": 1.41, "learning_rate": 1.0572409314407646e-05, - "loss": 0.1396, + "loss": 0.0541, "step": 5081 }, { "epoch": 1.41, "learning_rate": 1.0570553854717506e-05, - "loss": 0.0843, + "loss": 0.0592, "step": 5082 }, { "epoch": 1.41, "learning_rate": 1.0568698395027368e-05, - "loss": 0.1408, + "loss": 0.1075, "step": 5083 }, { "epoch": 1.41, "learning_rate": 1.0566842935337232e-05, - "loss": 0.2517, + "loss": 0.0589, "step": 5084 }, { "epoch": 1.42, "learning_rate": 1.0564987475647093e-05, - "loss": 0.1848, + "loss": 0.0654, "step": 5085 }, { "epoch": 1.42, "learning_rate": 1.0563132015956954e-05, - "loss": 0.1355, + "loss": 0.1029, "step": 5086 }, { "epoch": 1.42, "learning_rate": 1.0561276556266815e-05, - "loss": 0.0299, + "loss": 0.1149, "step": 5087 }, { "epoch": 1.42, "learning_rate": 1.0559421096576679e-05, - "loss": 0.1375, + "loss": 0.1078, "step": 5088 }, { "epoch": 1.42, "learning_rate": 1.055756563688654e-05, - "loss": 0.1382, + "loss": 0.0585, "step": 5089 }, { "epoch": 1.42, "learning_rate": 1.0555710177196401e-05, - "loss": 0.1407, + "loss": 0.0182, "step": 5090 }, { "epoch": 1.42, "learning_rate": 1.0553854717506263e-05, - "loss": 0.081, + "loss": 0.0543, "step": 5091 }, { "epoch": 1.42, "learning_rate": 1.0551999257816125e-05, - "loss": 0.0879, + "loss": 0.0891, "step": 5092 }, { "epoch": 1.42, "learning_rate": 1.0550143798125987e-05, - "loss": 0.2468, + "loss": 0.2187, "step": 5093 }, { "epoch": 1.42, "learning_rate": 1.0548288338435849e-05, - "loss": 0.0859, + "loss": 0.0176, "step": 5094 }, { "epoch": 1.42, "learning_rate": 1.0546432878745709e-05, - "loss": 0.133, + "loss": 0.0164, "step": 5095 }, { "epoch": 1.42, "learning_rate": 1.0544577419055572e-05, - "loss": 0.2474, + "loss": 0.2568, "step": 5096 }, { "epoch": 1.42, "learning_rate": 1.0542721959365434e-05, - "loss": 0.0301, + "loss": 0.0592, "step": 5097 }, { "epoch": 1.42, "learning_rate": 1.0540866499675294e-05, - "loss": 0.4111, + "loss": 0.1478, "step": 5098 }, { "epoch": 1.42, "learning_rate": 1.0539011039985156e-05, - "loss": 0.138, + "loss": 0.0595, "step": 5099 }, { "epoch": 1.42, "learning_rate": 1.053715558029502e-05, - "loss": 0.1979, + "loss": 0.041, "step": 5100 }, { "epoch": 1.42, "learning_rate": 1.0535300120604882e-05, - "loss": 0.139, + "loss": 0.0799, "step": 5101 }, { "epoch": 1.42, "learning_rate": 1.0533444660914742e-05, - "loss": 0.1384, + "loss": 0.0753, "step": 5102 }, { "epoch": 1.42, "learning_rate": 1.0531589201224604e-05, - "loss": 0.191, + "loss": 0.0545, "step": 5103 }, { "epoch": 1.42, "learning_rate": 1.0529733741534467e-05, - "loss": 0.0838, + "loss": 0.0791, "step": 5104 }, { "epoch": 1.42, "learning_rate": 1.0527878281844327e-05, - "loss": 0.0825, + "loss": 0.0708, "step": 5105 }, { "epoch": 1.42, "learning_rate": 1.052602282215419e-05, - "loss": 0.1943, + "loss": 0.0161, "step": 5106 }, { "epoch": 1.42, "learning_rate": 1.0524167362464051e-05, - "loss": 0.1368, + "loss": 0.0973, "step": 5107 }, { "epoch": 1.42, "learning_rate": 1.0522311902773915e-05, - "loss": 0.089, + "loss": 0.1053, "step": 5108 }, { "epoch": 1.42, "learning_rate": 1.0520456443083775e-05, - "loss": 0.1397, + "loss": 0.0502, "step": 5109 }, { "epoch": 1.42, "learning_rate": 1.0518600983393637e-05, - "loss": 0.0862, + "loss": 0.0328, "step": 5110 }, { "epoch": 1.42, "learning_rate": 1.0516745523703497e-05, - "loss": 0.0327, + "loss": 0.0672, "step": 5111 }, { "epoch": 1.42, "learning_rate": 1.051489006401336e-05, - "loss": 0.1939, + "loss": 0.1255, "step": 5112 }, { "epoch": 1.42, "learning_rate": 1.0513034604323222e-05, - "loss": 0.1422, + "loss": 0.0492, "step": 5113 }, { "epoch": 1.42, "learning_rate": 1.0511179144633084e-05, - "loss": 0.0878, + "loss": 0.0111, "step": 5114 }, { "epoch": 1.42, "learning_rate": 1.0509323684942944e-05, - "loss": 0.1391, + "loss": 0.0555, "step": 5115 }, { "epoch": 1.42, "learning_rate": 1.0507468225252808e-05, - "loss": 0.0868, + "loss": 0.0116, "step": 5116 }, { "epoch": 1.42, "learning_rate": 1.050561276556267e-05, - "loss": 0.3503, + "loss": 0.1205, "step": 5117 }, { "epoch": 1.42, "learning_rate": 1.050375730587253e-05, - "loss": 0.0324, + "loss": 0.0564, "step": 5118 }, { "epoch": 1.42, "learning_rate": 1.0501901846182392e-05, - "loss": 0.0331, + "loss": 0.0132, "step": 5119 }, { "epoch": 1.42, "learning_rate": 1.0500046386492255e-05, - "loss": 0.2516, + "loss": 0.2016, "step": 5120 }, { "epoch": 1.43, "learning_rate": 1.0498190926802116e-05, - "loss": 0.0847, + "loss": 0.1401, "step": 5121 }, { "epoch": 1.43, "learning_rate": 1.0496335467111978e-05, - "loss": 0.1417, + "loss": 0.0733, "step": 5122 }, { "epoch": 1.43, "learning_rate": 1.049448000742184e-05, - "loss": 0.0855, + "loss": 0.1306, "step": 5123 }, { "epoch": 1.43, "learning_rate": 1.0492624547731703e-05, - "loss": 0.0844, + "loss": 0.0541, "step": 5124 }, { "epoch": 1.43, "learning_rate": 1.0490769088041563e-05, - "loss": 0.1379, + "loss": 0.1154, "step": 5125 }, { "epoch": 1.43, "learning_rate": 1.0488913628351425e-05, - "loss": 0.1388, + "loss": 0.218, "step": 5126 }, { "epoch": 1.43, "learning_rate": 1.0487058168661285e-05, - "loss": 0.0855, + "loss": 0.0654, "step": 5127 }, { "epoch": 1.43, "learning_rate": 1.0485202708971149e-05, - "loss": 0.1912, + "loss": 0.1039, "step": 5128 }, { "epoch": 1.43, "learning_rate": 1.048334724928101e-05, - "loss": 0.1918, + "loss": 0.0215, "step": 5129 }, { "epoch": 1.43, "learning_rate": 1.0481491789590873e-05, - "loss": 0.1402, + "loss": 0.0977, "step": 5130 }, { "epoch": 1.43, "learning_rate": 1.0479636329900733e-05, - "loss": 0.084, + "loss": 0.0927, "step": 5131 }, { "epoch": 1.43, "learning_rate": 1.0477780870210596e-05, - "loss": 0.03, + "loss": 0.0229, "step": 5132 }, { "epoch": 1.43, "learning_rate": 1.0475925410520458e-05, - "loss": 0.1372, + "loss": 0.0314, "step": 5133 }, { "epoch": 1.43, "learning_rate": 1.0474069950830318e-05, - "loss": 0.1365, + "loss": 0.0499, "step": 5134 }, { "epoch": 1.43, "learning_rate": 1.047221449114018e-05, - "loss": 0.1971, + "loss": 0.0175, "step": 5135 }, { "epoch": 1.43, "learning_rate": 1.0470359031450044e-05, - "loss": 0.028, + "loss": 0.1299, "step": 5136 }, { "epoch": 1.43, "learning_rate": 1.0468503571759906e-05, - "loss": 0.1329, + "loss": 0.0458, "step": 5137 }, { "epoch": 1.43, "learning_rate": 1.0466648112069766e-05, - "loss": 0.1929, + "loss": 0.023, "step": 5138 }, { "epoch": 1.43, "learning_rate": 1.0464792652379628e-05, - "loss": 0.0837, + "loss": 0.0573, "step": 5139 }, { "epoch": 1.43, "learning_rate": 1.0462937192689491e-05, - "loss": 0.0828, + "loss": 0.0187, "step": 5140 }, { "epoch": 1.43, "learning_rate": 1.0461081732999351e-05, - "loss": 0.1363, + "loss": 0.0193, "step": 5141 }, { "epoch": 1.43, "learning_rate": 1.0459226273309213e-05, - "loss": 0.1374, + "loss": 0.054, "step": 5142 }, { "epoch": 1.43, "learning_rate": 1.0457370813619075e-05, - "loss": 0.0294, + "loss": 0.08, "step": 5143 }, { "epoch": 1.43, "learning_rate": 1.0455515353928937e-05, - "loss": 0.3091, + "loss": 0.0554, "step": 5144 }, { "epoch": 1.43, "learning_rate": 1.0453659894238799e-05, - "loss": 0.1406, + "loss": 0.1117, "step": 5145 }, { "epoch": 1.43, "learning_rate": 1.045180443454866e-05, - "loss": 0.1406, + "loss": 0.0623, "step": 5146 }, { "epoch": 1.43, "learning_rate": 1.0449948974858521e-05, - "loss": 0.083, + "loss": 0.0488, "step": 5147 }, { "epoch": 1.43, "learning_rate": 1.0448093515168384e-05, - "loss": 0.1884, + "loss": 0.0116, "step": 5148 }, { "epoch": 1.43, "learning_rate": 1.0446238055478246e-05, - "loss": 0.0865, + "loss": 0.107, "step": 5149 }, { "epoch": 1.43, "learning_rate": 1.0444382595788107e-05, - "loss": 0.0291, + "loss": 0.1491, "step": 5150 }, { "epoch": 1.43, "learning_rate": 1.0442527136097968e-05, - "loss": 0.2023, + "loss": 0.2144, "step": 5151 }, { "epoch": 1.43, "learning_rate": 1.0440671676407832e-05, - "loss": 0.3039, + "loss": 0.0639, "step": 5152 }, { "epoch": 1.43, "learning_rate": 1.0438816216717694e-05, - "loss": 0.0824, + "loss": 0.1973, "step": 5153 }, { "epoch": 1.43, "learning_rate": 1.0436960757027554e-05, - "loss": 0.138, + "loss": 0.0125, "step": 5154 }, { "epoch": 1.43, "learning_rate": 1.0435105297337416e-05, - "loss": 0.2, + "loss": 0.1607, "step": 5155 }, { "epoch": 1.44, "learning_rate": 1.043324983764728e-05, - "loss": 0.0286, + "loss": 0.0881, "step": 5156 }, { "epoch": 1.44, "learning_rate": 1.043139437795714e-05, - "loss": 0.1405, + "loss": 0.1525, "step": 5157 }, { "epoch": 1.44, "learning_rate": 1.0429538918267001e-05, - "loss": 0.1963, + "loss": 0.0566, "step": 5158 }, { "epoch": 1.44, "learning_rate": 1.0427683458576863e-05, - "loss": 0.1936, + "loss": 0.0582, "step": 5159 }, { "epoch": 1.44, "learning_rate": 1.0425827998886727e-05, - "loss": 0.0831, + "loss": 0.0237, "step": 5160 }, { "epoch": 1.44, "learning_rate": 1.0423972539196587e-05, - "loss": 0.0863, + "loss": 0.0157, "step": 5161 }, { "epoch": 1.44, "learning_rate": 1.0422117079506449e-05, - "loss": 0.1929, + "loss": 0.0981, "step": 5162 }, { "epoch": 1.44, "learning_rate": 1.042026161981631e-05, - "loss": 0.0296, + "loss": 0.0247, "step": 5163 }, { "epoch": 1.44, "learning_rate": 1.0418406160126171e-05, - "loss": 0.2456, + "loss": 0.0313, "step": 5164 }, { "epoch": 1.44, "learning_rate": 1.0416550700436035e-05, - "loss": 0.1385, + "loss": 0.024, "step": 5165 }, { "epoch": 1.44, "learning_rate": 1.0414695240745896e-05, - "loss": 0.0309, + "loss": 0.1279, "step": 5166 }, { "epoch": 1.44, "learning_rate": 1.0412839781055757e-05, - "loss": 0.0313, + "loss": 0.0203, "step": 5167 }, { "epoch": 1.44, "learning_rate": 1.0410984321365619e-05, - "loss": 0.0307, + "loss": 0.0197, "step": 5168 }, { "epoch": 1.44, "learning_rate": 1.0409128861675482e-05, - "loss": 0.0864, + "loss": 0.0947, "step": 5169 }, { "epoch": 1.44, "learning_rate": 1.0407273401985342e-05, - "loss": 0.1925, + "loss": 0.2444, "step": 5170 }, { "epoch": 1.44, "learning_rate": 1.0405417942295204e-05, - "loss": 0.1363, + "loss": 0.1115, "step": 5171 }, { "epoch": 1.44, "learning_rate": 1.0403562482605066e-05, - "loss": 0.0807, + "loss": 0.0854, "step": 5172 }, { "epoch": 1.44, "learning_rate": 1.0401707022914928e-05, - "loss": 0.1392, + "loss": 0.057, "step": 5173 }, { "epoch": 1.44, "learning_rate": 1.039985156322479e-05, - "loss": 0.2575, + "loss": 0.0679, "step": 5174 }, { "epoch": 1.44, "learning_rate": 1.0397996103534652e-05, - "loss": 0.0852, + "loss": 0.0491, "step": 5175 }, { "epoch": 1.44, "learning_rate": 1.0396140643844512e-05, - "loss": 0.1419, + "loss": 0.0946, "step": 5176 }, { "epoch": 1.44, "learning_rate": 1.0394285184154375e-05, - "loss": 0.136, + "loss": 0.061, "step": 5177 }, { "epoch": 1.44, "learning_rate": 1.0392429724464237e-05, - "loss": 0.0812, + "loss": 0.1222, "step": 5178 }, { "epoch": 1.44, "learning_rate": 1.0390574264774097e-05, - "loss": 0.1972, + "loss": 0.0559, "step": 5179 }, { "epoch": 1.44, "learning_rate": 1.038871880508396e-05, - "loss": 0.0847, + "loss": 0.0145, "step": 5180 }, { "epoch": 1.44, "learning_rate": 1.0386863345393823e-05, - "loss": 0.2479, + "loss": 0.0841, "step": 5181 }, { "epoch": 1.44, "learning_rate": 1.0385007885703685e-05, - "loss": 0.028, + "loss": 0.0801, "step": 5182 }, { "epoch": 1.44, "learning_rate": 1.0383152426013545e-05, - "loss": 0.1341, + "loss": 0.0768, "step": 5183 }, { "epoch": 1.44, "learning_rate": 1.0381296966323407e-05, - "loss": 0.0284, + "loss": 0.0529, "step": 5184 }, { "epoch": 1.44, "learning_rate": 1.037944150663327e-05, - "loss": 0.247, + "loss": 0.1092, "step": 5185 }, { "epoch": 1.44, "learning_rate": 1.037758604694313e-05, - "loss": 0.0816, + "loss": 0.0619, "step": 5186 }, { "epoch": 1.44, "learning_rate": 1.0375730587252992e-05, - "loss": 0.0862, + "loss": 0.0179, "step": 5187 }, { "epoch": 1.44, "learning_rate": 1.0373875127562854e-05, - "loss": 0.2472, + "loss": 0.1121, "step": 5188 }, { "epoch": 1.44, "learning_rate": 1.0372019667872718e-05, - "loss": 0.1976, + "loss": 0.0627, "step": 5189 }, { "epoch": 1.44, "learning_rate": 1.0370164208182578e-05, - "loss": 0.0851, + "loss": 0.0578, "step": 5190 }, { "epoch": 1.44, "learning_rate": 1.036830874849244e-05, - "loss": 0.0845, + "loss": 0.0161, "step": 5191 }, { "epoch": 1.45, "learning_rate": 1.03664532888023e-05, - "loss": 0.1895, + "loss": 0.0864, "step": 5192 }, { "epoch": 1.45, "learning_rate": 1.0364597829112164e-05, - "loss": 0.1945, + "loss": 0.1154, "step": 5193 }, { "epoch": 1.45, "learning_rate": 1.0362742369422025e-05, - "loss": 0.3612, + "loss": 0.1957, "step": 5194 }, { "epoch": 1.45, "learning_rate": 1.0360886909731887e-05, - "loss": 0.1942, + "loss": 0.0692, "step": 5195 }, { "epoch": 1.45, "learning_rate": 1.0359031450041747e-05, - "loss": 0.1408, + "loss": 0.0328, "step": 5196 }, { "epoch": 1.45, "learning_rate": 1.0357175990351611e-05, - "loss": 0.139, + "loss": 0.02, "step": 5197 }, { "epoch": 1.45, "learning_rate": 1.0355320530661473e-05, - "loss": 0.0297, + "loss": 0.0192, "step": 5198 }, { "epoch": 1.45, "learning_rate": 1.0353465070971333e-05, - "loss": 0.2994, + "loss": 0.0125, "step": 5199 }, { "epoch": 1.45, "learning_rate": 1.0351609611281195e-05, - "loss": 0.1926, + "loss": 0.0705, "step": 5200 }, { "epoch": 1.45, "learning_rate": 1.0349754151591059e-05, - "loss": 0.2515, + "loss": 0.0195, "step": 5201 }, { "epoch": 1.45, "learning_rate": 1.0347898691900919e-05, - "loss": 0.0321, + "loss": 0.0142, "step": 5202 }, { "epoch": 1.45, "learning_rate": 1.034604323221078e-05, - "loss": 0.2439, + "loss": 0.0949, "step": 5203 }, { "epoch": 1.45, "learning_rate": 1.0344187772520642e-05, - "loss": 0.1385, + "loss": 0.0497, "step": 5204 }, { "epoch": 1.45, "learning_rate": 1.0342332312830506e-05, - "loss": 0.1325, + "loss": 0.0416, "step": 5205 }, { "epoch": 1.45, "learning_rate": 1.0340476853140366e-05, - "loss": 0.0835, + "loss": 0.0176, "step": 5206 }, { "epoch": 1.45, "learning_rate": 1.0338621393450228e-05, - "loss": 0.0879, + "loss": 0.1155, "step": 5207 }, { "epoch": 1.45, "learning_rate": 1.0336765933760088e-05, - "loss": 0.1905, + "loss": 0.1625, "step": 5208 }, { "epoch": 1.45, "learning_rate": 1.0334910474069952e-05, - "loss": 0.0894, + "loss": 0.1447, "step": 5209 }, { "epoch": 1.45, "learning_rate": 1.0333055014379814e-05, - "loss": 0.1367, + "loss": 0.0738, "step": 5210 }, { "epoch": 1.45, "learning_rate": 1.0331199554689676e-05, - "loss": 0.1885, + "loss": 0.0296, "step": 5211 }, { "epoch": 1.45, "learning_rate": 1.0329344094999536e-05, - "loss": 0.0876, + "loss": 0.0898, "step": 5212 }, { "epoch": 1.45, "learning_rate": 1.03274886353094e-05, - "loss": 0.0898, + "loss": 0.0105, "step": 5213 }, { "epoch": 1.45, "learning_rate": 1.0325633175619261e-05, - "loss": 0.1922, + "loss": 0.0736, "step": 5214 }, { "epoch": 1.45, "learning_rate": 1.0323777715929121e-05, - "loss": 0.1358, + "loss": 0.0526, "step": 5215 }, { "epoch": 1.45, "learning_rate": 1.0321922256238983e-05, - "loss": 0.1861, + "loss": 0.0428, "step": 5216 }, { "epoch": 1.45, "learning_rate": 1.0320066796548847e-05, - "loss": 0.0878, + "loss": 0.0893, "step": 5217 }, { "epoch": 1.45, "learning_rate": 1.0318211336858707e-05, - "loss": 0.1427, + "loss": 0.0379, "step": 5218 }, { "epoch": 1.45, "learning_rate": 1.0316355877168569e-05, - "loss": 0.1907, + "loss": 0.0666, "step": 5219 }, { "epoch": 1.45, "learning_rate": 1.031450041747843e-05, - "loss": 0.351, + "loss": 0.1617, "step": 5220 }, { "epoch": 1.45, "learning_rate": 1.0312644957788294e-05, - "loss": 0.0912, + "loss": 0.0586, "step": 5221 }, { "epoch": 1.45, "learning_rate": 1.0310789498098154e-05, - "loss": 0.2411, + "loss": 0.1248, "step": 5222 }, { "epoch": 1.45, "learning_rate": 1.0308934038408016e-05, - "loss": 0.1415, + "loss": 0.2083, "step": 5223 }, { "epoch": 1.45, "learning_rate": 1.0307078578717876e-05, - "loss": 0.1406, + "loss": 0.1799, "step": 5224 }, { "epoch": 1.45, "learning_rate": 1.030522311902774e-05, - "loss": 0.1952, + "loss": 0.0257, "step": 5225 }, { "epoch": 1.45, "learning_rate": 1.0303367659337602e-05, - "loss": 0.0873, + "loss": 0.1355, "step": 5226 }, { "epoch": 1.45, "learning_rate": 1.0301512199647464e-05, - "loss": 0.3462, + "loss": 0.0827, "step": 5227 }, { "epoch": 1.46, "learning_rate": 1.0299656739957324e-05, - "loss": 0.2441, + "loss": 0.0615, "step": 5228 }, { "epoch": 1.46, "learning_rate": 1.0297801280267188e-05, - "loss": 0.0906, + "loss": 0.0227, "step": 5229 }, { "epoch": 1.46, "learning_rate": 1.029594582057705e-05, - "loss": 0.1856, + "loss": 0.064, "step": 5230 }, { "epoch": 1.46, "learning_rate": 1.029409036088691e-05, - "loss": 0.0895, + "loss": 0.0691, "step": 5231 }, { "epoch": 1.46, "learning_rate": 1.0292234901196771e-05, - "loss": 0.0855, + "loss": 0.0773, "step": 5232 }, { "epoch": 1.46, "learning_rate": 1.0290379441506635e-05, - "loss": 0.2955, + "loss": 0.1108, "step": 5233 }, { "epoch": 1.46, "learning_rate": 1.0288523981816497e-05, - "loss": 0.1867, + "loss": 0.0738, "step": 5234 }, { "epoch": 1.46, "learning_rate": 1.0286668522126357e-05, - "loss": 0.0888, + "loss": 0.1371, "step": 5235 }, { "epoch": 1.46, "learning_rate": 1.0284813062436219e-05, - "loss": 0.1398, + "loss": 0.0911, "step": 5236 }, { "epoch": 1.46, "learning_rate": 1.0282957602746082e-05, - "loss": 0.1405, + "loss": 0.0234, "step": 5237 }, { "epoch": 1.46, "learning_rate": 1.0281102143055943e-05, - "loss": 0.1895, + "loss": 0.1253, "step": 5238 }, { "epoch": 1.46, "learning_rate": 1.0279246683365805e-05, - "loss": 0.2464, + "loss": 0.0937, "step": 5239 }, { "epoch": 1.46, "learning_rate": 1.0277391223675666e-05, - "loss": 0.0912, + "loss": 0.1208, "step": 5240 }, { "epoch": 1.46, "learning_rate": 1.0275535763985528e-05, - "loss": 0.1389, + "loss": 0.1043, "step": 5241 }, { "epoch": 1.46, "learning_rate": 1.027368030429539e-05, - "loss": 0.1365, + "loss": 0.0337, "step": 5242 }, { "epoch": 1.46, "learning_rate": 1.0271824844605252e-05, - "loss": 0.141, + "loss": 0.0716, "step": 5243 }, { "epoch": 1.46, "learning_rate": 1.0269969384915112e-05, - "loss": 0.0903, + "loss": 0.0189, "step": 5244 }, { "epoch": 1.46, "learning_rate": 1.0268113925224976e-05, - "loss": 0.091, + "loss": 0.0255, "step": 5245 }, { "epoch": 1.46, "learning_rate": 1.0266258465534838e-05, - "loss": 0.0405, + "loss": 0.0873, "step": 5246 }, { "epoch": 1.46, "learning_rate": 1.0264403005844698e-05, - "loss": 0.0391, + "loss": 0.0462, "step": 5247 }, { "epoch": 1.46, "learning_rate": 1.026254754615456e-05, - "loss": 0.2945, + "loss": 0.2434, "step": 5248 }, { "epoch": 1.46, "learning_rate": 1.0260692086464423e-05, - "loss": 0.0917, + "loss": 0.122, "step": 5249 }, { "epoch": 1.46, "learning_rate": 1.0258836626774285e-05, - "loss": 0.243, + "loss": 0.0193, "step": 5250 }, { "epoch": 1.46, "learning_rate": 1.0256981167084145e-05, - "loss": 0.088, + "loss": 0.0535, "step": 5251 }, { "epoch": 1.46, "learning_rate": 1.0255125707394007e-05, - "loss": 0.2421, + "loss": 0.0492, "step": 5252 }, { "epoch": 1.46, "learning_rate": 1.025327024770387e-05, - "loss": 0.1377, + "loss": 0.1234, "step": 5253 }, { "epoch": 1.46, "learning_rate": 1.0251414788013731e-05, - "loss": 0.0908, + "loss": 0.1003, "step": 5254 }, { "epoch": 1.46, "learning_rate": 1.0249559328323593e-05, - "loss": 0.0875, + "loss": 0.0494, "step": 5255 }, { "epoch": 1.46, "learning_rate": 1.0247703868633455e-05, - "loss": 0.0358, + "loss": 0.0838, "step": 5256 }, { "epoch": 1.46, "learning_rate": 1.0245848408943318e-05, - "loss": 0.1374, + "loss": 0.1495, "step": 5257 }, { "epoch": 1.46, "learning_rate": 1.0243992949253178e-05, - "loss": 0.2438, + "loss": 0.0631, "step": 5258 }, { "epoch": 1.46, "learning_rate": 1.024213748956304e-05, - "loss": 0.1389, + "loss": 0.0224, "step": 5259 }, { "epoch": 1.46, "learning_rate": 1.02402820298729e-05, - "loss": 0.084, + "loss": 0.1822, "step": 5260 }, { "epoch": 1.46, "learning_rate": 1.0238426570182764e-05, - "loss": 0.2988, + "loss": 0.0956, "step": 5261 }, { "epoch": 1.46, "learning_rate": 1.0236571110492626e-05, - "loss": 0.0881, + "loss": 0.0601, "step": 5262 }, { "epoch": 1.46, "learning_rate": 1.0234715650802488e-05, - "loss": 0.1384, + "loss": 0.0755, "step": 5263 }, { "epoch": 1.47, "learning_rate": 1.0232860191112348e-05, - "loss": 0.1886, + "loss": 0.0338, "step": 5264 }, { "epoch": 1.47, "learning_rate": 1.0231004731422211e-05, - "loss": 0.0884, + "loss": 0.0957, "step": 5265 }, { "epoch": 1.47, "learning_rate": 1.0229149271732073e-05, - "loss": 0.1929, + "loss": 0.062, "step": 5266 }, { "epoch": 1.47, "learning_rate": 1.0227293812041934e-05, - "loss": 0.1414, + "loss": 0.0506, "step": 5267 }, { "epoch": 1.47, "learning_rate": 1.0225438352351795e-05, - "loss": 0.1403, + "loss": 0.1321, "step": 5268 }, { "epoch": 1.47, "learning_rate": 1.0223582892661659e-05, - "loss": 0.0865, + "loss": 0.0213, "step": 5269 }, { "epoch": 1.47, "learning_rate": 1.0221727432971519e-05, - "loss": 0.1923, + "loss": 0.1063, "step": 5270 }, { "epoch": 1.47, "learning_rate": 1.0219871973281381e-05, - "loss": 0.1923, + "loss": 0.1454, "step": 5271 }, { "epoch": 1.47, "learning_rate": 1.0218016513591243e-05, - "loss": 0.139, + "loss": 0.1209, "step": 5272 }, { "epoch": 1.47, "learning_rate": 1.0216161053901106e-05, - "loss": 0.0325, + "loss": 0.2124, "step": 5273 }, { "epoch": 1.47, "learning_rate": 1.0214305594210967e-05, - "loss": 0.086, + "loss": 0.0187, "step": 5274 }, { "epoch": 1.47, "learning_rate": 1.0212450134520828e-05, - "loss": 0.088, + "loss": 0.2011, "step": 5275 }, { "epoch": 1.47, "learning_rate": 1.0210594674830689e-05, - "loss": 0.0849, + "loss": 0.1465, "step": 5276 }, { "epoch": 1.47, "learning_rate": 1.0208739215140552e-05, - "loss": 0.2488, + "loss": 0.114, "step": 5277 }, { "epoch": 1.47, "learning_rate": 1.0206883755450414e-05, - "loss": 0.238, + "loss": 0.0433, "step": 5278 }, { "epoch": 1.47, "learning_rate": 1.0205028295760276e-05, - "loss": 0.0831, + "loss": 0.024, "step": 5279 }, { "epoch": 1.47, "learning_rate": 1.0203172836070136e-05, - "loss": 0.141, + "loss": 0.0598, "step": 5280 }, { "epoch": 1.47, "learning_rate": 1.020131737638e-05, - "loss": 0.0869, + "loss": 0.048, "step": 5281 }, { "epoch": 1.47, "learning_rate": 1.0199461916689862e-05, - "loss": 0.0847, + "loss": 0.042, "step": 5282 }, { "epoch": 1.47, "learning_rate": 1.0197606456999722e-05, - "loss": 0.086, + "loss": 0.0164, "step": 5283 }, { "epoch": 1.47, "learning_rate": 1.0195750997309584e-05, - "loss": 0.0296, + "loss": 0.0557, "step": 5284 }, { "epoch": 1.47, "learning_rate": 1.0193895537619447e-05, - "loss": 0.1929, + "loss": 0.0234, "step": 5285 }, { "epoch": 1.47, "learning_rate": 1.0192040077929309e-05, - "loss": 0.0847, + "loss": 0.1214, "step": 5286 }, { "epoch": 1.47, "learning_rate": 1.019018461823917e-05, - "loss": 0.083, + "loss": 0.0605, "step": 5287 }, { "epoch": 1.47, "learning_rate": 1.0188329158549031e-05, - "loss": 0.0297, + "loss": 0.0789, "step": 5288 }, { "epoch": 1.47, "learning_rate": 1.0186473698858895e-05, - "loss": 0.1926, + "loss": 0.0201, "step": 5289 }, { "epoch": 1.47, "learning_rate": 1.0184618239168755e-05, - "loss": 0.0851, + "loss": 0.0544, "step": 5290 }, { "epoch": 1.47, "learning_rate": 1.0182762779478617e-05, - "loss": 0.1384, + "loss": 0.0243, "step": 5291 }, { "epoch": 1.47, "learning_rate": 1.0180907319788479e-05, - "loss": 0.1367, + "loss": 0.066, "step": 5292 }, { "epoch": 1.47, "learning_rate": 1.017905186009834e-05, - "loss": 0.2527, + "loss": 0.1275, "step": 5293 }, { "epoch": 1.47, "learning_rate": 1.0177196400408202e-05, - "loss": 0.0835, + "loss": 0.0717, "step": 5294 }, { "epoch": 1.47, "learning_rate": 1.0175340940718064e-05, - "loss": 0.1947, + "loss": 0.0225, "step": 5295 }, { "epoch": 1.47, "learning_rate": 1.0173485481027924e-05, - "loss": 0.3699, + "loss": 0.2005, "step": 5296 }, { "epoch": 1.47, "learning_rate": 1.0171630021337788e-05, - "loss": 0.3055, + "loss": 0.2521, "step": 5297 }, { "epoch": 1.47, "learning_rate": 1.016977456164765e-05, - "loss": 0.141, + "loss": 0.0217, "step": 5298 }, { "epoch": 1.47, "learning_rate": 1.016791910195751e-05, - "loss": 0.3015, + "loss": 0.0965, "step": 5299 }, { "epoch": 1.48, "learning_rate": 1.0166063642267372e-05, - "loss": 0.1401, + "loss": 0.1854, "step": 5300 }, { "epoch": 1.48, "learning_rate": 1.0164208182577235e-05, - "loss": 0.0861, + "loss": 0.0133, "step": 5301 }, { "epoch": 1.48, "learning_rate": 1.0162352722887097e-05, - "loss": 0.0865, + "loss": 0.1378, "step": 5302 }, { "epoch": 1.48, "learning_rate": 1.0160497263196957e-05, - "loss": 0.0847, + "loss": 0.093, "step": 5303 }, { "epoch": 1.48, "learning_rate": 1.015864180350682e-05, - "loss": 0.2504, + "loss": 0.0798, "step": 5304 }, { "epoch": 1.48, "learning_rate": 1.0156786343816683e-05, - "loss": 0.1422, + "loss": 0.0598, "step": 5305 }, { "epoch": 1.48, "learning_rate": 1.0154930884126543e-05, - "loss": 0.1356, + "loss": 0.0652, "step": 5306 }, { "epoch": 1.48, "learning_rate": 1.0153075424436405e-05, - "loss": 0.0877, + "loss": 0.0195, "step": 5307 }, { "epoch": 1.48, "learning_rate": 1.0151219964746267e-05, - "loss": 0.2467, + "loss": 0.0595, "step": 5308 }, { "epoch": 1.48, "learning_rate": 1.014936450505613e-05, - "loss": 0.1417, + "loss": 0.0536, "step": 5309 }, { "epoch": 1.48, "learning_rate": 1.014750904536599e-05, - "loss": 0.0344, + "loss": 0.1004, "step": 5310 }, { "epoch": 1.48, "learning_rate": 1.0145653585675852e-05, - "loss": 0.035, + "loss": 0.063, "step": 5311 }, { "epoch": 1.48, "learning_rate": 1.0143798125985713e-05, - "loss": 0.1904, + "loss": 0.0546, "step": 5312 }, { "epoch": 1.48, "learning_rate": 1.0141942666295576e-05, - "loss": 0.2441, + "loss": 0.1468, "step": 5313 }, { "epoch": 1.48, "learning_rate": 1.0140087206605438e-05, - "loss": 0.0852, + "loss": 0.0844, "step": 5314 }, { "epoch": 1.48, "learning_rate": 1.01382317469153e-05, - "loss": 0.1374, + "loss": 0.0222, "step": 5315 }, { "epoch": 1.48, "learning_rate": 1.013637628722516e-05, - "loss": 0.1383, + "loss": 0.0205, "step": 5316 }, { "epoch": 1.48, "learning_rate": 1.0134520827535022e-05, - "loss": 0.0352, + "loss": 0.0199, "step": 5317 }, { "epoch": 1.48, "learning_rate": 1.0132665367844886e-05, - "loss": 0.1374, + "loss": 0.1501, "step": 5318 }, { "epoch": 1.48, "learning_rate": 1.0130809908154746e-05, - "loss": 0.138, + "loss": 0.0142, "step": 5319 }, { "epoch": 1.48, "learning_rate": 1.0128954448464608e-05, - "loss": 0.0335, + "loss": 0.0837, "step": 5320 }, { "epoch": 1.48, "learning_rate": 1.012709898877447e-05, - "loss": 0.0888, + "loss": 0.0623, "step": 5321 }, { "epoch": 1.48, "learning_rate": 1.0125243529084331e-05, - "loss": 0.0881, + "loss": 0.1017, "step": 5322 }, { "epoch": 1.48, "learning_rate": 1.0123388069394193e-05, - "loss": 0.1938, + "loss": 0.0232, "step": 5323 }, { "epoch": 1.48, "learning_rate": 1.0121532609704055e-05, - "loss": 0.0872, + "loss": 0.0243, "step": 5324 }, { "epoch": 1.48, "learning_rate": 1.0119677150013915e-05, - "loss": 0.1441, + "loss": 0.0287, "step": 5325 }, { "epoch": 1.48, "learning_rate": 1.0117821690323779e-05, - "loss": 0.2477, + "loss": 0.0475, "step": 5326 }, { "epoch": 1.48, "learning_rate": 1.011596623063364e-05, - "loss": 0.1368, + "loss": 0.0873, "step": 5327 }, { "epoch": 1.48, "learning_rate": 1.0114110770943501e-05, - "loss": 0.0312, + "loss": 0.0875, "step": 5328 }, { "epoch": 1.48, "learning_rate": 1.0112255311253363e-05, - "loss": 0.0308, + "loss": 0.0612, "step": 5329 }, { "epoch": 1.48, "learning_rate": 1.0110399851563226e-05, - "loss": 0.086, + "loss": 0.1316, "step": 5330 }, { "epoch": 1.48, "learning_rate": 1.0108544391873088e-05, - "loss": 0.0846, + "loss": 0.016, "step": 5331 }, { "epoch": 1.48, "learning_rate": 1.0106688932182948e-05, - "loss": 0.1365, + "loss": 0.0558, "step": 5332 }, { "epoch": 1.48, "learning_rate": 1.010483347249281e-05, - "loss": 0.0852, + "loss": 0.0134, "step": 5333 }, { "epoch": 1.48, "learning_rate": 1.0102978012802674e-05, - "loss": 0.135, + "loss": 0.0456, "step": 5334 }, { "epoch": 1.48, "learning_rate": 1.0101122553112534e-05, - "loss": 0.1936, + "loss": 0.0485, "step": 5335 }, { "epoch": 1.49, "learning_rate": 1.0099267093422396e-05, - "loss": 0.1939, + "loss": 0.0938, "step": 5336 }, { "epoch": 1.49, "learning_rate": 1.0097411633732258e-05, - "loss": 0.192, + "loss": 0.105, "step": 5337 }, { "epoch": 1.49, "learning_rate": 1.009555617404212e-05, - "loss": 0.2492, + "loss": 0.0489, "step": 5338 }, { "epoch": 1.49, "learning_rate": 1.0093700714351981e-05, - "loss": 0.2516, + "loss": 0.0741, "step": 5339 }, { "epoch": 1.49, "learning_rate": 1.0091845254661843e-05, - "loss": 0.2978, + "loss": 0.0272, "step": 5340 }, { "epoch": 1.49, "learning_rate": 1.0089989794971703e-05, - "loss": 0.252, + "loss": 0.0963, "step": 5341 }, { "epoch": 1.49, "learning_rate": 1.0088134335281567e-05, - "loss": 0.1934, + "loss": 0.0786, "step": 5342 }, { "epoch": 1.49, "learning_rate": 1.0086278875591429e-05, - "loss": 0.2485, + "loss": 0.0283, "step": 5343 }, { "epoch": 1.49, "learning_rate": 1.0084423415901289e-05, - "loss": 0.0301, + "loss": 0.0129, "step": 5344 }, { "epoch": 1.49, "learning_rate": 1.0082567956211151e-05, - "loss": 0.0311, + "loss": 0.0183, "step": 5345 }, { "epoch": 1.49, "learning_rate": 1.0080712496521015e-05, - "loss": 0.1405, + "loss": 0.0131, "step": 5346 }, { "epoch": 1.49, "learning_rate": 1.0078857036830876e-05, - "loss": 0.1414, + "loss": 0.1178, "step": 5347 }, { "epoch": 1.49, "learning_rate": 1.0077001577140737e-05, - "loss": 0.1424, + "loss": 0.0256, "step": 5348 }, { "epoch": 1.49, "learning_rate": 1.0075146117450598e-05, - "loss": 0.0339, + "loss": 0.0316, "step": 5349 }, { "epoch": 1.49, "learning_rate": 1.0073290657760462e-05, - "loss": 0.1381, + "loss": 0.1281, "step": 5350 }, { "epoch": 1.49, "learning_rate": 1.0071435198070322e-05, - "loss": 0.2455, + "loss": 0.0367, "step": 5351 }, { "epoch": 1.49, "learning_rate": 1.0069579738380184e-05, - "loss": 0.1379, + "loss": 0.0323, "step": 5352 }, { "epoch": 1.49, "learning_rate": 1.0067724278690046e-05, - "loss": 0.3518, + "loss": 0.0146, "step": 5353 }, { "epoch": 1.49, "learning_rate": 1.006586881899991e-05, - "loss": 0.088, + "loss": 0.0533, "step": 5354 }, { "epoch": 1.49, "learning_rate": 1.006401335930977e-05, - "loss": 0.1913, + "loss": 0.4008, "step": 5355 }, { "epoch": 1.49, "learning_rate": 1.0062157899619632e-05, - "loss": 0.134, + "loss": 0.0963, "step": 5356 }, { "epoch": 1.49, "learning_rate": 1.0060302439929492e-05, - "loss": 0.1918, + "loss": 0.1192, "step": 5357 }, { "epoch": 1.49, "learning_rate": 1.0058446980239355e-05, - "loss": 0.0879, + "loss": 0.1775, "step": 5358 }, { "epoch": 1.49, "learning_rate": 1.0056591520549217e-05, - "loss": 0.1904, + "loss": 0.0556, "step": 5359 }, { "epoch": 1.49, "learning_rate": 1.0054736060859079e-05, - "loss": 0.2474, + "loss": 0.1849, "step": 5360 }, { "epoch": 1.49, "learning_rate": 1.005288060116894e-05, - "loss": 0.1944, + "loss": 0.1592, "step": 5361 }, { "epoch": 1.49, "learning_rate": 1.0051025141478803e-05, - "loss": 0.1431, + "loss": 0.0896, "step": 5362 }, { "epoch": 1.49, "learning_rate": 1.0049169681788665e-05, - "loss": 0.09, + "loss": 0.1752, "step": 5363 }, { "epoch": 1.49, "learning_rate": 1.0047314222098525e-05, - "loss": 0.1929, + "loss": 0.1845, "step": 5364 }, { "epoch": 1.49, "learning_rate": 1.0045458762408387e-05, - "loss": 0.0873, + "loss": 0.1078, "step": 5365 }, { "epoch": 1.49, "learning_rate": 1.004360330271825e-05, - "loss": 0.1416, + "loss": 0.0294, "step": 5366 }, { "epoch": 1.49, "learning_rate": 1.004174784302811e-05, - "loss": 0.1394, + "loss": 0.0365, "step": 5367 }, { "epoch": 1.49, "learning_rate": 1.0039892383337972e-05, - "loss": 0.2438, + "loss": 0.0634, "step": 5368 }, { "epoch": 1.49, "learning_rate": 1.0038036923647834e-05, - "loss": 0.1868, + "loss": 0.0344, "step": 5369 }, { "epoch": 1.49, "learning_rate": 1.0036181463957698e-05, - "loss": 0.1381, + "loss": 0.142, "step": 5370 }, { "epoch": 1.49, "learning_rate": 1.0034326004267558e-05, - "loss": 0.196, + "loss": 0.0961, "step": 5371 }, { "epoch": 1.5, "learning_rate": 1.003247054457742e-05, - "loss": 0.146, + "loss": 0.0137, "step": 5372 }, { "epoch": 1.5, "learning_rate": 1.003061508488728e-05, - "loss": 0.0882, + "loss": 0.1149, "step": 5373 }, { "epoch": 1.5, "learning_rate": 1.0028759625197144e-05, - "loss": 0.1398, + "loss": 0.0805, "step": 5374 }, { "epoch": 1.5, "learning_rate": 1.0026904165507005e-05, - "loss": 0.1429, + "loss": 0.0738, "step": 5375 }, { "epoch": 1.5, "learning_rate": 1.0025048705816867e-05, - "loss": 0.1422, + "loss": 0.0667, "step": 5376 }, { "epoch": 1.5, "learning_rate": 1.0023193246126727e-05, - "loss": 0.2388, + "loss": 0.1225, "step": 5377 }, { "epoch": 1.5, "learning_rate": 1.0021337786436591e-05, - "loss": 0.1933, + "loss": 0.0315, "step": 5378 }, { "epoch": 1.5, "learning_rate": 1.0019482326746453e-05, - "loss": 0.039, + "loss": 0.0452, "step": 5379 }, { "epoch": 1.5, "learning_rate": 1.0017626867056313e-05, - "loss": 0.191, + "loss": 0.027, "step": 5380 }, { "epoch": 1.5, "learning_rate": 1.0015771407366175e-05, - "loss": 0.1406, + "loss": 0.1206, "step": 5381 }, { "epoch": 1.5, "learning_rate": 1.0013915947676038e-05, - "loss": 0.1401, + "loss": 0.1174, "step": 5382 }, { "epoch": 1.5, "learning_rate": 1.00120604879859e-05, - "loss": 0.0913, + "loss": 0.1267, "step": 5383 }, { "epoch": 1.5, "learning_rate": 1.001020502829576e-05, - "loss": 0.038, + "loss": 0.0457, "step": 5384 }, { "epoch": 1.5, "learning_rate": 1.0008349568605622e-05, - "loss": 0.0877, + "loss": 0.0605, "step": 5385 }, { "epoch": 1.5, "learning_rate": 1.0006494108915486e-05, - "loss": 0.0371, + "loss": 0.0175, "step": 5386 }, { "epoch": 1.5, "learning_rate": 1.0004638649225346e-05, - "loss": 0.1911, + "loss": 0.0265, "step": 5387 }, { "epoch": 1.5, "learning_rate": 1.0002783189535208e-05, - "loss": 0.0885, + "loss": 0.1394, "step": 5388 }, { "epoch": 1.5, "learning_rate": 1.000092772984507e-05, - "loss": 0.1943, + "loss": 0.1051, "step": 5389 }, { "epoch": 1.5, "learning_rate": 9.999072270154932e-06, - "loss": 0.2394, + "loss": 0.0658, "step": 5390 }, { "epoch": 1.5, "learning_rate": 9.997216810464794e-06, - "loss": 0.0355, + "loss": 0.0871, "step": 5391 }, { "epoch": 1.5, "learning_rate": 9.995361350774655e-06, - "loss": 0.0347, + "loss": 0.1016, "step": 5392 }, { "epoch": 1.5, "learning_rate": 9.993505891084517e-06, - "loss": 0.033, + "loss": 0.1068, "step": 5393 }, { "epoch": 1.5, "learning_rate": 9.99165043139438e-06, - "loss": 0.1998, + "loss": 0.0232, "step": 5394 }, { "epoch": 1.5, "learning_rate": 9.989794971704241e-06, - "loss": 0.0325, + "loss": 0.1059, "step": 5395 }, { "epoch": 1.5, "learning_rate": 9.987939512014101e-06, - "loss": 0.0861, + "loss": 0.0237, "step": 5396 }, { "epoch": 1.5, "learning_rate": 9.986084052323965e-06, - "loss": 0.0292, + "loss": 0.0523, "step": 5397 }, { "epoch": 1.5, "learning_rate": 9.984228592633825e-06, - "loss": 0.1958, + "loss": 0.1995, "step": 5398 }, { "epoch": 1.5, "learning_rate": 9.982373132943689e-06, - "loss": 0.1935, + "loss": 0.2027, "step": 5399 }, { "epoch": 1.5, "learning_rate": 9.980517673253549e-06, - "loss": 0.085, + "loss": 0.0592, "step": 5400 }, { "epoch": 1.5, "learning_rate": 9.97866221356341e-06, - "loss": 0.3077, + "loss": 0.0788, "step": 5401 }, { "epoch": 1.5, "learning_rate": 9.976806753873273e-06, - "loss": 0.0816, + "loss": 0.0636, "step": 5402 }, { "epoch": 1.5, "learning_rate": 9.974951294183134e-06, - "loss": 0.1981, + "loss": 0.1771, "step": 5403 }, { "epoch": 1.5, "learning_rate": 9.973095834492996e-06, - "loss": 0.0285, + "loss": 0.0252, "step": 5404 }, { "epoch": 1.5, "learning_rate": 9.971240374802858e-06, - "loss": 0.4264, + "loss": 0.0422, "step": 5405 }, { "epoch": 1.5, "learning_rate": 9.96938491511272e-06, - "loss": 0.1404, + "loss": 0.0228, "step": 5406 }, { "epoch": 1.5, "learning_rate": 9.967529455422582e-06, - "loss": 0.2994, + "loss": 0.1457, "step": 5407 }, { "epoch": 1.51, "learning_rate": 9.965673995732444e-06, - "loss": 0.0833, + "loss": 0.1166, "step": 5408 }, { "epoch": 1.51, "learning_rate": 9.963818536042306e-06, - "loss": 0.0833, + "loss": 0.0245, "step": 5409 }, { "epoch": 1.51, "learning_rate": 9.961963076352167e-06, - "loss": 0.0864, + "loss": 0.0933, "step": 5410 }, { "epoch": 1.51, "learning_rate": 9.96010761666203e-06, - "loss": 0.0833, + "loss": 0.0106, "step": 5411 }, { "epoch": 1.51, "learning_rate": 9.958252156971891e-06, - "loss": 0.1389, + "loss": 0.0207, "step": 5412 }, { "epoch": 1.51, "learning_rate": 9.956396697281753e-06, - "loss": 0.1384, + "loss": 0.0906, "step": 5413 }, { "epoch": 1.51, "learning_rate": 9.954541237591613e-06, - "loss": 0.1425, + "loss": 0.0473, "step": 5414 }, { "epoch": 1.51, "learning_rate": 9.952685777901477e-06, - "loss": 0.1389, + "loss": 0.0796, "step": 5415 }, { "epoch": 1.51, "learning_rate": 9.950830318211337e-06, - "loss": 0.028, + "loss": 0.086, "step": 5416 }, { "epoch": 1.51, "learning_rate": 9.9489748585212e-06, - "loss": 0.1991, + "loss": 0.0634, "step": 5417 }, { "epoch": 1.51, "learning_rate": 9.94711939883106e-06, - "loss": 0.3128, + "loss": 0.0817, "step": 5418 }, { "epoch": 1.51, "learning_rate": 9.945263939140923e-06, - "loss": 0.0293, + "loss": 0.1864, "step": 5419 }, { "epoch": 1.51, "learning_rate": 9.943408479450784e-06, - "loss": 0.1404, + "loss": 0.0927, "step": 5420 }, { "epoch": 1.51, "learning_rate": 9.941553019760646e-06, - "loss": 0.0833, + "loss": 0.1552, "step": 5421 }, { "epoch": 1.51, "learning_rate": 9.939697560070508e-06, - "loss": 0.0855, + "loss": 0.0468, "step": 5422 }, { "epoch": 1.51, "learning_rate": 9.93784210038037e-06, - "loss": 0.1938, + "loss": 0.0231, "step": 5423 }, { "epoch": 1.51, "learning_rate": 9.935986640690232e-06, - "loss": 0.0857, + "loss": 0.0296, "step": 5424 }, { "epoch": 1.51, "learning_rate": 9.934131181000094e-06, - "loss": 0.1372, + "loss": 0.0765, "step": 5425 }, { "epoch": 1.51, "learning_rate": 9.932275721309956e-06, - "loss": 0.1408, + "loss": 0.0147, "step": 5426 }, { "epoch": 1.51, "learning_rate": 9.930420261619818e-06, - "loss": 0.1953, + "loss": 0.0758, "step": 5427 }, { "epoch": 1.51, "learning_rate": 9.92856480192968e-06, - "loss": 0.0856, + "loss": 0.0134, "step": 5428 }, { "epoch": 1.51, "learning_rate": 9.926709342239541e-06, - "loss": 0.1423, + "loss": 0.0675, "step": 5429 }, { "epoch": 1.51, "learning_rate": 9.924853882549401e-06, - "loss": 0.1419, + "loss": 0.102, "step": 5430 }, { "epoch": 1.51, "learning_rate": 9.922998422859265e-06, - "loss": 0.1425, + "loss": 0.1058, "step": 5431 }, { "epoch": 1.51, "learning_rate": 9.921142963169125e-06, - "loss": 0.1945, + "loss": 0.1405, "step": 5432 }, { "epoch": 1.51, "learning_rate": 9.919287503478989e-06, - "loss": 0.1977, + "loss": 0.0552, "step": 5433 }, { "epoch": 1.51, "learning_rate": 9.917432043788849e-06, - "loss": 0.1365, + "loss": 0.0958, "step": 5434 }, { "epoch": 1.51, "learning_rate": 9.915576584098713e-06, - "loss": 0.0841, + "loss": 0.0263, "step": 5435 }, { "epoch": 1.51, "learning_rate": 9.913721124408573e-06, - "loss": 0.087, + "loss": 0.1458, "step": 5436 }, { "epoch": 1.51, "learning_rate": 9.911865664718435e-06, - "loss": 0.1416, + "loss": 0.0892, "step": 5437 }, { "epoch": 1.51, "learning_rate": 9.910010205028296e-06, - "loss": 0.1379, + "loss": 0.0681, "step": 5438 }, { "epoch": 1.51, "learning_rate": 9.908154745338158e-06, - "loss": 0.1979, + "loss": 0.0418, "step": 5439 }, { "epoch": 1.51, "learning_rate": 9.90629928564802e-06, - "loss": 0.086, + "loss": 0.0164, "step": 5440 }, { "epoch": 1.51, "learning_rate": 9.904443825957882e-06, - "loss": 0.0845, + "loss": 0.0339, "step": 5441 }, { "epoch": 1.51, "learning_rate": 9.902588366267744e-06, - "loss": 0.1895, + "loss": 0.0513, "step": 5442 }, { "epoch": 1.51, "learning_rate": 9.900732906577606e-06, - "loss": 0.1357, + "loss": 0.0221, "step": 5443 }, { "epoch": 1.52, "learning_rate": 9.898877446887468e-06, - "loss": 0.1396, + "loss": 0.0917, "step": 5444 }, { "epoch": 1.52, "learning_rate": 9.89702198719733e-06, - "loss": 0.0862, + "loss": 0.0922, "step": 5445 }, { "epoch": 1.52, "learning_rate": 9.895166527507191e-06, - "loss": 0.1402, + "loss": 0.0625, "step": 5446 }, { "epoch": 1.52, "learning_rate": 9.893311067817053e-06, - "loss": 0.0292, + "loss": 0.0411, "step": 5447 }, { "epoch": 1.52, "learning_rate": 9.891455608126913e-06, - "loss": 0.0848, + "loss": 0.0487, "step": 5448 }, { "epoch": 1.52, "learning_rate": 9.889600148436777e-06, - "loss": 0.0837, + "loss": 0.0155, "step": 5449 }, { "epoch": 1.52, "learning_rate": 9.887744688746637e-06, - "loss": 0.0848, + "loss": 0.1506, "step": 5450 }, { "epoch": 1.52, "learning_rate": 9.8858892290565e-06, - "loss": 0.0859, + "loss": 0.056, "step": 5451 }, { "epoch": 1.52, "learning_rate": 9.884033769366361e-06, - "loss": 0.1417, + "loss": 0.008, "step": 5452 }, { "epoch": 1.52, "learning_rate": 9.882178309676223e-06, - "loss": 0.1388, + "loss": 0.0938, "step": 5453 }, { "epoch": 1.52, "learning_rate": 9.880322849986085e-06, - "loss": 0.2487, + "loss": 0.0673, "step": 5454 }, { "epoch": 1.52, "learning_rate": 9.878467390295947e-06, - "loss": 0.1927, + "loss": 0.0628, "step": 5455 }, { "epoch": 1.52, "learning_rate": 9.876611930605808e-06, - "loss": 0.1983, + "loss": 0.0094, "step": 5456 }, { "epoch": 1.52, "learning_rate": 9.87475647091567e-06, - "loss": 0.0851, + "loss": 0.0548, "step": 5457 }, { "epoch": 1.52, "learning_rate": 9.872901011225532e-06, - "loss": 0.0292, + "loss": 0.211, "step": 5458 }, { "epoch": 1.52, "learning_rate": 9.871045551535394e-06, - "loss": 0.0292, + "loss": 0.1662, "step": 5459 }, { "epoch": 1.52, "learning_rate": 9.869190091845256e-06, - "loss": 0.0289, + "loss": 0.0568, "step": 5460 }, { "epoch": 1.52, "learning_rate": 9.867334632155118e-06, - "loss": 0.1406, + "loss": 0.115, "step": 5461 }, { "epoch": 1.52, "learning_rate": 9.86547917246498e-06, - "loss": 0.1881, + "loss": 0.0145, "step": 5462 }, { "epoch": 1.52, "learning_rate": 9.863623712774842e-06, - "loss": 0.0835, + "loss": 0.2141, "step": 5463 }, { "epoch": 1.52, "learning_rate": 9.861768253084702e-06, - "loss": 0.1943, + "loss": 0.0652, "step": 5464 }, { "epoch": 1.52, "learning_rate": 9.859912793394565e-06, - "loss": 0.1946, + "loss": 0.0179, "step": 5465 }, { "epoch": 1.52, "learning_rate": 9.858057333704425e-06, - "loss": 0.1402, + "loss": 0.0558, "step": 5466 }, { "epoch": 1.52, "learning_rate": 9.856201874014287e-06, - "loss": 0.0826, + "loss": 0.0212, "step": 5467 }, { "epoch": 1.52, "learning_rate": 9.85434641432415e-06, - "loss": 0.0814, + "loss": 0.0876, "step": 5468 }, { "epoch": 1.52, "learning_rate": 9.852490954634011e-06, - "loss": 0.1391, + "loss": 0.0474, "step": 5469 }, { "epoch": 1.52, "learning_rate": 9.850635494943873e-06, - "loss": 0.2504, + "loss": 0.0605, "step": 5470 }, { "epoch": 1.52, "learning_rate": 9.848780035253735e-06, - "loss": 0.3615, + "loss": 0.0139, "step": 5471 }, { "epoch": 1.52, "learning_rate": 9.846924575563597e-06, - "loss": 0.0814, + "loss": 0.0652, "step": 5472 }, { "epoch": 1.52, "learning_rate": 9.845069115873459e-06, - "loss": 0.4231, + "loss": 0.1315, "step": 5473 }, { "epoch": 1.52, "learning_rate": 9.84321365618332e-06, - "loss": 0.1422, + "loss": 0.1499, "step": 5474 }, { "epoch": 1.52, "learning_rate": 9.841358196493182e-06, - "loss": 0.1396, + "loss": 0.0144, "step": 5475 }, { "epoch": 1.52, "learning_rate": 9.839502736803044e-06, - "loss": 0.0853, + "loss": 0.1521, "step": 5476 }, { "epoch": 1.52, "learning_rate": 9.837647277112904e-06, - "loss": 0.1365, + "loss": 0.127, "step": 5477 }, { "epoch": 1.52, "learning_rate": 9.835791817422768e-06, - "loss": 0.1938, + "loss": 0.0159, "step": 5478 }, { "epoch": 1.52, "learning_rate": 9.833936357732628e-06, - "loss": 0.033, + "loss": 0.0279, "step": 5479 }, { "epoch": 1.53, "learning_rate": 9.832080898042492e-06, - "loss": 0.1925, + "loss": 0.0646, "step": 5480 }, { "epoch": 1.53, "learning_rate": 9.830225438352352e-06, - "loss": 0.2465, + "loss": 0.0579, "step": 5481 }, { "epoch": 1.53, "learning_rate": 9.828369978662214e-06, - "loss": 0.2423, + "loss": 0.0177, "step": 5482 }, { "epoch": 1.53, "learning_rate": 9.826514518972076e-06, - "loss": 0.0853, + "loss": 0.0964, "step": 5483 }, { "epoch": 1.53, "learning_rate": 9.824659059281937e-06, - "loss": 0.0876, + "loss": 0.0156, "step": 5484 }, { "epoch": 1.53, "learning_rate": 9.8228035995918e-06, - "loss": 0.1429, + "loss": 0.0171, "step": 5485 }, { "epoch": 1.53, "learning_rate": 9.820948139901661e-06, - "loss": 0.0844, + "loss": 0.0488, "step": 5486 }, { "epoch": 1.53, "learning_rate": 9.819092680211523e-06, - "loss": 0.084, + "loss": 0.045, "step": 5487 }, { "epoch": 1.53, "learning_rate": 9.817237220521385e-06, - "loss": 0.3522, + "loss": 0.0122, "step": 5488 }, { "epoch": 1.53, "learning_rate": 9.815381760831247e-06, - "loss": 0.0857, + "loss": 0.076, "step": 5489 }, { "epoch": 1.53, "learning_rate": 9.813526301141109e-06, - "loss": 0.1414, + "loss": 0.0625, "step": 5490 }, { "epoch": 1.53, "learning_rate": 9.81167084145097e-06, - "loss": 0.1434, + "loss": 0.1299, "step": 5491 }, { "epoch": 1.53, "learning_rate": 9.809815381760832e-06, - "loss": 0.0896, + "loss": 0.058, "step": 5492 }, { "epoch": 1.53, "learning_rate": 9.807959922070693e-06, - "loss": 0.187, + "loss": 0.0938, "step": 5493 }, { "epoch": 1.53, "learning_rate": 9.806104462380556e-06, - "loss": 0.1397, + "loss": 0.0473, "step": 5494 }, { "epoch": 1.53, "learning_rate": 9.804249002690416e-06, - "loss": 0.1402, + "loss": 0.1067, "step": 5495 }, { "epoch": 1.53, "learning_rate": 9.80239354300028e-06, - "loss": 0.2429, + "loss": 0.0892, "step": 5496 }, { "epoch": 1.53, "learning_rate": 9.80053808331014e-06, - "loss": 0.0352, + "loss": 0.0993, "step": 5497 }, { "epoch": 1.53, "learning_rate": 9.798682623620004e-06, - "loss": 0.1421, + "loss": 0.0673, "step": 5498 }, { "epoch": 1.53, "learning_rate": 9.796827163929864e-06, - "loss": 0.1903, + "loss": 0.0598, "step": 5499 }, { "epoch": 1.53, "learning_rate": 9.794971704239726e-06, - "loss": 0.2899, + "loss": 0.0498, "step": 5500 }, { "epoch": 1.53, "learning_rate": 9.793116244549588e-06, - "loss": 0.1418, + "loss": 0.0244, "step": 5501 }, { "epoch": 1.53, "learning_rate": 9.79126078485945e-06, - "loss": 0.1406, + "loss": 0.0168, "step": 5502 }, { "epoch": 1.53, "learning_rate": 9.789405325169311e-06, - "loss": 0.1434, + "loss": 0.1873, "step": 5503 }, { "epoch": 1.53, "learning_rate": 9.787549865479173e-06, - "loss": 0.1396, + "loss": 0.0577, "step": 5504 }, { "epoch": 1.53, "learning_rate": 9.785694405789035e-06, - "loss": 0.0373, + "loss": 0.085, "step": 5505 }, { "epoch": 1.53, "learning_rate": 9.783838946098897e-06, - "loss": 0.0356, + "loss": 0.1775, "step": 5506 }, { "epoch": 1.53, "learning_rate": 9.781983486408759e-06, - "loss": 0.1887, + "loss": 0.0865, "step": 5507 }, { "epoch": 1.53, "learning_rate": 9.78012802671862e-06, - "loss": 0.1388, + "loss": 0.0533, "step": 5508 }, { "epoch": 1.53, "learning_rate": 9.778272567028482e-06, - "loss": 0.0351, + "loss": 0.0562, "step": 5509 }, { "epoch": 1.53, "learning_rate": 9.776417107338344e-06, - "loss": 0.0872, + "loss": 0.2182, "step": 5510 }, { "epoch": 1.53, "learning_rate": 9.774561647648205e-06, - "loss": 0.0866, + "loss": 0.0609, "step": 5511 }, { "epoch": 1.53, "learning_rate": 9.772706187958068e-06, - "loss": 0.1415, + "loss": 0.1132, "step": 5512 }, { "epoch": 1.53, "learning_rate": 9.770850728267928e-06, - "loss": 0.144, + "loss": 0.0737, "step": 5513 }, { "epoch": 1.53, "learning_rate": 9.768995268577792e-06, - "loss": 0.1434, + "loss": 0.0233, "step": 5514 }, { "epoch": 1.53, "learning_rate": 9.767139808887652e-06, - "loss": 0.1389, + "loss": 0.1068, "step": 5515 }, { "epoch": 1.54, "learning_rate": 9.765284349197514e-06, - "loss": 0.1902, + "loss": 0.0541, "step": 5516 }, { "epoch": 1.54, "learning_rate": 9.763428889507376e-06, - "loss": 0.1952, + "loss": 0.0295, "step": 5517 }, { "epoch": 1.54, "learning_rate": 9.761573429817238e-06, - "loss": 0.0843, + "loss": 0.021, "step": 5518 }, { "epoch": 1.54, "learning_rate": 9.7597179701271e-06, - "loss": 0.0869, + "loss": 0.0479, "step": 5519 }, { "epoch": 1.54, "learning_rate": 9.757862510436961e-06, - "loss": 0.1929, + "loss": 0.1204, "step": 5520 }, { "epoch": 1.54, "learning_rate": 9.756007050746823e-06, - "loss": 0.0861, + "loss": 0.1299, "step": 5521 }, { "epoch": 1.54, "learning_rate": 9.754151591056685e-06, - "loss": 0.2458, + "loss": 0.1432, "step": 5522 }, { "epoch": 1.54, "learning_rate": 9.752296131366547e-06, - "loss": 0.0302, + "loss": 0.0273, "step": 5523 }, { "epoch": 1.54, "learning_rate": 9.750440671676409e-06, - "loss": 0.0294, + "loss": 0.0888, "step": 5524 }, { "epoch": 1.54, "learning_rate": 9.74858521198627e-06, - "loss": 0.3054, + "loss": 0.0566, "step": 5525 }, { "epoch": 1.54, "learning_rate": 9.746729752296133e-06, - "loss": 0.1346, + "loss": 0.1232, "step": 5526 }, { "epoch": 1.54, "learning_rate": 9.744874292605993e-06, - "loss": 0.1937, + "loss": 0.1358, "step": 5527 }, { "epoch": 1.54, "learning_rate": 9.743018832915856e-06, - "loss": 0.247, + "loss": 0.0975, "step": 5528 }, { "epoch": 1.54, "learning_rate": 9.741163373225717e-06, - "loss": 0.134, + "loss": 0.0563, "step": 5529 }, { "epoch": 1.54, "learning_rate": 9.73930791353558e-06, - "loss": 0.0305, + "loss": 0.0186, "step": 5530 }, { "epoch": 1.54, "learning_rate": 9.73745245384544e-06, - "loss": 0.0848, + "loss": 0.0534, "step": 5531 }, { "epoch": 1.54, "learning_rate": 9.735596994155304e-06, - "loss": 0.3026, + "loss": 0.0523, "step": 5532 }, { "epoch": 1.54, "learning_rate": 9.733741534465164e-06, - "loss": 0.3013, + "loss": 0.0217, "step": 5533 }, { "epoch": 1.54, "learning_rate": 9.731886074775026e-06, - "loss": 0.0848, + "loss": 0.019, "step": 5534 }, { "epoch": 1.54, "learning_rate": 9.730030615084888e-06, - "loss": 0.2941, + "loss": 0.0146, "step": 5535 }, { "epoch": 1.54, "learning_rate": 9.72817515539475e-06, - "loss": 0.084, + "loss": 0.0639, "step": 5536 }, { "epoch": 1.54, "learning_rate": 9.726319695704611e-06, - "loss": 0.0323, + "loss": 0.0815, "step": 5537 }, { "epoch": 1.54, "learning_rate": 9.724464236014473e-06, - "loss": 0.1907, + "loss": 0.1084, "step": 5538 }, { "epoch": 1.54, "learning_rate": 9.722608776324335e-06, - "loss": 0.0333, + "loss": 0.1023, "step": 5539 }, { "epoch": 1.54, "learning_rate": 9.720753316634197e-06, - "loss": 0.1443, + "loss": 0.0236, "step": 5540 }, { "epoch": 1.54, "learning_rate": 9.718897856944059e-06, - "loss": 0.0861, + "loss": 0.0135, "step": 5541 }, { "epoch": 1.54, "learning_rate": 9.71704239725392e-06, - "loss": 0.2477, + "loss": 0.0131, "step": 5542 }, { "epoch": 1.54, "learning_rate": 9.715186937563783e-06, - "loss": 0.0918, + "loss": 0.1096, "step": 5543 }, { "epoch": 1.54, "learning_rate": 9.713331477873645e-06, - "loss": 0.194, + "loss": 0.0346, "step": 5544 }, { "epoch": 1.54, "learning_rate": 9.711476018183505e-06, - "loss": 0.0874, + "loss": 0.0551, "step": 5545 }, { "epoch": 1.54, "learning_rate": 9.709620558493368e-06, - "loss": 0.0855, + "loss": 0.0481, "step": 5546 }, { "epoch": 1.54, "learning_rate": 9.707765098803228e-06, - "loss": 0.2481, + "loss": 0.0205, "step": 5547 }, { "epoch": 1.54, "learning_rate": 9.705909639113092e-06, - "loss": 0.1401, + "loss": 0.0426, "step": 5548 }, { "epoch": 1.54, "learning_rate": 9.704054179422952e-06, - "loss": 0.2467, + "loss": 0.0968, "step": 5549 }, { "epoch": 1.54, "learning_rate": 9.702198719732814e-06, - "loss": 0.2447, + "loss": 0.0401, "step": 5550 }, { "epoch": 1.54, "learning_rate": 9.700343260042676e-06, - "loss": 0.1369, + "loss": 0.0457, "step": 5551 }, { "epoch": 1.55, "learning_rate": 9.698487800352538e-06, - "loss": 0.0857, + "loss": 0.0473, "step": 5552 }, { "epoch": 1.55, "learning_rate": 9.6966323406624e-06, - "loss": 0.1947, + "loss": 0.0131, "step": 5553 }, { "epoch": 1.55, "learning_rate": 9.694776880972262e-06, - "loss": 0.0832, + "loss": 0.0094, "step": 5554 }, { "epoch": 1.55, "learning_rate": 9.692921421282123e-06, - "loss": 0.253, + "loss": 0.0505, "step": 5555 }, { "epoch": 1.55, "learning_rate": 9.691065961591985e-06, - "loss": 0.1387, + "loss": 0.053, "step": 5556 }, { "epoch": 1.55, "learning_rate": 9.689210501901847e-06, - "loss": 0.1382, + "loss": 0.0816, "step": 5557 }, { "epoch": 1.55, "learning_rate": 9.687355042211709e-06, - "loss": 0.0358, + "loss": 0.0826, "step": 5558 }, { "epoch": 1.55, "learning_rate": 9.685499582521571e-06, - "loss": 0.0868, + "loss": 0.1334, "step": 5559 }, { "epoch": 1.55, "learning_rate": 9.683644122831433e-06, - "loss": 0.1947, + "loss": 0.1038, "step": 5560 }, { "epoch": 1.55, "learning_rate": 9.681788663141295e-06, - "loss": 0.1951, + "loss": 0.1431, "step": 5561 }, { "epoch": 1.55, "learning_rate": 9.679933203451157e-06, - "loss": 0.1409, + "loss": 0.0615, "step": 5562 }, { "epoch": 1.55, "learning_rate": 9.678077743761017e-06, - "loss": 0.1931, + "loss": 0.1222, "step": 5563 }, { "epoch": 1.55, "learning_rate": 9.67622228407088e-06, - "loss": 0.1894, + "loss": 0.0163, "step": 5564 }, { "epoch": 1.55, "learning_rate": 9.67436682438074e-06, - "loss": 0.1924, + "loss": 0.0455, "step": 5565 }, { "epoch": 1.55, "learning_rate": 9.672511364690604e-06, - "loss": 0.1374, + "loss": 0.0563, "step": 5566 }, { "epoch": 1.55, "learning_rate": 9.670655905000464e-06, - "loss": 0.2413, + "loss": 0.1382, "step": 5567 }, { "epoch": 1.55, "learning_rate": 9.668800445310326e-06, - "loss": 0.2416, + "loss": 0.0669, "step": 5568 }, { "epoch": 1.55, "learning_rate": 9.666944985620188e-06, - "loss": 0.1971, + "loss": 0.0321, "step": 5569 }, { "epoch": 1.55, "learning_rate": 9.66508952593005e-06, - "loss": 0.0891, + "loss": 0.0961, "step": 5570 }, { "epoch": 1.55, "learning_rate": 9.663234066239912e-06, - "loss": 0.1407, + "loss": 0.1527, "step": 5571 }, { "epoch": 1.55, "learning_rate": 9.661378606549774e-06, - "loss": 0.0919, + "loss": 0.124, "step": 5572 }, { "epoch": 1.55, "learning_rate": 9.659523146859635e-06, - "loss": 0.0396, + "loss": 0.2176, "step": 5573 }, { "epoch": 1.55, "learning_rate": 9.657667687169497e-06, - "loss": 0.1908, + "loss": 0.1166, "step": 5574 }, { "epoch": 1.55, "learning_rate": 9.65581222747936e-06, - "loss": 0.1406, + "loss": 0.0184, "step": 5575 }, { "epoch": 1.55, "learning_rate": 9.653956767789221e-06, - "loss": 0.14, + "loss": 0.0788, "step": 5576 }, { "epoch": 1.55, "learning_rate": 9.652101308099083e-06, - "loss": 0.1378, + "loss": 0.0254, "step": 5577 }, { "epoch": 1.55, "learning_rate": 9.650245848408945e-06, - "loss": 0.0886, + "loss": 0.0258, "step": 5578 }, { "epoch": 1.55, "learning_rate": 9.648390388718805e-06, - "loss": 0.0898, + "loss": 0.2235, "step": 5579 }, { "epoch": 1.55, "learning_rate": 9.646534929028669e-06, - "loss": 0.2436, + "loss": 0.0982, "step": 5580 }, { "epoch": 1.55, "learning_rate": 9.644679469338529e-06, - "loss": 0.1932, + "loss": 0.0918, "step": 5581 }, { "epoch": 1.55, "learning_rate": 9.642824009648392e-06, - "loss": 0.2423, + "loss": 0.0211, "step": 5582 }, { "epoch": 1.55, "learning_rate": 9.640968549958252e-06, - "loss": 0.2424, + "loss": 0.0197, "step": 5583 }, { "epoch": 1.55, "learning_rate": 9.639113090268114e-06, - "loss": 0.0353, + "loss": 0.0475, "step": 5584 }, { "epoch": 1.55, "learning_rate": 9.637257630577976e-06, - "loss": 0.1968, + "loss": 0.0249, "step": 5585 }, { "epoch": 1.55, "learning_rate": 9.635402170887838e-06, - "loss": 0.1944, + "loss": 0.1544, "step": 5586 }, { "epoch": 1.55, "learning_rate": 9.6335467111977e-06, - "loss": 0.0886, + "loss": 0.1587, "step": 5587 }, { "epoch": 1.56, "learning_rate": 9.631691251507562e-06, - "loss": 0.1404, + "loss": 0.0986, "step": 5588 }, { "epoch": 1.56, "learning_rate": 9.629835791817424e-06, - "loss": 0.1914, + "loss": 0.0515, "step": 5589 }, { "epoch": 1.56, "learning_rate": 9.627980332127286e-06, - "loss": 0.1948, + "loss": 0.049, "step": 5590 }, { "epoch": 1.56, "learning_rate": 9.626124872437147e-06, - "loss": 0.2432, + "loss": 0.0841, "step": 5591 }, { "epoch": 1.56, "learning_rate": 9.62426941274701e-06, - "loss": 0.1425, + "loss": 0.0911, "step": 5592 }, { "epoch": 1.56, "learning_rate": 9.622413953056871e-06, - "loss": 0.1388, + "loss": 0.0742, "step": 5593 }, { "epoch": 1.56, "learning_rate": 9.620558493366733e-06, - "loss": 0.1418, + "loss": 0.0453, "step": 5594 }, { "epoch": 1.56, "learning_rate": 9.618703033676595e-06, - "loss": 0.1944, + "loss": 0.1268, "step": 5595 }, { "epoch": 1.56, "learning_rate": 9.616847573986457e-06, - "loss": 0.2491, + "loss": 0.0846, "step": 5596 }, { "epoch": 1.56, "learning_rate": 9.614992114296317e-06, - "loss": 0.1382, + "loss": 0.0769, "step": 5597 }, { "epoch": 1.56, "learning_rate": 9.61313665460618e-06, - "loss": 0.0371, + "loss": 0.0136, "step": 5598 }, { "epoch": 1.56, "learning_rate": 9.61128119491604e-06, - "loss": 0.0374, + "loss": 0.0959, "step": 5599 }, { "epoch": 1.56, "learning_rate": 9.609425735225904e-06, - "loss": 0.0901, + "loss": 0.0122, "step": 5600 }, { "epoch": 1.56, "learning_rate": 9.607570275535764e-06, - "loss": 0.139, + "loss": 0.0387, "step": 5601 }, { "epoch": 1.56, "learning_rate": 9.605714815845626e-06, - "loss": 0.0871, + "loss": 0.0453, "step": 5602 }, { "epoch": 1.56, "learning_rate": 9.603859356155488e-06, - "loss": 0.0885, + "loss": 0.0137, "step": 5603 }, { "epoch": 1.56, "learning_rate": 9.60200389646535e-06, - "loss": 0.0877, + "loss": 0.101, "step": 5604 }, { "epoch": 1.56, "learning_rate": 9.600148436775212e-06, - "loss": 0.0349, + "loss": 0.0501, "step": 5605 }, { "epoch": 1.56, "learning_rate": 9.598292977085074e-06, - "loss": 0.1942, + "loss": 0.0559, "step": 5606 }, { "epoch": 1.56, "learning_rate": 9.596437517394936e-06, - "loss": 0.1363, + "loss": 0.0137, "step": 5607 }, { "epoch": 1.56, "learning_rate": 9.594582057704798e-06, - "loss": 0.033, + "loss": 0.0093, "step": 5608 }, { "epoch": 1.56, "learning_rate": 9.59272659801466e-06, - "loss": 0.191, + "loss": 0.0535, "step": 5609 }, { "epoch": 1.56, "learning_rate": 9.590871138324521e-06, - "loss": 0.1428, + "loss": 0.0659, "step": 5610 }, { "epoch": 1.56, "learning_rate": 9.589015678634383e-06, - "loss": 0.2918, + "loss": 0.0086, "step": 5611 }, { "epoch": 1.56, "learning_rate": 9.587160218944245e-06, - "loss": 0.0861, + "loss": 0.0105, "step": 5612 }, { "epoch": 1.56, "learning_rate": 9.585304759254105e-06, - "loss": 0.1438, + "loss": 0.1615, "step": 5613 }, { "epoch": 1.56, "learning_rate": 9.583449299563969e-06, - "loss": 0.031, + "loss": 0.0125, "step": 5614 }, { "epoch": 1.56, "learning_rate": 9.581593839873829e-06, - "loss": 0.0894, + "loss": 0.1265, "step": 5615 }, { "epoch": 1.56, "learning_rate": 9.579738380183692e-06, - "loss": 0.0846, + "loss": 0.0565, "step": 5616 }, { "epoch": 1.56, "learning_rate": 9.577882920493553e-06, - "loss": 0.0817, + "loss": 0.1446, "step": 5617 }, { "epoch": 1.56, "learning_rate": 9.576027460803415e-06, - "loss": 0.0302, + "loss": 0.1354, "step": 5618 }, { "epoch": 1.56, "learning_rate": 9.574172001113276e-06, - "loss": 0.1392, + "loss": 0.1448, "step": 5619 }, { "epoch": 1.56, "learning_rate": 9.572316541423138e-06, - "loss": 0.0851, + "loss": 0.0678, "step": 5620 }, { "epoch": 1.56, "learning_rate": 9.570461081733e-06, - "loss": 0.0864, + "loss": 0.09, "step": 5621 }, { "epoch": 1.56, "learning_rate": 9.568605622042862e-06, - "loss": 0.1946, + "loss": 0.1213, "step": 5622 }, { "epoch": 1.56, "learning_rate": 9.566750162352724e-06, - "loss": 0.141, + "loss": 0.0155, "step": 5623 }, { "epoch": 1.57, "learning_rate": 9.564894702662586e-06, - "loss": 0.029, + "loss": 0.0201, "step": 5624 }, { "epoch": 1.57, "learning_rate": 9.563039242972448e-06, - "loss": 0.0803, + "loss": 0.058, "step": 5625 }, { "epoch": 1.57, "learning_rate": 9.561183783282308e-06, - "loss": 0.2504, + "loss": 0.0195, "step": 5626 }, { "epoch": 1.57, "learning_rate": 9.559328323592171e-06, - "loss": 0.1937, + "loss": 0.0125, "step": 5627 }, { "epoch": 1.57, "learning_rate": 9.557472863902032e-06, - "loss": 0.139, + "loss": 0.0179, "step": 5628 }, { "epoch": 1.57, "learning_rate": 9.555617404211895e-06, - "loss": 0.3087, + "loss": 0.0178, "step": 5629 }, { "epoch": 1.57, "learning_rate": 9.553761944521755e-06, - "loss": 0.0842, + "loss": 0.0627, "step": 5630 }, { "epoch": 1.57, "learning_rate": 9.551906484831617e-06, - "loss": 0.0274, + "loss": 0.0193, "step": 5631 }, { "epoch": 1.57, "learning_rate": 9.550051025141479e-06, - "loss": 0.0825, + "loss": 0.0182, "step": 5632 }, { "epoch": 1.57, "learning_rate": 9.548195565451341e-06, - "loss": 0.0269, + "loss": 0.0807, "step": 5633 }, { "epoch": 1.57, "learning_rate": 9.546340105761203e-06, - "loss": 0.2467, + "loss": 0.1807, "step": 5634 }, { "epoch": 1.57, "learning_rate": 9.544484646071065e-06, - "loss": 0.0854, + "loss": 0.1398, "step": 5635 }, { "epoch": 1.57, "learning_rate": 9.542629186380927e-06, - "loss": 0.1329, + "loss": 0.012, "step": 5636 }, { "epoch": 1.57, "learning_rate": 9.540773726690788e-06, - "loss": 0.1405, + "loss": 0.1211, "step": 5637 }, { "epoch": 1.57, "learning_rate": 9.53891826700065e-06, - "loss": 0.1396, + "loss": 0.0614, "step": 5638 }, { "epoch": 1.57, "learning_rate": 9.537062807310512e-06, - "loss": 0.1408, + "loss": 0.0202, "step": 5639 }, { "epoch": 1.57, "learning_rate": 9.535207347620374e-06, - "loss": 0.0845, + "loss": 0.0193, "step": 5640 }, { "epoch": 1.57, "learning_rate": 9.533351887930236e-06, - "loss": 0.0814, + "loss": 0.0682, "step": 5641 }, { "epoch": 1.57, "learning_rate": 9.531496428240096e-06, - "loss": 0.1393, + "loss": 0.013, "step": 5642 }, { "epoch": 1.57, "learning_rate": 9.52964096854996e-06, - "loss": 0.1398, + "loss": 0.0645, "step": 5643 }, { "epoch": 1.57, "learning_rate": 9.52778550885982e-06, - "loss": 0.1417, + "loss": 0.0705, "step": 5644 }, { "epoch": 1.57, "learning_rate": 9.525930049169683e-06, - "loss": 0.085, + "loss": 0.011, "step": 5645 }, { "epoch": 1.57, "learning_rate": 9.524074589479544e-06, - "loss": 0.0855, + "loss": 0.1168, "step": 5646 }, { "epoch": 1.57, "learning_rate": 9.522219129789405e-06, - "loss": 0.195, + "loss": 0.0505, "step": 5647 }, { "epoch": 1.57, "learning_rate": 9.520363670099267e-06, - "loss": 0.1958, + "loss": 0.0664, "step": 5648 }, { "epoch": 1.57, "learning_rate": 9.518508210409129e-06, - "loss": 0.1451, + "loss": 0.016, "step": 5649 }, { "epoch": 1.57, "learning_rate": 9.516652750718991e-06, - "loss": 0.1909, + "loss": 0.12, "step": 5650 }, { "epoch": 1.57, "learning_rate": 9.514797291028853e-06, - "loss": 0.2536, + "loss": 0.1022, "step": 5651 }, { "epoch": 1.57, "learning_rate": 9.512941831338715e-06, - "loss": 0.0824, + "loss": 0.1301, "step": 5652 }, { "epoch": 1.57, "learning_rate": 9.511086371648577e-06, - "loss": 0.2496, + "loss": 0.1377, "step": 5653 }, { "epoch": 1.57, "learning_rate": 9.509230911958438e-06, - "loss": 0.0839, + "loss": 0.0129, "step": 5654 }, { "epoch": 1.57, "learning_rate": 9.5073754522683e-06, - "loss": 0.3104, + "loss": 0.0092, "step": 5655 }, { "epoch": 1.57, "learning_rate": 9.505519992578162e-06, - "loss": 0.1969, + "loss": 0.1208, "step": 5656 }, { "epoch": 1.57, "learning_rate": 9.503664532888024e-06, - "loss": 0.2485, + "loss": 0.0207, "step": 5657 }, { "epoch": 1.57, "learning_rate": 9.501809073197886e-06, - "loss": 0.1972, + "loss": 0.0197, "step": 5658 }, { "epoch": 1.58, "learning_rate": 9.499953613507748e-06, - "loss": 0.1421, + "loss": 0.0237, "step": 5659 }, { "epoch": 1.58, "learning_rate": 9.498098153817608e-06, - "loss": 0.1895, + "loss": 0.0183, "step": 5660 }, { "epoch": 1.58, "learning_rate": 9.496242694127472e-06, - "loss": 0.0861, + "loss": 0.0114, "step": 5661 }, { "epoch": 1.58, "learning_rate": 9.494387234437332e-06, - "loss": 0.034, + "loss": 0.0594, "step": 5662 }, { "epoch": 1.58, "learning_rate": 9.492531774747195e-06, - "loss": 0.2391, + "loss": 0.0631, "step": 5663 }, { "epoch": 1.58, "learning_rate": 9.490676315057055e-06, - "loss": 0.1394, + "loss": 0.0766, "step": 5664 }, { "epoch": 1.58, "learning_rate": 9.488820855366917e-06, - "loss": 0.1916, + "loss": 0.0171, "step": 5665 }, { "epoch": 1.58, "learning_rate": 9.48696539567678e-06, - "loss": 0.134, + "loss": 0.1425, "step": 5666 }, { "epoch": 1.58, "learning_rate": 9.485109935986641e-06, - "loss": 0.137, + "loss": 0.1025, "step": 5667 }, { "epoch": 1.58, "learning_rate": 9.483254476296503e-06, - "loss": 0.0896, + "loss": 0.1075, "step": 5668 }, { "epoch": 1.58, "learning_rate": 9.481399016606365e-06, - "loss": 0.1928, + "loss": 0.1141, "step": 5669 }, { "epoch": 1.58, "learning_rate": 9.479543556916227e-06, - "loss": 0.1896, + "loss": 0.1164, "step": 5670 }, { "epoch": 1.58, "learning_rate": 9.477688097226089e-06, - "loss": 0.0868, + "loss": 0.0929, "step": 5671 }, { "epoch": 1.58, "learning_rate": 9.47583263753595e-06, - "loss": 0.1405, + "loss": 0.0489, "step": 5672 }, { "epoch": 1.58, "learning_rate": 9.473977177845812e-06, - "loss": 0.1926, + "loss": 0.016, "step": 5673 }, { "epoch": 1.58, "learning_rate": 9.472121718155674e-06, - "loss": 0.0857, + "loss": 0.0164, "step": 5674 }, { "epoch": 1.58, "learning_rate": 9.470266258465536e-06, - "loss": 0.1414, + "loss": 0.0763, "step": 5675 }, { "epoch": 1.58, "learning_rate": 9.468410798775396e-06, - "loss": 0.1881, + "loss": 0.0123, "step": 5676 }, { "epoch": 1.58, "learning_rate": 9.46655533908526e-06, - "loss": 0.0373, + "loss": 0.0612, "step": 5677 }, { "epoch": 1.58, "learning_rate": 9.46469987939512e-06, - "loss": 0.1928, + "loss": 0.0145, "step": 5678 }, { "epoch": 1.58, "learning_rate": 9.462844419704984e-06, - "loss": 0.0371, + "loss": 0.0619, "step": 5679 }, { "epoch": 1.58, "learning_rate": 9.460988960014844e-06, - "loss": 0.0864, + "loss": 0.0309, "step": 5680 }, { "epoch": 1.58, "learning_rate": 9.459133500324707e-06, - "loss": 0.1371, + "loss": 0.0155, "step": 5681 }, { "epoch": 1.58, "learning_rate": 9.457278040634567e-06, - "loss": 0.035, + "loss": 0.0911, "step": 5682 }, { "epoch": 1.58, "learning_rate": 9.45542258094443e-06, - "loss": 0.034, + "loss": 0.0101, "step": 5683 }, { "epoch": 1.58, "learning_rate": 9.453567121254291e-06, - "loss": 0.1923, + "loss": 0.159, "step": 5684 }, { "epoch": 1.58, "learning_rate": 9.451711661564153e-06, - "loss": 0.1456, + "loss": 0.0633, "step": 5685 }, { "epoch": 1.58, "learning_rate": 9.449856201874015e-06, - "loss": 0.2455, + "loss": 0.138, "step": 5686 }, { "epoch": 1.58, "learning_rate": 9.448000742183877e-06, - "loss": 0.2432, + "loss": 0.0222, "step": 5687 }, { "epoch": 1.58, "learning_rate": 9.446145282493739e-06, - "loss": 0.085, + "loss": 0.0174, "step": 5688 }, { "epoch": 1.58, "learning_rate": 9.4442898228036e-06, - "loss": 0.1945, + "loss": 0.0625, "step": 5689 }, { "epoch": 1.58, "learning_rate": 9.442434363113462e-06, - "loss": 0.19, + "loss": 0.0538, "step": 5690 }, { "epoch": 1.58, "learning_rate": 9.440578903423324e-06, - "loss": 0.2514, + "loss": 0.0139, "step": 5691 }, { "epoch": 1.58, "learning_rate": 9.438723443733186e-06, - "loss": 0.0833, + "loss": 0.0842, "step": 5692 }, { "epoch": 1.58, "learning_rate": 9.436867984043048e-06, - "loss": 0.1452, + "loss": 0.0159, "step": 5693 }, { "epoch": 1.58, "learning_rate": 9.435012524352908e-06, - "loss": 0.0329, + "loss": 0.0152, "step": 5694 }, { "epoch": 1.59, "learning_rate": 9.433157064662772e-06, - "loss": 0.1911, + "loss": 0.0853, "step": 5695 }, { "epoch": 1.59, "learning_rate": 9.431301604972632e-06, - "loss": 0.1413, + "loss": 0.0119, "step": 5696 }, { "epoch": 1.59, "learning_rate": 9.429446145282496e-06, - "loss": 0.0866, + "loss": 0.0175, "step": 5697 }, { "epoch": 1.59, "learning_rate": 9.427590685592356e-06, - "loss": 0.136, + "loss": 0.0181, "step": 5698 }, { "epoch": 1.59, "learning_rate": 9.425735225902218e-06, - "loss": 0.197, + "loss": 0.0933, "step": 5699 }, { "epoch": 1.59, "learning_rate": 9.42387976621208e-06, - "loss": 0.142, + "loss": 0.0783, "step": 5700 }, { "epoch": 1.59, "learning_rate": 9.422024306521941e-06, - "loss": 0.1393, + "loss": 0.1043, "step": 5701 }, { "epoch": 1.59, "learning_rate": 9.420168846831803e-06, - "loss": 0.0319, + "loss": 0.1102, "step": 5702 }, { "epoch": 1.59, "learning_rate": 9.418313387141665e-06, - "loss": 0.1927, + "loss": 0.012, "step": 5703 }, { "epoch": 1.59, "learning_rate": 9.416457927451527e-06, - "loss": 0.2999, + "loss": 0.0232, "step": 5704 }, { "epoch": 1.59, "learning_rate": 9.414602467761389e-06, - "loss": 0.0332, + "loss": 0.0633, "step": 5705 }, { "epoch": 1.59, "learning_rate": 9.41274700807125e-06, - "loss": 0.1432, + "loss": 0.0823, "step": 5706 }, { "epoch": 1.59, "learning_rate": 9.410891548381113e-06, - "loss": 0.2459, + "loss": 0.1161, "step": 5707 }, { "epoch": 1.59, "learning_rate": 9.409036088690974e-06, - "loss": 0.1391, + "loss": 0.0678, "step": 5708 }, { "epoch": 1.59, "learning_rate": 9.407180629000836e-06, - "loss": 0.1386, + "loss": 0.0537, "step": 5709 }, { "epoch": 1.59, "learning_rate": 9.405325169310696e-06, - "loss": 0.1398, + "loss": 0.0937, "step": 5710 }, { "epoch": 1.59, "learning_rate": 9.40346970962056e-06, - "loss": 0.1399, + "loss": 0.0121, "step": 5711 }, { "epoch": 1.59, "learning_rate": 9.40161424993042e-06, - "loss": 0.1964, + "loss": 0.0503, "step": 5712 }, { "epoch": 1.59, "learning_rate": 9.399758790240284e-06, - "loss": 0.1955, + "loss": 0.0222, "step": 5713 }, { "epoch": 1.59, "learning_rate": 9.397903330550144e-06, - "loss": 0.1411, + "loss": 0.1577, "step": 5714 }, { "epoch": 1.59, "learning_rate": 9.396047870860008e-06, - "loss": 0.1922, + "loss": 0.253, "step": 5715 }, { "epoch": 1.59, "learning_rate": 9.394192411169868e-06, - "loss": 0.0336, + "loss": 0.1521, "step": 5716 }, { "epoch": 1.59, "learning_rate": 9.39233695147973e-06, - "loss": 0.1921, + "loss": 0.2045, "step": 5717 }, { "epoch": 1.59, "learning_rate": 9.390481491789591e-06, - "loss": 0.192, + "loss": 0.1018, "step": 5718 }, { "epoch": 1.59, "learning_rate": 9.388626032099453e-06, - "loss": 0.1417, + "loss": 0.0151, "step": 5719 }, { "epoch": 1.59, "learning_rate": 9.386770572409315e-06, - "loss": 0.1967, + "loss": 0.0489, "step": 5720 }, { "epoch": 1.59, "learning_rate": 9.384915112719177e-06, - "loss": 0.0351, + "loss": 0.0198, "step": 5721 }, { "epoch": 1.59, "learning_rate": 9.383059653029039e-06, - "loss": 0.0874, + "loss": 0.0614, "step": 5722 }, { "epoch": 1.59, "learning_rate": 9.3812041933389e-06, - "loss": 0.0353, + "loss": 0.07, "step": 5723 }, { "epoch": 1.59, "learning_rate": 9.379348733648763e-06, - "loss": 0.1406, + "loss": 0.0476, "step": 5724 }, { "epoch": 1.59, "learning_rate": 9.377493273958625e-06, - "loss": 0.0877, + "loss": 0.0204, "step": 5725 }, { "epoch": 1.59, "learning_rate": 9.375637814268486e-06, - "loss": 0.1921, + "loss": 0.0653, "step": 5726 }, { "epoch": 1.59, "learning_rate": 9.373782354578348e-06, - "loss": 0.1903, + "loss": 0.1144, "step": 5727 }, { "epoch": 1.59, "learning_rate": 9.371926894888208e-06, - "loss": 0.2986, + "loss": 0.1063, "step": 5728 }, { "epoch": 1.59, "learning_rate": 9.370071435198072e-06, - "loss": 0.0861, + "loss": 0.1361, "step": 5729 }, { "epoch": 1.59, "learning_rate": 9.368215975507932e-06, - "loss": 0.1941, + "loss": 0.1616, "step": 5730 }, { "epoch": 1.6, "learning_rate": 9.366360515817796e-06, - "loss": 0.1885, + "loss": 0.1342, "step": 5731 }, { "epoch": 1.6, "learning_rate": 9.364505056127656e-06, - "loss": 0.1381, + "loss": 0.0664, "step": 5732 }, { "epoch": 1.6, "learning_rate": 9.362649596437518e-06, - "loss": 0.0856, + "loss": 0.0566, "step": 5733 }, { "epoch": 1.6, "learning_rate": 9.36079413674738e-06, - "loss": 0.141, + "loss": 0.0199, "step": 5734 }, { "epoch": 1.6, "learning_rate": 9.358938677057242e-06, - "loss": 0.0346, + "loss": 0.0234, "step": 5735 }, { "epoch": 1.6, "learning_rate": 9.357083217367103e-06, - "loss": 0.0882, + "loss": 0.026, "step": 5736 }, { "epoch": 1.6, "learning_rate": 9.355227757676965e-06, - "loss": 0.3017, + "loss": 0.0776, "step": 5737 }, { "epoch": 1.6, "learning_rate": 9.353372297986827e-06, - "loss": 0.0853, + "loss": 0.0477, "step": 5738 }, { "epoch": 1.6, "learning_rate": 9.351516838296689e-06, - "loss": 0.1964, + "loss": 0.2427, "step": 5739 }, { "epoch": 1.6, "learning_rate": 9.349661378606551e-06, - "loss": 0.0849, + "loss": 0.1326, "step": 5740 }, { "epoch": 1.6, "learning_rate": 9.347805918916413e-06, - "loss": 0.2441, + "loss": 0.1326, "step": 5741 }, { "epoch": 1.6, "learning_rate": 9.345950459226275e-06, - "loss": 0.1931, + "loss": 0.0702, "step": 5742 }, { "epoch": 1.6, "learning_rate": 9.344094999536136e-06, - "loss": 0.0338, + "loss": 0.071, "step": 5743 }, { "epoch": 1.6, "learning_rate": 9.342239539845998e-06, - "loss": 0.1941, + "loss": 0.0506, "step": 5744 }, { "epoch": 1.6, "learning_rate": 9.34038408015586e-06, - "loss": 0.2449, + "loss": 0.1191, "step": 5745 }, { "epoch": 1.6, "learning_rate": 9.33852862046572e-06, - "loss": 0.0887, + "loss": 0.1125, "step": 5746 }, { "epoch": 1.6, "learning_rate": 9.336673160775584e-06, - "loss": 0.0336, + "loss": 0.122, "step": 5747 }, { "epoch": 1.6, "learning_rate": 9.334817701085444e-06, - "loss": 0.086, + "loss": 0.2887, "step": 5748 }, { "epoch": 1.6, "learning_rate": 9.332962241395308e-06, - "loss": 0.1928, + "loss": 0.0232, "step": 5749 }, { "epoch": 1.6, "learning_rate": 9.331106781705168e-06, - "loss": 0.0869, + "loss": 0.1498, "step": 5750 }, { "epoch": 1.6, "learning_rate": 9.32925132201503e-06, - "loss": 0.0873, + "loss": 0.176, "step": 5751 }, { "epoch": 1.6, "learning_rate": 9.327395862324892e-06, - "loss": 0.0855, + "loss": 0.072, "step": 5752 }, { "epoch": 1.6, "learning_rate": 9.325540402634754e-06, - "loss": 0.0867, + "loss": 0.1079, "step": 5753 }, { "epoch": 1.6, "learning_rate": 9.323684942944615e-06, - "loss": 0.1414, + "loss": 0.0698, "step": 5754 }, { "epoch": 1.6, "learning_rate": 9.321829483254477e-06, - "loss": 0.0854, + "loss": 0.0193, "step": 5755 }, { "epoch": 1.6, "learning_rate": 9.319974023564339e-06, - "loss": 0.1401, + "loss": 0.0333, "step": 5756 }, { "epoch": 1.6, "learning_rate": 9.318118563874201e-06, - "loss": 0.2487, + "loss": 0.0978, "step": 5757 }, { "epoch": 1.6, "learning_rate": 9.316263104184063e-06, - "loss": 0.0856, + "loss": 0.0251, "step": 5758 }, { "epoch": 1.6, "learning_rate": 9.314407644493925e-06, - "loss": 0.1383, + "loss": 0.0499, "step": 5759 }, { "epoch": 1.6, "learning_rate": 9.312552184803787e-06, - "loss": 0.1947, + "loss": 0.0742, "step": 5760 }, { "epoch": 1.6, "learning_rate": 9.310696725113648e-06, - "loss": 0.083, + "loss": 0.0525, "step": 5761 }, { "epoch": 1.6, "learning_rate": 9.308841265423509e-06, - "loss": 0.1398, + "loss": 0.0417, "step": 5762 }, { "epoch": 1.6, "learning_rate": 9.306985805733372e-06, - "loss": 0.086, + "loss": 0.2728, "step": 5763 }, { "epoch": 1.6, "learning_rate": 9.305130346043232e-06, - "loss": 0.1429, + "loss": 0.2414, "step": 5764 }, { "epoch": 1.6, "learning_rate": 9.303274886353096e-06, - "loss": 0.3541, + "loss": 0.055, "step": 5765 }, { "epoch": 1.6, "learning_rate": 9.301419426662956e-06, - "loss": 0.1402, + "loss": 0.0976, "step": 5766 }, { "epoch": 1.61, "learning_rate": 9.299563966972818e-06, - "loss": 0.0872, + "loss": 0.0146, "step": 5767 }, { "epoch": 1.61, "learning_rate": 9.29770850728268e-06, - "loss": 0.2434, + "loss": 0.1448, "step": 5768 }, { "epoch": 1.61, "learning_rate": 9.295853047592542e-06, - "loss": 0.0844, + "loss": 0.021, "step": 5769 }, { "epoch": 1.61, "learning_rate": 9.293997587902404e-06, - "loss": 0.1956, + "loss": 0.1452, "step": 5770 }, { "epoch": 1.61, "learning_rate": 9.292142128212265e-06, - "loss": 0.1375, + "loss": 0.0436, "step": 5771 }, { "epoch": 1.61, "learning_rate": 9.290286668522127e-06, - "loss": 0.1347, + "loss": 0.1126, "step": 5772 }, { "epoch": 1.61, "learning_rate": 9.288431208831988e-06, - "loss": 0.1439, + "loss": 0.0177, "step": 5773 }, { "epoch": 1.61, "learning_rate": 9.286575749141851e-06, - "loss": 0.0318, + "loss": 0.0797, "step": 5774 }, { "epoch": 1.61, "learning_rate": 9.284720289451711e-06, - "loss": 0.1365, + "loss": 0.078, "step": 5775 }, { "epoch": 1.61, "learning_rate": 9.282864829761575e-06, - "loss": 0.1425, + "loss": 0.0701, "step": 5776 }, { "epoch": 1.61, "learning_rate": 9.281009370071435e-06, - "loss": 0.1881, + "loss": 0.0176, "step": 5777 }, { "epoch": 1.61, "learning_rate": 9.279153910381299e-06, - "loss": 0.1917, + "loss": 0.0475, "step": 5778 }, { "epoch": 1.61, "learning_rate": 9.277298450691159e-06, - "loss": 0.0879, + "loss": 0.0597, "step": 5779 }, { "epoch": 1.61, "learning_rate": 9.27544299100102e-06, - "loss": 0.1365, + "loss": 0.0591, "step": 5780 }, { "epoch": 1.61, "learning_rate": 9.273587531310882e-06, - "loss": 0.2448, + "loss": 0.0729, "step": 5781 }, { "epoch": 1.61, "learning_rate": 9.271732071620744e-06, - "loss": 0.2409, + "loss": 0.0399, "step": 5782 }, { "epoch": 1.61, "learning_rate": 9.269876611930606e-06, - "loss": 0.0852, + "loss": 0.0887, "step": 5783 }, { "epoch": 1.61, "learning_rate": 9.268021152240468e-06, - "loss": 0.0871, + "loss": 0.1275, "step": 5784 }, { "epoch": 1.61, "learning_rate": 9.26616569255033e-06, - "loss": 0.0838, + "loss": 0.073, "step": 5785 }, { "epoch": 1.61, "learning_rate": 9.264310232860192e-06, - "loss": 0.0337, + "loss": 0.1068, "step": 5786 }, { "epoch": 1.61, "learning_rate": 9.262454773170054e-06, - "loss": 0.2479, + "loss": 0.076, "step": 5787 }, { "epoch": 1.61, "learning_rate": 9.260599313479916e-06, - "loss": 0.1384, + "loss": 0.1048, "step": 5788 }, { "epoch": 1.61, "learning_rate": 9.258743853789777e-06, - "loss": 0.0834, + "loss": 0.0168, "step": 5789 }, { "epoch": 1.61, "learning_rate": 9.25688839409964e-06, - "loss": 0.0853, + "loss": 0.0624, "step": 5790 }, { "epoch": 1.61, "learning_rate": 9.2550329344095e-06, - "loss": 0.2431, + "loss": 0.1195, "step": 5791 }, { "epoch": 1.61, "learning_rate": 9.253177474719363e-06, - "loss": 0.0872, + "loss": 0.0939, "step": 5792 }, { "epoch": 1.61, "learning_rate": 9.251322015029223e-06, - "loss": 0.2427, + "loss": 0.0237, "step": 5793 }, { "epoch": 1.61, "learning_rate": 9.249466555339087e-06, - "loss": 0.1954, + "loss": 0.231, "step": 5794 }, { "epoch": 1.61, "learning_rate": 9.247611095648947e-06, - "loss": 0.141, + "loss": 0.0575, "step": 5795 }, { "epoch": 1.61, "learning_rate": 9.245755635958809e-06, - "loss": 0.1902, + "loss": 0.1306, "step": 5796 }, { "epoch": 1.61, "learning_rate": 9.24390017626867e-06, - "loss": 0.0872, + "loss": 0.0478, "step": 5797 }, { "epoch": 1.61, "learning_rate": 9.242044716578533e-06, - "loss": 0.1399, + "loss": 0.1304, "step": 5798 }, { "epoch": 1.61, "learning_rate": 9.240189256888394e-06, - "loss": 0.193, + "loss": 0.1358, "step": 5799 }, { "epoch": 1.61, "learning_rate": 9.238333797198256e-06, - "loss": 0.1899, + "loss": 0.017, "step": 5800 }, { "epoch": 1.61, "learning_rate": 9.236478337508118e-06, - "loss": 0.034, + "loss": 0.0489, "step": 5801 }, { "epoch": 1.61, "learning_rate": 9.23462287781798e-06, - "loss": 0.3006, + "loss": 0.1447, "step": 5802 }, { "epoch": 1.62, "learning_rate": 9.232767418127842e-06, - "loss": 0.1906, + "loss": 0.0637, "step": 5803 }, { "epoch": 1.62, "learning_rate": 9.230911958437704e-06, - "loss": 0.1932, + "loss": 0.0555, "step": 5804 }, { "epoch": 1.62, "learning_rate": 9.229056498747566e-06, - "loss": 0.1395, + "loss": 0.0249, "step": 5805 }, { "epoch": 1.62, "learning_rate": 9.227201039057428e-06, - "loss": 0.1895, + "loss": 0.1763, "step": 5806 }, { "epoch": 1.62, "learning_rate": 9.22534557936729e-06, - "loss": 0.0346, + "loss": 0.2364, "step": 5807 }, { "epoch": 1.62, "learning_rate": 9.223490119677151e-06, - "loss": 0.1881, + "loss": 0.0516, "step": 5808 }, { "epoch": 1.62, "learning_rate": 9.221634659987011e-06, - "loss": 0.2968, + "loss": 0.0259, "step": 5809 }, { "epoch": 1.62, "learning_rate": 9.219779200296875e-06, - "loss": 0.2439, + "loss": 0.1235, "step": 5810 }, { "epoch": 1.62, "learning_rate": 9.217923740606735e-06, - "loss": 0.1901, + "loss": 0.1531, "step": 5811 }, { "epoch": 1.62, "learning_rate": 9.216068280916599e-06, - "loss": 0.0377, + "loss": 0.0201, "step": 5812 }, { "epoch": 1.62, "learning_rate": 9.214212821226459e-06, - "loss": 0.2346, + "loss": 0.0798, "step": 5813 }, { "epoch": 1.62, "learning_rate": 9.21235736153632e-06, - "loss": 0.1898, + "loss": 0.0383, "step": 5814 }, { "epoch": 1.62, "learning_rate": 9.210501901846183e-06, - "loss": 0.0384, + "loss": 0.1217, "step": 5815 }, { "epoch": 1.62, "learning_rate": 9.208646442156045e-06, - "loss": 0.1417, + "loss": 0.1413, "step": 5816 }, { "epoch": 1.62, "learning_rate": 9.206790982465906e-06, - "loss": 0.1854, + "loss": 0.0268, "step": 5817 }, { "epoch": 1.62, "learning_rate": 9.204935522775768e-06, - "loss": 0.0889, + "loss": 0.0242, "step": 5818 }, { "epoch": 1.62, "learning_rate": 9.20308006308563e-06, - "loss": 0.0396, + "loss": 0.092, "step": 5819 }, { "epoch": 1.62, "learning_rate": 9.201224603395492e-06, - "loss": 0.191, + "loss": 0.0274, "step": 5820 }, { "epoch": 1.62, "learning_rate": 9.199369143705354e-06, - "loss": 0.0869, + "loss": 0.0619, "step": 5821 }, { "epoch": 1.62, "learning_rate": 9.197513684015216e-06, - "loss": 0.1405, + "loss": 0.0209, "step": 5822 }, { "epoch": 1.62, "learning_rate": 9.195658224325078e-06, - "loss": 0.1945, + "loss": 0.0899, "step": 5823 }, { "epoch": 1.62, "learning_rate": 9.19380276463494e-06, - "loss": 0.2966, + "loss": 0.1238, "step": 5824 }, { "epoch": 1.62, "learning_rate": 9.1919473049448e-06, - "loss": 0.1925, + "loss": 0.0865, "step": 5825 }, { "epoch": 1.62, "learning_rate": 9.190091845254663e-06, - "loss": 0.0892, + "loss": 0.0758, "step": 5826 }, { "epoch": 1.62, "learning_rate": 9.188236385564523e-06, - "loss": 0.1917, + "loss": 0.159, "step": 5827 }, { "epoch": 1.62, "learning_rate": 9.186380925874387e-06, - "loss": 0.1361, + "loss": 0.0242, "step": 5828 }, { "epoch": 1.62, "learning_rate": 9.184525466184247e-06, - "loss": 0.0361, + "loss": 0.1257, "step": 5829 }, { "epoch": 1.62, "learning_rate": 9.182670006494109e-06, - "loss": 0.0927, + "loss": 0.1147, "step": 5830 }, { "epoch": 1.62, "learning_rate": 9.180814546803971e-06, - "loss": 0.2914, + "loss": 0.1261, "step": 5831 }, { "epoch": 1.62, "learning_rate": 9.178959087113833e-06, - "loss": 0.2964, + "loss": 0.1173, "step": 5832 }, { "epoch": 1.62, "learning_rate": 9.177103627423695e-06, - "loss": 0.2924, + "loss": 0.0722, "step": 5833 }, { "epoch": 1.62, "learning_rate": 9.175248167733557e-06, - "loss": 0.09, + "loss": 0.056, "step": 5834 }, { "epoch": 1.62, "learning_rate": 9.173392708043418e-06, - "loss": 0.19, + "loss": 0.0197, "step": 5835 }, { "epoch": 1.62, "learning_rate": 9.17153724835328e-06, - "loss": 0.0872, + "loss": 0.1821, "step": 5836 }, { "epoch": 1.62, "learning_rate": 9.169681788663142e-06, - "loss": 0.1373, + "loss": 0.054, "step": 5837 }, { "epoch": 1.62, "learning_rate": 9.167826328973004e-06, - "loss": 0.0381, + "loss": 0.0708, "step": 5838 }, { "epoch": 1.63, "learning_rate": 9.165970869282866e-06, - "loss": 0.2409, + "loss": 0.0783, "step": 5839 }, { "epoch": 1.63, "learning_rate": 9.164115409592728e-06, - "loss": 0.1919, + "loss": 0.0139, "step": 5840 }, { "epoch": 1.63, "learning_rate": 9.16225994990259e-06, - "loss": 0.1892, + "loss": 0.0906, "step": 5841 }, { "epoch": 1.63, "learning_rate": 9.160404490212452e-06, - "loss": 0.189, + "loss": 0.0186, "step": 5842 }, { "epoch": 1.63, "learning_rate": 9.158549030522312e-06, - "loss": 0.2401, + "loss": 0.0186, "step": 5843 }, { "epoch": 1.63, "learning_rate": 9.156693570832175e-06, - "loss": 0.1397, + "loss": 0.2157, "step": 5844 }, { "epoch": 1.63, "learning_rate": 9.154838111142035e-06, - "loss": 0.0378, + "loss": 0.0529, "step": 5845 }, { "epoch": 1.63, "learning_rate": 9.152982651451899e-06, - "loss": 0.2853, + "loss": 0.0612, "step": 5846 }, { "epoch": 1.63, "learning_rate": 9.15112719176176e-06, - "loss": 0.1348, + "loss": 0.0237, "step": 5847 }, { "epoch": 1.63, "learning_rate": 9.149271732071621e-06, - "loss": 0.3423, + "loss": 0.2207, "step": 5848 }, { "epoch": 1.63, "learning_rate": 9.147416272381483e-06, - "loss": 0.0922, + "loss": 0.0391, "step": 5849 }, { "epoch": 1.63, "learning_rate": 9.145560812691345e-06, - "loss": 0.09, + "loss": 0.0205, "step": 5850 }, { "epoch": 1.63, "learning_rate": 9.143705353001207e-06, - "loss": 0.1896, + "loss": 0.0172, "step": 5851 }, { "epoch": 1.63, "learning_rate": 9.141849893311069e-06, - "loss": 0.1915, + "loss": 0.0836, "step": 5852 }, { "epoch": 1.63, "learning_rate": 9.13999443362093e-06, - "loss": 0.1422, + "loss": 0.0679, "step": 5853 }, { "epoch": 1.63, "learning_rate": 9.138138973930792e-06, - "loss": 0.043, + "loss": 0.1575, "step": 5854 }, { "epoch": 1.63, "learning_rate": 9.136283514240654e-06, - "loss": 0.1903, + "loss": 0.0205, "step": 5855 }, { "epoch": 1.63, "learning_rate": 9.134428054550516e-06, - "loss": 0.0405, + "loss": 0.1362, "step": 5856 }, { "epoch": 1.63, "learning_rate": 9.132572594860378e-06, - "loss": 0.1418, + "loss": 0.0572, "step": 5857 }, { "epoch": 1.63, "learning_rate": 9.13071713517024e-06, - "loss": 0.1937, + "loss": 0.1341, "step": 5858 }, { "epoch": 1.63, "learning_rate": 9.1288616754801e-06, - "loss": 0.1399, + "loss": 0.0155, "step": 5859 }, { "epoch": 1.63, "learning_rate": 9.127006215789963e-06, - "loss": 0.1845, + "loss": 0.0272, "step": 5860 }, { "epoch": 1.63, "learning_rate": 9.125150756099824e-06, - "loss": 0.0904, + "loss": 0.0692, "step": 5861 }, { "epoch": 1.63, "learning_rate": 9.123295296409687e-06, - "loss": 0.1386, + "loss": 0.1559, "step": 5862 }, { "epoch": 1.63, "learning_rate": 9.121439836719547e-06, - "loss": 0.1922, + "loss": 0.0269, "step": 5863 }, { "epoch": 1.63, "learning_rate": 9.119584377029411e-06, - "loss": 0.1391, + "loss": 0.0262, "step": 5864 }, { "epoch": 1.63, "learning_rate": 9.117728917339271e-06, - "loss": 0.1901, + "loss": 0.1289, "step": 5865 }, { "epoch": 1.63, "learning_rate": 9.115873457649133e-06, - "loss": 0.2967, + "loss": 0.08, "step": 5866 }, { "epoch": 1.63, "learning_rate": 9.114017997958995e-06, - "loss": 0.1881, + "loss": 0.0532, "step": 5867 }, { "epoch": 1.63, "learning_rate": 9.112162538268857e-06, - "loss": 0.1929, + "loss": 0.0623, "step": 5868 }, { "epoch": 1.63, "learning_rate": 9.110307078578719e-06, - "loss": 0.1402, + "loss": 0.2867, "step": 5869 }, { "epoch": 1.63, "learning_rate": 9.10845161888858e-06, - "loss": 0.0874, + "loss": 0.0207, "step": 5870 }, { "epoch": 1.63, "learning_rate": 9.106596159198442e-06, - "loss": 0.1925, + "loss": 0.0136, "step": 5871 }, { "epoch": 1.63, "learning_rate": 9.104740699508304e-06, - "loss": 0.0893, + "loss": 0.1834, "step": 5872 }, { "epoch": 1.63, "learning_rate": 9.102885239818166e-06, - "loss": 0.0893, + "loss": 0.1401, "step": 5873 }, { "epoch": 1.63, "learning_rate": 9.101029780128028e-06, - "loss": 0.086, + "loss": 0.0256, "step": 5874 }, { "epoch": 1.64, "learning_rate": 9.09917432043789e-06, - "loss": 0.0366, + "loss": 0.0601, "step": 5875 }, { "epoch": 1.64, "learning_rate": 9.097318860747752e-06, - "loss": 0.1907, + "loss": 0.0208, "step": 5876 }, { "epoch": 1.64, "learning_rate": 9.095463401057612e-06, - "loss": 0.0364, + "loss": 0.1847, "step": 5877 }, { "epoch": 1.64, "learning_rate": 9.093607941367475e-06, - "loss": 0.0872, + "loss": 0.0374, "step": 5878 }, { "epoch": 1.64, "learning_rate": 9.091752481677336e-06, - "loss": 0.0875, + "loss": 0.0844, "step": 5879 }, { "epoch": 1.64, "learning_rate": 9.0898970219872e-06, - "loss": 0.1412, + "loss": 0.0418, "step": 5880 }, { "epoch": 1.64, "learning_rate": 9.08804156229706e-06, - "loss": 0.0348, + "loss": 0.0659, "step": 5881 }, { "epoch": 1.64, "learning_rate": 9.086186102606921e-06, - "loss": 0.1932, + "loss": 0.1212, "step": 5882 }, { "epoch": 1.64, "learning_rate": 9.084330642916783e-06, - "loss": 0.0862, + "loss": 0.0841, "step": 5883 }, { "epoch": 1.64, "learning_rate": 9.082475183226645e-06, - "loss": 0.1948, + "loss": 0.1352, "step": 5884 }, { "epoch": 1.64, "learning_rate": 9.080619723536507e-06, - "loss": 0.198, + "loss": 0.0248, "step": 5885 }, { "epoch": 1.64, "learning_rate": 9.078764263846369e-06, - "loss": 0.188, + "loss": 0.1653, "step": 5886 }, { "epoch": 1.64, "learning_rate": 9.07690880415623e-06, - "loss": 0.2475, + "loss": 0.1189, "step": 5887 }, { "epoch": 1.64, "learning_rate": 9.075053344466092e-06, - "loss": 0.1369, + "loss": 0.0631, "step": 5888 }, { "epoch": 1.64, "learning_rate": 9.073197884775954e-06, - "loss": 0.0307, + "loss": 0.098, "step": 5889 }, { "epoch": 1.64, "learning_rate": 9.071342425085816e-06, - "loss": 0.1908, + "loss": 0.0939, "step": 5890 }, { "epoch": 1.64, "learning_rate": 9.069486965395678e-06, - "loss": 0.0817, + "loss": 0.0661, "step": 5891 }, { "epoch": 1.64, "learning_rate": 9.06763150570554e-06, - "loss": 0.1402, + "loss": 0.0354, "step": 5892 }, { "epoch": 1.64, "learning_rate": 9.0657760460154e-06, - "loss": 0.138, + "loss": 0.1364, "step": 5893 }, { "epoch": 1.64, "learning_rate": 9.063920586325264e-06, - "loss": 0.0295, + "loss": 0.0538, "step": 5894 }, { "epoch": 1.64, "learning_rate": 9.062065126635124e-06, - "loss": 0.0861, + "loss": 0.0398, "step": 5895 }, { "epoch": 1.64, "learning_rate": 9.060209666944987e-06, - "loss": 0.1353, + "loss": 0.0187, "step": 5896 }, { "epoch": 1.64, "learning_rate": 9.058354207254848e-06, - "loss": 0.0857, + "loss": 0.0193, "step": 5897 }, { "epoch": 1.64, "learning_rate": 9.056498747564711e-06, - "loss": 0.1972, + "loss": 0.0244, "step": 5898 }, { "epoch": 1.64, "learning_rate": 9.054643287874571e-06, - "loss": 0.0864, + "loss": 0.0132, "step": 5899 }, { "epoch": 1.64, "learning_rate": 9.052787828184433e-06, - "loss": 0.0861, + "loss": 0.0119, "step": 5900 }, { "epoch": 1.64, "learning_rate": 9.050932368494295e-06, - "loss": 0.0853, + "loss": 0.0126, "step": 5901 }, { "epoch": 1.64, "learning_rate": 9.049076908804157e-06, - "loss": 0.1968, + "loss": 0.1234, "step": 5902 }, { "epoch": 1.64, "learning_rate": 9.047221449114019e-06, - "loss": 0.3031, + "loss": 0.1506, "step": 5903 }, { "epoch": 1.64, "learning_rate": 9.04536598942388e-06, - "loss": 0.1359, + "loss": 0.0093, "step": 5904 }, { "epoch": 1.64, "learning_rate": 9.043510529733743e-06, - "loss": 0.2529, + "loss": 0.1601, "step": 5905 }, { "epoch": 1.64, "learning_rate": 9.041655070043604e-06, - "loss": 0.1382, + "loss": 0.11, "step": 5906 }, { "epoch": 1.64, "learning_rate": 9.039799610353466e-06, - "loss": 0.08, + "loss": 0.0532, "step": 5907 }, { "epoch": 1.64, "learning_rate": 9.037944150663328e-06, - "loss": 0.1387, + "loss": 0.0486, "step": 5908 }, { "epoch": 1.64, "learning_rate": 9.03608869097319e-06, - "loss": 0.0293, + "loss": 0.0106, "step": 5909 }, { "epoch": 1.64, "learning_rate": 9.034233231283052e-06, - "loss": 0.1408, + "loss": 0.0729, "step": 5910 }, { "epoch": 1.65, "learning_rate": 9.032377771592912e-06, - "loss": 0.0825, + "loss": 0.1139, "step": 5911 }, { "epoch": 1.65, "learning_rate": 9.030522311902776e-06, - "loss": 0.085, + "loss": 0.0671, "step": 5912 }, { "epoch": 1.65, "learning_rate": 9.028666852212636e-06, - "loss": 0.1353, + "loss": 0.0194, "step": 5913 }, { "epoch": 1.65, "learning_rate": 9.0268113925225e-06, - "loss": 0.0842, + "loss": 0.1324, "step": 5914 }, { "epoch": 1.65, "learning_rate": 9.02495593283236e-06, - "loss": 0.1388, + "loss": 0.17, "step": 5915 }, { "epoch": 1.65, "learning_rate": 9.023100473142221e-06, - "loss": 0.0894, + "loss": 0.0844, "step": 5916 }, { "epoch": 1.65, "learning_rate": 9.021245013452083e-06, - "loss": 0.3073, + "loss": 0.1143, "step": 5917 }, { "epoch": 1.65, "learning_rate": 9.019389553761945e-06, - "loss": 0.029, + "loss": 0.0687, "step": 5918 }, { "epoch": 1.65, "learning_rate": 9.017534094071807e-06, - "loss": 0.2501, + "loss": 0.0862, "step": 5919 }, { "epoch": 1.65, "learning_rate": 9.015678634381669e-06, - "loss": 0.1376, + "loss": 0.0325, "step": 5920 }, { "epoch": 1.65, "learning_rate": 9.01382317469153e-06, - "loss": 0.3673, + "loss": 0.0556, "step": 5921 }, { "epoch": 1.65, "learning_rate": 9.011967715001391e-06, - "loss": 0.1943, + "loss": 0.1185, "step": 5922 }, { "epoch": 1.65, "learning_rate": 9.010112255311255e-06, - "loss": 0.1371, + "loss": 0.0159, "step": 5923 }, { "epoch": 1.65, "learning_rate": 9.008256795621115e-06, - "loss": 0.085, + "loss": 0.0456, "step": 5924 }, { "epoch": 1.65, "learning_rate": 9.006401335930978e-06, - "loss": 0.1384, + "loss": 0.0228, "step": 5925 }, { "epoch": 1.65, "learning_rate": 9.004545876240838e-06, - "loss": 0.1367, + "loss": 0.1028, "step": 5926 }, { "epoch": 1.65, "learning_rate": 9.002690416550702e-06, - "loss": 0.1941, + "loss": 0.0243, "step": 5927 }, { "epoch": 1.65, "learning_rate": 9.000834956860562e-06, - "loss": 0.0843, + "loss": 0.0186, "step": 5928 }, { "epoch": 1.65, "learning_rate": 8.998979497170424e-06, - "loss": 0.0885, + "loss": 0.0817, "step": 5929 }, { "epoch": 1.65, "learning_rate": 8.997124037480286e-06, - "loss": 0.1375, + "loss": 0.0856, "step": 5930 }, { "epoch": 1.65, "learning_rate": 8.995268577790148e-06, - "loss": 0.0826, + "loss": 0.2027, "step": 5931 }, { "epoch": 1.65, "learning_rate": 8.99341311810001e-06, - "loss": 0.2465, + "loss": 0.1134, "step": 5932 }, { "epoch": 1.65, "learning_rate": 8.991557658409872e-06, - "loss": 0.0869, + "loss": 0.0312, "step": 5933 }, { "epoch": 1.65, "learning_rate": 8.989702198719733e-06, - "loss": 0.0856, + "loss": 0.0563, "step": 5934 }, { "epoch": 1.65, "learning_rate": 8.987846739029595e-06, - "loss": 0.033, + "loss": 0.1322, "step": 5935 }, { "epoch": 1.65, "learning_rate": 8.985991279339457e-06, - "loss": 0.0791, + "loss": 0.0907, "step": 5936 }, { "epoch": 1.65, "learning_rate": 8.984135819649319e-06, - "loss": 0.0894, + "loss": 0.0556, "step": 5937 }, { "epoch": 1.65, "learning_rate": 8.982280359959181e-06, - "loss": 0.0314, + "loss": 0.0853, "step": 5938 }, { "epoch": 1.65, "learning_rate": 8.980424900269043e-06, - "loss": 0.1375, + "loss": 0.0817, "step": 5939 }, { "epoch": 1.65, "learning_rate": 8.978569440578903e-06, - "loss": 0.0861, + "loss": 0.0139, "step": 5940 }, { "epoch": 1.65, "learning_rate": 8.976713980888767e-06, - "loss": 0.0868, + "loss": 0.062, "step": 5941 }, { "epoch": 1.65, "learning_rate": 8.974858521198627e-06, - "loss": 0.0854, + "loss": 0.0808, "step": 5942 }, { "epoch": 1.65, "learning_rate": 8.97300306150849e-06, - "loss": 0.1395, + "loss": 0.0728, "step": 5943 }, { "epoch": 1.65, "learning_rate": 8.97114760181835e-06, - "loss": 0.0288, + "loss": 0.0932, "step": 5944 }, { "epoch": 1.65, "learning_rate": 8.969292142128212e-06, - "loss": 0.2972, + "loss": 0.0222, "step": 5945 }, { "epoch": 1.65, "learning_rate": 8.967436682438074e-06, - "loss": 0.1343, + "loss": 0.1377, "step": 5946 }, { "epoch": 1.66, "learning_rate": 8.965581222747936e-06, - "loss": 0.1404, + "loss": 0.0521, "step": 5947 }, { "epoch": 1.66, "learning_rate": 8.963725763057798e-06, - "loss": 0.252, + "loss": 0.1608, "step": 5948 }, { "epoch": 1.66, "learning_rate": 8.96187030336766e-06, - "loss": 0.1858, + "loss": 0.0121, "step": 5949 }, { "epoch": 1.66, "learning_rate": 8.960014843677522e-06, - "loss": 0.3078, + "loss": 0.1437, "step": 5950 }, { "epoch": 1.66, "learning_rate": 8.958159383987384e-06, - "loss": 0.1974, + "loss": 0.0195, "step": 5951 }, { "epoch": 1.66, "learning_rate": 8.956303924297245e-06, - "loss": 0.1422, + "loss": 0.1159, "step": 5952 }, { "epoch": 1.66, "learning_rate": 8.954448464607107e-06, - "loss": 0.0831, + "loss": 0.0596, "step": 5953 }, { "epoch": 1.66, "learning_rate": 8.95259300491697e-06, - "loss": 0.1416, + "loss": 0.0465, "step": 5954 }, { "epoch": 1.66, "learning_rate": 8.950737545226831e-06, - "loss": 0.0855, + "loss": 0.0323, "step": 5955 }, { "epoch": 1.66, "learning_rate": 8.948882085536691e-06, - "loss": 0.0861, + "loss": 0.0663, "step": 5956 }, { "epoch": 1.66, "learning_rate": 8.947026625846555e-06, - "loss": 0.031, + "loss": 0.0992, "step": 5957 }, { "epoch": 1.66, "learning_rate": 8.945171166156415e-06, - "loss": 0.0841, + "loss": 0.1126, "step": 5958 }, { "epoch": 1.66, "learning_rate": 8.943315706466279e-06, - "loss": 0.0314, + "loss": 0.0106, "step": 5959 }, { "epoch": 1.66, "learning_rate": 8.941460246776139e-06, - "loss": 0.1392, + "loss": 0.0203, "step": 5960 }, { "epoch": 1.66, "learning_rate": 8.939604787086002e-06, - "loss": 0.1952, + "loss": 0.0124, "step": 5961 }, { "epoch": 1.66, "learning_rate": 8.937749327395862e-06, - "loss": 0.1407, + "loss": 0.2419, "step": 5962 }, { "epoch": 1.66, "learning_rate": 8.935893867705724e-06, - "loss": 0.1935, + "loss": 0.1848, "step": 5963 }, { "epoch": 1.66, "learning_rate": 8.934038408015586e-06, - "loss": 0.1927, + "loss": 0.0431, "step": 5964 }, { "epoch": 1.66, "learning_rate": 8.932182948325448e-06, - "loss": 0.0297, + "loss": 0.2379, "step": 5965 }, { "epoch": 1.66, "learning_rate": 8.93032748863531e-06, - "loss": 0.2532, + "loss": 0.0939, "step": 5966 }, { "epoch": 1.66, "learning_rate": 8.928472028945172e-06, - "loss": 0.1945, + "loss": 0.0161, "step": 5967 }, { "epoch": 1.66, "learning_rate": 8.926616569255034e-06, - "loss": 0.0829, + "loss": 0.0681, "step": 5968 }, { "epoch": 1.66, "learning_rate": 8.924761109564896e-06, - "loss": 0.3519, + "loss": 0.0643, "step": 5969 }, { "epoch": 1.66, "learning_rate": 8.922905649874757e-06, - "loss": 0.0835, + "loss": 0.0659, "step": 5970 }, { "epoch": 1.66, "learning_rate": 8.92105019018462e-06, - "loss": 0.087, + "loss": 0.0576, "step": 5971 }, { "epoch": 1.66, "learning_rate": 8.919194730494481e-06, - "loss": 0.1899, + "loss": 0.1558, "step": 5972 }, { "epoch": 1.66, "learning_rate": 8.917339270804343e-06, - "loss": 0.139, + "loss": 0.1042, "step": 5973 }, { "epoch": 1.66, "learning_rate": 8.915483811114203e-06, - "loss": 0.193, + "loss": 0.0141, "step": 5974 }, { "epoch": 1.66, "learning_rate": 8.913628351424067e-06, - "loss": 0.0854, + "loss": 0.1754, "step": 5975 }, { "epoch": 1.66, "learning_rate": 8.911772891733927e-06, - "loss": 0.1967, + "loss": 0.1752, "step": 5976 }, { "epoch": 1.66, "learning_rate": 8.90991743204379e-06, - "loss": 0.0838, + "loss": 0.0472, "step": 5977 }, { "epoch": 1.66, "learning_rate": 8.90806197235365e-06, - "loss": 0.2453, + "loss": 0.0322, "step": 5978 }, { "epoch": 1.66, "learning_rate": 8.906206512663513e-06, - "loss": 0.0857, + "loss": 0.0217, "step": 5979 }, { "epoch": 1.66, "learning_rate": 8.904351052973374e-06, - "loss": 0.1376, + "loss": 0.0932, "step": 5980 }, { "epoch": 1.66, "learning_rate": 8.902495593283236e-06, - "loss": 0.1318, + "loss": 0.0293, "step": 5981 }, { "epoch": 1.66, "learning_rate": 8.900640133593098e-06, - "loss": 0.1421, + "loss": 0.0348, "step": 5982 }, { "epoch": 1.67, "learning_rate": 8.89878467390296e-06, - "loss": 0.194, + "loss": 0.0281, "step": 5983 }, { "epoch": 1.67, "learning_rate": 8.896929214212822e-06, - "loss": 0.2451, + "loss": 0.2377, "step": 5984 }, { "epoch": 1.67, "learning_rate": 8.895073754522684e-06, - "loss": 0.1401, + "loss": 0.0352, "step": 5985 }, { "epoch": 1.67, "learning_rate": 8.893218294832546e-06, - "loss": 0.087, + "loss": 0.085, "step": 5986 }, { "epoch": 1.67, "learning_rate": 8.891362835142407e-06, - "loss": 0.1948, + "loss": 0.0854, "step": 5987 }, { "epoch": 1.67, "learning_rate": 8.88950737545227e-06, - "loss": 0.1985, + "loss": 0.0972, "step": 5988 }, { "epoch": 1.67, "learning_rate": 8.887651915762131e-06, - "loss": 0.194, + "loss": 0.0147, "step": 5989 }, { "epoch": 1.67, "learning_rate": 8.885796456071991e-06, - "loss": 0.1957, + "loss": 0.0573, "step": 5990 }, { "epoch": 1.67, "learning_rate": 8.883940996381855e-06, - "loss": 0.0869, + "loss": 0.0711, "step": 5991 }, { "epoch": 1.67, "learning_rate": 8.882085536691715e-06, - "loss": 0.1941, + "loss": 0.0272, "step": 5992 }, { "epoch": 1.67, "learning_rate": 8.880230077001579e-06, - "loss": 0.1914, + "loss": 0.054, "step": 5993 }, { "epoch": 1.67, "learning_rate": 8.878374617311439e-06, - "loss": 0.0865, + "loss": 0.082, "step": 5994 }, { "epoch": 1.67, "learning_rate": 8.876519157621302e-06, - "loss": 0.1397, + "loss": 0.0542, "step": 5995 }, { "epoch": 1.67, "learning_rate": 8.874663697931163e-06, - "loss": 0.1416, + "loss": 0.0503, "step": 5996 }, { "epoch": 1.67, "learning_rate": 8.872808238241025e-06, - "loss": 0.1934, + "loss": 0.1146, "step": 5997 }, { "epoch": 1.67, "learning_rate": 8.870952778550886e-06, - "loss": 0.2434, + "loss": 0.1378, "step": 5998 }, { "epoch": 1.67, "learning_rate": 8.869097318860748e-06, - "loss": 0.2417, + "loss": 0.048, "step": 5999 }, { "epoch": 1.67, "learning_rate": 8.86724185917061e-06, - "loss": 0.1379, + "loss": 0.0076, "step": 6000 }, { "epoch": 1.67, "learning_rate": 8.865386399480472e-06, - "loss": 0.1908, + "loss": 0.0126, "step": 6001 }, { "epoch": 1.67, "learning_rate": 8.863530939790334e-06, - "loss": 0.1377, + "loss": 0.0456, "step": 6002 }, { "epoch": 1.67, "learning_rate": 8.861675480100196e-06, - "loss": 0.0369, + "loss": 0.048, "step": 6003 }, { "epoch": 1.67, "learning_rate": 8.859820020410058e-06, - "loss": 0.0371, + "loss": 0.0737, "step": 6004 }, { "epoch": 1.67, "learning_rate": 8.85796456071992e-06, - "loss": 0.0886, + "loss": 0.0158, "step": 6005 }, { "epoch": 1.67, "learning_rate": 8.856109101029781e-06, - "loss": 0.0887, + "loss": 0.1056, "step": 6006 }, { "epoch": 1.67, "learning_rate": 8.854253641339643e-06, - "loss": 0.1917, + "loss": 0.1436, "step": 6007 }, { "epoch": 1.67, "learning_rate": 8.852398181649503e-06, - "loss": 0.0885, + "loss": 0.1481, "step": 6008 }, { "epoch": 1.67, "learning_rate": 8.850542721959367e-06, - "loss": 0.2897, + "loss": 0.0582, "step": 6009 }, { "epoch": 1.67, "learning_rate": 8.848687262269227e-06, - "loss": 0.0887, + "loss": 0.0599, "step": 6010 }, { "epoch": 1.67, "learning_rate": 8.84683180257909e-06, - "loss": 0.1884, + "loss": 0.1248, "step": 6011 }, { "epoch": 1.67, "learning_rate": 8.844976342888951e-06, - "loss": 0.191, + "loss": 0.0119, "step": 6012 }, { "epoch": 1.67, "learning_rate": 8.843120883198813e-06, - "loss": 0.0872, + "loss": 0.0211, "step": 6013 }, { "epoch": 1.67, "learning_rate": 8.841265423508675e-06, - "loss": 0.2871, + "loss": 0.0203, "step": 6014 }, { "epoch": 1.67, "learning_rate": 8.839409963818536e-06, - "loss": 0.2434, + "loss": 0.1071, "step": 6015 }, { "epoch": 1.67, "learning_rate": 8.837554504128398e-06, - "loss": 0.0372, + "loss": 0.1286, "step": 6016 }, { "epoch": 1.67, "learning_rate": 8.83569904443826e-06, - "loss": 0.1405, + "loss": 0.111, "step": 6017 }, { "epoch": 1.67, "learning_rate": 8.833843584748122e-06, - "loss": 0.1421, + "loss": 0.111, "step": 6018 }, { "epoch": 1.68, "learning_rate": 8.831988125057984e-06, - "loss": 0.1915, + "loss": 0.0778, "step": 6019 }, { "epoch": 1.68, "learning_rate": 8.830132665367846e-06, - "loss": 0.2944, + "loss": 0.1456, "step": 6020 }, { "epoch": 1.68, "learning_rate": 8.828277205677708e-06, - "loss": 0.1908, + "loss": 0.0216, "step": 6021 }, { "epoch": 1.68, "learning_rate": 8.82642174598757e-06, - "loss": 0.1399, + "loss": 0.0556, "step": 6022 }, { "epoch": 1.68, "learning_rate": 8.824566286297431e-06, - "loss": 0.0885, + "loss": 0.0354, "step": 6023 }, { "epoch": 1.68, "learning_rate": 8.822710826607293e-06, - "loss": 0.2412, + "loss": 0.0468, "step": 6024 }, { "epoch": 1.68, "learning_rate": 8.820855366917155e-06, - "loss": 0.2425, + "loss": 0.1534, "step": 6025 }, { "epoch": 1.68, "learning_rate": 8.818999907227015e-06, - "loss": 0.0891, + "loss": 0.0257, "step": 6026 }, { "epoch": 1.68, "learning_rate": 8.817144447536879e-06, - "loss": 0.188, + "loss": 0.0791, "step": 6027 }, { "epoch": 1.68, "learning_rate": 8.815288987846739e-06, - "loss": 0.3397, + "loss": 0.0738, "step": 6028 }, { "epoch": 1.68, "learning_rate": 8.813433528156603e-06, - "loss": 0.1378, + "loss": 0.057, "step": 6029 }, { "epoch": 1.68, "learning_rate": 8.811578068466463e-06, - "loss": 0.1403, + "loss": 0.0922, "step": 6030 }, { "epoch": 1.68, "learning_rate": 8.809722608776325e-06, - "loss": 0.086, + "loss": 0.1859, "step": 6031 }, { "epoch": 1.68, "learning_rate": 8.807867149086187e-06, - "loss": 0.1444, + "loss": 0.1656, "step": 6032 }, { "epoch": 1.68, "learning_rate": 8.806011689396048e-06, - "loss": 0.1393, + "loss": 0.1292, "step": 6033 }, { "epoch": 1.68, "learning_rate": 8.80415622970591e-06, - "loss": 0.0885, + "loss": 0.1404, "step": 6034 }, { "epoch": 1.68, "learning_rate": 8.802300770015772e-06, - "loss": 0.1363, + "loss": 0.1297, "step": 6035 }, { "epoch": 1.68, "learning_rate": 8.800445310325634e-06, - "loss": 0.0888, + "loss": 0.0219, "step": 6036 }, { "epoch": 1.68, "learning_rate": 8.798589850635496e-06, - "loss": 0.197, + "loss": 0.0171, "step": 6037 }, { "epoch": 1.68, "learning_rate": 8.796734390945358e-06, - "loss": 0.3981, + "loss": 0.0149, "step": 6038 }, { "epoch": 1.68, "learning_rate": 8.79487893125522e-06, - "loss": 0.0387, + "loss": 0.0666, "step": 6039 }, { "epoch": 1.68, "learning_rate": 8.793023471565082e-06, - "loss": 0.1411, + "loss": 0.2291, "step": 6040 }, { "epoch": 1.68, "learning_rate": 8.791168011874943e-06, - "loss": 0.1933, + "loss": 0.1507, "step": 6041 }, { "epoch": 1.68, "learning_rate": 8.789312552184804e-06, - "loss": 0.0904, + "loss": 0.1048, "step": 6042 }, { "epoch": 1.68, "learning_rate": 8.787457092494667e-06, - "loss": 0.0891, + "loss": 0.0554, "step": 6043 }, { "epoch": 1.68, "learning_rate": 8.785601632804527e-06, - "loss": 0.0396, + "loss": 0.0185, "step": 6044 }, { "epoch": 1.68, "learning_rate": 8.783746173114391e-06, - "loss": 0.0382, + "loss": 0.0501, "step": 6045 }, { "epoch": 1.68, "learning_rate": 8.781890713424251e-06, - "loss": 0.1364, + "loss": 0.0173, "step": 6046 }, { "epoch": 1.68, "learning_rate": 8.780035253734115e-06, - "loss": 0.1875, + "loss": 0.0172, "step": 6047 }, { "epoch": 1.68, "learning_rate": 8.778179794043975e-06, - "loss": 0.1422, + "loss": 0.1516, "step": 6048 }, { "epoch": 1.68, "learning_rate": 8.776324334353837e-06, - "loss": 0.0859, + "loss": 0.046, "step": 6049 }, { "epoch": 1.68, "learning_rate": 8.774468874663699e-06, - "loss": 0.09, + "loss": 0.114, "step": 6050 }, { "epoch": 1.68, "learning_rate": 8.77261341497356e-06, - "loss": 0.1416, + "loss": 0.0505, "step": 6051 }, { "epoch": 1.68, "learning_rate": 8.770757955283422e-06, - "loss": 0.0861, + "loss": 0.1678, "step": 6052 }, { "epoch": 1.68, "learning_rate": 8.768902495593284e-06, - "loss": 0.1874, + "loss": 0.0136, "step": 6053 }, { "epoch": 1.68, "learning_rate": 8.767047035903146e-06, - "loss": 0.0845, + "loss": 0.0219, "step": 6054 }, { "epoch": 1.69, "learning_rate": 8.765191576213008e-06, - "loss": 0.189, + "loss": 0.054, "step": 6055 }, { "epoch": 1.69, "learning_rate": 8.76333611652287e-06, - "loss": 0.1339, + "loss": 0.0137, "step": 6056 }, { "epoch": 1.69, "learning_rate": 8.761480656832732e-06, - "loss": 0.1395, + "loss": 0.0601, "step": 6057 }, { "epoch": 1.69, "learning_rate": 8.759625197142594e-06, - "loss": 0.0326, + "loss": 0.0174, "step": 6058 }, { "epoch": 1.69, "learning_rate": 8.757769737452455e-06, - "loss": 0.0854, + "loss": 0.2369, "step": 6059 }, { "epoch": 1.69, "learning_rate": 8.755914277762316e-06, - "loss": 0.0832, + "loss": 0.1626, "step": 6060 }, { "epoch": 1.69, "learning_rate": 8.754058818072179e-06, - "loss": 0.2455, + "loss": 0.0476, "step": 6061 }, { "epoch": 1.69, "learning_rate": 8.75220335838204e-06, - "loss": 0.086, + "loss": 0.2961, "step": 6062 }, { "epoch": 1.69, "learning_rate": 8.750347898691903e-06, - "loss": 0.1375, + "loss": 0.0632, "step": 6063 }, { "epoch": 1.69, "learning_rate": 8.748492439001763e-06, - "loss": 0.0829, + "loss": 0.0553, "step": 6064 }, { "epoch": 1.69, "learning_rate": 8.746636979311625e-06, - "loss": 0.0839, + "loss": 0.0858, "step": 6065 }, { "epoch": 1.69, "learning_rate": 8.744781519621487e-06, - "loss": 0.1935, + "loss": 0.0711, "step": 6066 }, { "epoch": 1.69, "learning_rate": 8.742926059931349e-06, - "loss": 0.2449, + "loss": 0.0228, "step": 6067 }, { "epoch": 1.69, "learning_rate": 8.74107060024121e-06, - "loss": 0.2455, + "loss": 0.0235, "step": 6068 }, { "epoch": 1.69, "learning_rate": 8.739215140551072e-06, - "loss": 0.1924, + "loss": 0.0724, "step": 6069 }, { "epoch": 1.69, "learning_rate": 8.737359680860934e-06, - "loss": 0.3018, + "loss": 0.0562, "step": 6070 }, { "epoch": 1.69, "learning_rate": 8.735504221170796e-06, - "loss": 0.194, + "loss": 0.1602, "step": 6071 }, { "epoch": 1.69, "learning_rate": 8.733648761480658e-06, - "loss": 0.0866, + "loss": 0.0807, "step": 6072 }, { "epoch": 1.69, "learning_rate": 8.731793301790518e-06, - "loss": 0.4619, + "loss": 0.1736, "step": 6073 }, { "epoch": 1.69, "learning_rate": 8.729937842100382e-06, - "loss": 0.1381, + "loss": 0.0266, "step": 6074 }, { "epoch": 1.69, "learning_rate": 8.728082382410242e-06, - "loss": 0.1369, + "loss": 0.0528, "step": 6075 }, { "epoch": 1.69, "learning_rate": 8.726226922720104e-06, - "loss": 0.0856, + "loss": 0.0878, "step": 6076 }, { "epoch": 1.69, "learning_rate": 8.724371463029966e-06, - "loss": 0.0316, + "loss": 0.0785, "step": 6077 }, { "epoch": 1.69, "learning_rate": 8.722516003339828e-06, - "loss": 0.1398, + "loss": 0.1841, "step": 6078 }, { "epoch": 1.69, "learning_rate": 8.72066054364969e-06, - "loss": 0.0876, + "loss": 0.0271, "step": 6079 }, { "epoch": 1.69, "learning_rate": 8.718805083959551e-06, - "loss": 0.2933, + "loss": 0.1162, "step": 6080 }, { "epoch": 1.69, "learning_rate": 8.716949624269413e-06, - "loss": 0.1381, + "loss": 0.0983, "step": 6081 }, { "epoch": 1.69, "learning_rate": 8.715094164579275e-06, - "loss": 0.0857, + "loss": 0.1106, "step": 6082 }, { "epoch": 1.69, "learning_rate": 8.713238704889137e-06, - "loss": 0.1418, + "loss": 0.0885, "step": 6083 }, { "epoch": 1.69, "learning_rate": 8.711383245198999e-06, - "loss": 0.245, + "loss": 0.1652, "step": 6084 }, { "epoch": 1.69, "learning_rate": 8.70952778550886e-06, - "loss": 0.249, + "loss": 0.2024, "step": 6085 }, { "epoch": 1.69, "learning_rate": 8.707672325818723e-06, - "loss": 0.0359, + "loss": 0.1076, "step": 6086 }, { "epoch": 1.69, "learning_rate": 8.705816866128584e-06, - "loss": 0.0896, + "loss": 0.1154, "step": 6087 }, { "epoch": 1.69, "learning_rate": 8.703961406438446e-06, - "loss": 0.1388, + "loss": 0.103, "step": 6088 }, { "epoch": 1.69, "learning_rate": 8.702105946748306e-06, - "loss": 0.1409, + "loss": 0.0256, "step": 6089 }, { "epoch": 1.69, "learning_rate": 8.70025048705817e-06, - "loss": 0.2465, + "loss": 0.1117, "step": 6090 }, { "epoch": 1.7, "learning_rate": 8.69839502736803e-06, - "loss": 0.0844, + "loss": 0.019, "step": 6091 }, { "epoch": 1.7, "learning_rate": 8.696539567677894e-06, - "loss": 0.0855, + "loss": 0.0334, "step": 6092 }, { "epoch": 1.7, "learning_rate": 8.694684107987754e-06, - "loss": 0.1906, + "loss": 0.0857, "step": 6093 }, { "epoch": 1.7, "learning_rate": 8.692828648297616e-06, - "loss": 0.1933, + "loss": 0.1285, "step": 6094 }, { "epoch": 1.7, "learning_rate": 8.690973188607478e-06, - "loss": 0.0346, + "loss": 0.0304, "step": 6095 }, { "epoch": 1.7, "learning_rate": 8.68911772891734e-06, - "loss": 0.1392, + "loss": 0.0622, "step": 6096 }, { "epoch": 1.7, "learning_rate": 8.687262269227201e-06, - "loss": 0.1399, + "loss": 0.0245, "step": 6097 }, { "epoch": 1.7, "learning_rate": 8.685406809537063e-06, - "loss": 0.0876, + "loss": 0.0516, "step": 6098 }, { "epoch": 1.7, "learning_rate": 8.683551349846925e-06, - "loss": 0.0887, + "loss": 0.063, "step": 6099 }, { "epoch": 1.7, "learning_rate": 8.681695890156787e-06, - "loss": 0.0867, + "loss": 0.0919, "step": 6100 }, { "epoch": 1.7, "learning_rate": 8.679840430466649e-06, - "loss": 0.1376, + "loss": 0.1158, "step": 6101 }, { "epoch": 1.7, "learning_rate": 8.67798497077651e-06, - "loss": 0.0318, + "loss": 0.1089, "step": 6102 }, { "epoch": 1.7, "learning_rate": 8.676129511086373e-06, - "loss": 0.0846, + "loss": 0.0738, "step": 6103 }, { "epoch": 1.7, "learning_rate": 8.674274051396234e-06, - "loss": 0.032, + "loss": 0.0152, "step": 6104 }, { "epoch": 1.7, "learning_rate": 8.672418591706095e-06, - "loss": 0.2447, + "loss": 0.1966, "step": 6105 }, { "epoch": 1.7, "learning_rate": 8.670563132015958e-06, - "loss": 0.2419, + "loss": 0.0163, "step": 6106 }, { "epoch": 1.7, "learning_rate": 8.668707672325818e-06, - "loss": 0.0325, + "loss": 0.1096, "step": 6107 }, { "epoch": 1.7, "learning_rate": 8.666852212635682e-06, - "loss": 0.0849, + "loss": 0.0197, "step": 6108 }, { "epoch": 1.7, "learning_rate": 8.664996752945542e-06, - "loss": 0.0854, + "loss": 0.0609, "step": 6109 }, { "epoch": 1.7, "learning_rate": 8.663141293255404e-06, - "loss": 0.0845, + "loss": 0.0167, "step": 6110 }, { "epoch": 1.7, "learning_rate": 8.661285833565266e-06, - "loss": 0.1943, + "loss": 0.0151, "step": 6111 }, { "epoch": 1.7, "learning_rate": 8.659430373875128e-06, - "loss": 0.0827, + "loss": 0.0543, "step": 6112 }, { "epoch": 1.7, "learning_rate": 8.65757491418499e-06, - "loss": 0.0288, + "loss": 0.1605, "step": 6113 }, { "epoch": 1.7, "learning_rate": 8.655719454494852e-06, - "loss": 0.1344, + "loss": 0.0156, "step": 6114 }, { "epoch": 1.7, "learning_rate": 8.653863994804713e-06, - "loss": 0.0849, + "loss": 0.0521, "step": 6115 }, { "epoch": 1.7, "learning_rate": 8.652008535114575e-06, - "loss": 0.1936, + "loss": 0.0546, "step": 6116 }, { "epoch": 1.7, "learning_rate": 8.650153075424437e-06, - "loss": 0.1959, + "loss": 0.1136, "step": 6117 }, { "epoch": 1.7, "learning_rate": 8.648297615734299e-06, - "loss": 0.0284, + "loss": 0.0608, "step": 6118 }, { "epoch": 1.7, "learning_rate": 8.646442156044161e-06, - "loss": 0.1949, + "loss": 0.061, "step": 6119 }, { "epoch": 1.7, "learning_rate": 8.644586696354023e-06, - "loss": 0.1381, + "loss": 0.1414, "step": 6120 }, { "epoch": 1.7, "learning_rate": 8.642731236663885e-06, - "loss": 0.0824, + "loss": 0.0885, "step": 6121 }, { "epoch": 1.7, "learning_rate": 8.640875776973746e-06, - "loss": 0.1378, + "loss": 0.1395, "step": 6122 }, { "epoch": 1.7, "learning_rate": 8.639020317283607e-06, - "loss": 0.1398, + "loss": 0.1016, "step": 6123 }, { "epoch": 1.7, "learning_rate": 8.63716485759347e-06, - "loss": 0.0841, + "loss": 0.0718, "step": 6124 }, { "epoch": 1.7, "learning_rate": 8.63530939790333e-06, - "loss": 0.0805, + "loss": 0.069, "step": 6125 }, { "epoch": 1.7, "learning_rate": 8.633453938213194e-06, - "loss": 0.0277, + "loss": 0.079, "step": 6126 }, { "epoch": 1.71, "learning_rate": 8.631598478523054e-06, - "loss": 0.083, + "loss": 0.0803, "step": 6127 }, { "epoch": 1.71, "learning_rate": 8.629743018832916e-06, - "loss": 0.3034, + "loss": 0.0645, "step": 6128 }, { "epoch": 1.71, "learning_rate": 8.627887559142778e-06, - "loss": 0.0847, + "loss": 0.0496, "step": 6129 }, { "epoch": 1.71, "learning_rate": 8.62603209945264e-06, - "loss": 0.2491, + "loss": 0.0197, "step": 6130 }, { "epoch": 1.71, "learning_rate": 8.624176639762502e-06, - "loss": 0.0845, + "loss": 0.0749, "step": 6131 }, { "epoch": 1.71, "learning_rate": 8.622321180072363e-06, - "loss": 0.1966, + "loss": 0.1213, "step": 6132 }, { "epoch": 1.71, "learning_rate": 8.620465720382225e-06, - "loss": 0.1381, + "loss": 0.1232, "step": 6133 }, { "epoch": 1.71, "learning_rate": 8.618610260692087e-06, - "loss": 0.142, + "loss": 0.1712, "step": 6134 }, { "epoch": 1.71, "learning_rate": 8.616754801001949e-06, - "loss": 0.0269, + "loss": 0.2757, "step": 6135 }, { "epoch": 1.71, "learning_rate": 8.614899341311811e-06, - "loss": 0.1919, + "loss": 0.0304, "step": 6136 }, { "epoch": 1.71, "learning_rate": 8.613043881621673e-06, - "loss": 0.3649, + "loss": 0.1655, "step": 6137 }, { "epoch": 1.71, "learning_rate": 8.611188421931535e-06, - "loss": 0.1942, + "loss": 0.08, "step": 6138 }, { "epoch": 1.71, "learning_rate": 8.609332962241395e-06, - "loss": 0.0832, + "loss": 0.02, "step": 6139 }, { "epoch": 1.71, "learning_rate": 8.607477502551258e-06, - "loss": 0.0834, + "loss": 0.0417, "step": 6140 }, { "epoch": 1.71, "learning_rate": 8.605622042861119e-06, - "loss": 0.3023, + "loss": 0.0902, "step": 6141 }, { "epoch": 1.71, "learning_rate": 8.603766583170982e-06, - "loss": 0.1933, + "loss": 0.1947, "step": 6142 }, { "epoch": 1.71, "learning_rate": 8.601911123480842e-06, - "loss": 0.1422, + "loss": 0.0726, "step": 6143 }, { "epoch": 1.71, "learning_rate": 8.600055663790706e-06, - "loss": 0.0295, + "loss": 0.151, "step": 6144 }, { "epoch": 1.71, "learning_rate": 8.598200204100566e-06, - "loss": 0.2427, + "loss": 0.0841, "step": 6145 }, { "epoch": 1.71, "learning_rate": 8.596344744410428e-06, - "loss": 0.1945, + "loss": 0.0314, "step": 6146 }, { "epoch": 1.71, "learning_rate": 8.59448928472029e-06, - "loss": 0.0864, + "loss": 0.0248, "step": 6147 }, { "epoch": 1.71, "learning_rate": 8.592633825030152e-06, - "loss": 0.1378, + "loss": 0.123, "step": 6148 }, { "epoch": 1.71, "learning_rate": 8.590778365340014e-06, - "loss": 0.3562, + "loss": 0.0491, "step": 6149 }, { "epoch": 1.71, "learning_rate": 8.588922905649875e-06, - "loss": 0.1404, + "loss": 0.0846, "step": 6150 }, { "epoch": 1.71, "learning_rate": 8.587067445959737e-06, - "loss": 0.1905, + "loss": 0.0705, "step": 6151 }, { "epoch": 1.71, "learning_rate": 8.5852119862696e-06, - "loss": 0.0822, + "loss": 0.0664, "step": 6152 }, { "epoch": 1.71, "learning_rate": 8.583356526579461e-06, - "loss": 0.0337, + "loss": 0.0571, "step": 6153 }, { "epoch": 1.71, "learning_rate": 8.581501066889323e-06, - "loss": 0.1929, + "loss": 0.0299, "step": 6154 }, { "epoch": 1.71, "learning_rate": 8.579645607199185e-06, - "loss": 0.1363, + "loss": 0.136, "step": 6155 }, { "epoch": 1.71, "learning_rate": 8.577790147509047e-06, - "loss": 0.1942, + "loss": 0.061, "step": 6156 }, { "epoch": 1.71, "learning_rate": 8.575934687818907e-06, - "loss": 0.0338, + "loss": 0.0464, "step": 6157 }, { "epoch": 1.71, "learning_rate": 8.57407922812877e-06, - "loss": 0.2937, + "loss": 0.0391, "step": 6158 }, { "epoch": 1.71, "learning_rate": 8.57222376843863e-06, - "loss": 0.141, + "loss": 0.0221, "step": 6159 }, { "epoch": 1.71, "learning_rate": 8.570368308748494e-06, - "loss": 0.2417, + "loss": 0.0302, "step": 6160 }, { "epoch": 1.71, "learning_rate": 8.568512849058354e-06, - "loss": 0.0341, + "loss": 0.0676, "step": 6161 }, { "epoch": 1.72, "learning_rate": 8.566657389368216e-06, - "loss": 0.1414, + "loss": 0.0592, "step": 6162 }, { "epoch": 1.72, "learning_rate": 8.564801929678078e-06, - "loss": 0.0902, + "loss": 0.0149, "step": 6163 }, { "epoch": 1.72, "learning_rate": 8.56294646998794e-06, - "loss": 0.1433, + "loss": 0.0937, "step": 6164 }, { "epoch": 1.72, "learning_rate": 8.561091010297802e-06, - "loss": 0.1381, + "loss": 0.024, "step": 6165 }, { "epoch": 1.72, "learning_rate": 8.559235550607664e-06, - "loss": 0.2428, + "loss": 0.0715, "step": 6166 }, { "epoch": 1.72, "learning_rate": 8.557380090917526e-06, - "loss": 0.088, + "loss": 0.1185, "step": 6167 }, { "epoch": 1.72, "learning_rate": 8.555524631227387e-06, - "loss": 0.1392, + "loss": 0.016, "step": 6168 }, { "epoch": 1.72, "learning_rate": 8.55366917153725e-06, - "loss": 0.0876, + "loss": 0.0179, "step": 6169 }, { "epoch": 1.72, "learning_rate": 8.551813711847111e-06, - "loss": 0.0354, + "loss": 0.0117, "step": 6170 }, { "epoch": 1.72, "learning_rate": 8.549958252156973e-06, - "loss": 0.1386, + "loss": 0.1293, "step": 6171 }, { "epoch": 1.72, "learning_rate": 8.548102792466835e-06, - "loss": 0.2424, + "loss": 0.0524, "step": 6172 }, { "epoch": 1.72, "learning_rate": 8.546247332776695e-06, - "loss": 0.193, + "loss": 0.169, "step": 6173 }, { "epoch": 1.72, "learning_rate": 8.544391873086559e-06, - "loss": 0.0344, + "loss": 0.0183, "step": 6174 }, { "epoch": 1.72, "learning_rate": 8.542536413396419e-06, - "loss": 0.0348, + "loss": 0.0585, "step": 6175 }, { "epoch": 1.72, "learning_rate": 8.540680953706282e-06, - "loss": 0.0337, + "loss": 0.0638, "step": 6176 }, { "epoch": 1.72, "learning_rate": 8.538825494016143e-06, - "loss": 0.2956, + "loss": 0.0501, "step": 6177 }, { "epoch": 1.72, "learning_rate": 8.536970034326006e-06, - "loss": 0.0854, + "loss": 0.0093, "step": 6178 }, { "epoch": 1.72, "learning_rate": 8.535114574635866e-06, - "loss": 0.1353, + "loss": 0.2466, "step": 6179 }, { "epoch": 1.72, "learning_rate": 8.533259114945728e-06, - "loss": 0.0859, + "loss": 0.1259, "step": 6180 }, { "epoch": 1.72, "learning_rate": 8.53140365525559e-06, - "loss": 0.1947, + "loss": 0.1255, "step": 6181 }, { "epoch": 1.72, "learning_rate": 8.529548195565452e-06, - "loss": 0.1346, + "loss": 0.0697, "step": 6182 }, { "epoch": 1.72, "learning_rate": 8.527692735875314e-06, - "loss": 0.1398, + "loss": 0.063, "step": 6183 }, { "epoch": 1.72, "learning_rate": 8.525837276185176e-06, - "loss": 0.0807, + "loss": 0.0201, "step": 6184 }, { "epoch": 1.72, "learning_rate": 8.523981816495038e-06, - "loss": 0.0334, + "loss": 0.0268, "step": 6185 }, { "epoch": 1.72, "learning_rate": 8.5221263568049e-06, - "loss": 0.087, + "loss": 0.0521, "step": 6186 }, { "epoch": 1.72, "learning_rate": 8.520270897114761e-06, - "loss": 0.2427, + "loss": 0.0678, "step": 6187 }, { "epoch": 1.72, "learning_rate": 8.518415437424623e-06, - "loss": 0.031, + "loss": 0.0173, "step": 6188 }, { "epoch": 1.72, "learning_rate": 8.516559977734485e-06, - "loss": 0.0312, + "loss": 0.117, "step": 6189 }, { "epoch": 1.72, "learning_rate": 8.514704518044347e-06, - "loss": 0.1396, + "loss": 0.0211, "step": 6190 }, { "epoch": 1.72, "learning_rate": 8.512849058354207e-06, - "loss": 0.2533, + "loss": 0.0667, "step": 6191 }, { "epoch": 1.72, "learning_rate": 8.51099359866407e-06, - "loss": 0.3501, + "loss": 0.1244, "step": 6192 }, { "epoch": 1.72, "learning_rate": 8.50913813897393e-06, - "loss": 0.086, + "loss": 0.0919, "step": 6193 }, { "epoch": 1.72, "learning_rate": 8.507282679283794e-06, - "loss": 0.0305, + "loss": 0.0816, "step": 6194 }, { "epoch": 1.72, "learning_rate": 8.505427219593655e-06, - "loss": 0.1937, + "loss": 0.0432, "step": 6195 }, { "epoch": 1.72, "learning_rate": 8.503571759903516e-06, - "loss": 0.2481, + "loss": 0.1163, "step": 6196 }, { "epoch": 1.72, "learning_rate": 8.501716300213378e-06, - "loss": 0.1957, + "loss": 0.2656, "step": 6197 }, { "epoch": 1.73, "learning_rate": 8.49986084052324e-06, - "loss": 0.1372, + "loss": 0.1276, "step": 6198 }, { "epoch": 1.73, "learning_rate": 8.498005380833102e-06, - "loss": 0.3087, + "loss": 0.0522, "step": 6199 }, { "epoch": 1.73, "learning_rate": 8.496149921142964e-06, - "loss": 0.0872, + "loss": 0.1066, "step": 6200 }, { "epoch": 1.73, "learning_rate": 8.494294461452826e-06, - "loss": 0.031, + "loss": 0.0641, "step": 6201 }, { "epoch": 1.73, "learning_rate": 8.492439001762688e-06, - "loss": 0.143, + "loss": 0.028, "step": 6202 }, { "epoch": 1.73, "learning_rate": 8.49058354207255e-06, - "loss": 0.1399, + "loss": 0.0232, "step": 6203 }, { "epoch": 1.73, "learning_rate": 8.488728082382411e-06, - "loss": 0.1455, + "loss": 0.1566, "step": 6204 }, { "epoch": 1.73, "learning_rate": 8.486872622692273e-06, - "loss": 0.1926, + "loss": 0.0536, "step": 6205 }, { "epoch": 1.73, "learning_rate": 8.485017163002135e-06, - "loss": 0.1968, + "loss": 0.097, "step": 6206 }, { "epoch": 1.73, "learning_rate": 8.483161703311997e-06, - "loss": 0.1919, + "loss": 0.1033, "step": 6207 }, { "epoch": 1.73, "learning_rate": 8.481306243621859e-06, - "loss": 0.139, + "loss": 0.0507, "step": 6208 }, { "epoch": 1.73, "learning_rate": 8.479450783931719e-06, - "loss": 0.1919, + "loss": 0.0201, "step": 6209 }, { "epoch": 1.73, "learning_rate": 8.477595324241583e-06, - "loss": 0.1388, + "loss": 0.0911, "step": 6210 }, { "epoch": 1.73, "learning_rate": 8.475739864551443e-06, - "loss": 0.2477, + "loss": 0.0162, "step": 6211 }, { "epoch": 1.73, "learning_rate": 8.473884404861306e-06, - "loss": 0.1425, + "loss": 0.054, "step": 6212 }, { "epoch": 1.73, "learning_rate": 8.472028945171167e-06, - "loss": 0.0346, + "loss": 0.0714, "step": 6213 }, { "epoch": 1.73, "learning_rate": 8.470173485481028e-06, - "loss": 0.1384, + "loss": 0.0871, "step": 6214 }, { "epoch": 1.73, "learning_rate": 8.46831802579089e-06, - "loss": 0.0328, + "loss": 0.0453, "step": 6215 }, { "epoch": 1.73, "learning_rate": 8.466462566100752e-06, - "loss": 0.0871, + "loss": 0.2225, "step": 6216 }, { "epoch": 1.73, "learning_rate": 8.464607106410614e-06, - "loss": 0.2034, + "loss": 0.0951, "step": 6217 }, { "epoch": 1.73, "learning_rate": 8.462751646720476e-06, - "loss": 0.0869, + "loss": 0.101, "step": 6218 }, { "epoch": 1.73, "learning_rate": 8.460896187030338e-06, - "loss": 0.2974, + "loss": 0.0536, "step": 6219 }, { "epoch": 1.73, "learning_rate": 8.4590407273402e-06, - "loss": 0.0854, + "loss": 0.1055, "step": 6220 }, { "epoch": 1.73, "learning_rate": 8.457185267650061e-06, - "loss": 0.1432, + "loss": 0.0593, "step": 6221 }, { "epoch": 1.73, "learning_rate": 8.455329807959923e-06, - "loss": 0.1955, + "loss": 0.0679, "step": 6222 }, { "epoch": 1.73, "learning_rate": 8.453474348269785e-06, - "loss": 0.0332, + "loss": 0.1422, "step": 6223 }, { "epoch": 1.73, "learning_rate": 8.451618888579647e-06, - "loss": 0.0324, + "loss": 0.1121, "step": 6224 }, { "epoch": 1.73, "learning_rate": 8.449763428889507e-06, - "loss": 0.0332, + "loss": 0.0818, "step": 6225 }, { "epoch": 1.73, "learning_rate": 8.44790796919937e-06, - "loss": 0.1391, + "loss": 0.0465, "step": 6226 }, { "epoch": 1.73, "learning_rate": 8.446052509509231e-06, - "loss": 0.1907, + "loss": 0.1212, "step": 6227 }, { "epoch": 1.73, "learning_rate": 8.444197049819093e-06, - "loss": 0.1911, + "loss": 0.1959, "step": 6228 }, { "epoch": 1.73, "learning_rate": 8.442341590128955e-06, - "loss": 0.0336, + "loss": 0.1051, "step": 6229 }, { "epoch": 1.73, "learning_rate": 8.440486130438817e-06, - "loss": 0.1421, + "loss": 0.0939, "step": 6230 }, { "epoch": 1.73, "learning_rate": 8.438630670748679e-06, - "loss": 0.139, + "loss": 0.0253, "step": 6231 }, { "epoch": 1.73, "learning_rate": 8.43677521105854e-06, - "loss": 0.0846, + "loss": 0.0989, "step": 6232 }, { "epoch": 1.73, "learning_rate": 8.434919751368402e-06, - "loss": 0.1416, + "loss": 0.0629, "step": 6233 }, { "epoch": 1.74, "learning_rate": 8.433064291678264e-06, - "loss": 0.0307, + "loss": 0.0845, "step": 6234 }, { "epoch": 1.74, "learning_rate": 8.431208831988126e-06, - "loss": 0.1367, + "loss": 0.1516, "step": 6235 }, { "epoch": 1.74, "learning_rate": 8.429353372297986e-06, - "loss": 0.1447, + "loss": 0.0232, "step": 6236 }, { "epoch": 1.74, "learning_rate": 8.42749791260785e-06, - "loss": 0.0825, + "loss": 0.058, "step": 6237 }, { "epoch": 1.74, "learning_rate": 8.42564245291771e-06, - "loss": 0.0859, + "loss": 0.1766, "step": 6238 }, { "epoch": 1.74, "learning_rate": 8.423786993227573e-06, - "loss": 0.2496, + "loss": 0.0235, "step": 6239 }, { "epoch": 1.74, "learning_rate": 8.421931533537434e-06, - "loss": 0.0848, + "loss": 0.0233, "step": 6240 }, { "epoch": 1.74, "learning_rate": 8.420076073847297e-06, - "loss": 0.1373, + "loss": 0.0505, "step": 6241 }, { "epoch": 1.74, "learning_rate": 8.418220614157157e-06, - "loss": 0.029, + "loss": 0.0924, "step": 6242 }, { "epoch": 1.74, "learning_rate": 8.41636515446702e-06, - "loss": 0.1954, + "loss": 0.1726, "step": 6243 }, { "epoch": 1.74, "learning_rate": 8.414509694776881e-06, - "loss": 0.0293, + "loss": 0.0143, "step": 6244 }, { "epoch": 1.74, "learning_rate": 8.412654235086743e-06, - "loss": 0.0859, + "loss": 0.1607, "step": 6245 }, { "epoch": 1.74, "learning_rate": 8.410798775396605e-06, - "loss": 0.0814, + "loss": 0.1491, "step": 6246 }, { "epoch": 1.74, "learning_rate": 8.408943315706467e-06, - "loss": 0.2479, + "loss": 0.1008, "step": 6247 }, { "epoch": 1.74, "learning_rate": 8.407087856016329e-06, - "loss": 0.1957, + "loss": 0.1207, "step": 6248 }, { "epoch": 1.74, "learning_rate": 8.40523239632619e-06, - "loss": 0.1374, + "loss": 0.0215, "step": 6249 }, { "epoch": 1.74, "learning_rate": 8.403376936636052e-06, - "loss": 0.085, + "loss": 0.0744, "step": 6250 }, { "epoch": 1.74, "learning_rate": 8.401521476945914e-06, - "loss": 0.0281, + "loss": 0.0613, "step": 6251 }, { "epoch": 1.74, "learning_rate": 8.399666017255776e-06, - "loss": 0.0818, + "loss": 0.0197, "step": 6252 }, { "epoch": 1.74, "learning_rate": 8.397810557565638e-06, - "loss": 0.0277, + "loss": 0.0463, "step": 6253 }, { "epoch": 1.74, "learning_rate": 8.395955097875498e-06, - "loss": 0.1373, + "loss": 0.1207, "step": 6254 }, { "epoch": 1.74, "learning_rate": 8.394099638185362e-06, - "loss": 0.0819, + "loss": 0.1115, "step": 6255 }, { "epoch": 1.74, "learning_rate": 8.392244178495222e-06, - "loss": 0.2002, + "loss": 0.0185, "step": 6256 }, { "epoch": 1.74, "learning_rate": 8.390388718805085e-06, - "loss": 0.0859, + "loss": 0.0222, "step": 6257 }, { "epoch": 1.74, "learning_rate": 8.388533259114946e-06, - "loss": 0.1404, + "loss": 0.1049, "step": 6258 }, { "epoch": 1.74, "learning_rate": 8.386677799424807e-06, - "loss": 0.0842, + "loss": 0.0354, "step": 6259 }, { "epoch": 1.74, "learning_rate": 8.38482233973467e-06, - "loss": 0.0267, + "loss": 0.1417, "step": 6260 }, { "epoch": 1.74, "learning_rate": 8.382966880044531e-06, - "loss": 0.1388, + "loss": 0.1269, "step": 6261 }, { "epoch": 1.74, "learning_rate": 8.381111420354393e-06, - "loss": 0.2, + "loss": 0.0912, "step": 6262 }, { "epoch": 1.74, "learning_rate": 8.379255960664255e-06, - "loss": 0.2508, + "loss": 0.1262, "step": 6263 }, { "epoch": 1.74, "learning_rate": 8.377400500974117e-06, - "loss": 0.249, + "loss": 0.1716, "step": 6264 }, { "epoch": 1.74, "learning_rate": 8.375545041283979e-06, - "loss": 0.0838, + "loss": 0.0161, "step": 6265 }, { "epoch": 1.74, "learning_rate": 8.37368958159384e-06, - "loss": 0.1389, + "loss": 0.1953, "step": 6266 }, { "epoch": 1.74, "learning_rate": 8.371834121903702e-06, - "loss": 0.3139, + "loss": 0.0425, "step": 6267 }, { "epoch": 1.74, "learning_rate": 8.369978662213564e-06, - "loss": 0.1976, + "loss": 0.1064, "step": 6268 }, { "epoch": 1.74, "learning_rate": 8.368123202523426e-06, - "loss": 0.1989, + "loss": 0.0645, "step": 6269 }, { "epoch": 1.75, "learning_rate": 8.366267742833288e-06, - "loss": 0.1963, + "loss": 0.0829, "step": 6270 }, { "epoch": 1.75, "learning_rate": 8.36441228314315e-06, - "loss": 0.1369, + "loss": 0.0143, "step": 6271 }, { "epoch": 1.75, "learning_rate": 8.36255682345301e-06, - "loss": 0.2457, + "loss": 0.061, "step": 6272 }, { "epoch": 1.75, "learning_rate": 8.360701363762874e-06, - "loss": 0.1404, + "loss": 0.1415, "step": 6273 }, { "epoch": 1.75, "learning_rate": 8.358845904072734e-06, - "loss": 0.0862, + "loss": 0.1718, "step": 6274 }, { "epoch": 1.75, "learning_rate": 8.356990444382597e-06, - "loss": 0.141, + "loss": 0.021, "step": 6275 }, { "epoch": 1.75, "learning_rate": 8.355134984692458e-06, - "loss": 0.141, + "loss": 0.0238, "step": 6276 }, { "epoch": 1.75, "learning_rate": 8.35327952500232e-06, - "loss": 0.1992, + "loss": 0.0299, "step": 6277 }, { "epoch": 1.75, "learning_rate": 8.351424065312181e-06, - "loss": 0.0296, + "loss": 0.0314, "step": 6278 }, { "epoch": 1.75, "learning_rate": 8.349568605622043e-06, - "loss": 0.1884, + "loss": 0.0991, "step": 6279 }, { "epoch": 1.75, "learning_rate": 8.347713145931905e-06, - "loss": 0.1428, + "loss": 0.0495, "step": 6280 }, { "epoch": 1.75, "learning_rate": 8.345857686241767e-06, - "loss": 0.0877, + "loss": 0.11, "step": 6281 }, { "epoch": 1.75, "learning_rate": 8.344002226551629e-06, - "loss": 0.0313, + "loss": 0.0539, "step": 6282 }, { "epoch": 1.75, "learning_rate": 8.34214676686149e-06, - "loss": 0.1417, + "loss": 0.1591, "step": 6283 }, { "epoch": 1.75, "learning_rate": 8.340291307171353e-06, - "loss": 0.142, + "loss": 0.0654, "step": 6284 }, { "epoch": 1.75, "learning_rate": 8.338435847481214e-06, - "loss": 0.0845, + "loss": 0.0273, "step": 6285 }, { "epoch": 1.75, "learning_rate": 8.336580387791076e-06, - "loss": 0.1408, + "loss": 0.0205, "step": 6286 }, { "epoch": 1.75, "learning_rate": 8.334724928100938e-06, - "loss": 0.2479, + "loss": 0.0316, "step": 6287 }, { "epoch": 1.75, "learning_rate": 8.332869468410798e-06, - "loss": 0.0865, + "loss": 0.1286, "step": 6288 }, { "epoch": 1.75, "learning_rate": 8.331014008720662e-06, - "loss": 0.354, + "loss": 0.0786, "step": 6289 }, { "epoch": 1.75, "learning_rate": 8.329158549030522e-06, - "loss": 0.2449, + "loss": 0.0108, "step": 6290 }, { "epoch": 1.75, "learning_rate": 8.327303089340386e-06, - "loss": 0.2474, + "loss": 0.0638, "step": 6291 }, { "epoch": 1.75, "learning_rate": 8.325447629650246e-06, - "loss": 0.4056, + "loss": 0.1192, "step": 6292 }, { "epoch": 1.75, "learning_rate": 8.323592169960108e-06, - "loss": 0.0332, + "loss": 0.0127, "step": 6293 }, { "epoch": 1.75, "learning_rate": 8.32173671026997e-06, - "loss": 0.1948, + "loss": 0.0958, "step": 6294 }, { "epoch": 1.75, "learning_rate": 8.319881250579831e-06, - "loss": 0.1946, + "loss": 0.1022, "step": 6295 }, { "epoch": 1.75, "learning_rate": 8.318025790889693e-06, - "loss": 0.1424, + "loss": 0.1281, "step": 6296 }, { "epoch": 1.75, "learning_rate": 8.316170331199555e-06, - "loss": 0.0358, + "loss": 0.2301, "step": 6297 }, { "epoch": 1.75, "learning_rate": 8.314314871509417e-06, - "loss": 0.1922, + "loss": 0.012, "step": 6298 }, { "epoch": 1.75, "learning_rate": 8.312459411819279e-06, - "loss": 0.1895, + "loss": 0.0596, "step": 6299 }, { "epoch": 1.75, "learning_rate": 8.31060395212914e-06, - "loss": 0.14, + "loss": 0.0139, "step": 6300 }, { "epoch": 1.75, "learning_rate": 8.308748492439003e-06, - "loss": 0.1955, + "loss": 0.0925, "step": 6301 }, { "epoch": 1.75, "learning_rate": 8.306893032748865e-06, - "loss": 0.0883, + "loss": 0.0131, "step": 6302 }, { "epoch": 1.75, "learning_rate": 8.305037573058726e-06, - "loss": 0.2907, + "loss": 0.0326, "step": 6303 }, { "epoch": 1.75, "learning_rate": 8.303182113368588e-06, - "loss": 0.0371, + "loss": 0.0434, "step": 6304 }, { "epoch": 1.75, "learning_rate": 8.30132665367845e-06, - "loss": 0.142, + "loss": 0.0535, "step": 6305 }, { "epoch": 1.76, "learning_rate": 8.29947119398831e-06, - "loss": 0.0878, + "loss": 0.0828, "step": 6306 }, { "epoch": 1.76, "learning_rate": 8.297615734298174e-06, - "loss": 0.297, + "loss": 0.1033, "step": 6307 }, { "epoch": 1.76, "learning_rate": 8.295760274608034e-06, - "loss": 0.1897, + "loss": 0.0274, "step": 6308 }, { "epoch": 1.76, "learning_rate": 8.293904814917898e-06, - "loss": 0.1361, + "loss": 0.0596, "step": 6309 }, { "epoch": 1.76, "learning_rate": 8.292049355227758e-06, - "loss": 0.0383, + "loss": 0.0658, "step": 6310 }, { "epoch": 1.76, "learning_rate": 8.29019389553762e-06, - "loss": 0.2393, + "loss": 0.1465, "step": 6311 }, { "epoch": 1.76, "learning_rate": 8.288338435847482e-06, - "loss": 0.2382, + "loss": 0.072, "step": 6312 }, { "epoch": 1.76, "learning_rate": 8.286482976157343e-06, - "loss": 0.038, + "loss": 0.0307, "step": 6313 }, { "epoch": 1.76, "learning_rate": 8.284627516467205e-06, - "loss": 0.1392, + "loss": 0.1308, "step": 6314 }, { "epoch": 1.76, "learning_rate": 8.282772056777067e-06, - "loss": 0.0379, + "loss": 0.1719, "step": 6315 }, { "epoch": 1.76, "learning_rate": 8.280916597086929e-06, - "loss": 0.1387, + "loss": 0.1092, "step": 6316 }, { "epoch": 1.76, "learning_rate": 8.279061137396791e-06, - "loss": 0.0372, + "loss": 0.0729, "step": 6317 }, { "epoch": 1.76, "learning_rate": 8.277205677706653e-06, - "loss": 0.1882, + "loss": 0.056, "step": 6318 }, { "epoch": 1.76, "learning_rate": 8.275350218016515e-06, - "loss": 0.1394, + "loss": 0.2216, "step": 6319 }, { "epoch": 1.76, "learning_rate": 8.273494758326377e-06, - "loss": 0.1927, + "loss": 0.1152, "step": 6320 }, { "epoch": 1.76, "learning_rate": 8.271639298636238e-06, - "loss": 0.0363, + "loss": 0.1222, "step": 6321 }, { "epoch": 1.76, "learning_rate": 8.269783838946099e-06, - "loss": 0.0918, + "loss": 0.1881, "step": 6322 }, { "epoch": 1.76, "learning_rate": 8.267928379255962e-06, - "loss": 0.0359, + "loss": 0.0702, "step": 6323 }, { "epoch": 1.76, "learning_rate": 8.266072919565822e-06, - "loss": 0.1924, + "loss": 0.083, "step": 6324 }, { "epoch": 1.76, "learning_rate": 8.264217459875686e-06, - "loss": 0.1388, + "loss": 0.0534, "step": 6325 }, { "epoch": 1.76, "learning_rate": 8.262362000185546e-06, - "loss": 0.0343, + "loss": 0.1172, "step": 6326 }, { "epoch": 1.76, "learning_rate": 8.26050654049541e-06, - "loss": 0.0856, + "loss": 0.0453, "step": 6327 }, { "epoch": 1.76, "learning_rate": 8.25865108080527e-06, - "loss": 0.197, + "loss": 0.0839, "step": 6328 }, { "epoch": 1.76, "learning_rate": 8.256795621115132e-06, - "loss": 0.1939, + "loss": 0.1782, "step": 6329 }, { "epoch": 1.76, "learning_rate": 8.254940161424994e-06, - "loss": 0.1386, + "loss": 0.0494, "step": 6330 }, { "epoch": 1.76, "learning_rate": 8.253084701734855e-06, - "loss": 0.1437, + "loss": 0.0692, "step": 6331 }, { "epoch": 1.76, "learning_rate": 8.251229242044717e-06, - "loss": 0.0854, + "loss": 0.0588, "step": 6332 }, { "epoch": 1.76, "learning_rate": 8.249373782354579e-06, - "loss": 0.138, + "loss": 0.0238, "step": 6333 }, { "epoch": 1.76, "learning_rate": 8.247518322664441e-06, - "loss": 0.1389, + "loss": 0.0771, "step": 6334 }, { "epoch": 1.76, "learning_rate": 8.245662862974303e-06, - "loss": 0.0311, + "loss": 0.1004, "step": 6335 }, { "epoch": 1.76, "learning_rate": 8.243807403284165e-06, - "loss": 0.1955, + "loss": 0.0313, "step": 6336 }, { "epoch": 1.76, "learning_rate": 8.241951943594027e-06, - "loss": 0.0855, + "loss": 0.1212, "step": 6337 }, { "epoch": 1.76, "learning_rate": 8.240096483903888e-06, - "loss": 0.0317, + "loss": 0.1413, "step": 6338 }, { "epoch": 1.76, "learning_rate": 8.23824102421375e-06, - "loss": 0.1947, + "loss": 0.0613, "step": 6339 }, { "epoch": 1.76, "learning_rate": 8.23638556452361e-06, - "loss": 0.4115, + "loss": 0.0637, "step": 6340 }, { "epoch": 1.76, "learning_rate": 8.234530104833474e-06, - "loss": 0.3535, + "loss": 0.1145, "step": 6341 }, { "epoch": 1.77, "learning_rate": 8.232674645143334e-06, - "loss": 0.1931, + "loss": 0.1964, "step": 6342 }, { "epoch": 1.77, "learning_rate": 8.230819185453198e-06, - "loss": 0.0852, + "loss": 0.0781, "step": 6343 }, { "epoch": 1.77, "learning_rate": 8.228963725763058e-06, - "loss": 0.0846, + "loss": 0.0563, "step": 6344 }, { "epoch": 1.77, "learning_rate": 8.22710826607292e-06, - "loss": 0.0314, + "loss": 0.0869, "step": 6345 }, { "epoch": 1.77, "learning_rate": 8.225252806382782e-06, - "loss": 0.0849, + "loss": 0.2002, "step": 6346 }, { "epoch": 1.77, "learning_rate": 8.223397346692644e-06, - "loss": 0.0312, + "loss": 0.0339, "step": 6347 }, { "epoch": 1.77, "learning_rate": 8.221541887002506e-06, - "loss": 0.1946, + "loss": 0.1768, "step": 6348 }, { "epoch": 1.77, "learning_rate": 8.219686427312367e-06, - "loss": 0.1374, + "loss": 0.05, "step": 6349 }, { "epoch": 1.77, "learning_rate": 8.21783096762223e-06, - "loss": 0.0876, + "loss": 0.0249, "step": 6350 }, { "epoch": 1.77, "learning_rate": 8.215975507932091e-06, - "loss": 0.2478, + "loss": 0.0383, "step": 6351 }, { "epoch": 1.77, "learning_rate": 8.214120048241953e-06, - "loss": 0.1912, + "loss": 0.0205, "step": 6352 }, { "epoch": 1.77, "learning_rate": 8.212264588551815e-06, - "loss": 0.2436, + "loss": 0.1214, "step": 6353 }, { "epoch": 1.77, "learning_rate": 8.210409128861677e-06, - "loss": 0.0309, + "loss": 0.128, "step": 6354 }, { "epoch": 1.77, "learning_rate": 8.208553669171539e-06, - "loss": 0.1943, + "loss": 0.1043, "step": 6355 }, { "epoch": 1.77, "learning_rate": 8.206698209481399e-06, - "loss": 0.1378, + "loss": 0.0881, "step": 6356 }, { "epoch": 1.77, "learning_rate": 8.204842749791262e-06, - "loss": 0.306, + "loss": 0.0624, "step": 6357 }, { "epoch": 1.77, "learning_rate": 8.202987290101123e-06, - "loss": 0.2492, + "loss": 0.1011, "step": 6358 }, { "epoch": 1.77, "learning_rate": 8.201131830410986e-06, - "loss": 0.1385, + "loss": 0.0313, "step": 6359 }, { "epoch": 1.77, "learning_rate": 8.199276370720846e-06, - "loss": 0.2448, + "loss": 0.0737, "step": 6360 }, { "epoch": 1.77, "learning_rate": 8.19742091103071e-06, - "loss": 0.0859, + "loss": 0.0226, "step": 6361 }, { "epoch": 1.77, "learning_rate": 8.19556545134057e-06, - "loss": 0.1423, + "loss": 0.0544, "step": 6362 }, { "epoch": 1.77, "learning_rate": 8.193709991650432e-06, - "loss": 0.0342, + "loss": 0.0203, "step": 6363 }, { "epoch": 1.77, "learning_rate": 8.191854531960294e-06, - "loss": 0.1934, + "loss": 0.0825, "step": 6364 }, { "epoch": 1.77, "learning_rate": 8.189999072270156e-06, - "loss": 0.0854, + "loss": 0.0498, "step": 6365 }, { "epoch": 1.77, "learning_rate": 8.188143612580017e-06, - "loss": 0.0806, + "loss": 0.0174, "step": 6366 }, { "epoch": 1.77, "learning_rate": 8.18628815288988e-06, - "loss": 0.0836, + "loss": 0.0427, "step": 6367 }, { "epoch": 1.77, "learning_rate": 8.184432693199741e-06, - "loss": 0.1402, + "loss": 0.0112, "step": 6368 }, { "epoch": 1.77, "learning_rate": 8.182577233509603e-06, - "loss": 0.2487, + "loss": 0.1525, "step": 6369 }, { "epoch": 1.77, "learning_rate": 8.180721773819465e-06, - "loss": 0.2989, + "loss": 0.0201, "step": 6370 }, { "epoch": 1.77, "learning_rate": 8.178866314129327e-06, - "loss": 0.1957, + "loss": 0.1777, "step": 6371 }, { "epoch": 1.77, "learning_rate": 8.177010854439189e-06, - "loss": 0.1375, + "loss": 0.1017, "step": 6372 }, { "epoch": 1.77, "learning_rate": 8.17515539474905e-06, - "loss": 0.1888, + "loss": 0.0962, "step": 6373 }, { "epoch": 1.77, "learning_rate": 8.17329993505891e-06, - "loss": 0.0847, + "loss": 0.0292, "step": 6374 }, { "epoch": 1.77, "learning_rate": 8.171444475368774e-06, - "loss": 0.0863, + "loss": 0.0655, "step": 6375 }, { "epoch": 1.77, "learning_rate": 8.169589015678634e-06, - "loss": 0.1933, + "loss": 0.0801, "step": 6376 }, { "epoch": 1.77, "learning_rate": 8.167733555988496e-06, - "loss": 0.0884, + "loss": 0.1785, "step": 6377 }, { "epoch": 1.78, "learning_rate": 8.165878096298358e-06, - "loss": 0.2423, + "loss": 0.1499, "step": 6378 }, { "epoch": 1.78, "learning_rate": 8.16402263660822e-06, - "loss": 0.1413, + "loss": 0.0334, "step": 6379 }, { "epoch": 1.78, "learning_rate": 8.162167176918082e-06, - "loss": 0.1402, + "loss": 0.0103, "step": 6380 }, { "epoch": 1.78, "learning_rate": 8.160311717227944e-06, - "loss": 0.2494, + "loss": 0.0615, "step": 6381 }, { "epoch": 1.78, "learning_rate": 8.158456257537806e-06, - "loss": 0.089, + "loss": 0.019, "step": 6382 }, { "epoch": 1.78, "learning_rate": 8.156600797847668e-06, - "loss": 0.1425, + "loss": 0.1839, "step": 6383 }, { "epoch": 1.78, "learning_rate": 8.15474533815753e-06, - "loss": 0.1373, + "loss": 0.1024, "step": 6384 }, { "epoch": 1.78, "learning_rate": 8.15288987846739e-06, - "loss": 0.0877, + "loss": 0.196, "step": 6385 }, { "epoch": 1.78, "learning_rate": 8.151034418777253e-06, - "loss": 0.1399, + "loss": 0.0181, "step": 6386 }, { "epoch": 1.78, "learning_rate": 8.149178959087113e-06, - "loss": 0.1433, + "loss": 0.1239, "step": 6387 }, { "epoch": 1.78, "learning_rate": 8.147323499396977e-06, - "loss": 0.3461, + "loss": 0.0546, "step": 6388 }, { "epoch": 1.78, "learning_rate": 8.145468039706837e-06, - "loss": 0.0852, + "loss": 0.0762, "step": 6389 }, { "epoch": 1.78, "learning_rate": 8.1436125800167e-06, - "loss": 0.1383, + "loss": 0.0647, "step": 6390 }, { "epoch": 1.78, "learning_rate": 8.141757120326561e-06, - "loss": 0.0919, + "loss": 0.0584, "step": 6391 }, { "epoch": 1.78, "learning_rate": 8.139901660636423e-06, - "loss": 0.0367, + "loss": 0.1083, "step": 6392 }, { "epoch": 1.78, "learning_rate": 8.138046200946285e-06, - "loss": 0.0894, + "loss": 0.0513, "step": 6393 }, { "epoch": 1.78, "learning_rate": 8.136190741256146e-06, - "loss": 0.1433, + "loss": 0.0959, "step": 6394 }, { "epoch": 1.78, "learning_rate": 8.134335281566008e-06, - "loss": 0.2413, + "loss": 0.0277, "step": 6395 }, { "epoch": 1.78, "learning_rate": 8.13247982187587e-06, - "loss": 0.1372, + "loss": 0.0216, "step": 6396 }, { "epoch": 1.78, "learning_rate": 8.130624362185732e-06, - "loss": 0.0867, + "loss": 0.0456, "step": 6397 }, { "epoch": 1.78, "learning_rate": 8.128768902495594e-06, - "loss": 0.0351, + "loss": 0.115, "step": 6398 }, { "epoch": 1.78, "learning_rate": 8.126913442805456e-06, - "loss": 0.1902, + "loss": 0.0996, "step": 6399 }, { "epoch": 1.78, "learning_rate": 8.125057983115318e-06, - "loss": 0.1921, + "loss": 0.0676, "step": 6400 }, { "epoch": 1.78, "learning_rate": 8.12320252342518e-06, - "loss": 0.244, + "loss": 0.0181, "step": 6401 }, { "epoch": 1.78, "learning_rate": 8.121347063735041e-06, - "loss": 0.0854, + "loss": 0.0896, "step": 6402 }, { "epoch": 1.78, "learning_rate": 8.119491604044902e-06, - "loss": 0.0871, + "loss": 0.0636, "step": 6403 }, { "epoch": 1.78, "learning_rate": 8.117636144354765e-06, - "loss": 0.1368, + "loss": 0.1085, "step": 6404 }, { "epoch": 1.78, "learning_rate": 8.115780684664625e-06, - "loss": 0.0848, + "loss": 0.0168, "step": 6405 }, { "epoch": 1.78, "learning_rate": 8.113925224974489e-06, - "loss": 0.1424, + "loss": 0.0257, "step": 6406 }, { "epoch": 1.78, "learning_rate": 8.112069765284349e-06, - "loss": 0.0851, + "loss": 0.1049, "step": 6407 }, { "epoch": 1.78, "learning_rate": 8.110214305594211e-06, - "loss": 0.0843, + "loss": 0.0469, "step": 6408 }, { "epoch": 1.78, "learning_rate": 8.108358845904073e-06, - "loss": 0.1938, + "loss": 0.0205, "step": 6409 }, { "epoch": 1.78, "learning_rate": 8.106503386213935e-06, - "loss": 0.25, + "loss": 0.0835, "step": 6410 }, { "epoch": 1.78, "learning_rate": 8.104647926523797e-06, - "loss": 0.0856, + "loss": 0.0094, "step": 6411 }, { "epoch": 1.78, "learning_rate": 8.102792466833658e-06, - "loss": 0.0865, + "loss": 0.0192, "step": 6412 }, { "epoch": 1.78, "learning_rate": 8.10093700714352e-06, - "loss": 0.2482, + "loss": 0.1074, "step": 6413 }, { "epoch": 1.79, "learning_rate": 8.099081547453382e-06, - "loss": 0.0866, + "loss": 0.0626, "step": 6414 }, { "epoch": 1.79, "learning_rate": 8.097226087763244e-06, - "loss": 0.0875, + "loss": 0.1246, "step": 6415 }, { "epoch": 1.79, "learning_rate": 8.095370628073106e-06, - "loss": 0.0868, + "loss": 0.1425, "step": 6416 }, { "epoch": 1.79, "learning_rate": 8.093515168382968e-06, - "loss": 0.0862, + "loss": 0.0098, "step": 6417 }, { "epoch": 1.79, "learning_rate": 8.09165970869283e-06, - "loss": 0.0854, + "loss": 0.2359, "step": 6418 }, { "epoch": 1.79, "learning_rate": 8.08980424900269e-06, - "loss": 0.2445, + "loss": 0.0171, "step": 6419 }, { "epoch": 1.79, "learning_rate": 8.087948789312553e-06, - "loss": 0.1954, + "loss": 0.0622, "step": 6420 }, { "epoch": 1.79, "learning_rate": 8.086093329622414e-06, - "loss": 0.1356, + "loss": 0.094, "step": 6421 }, { "epoch": 1.79, "learning_rate": 8.084237869932277e-06, - "loss": 0.139, + "loss": 0.0631, "step": 6422 }, { "epoch": 1.79, "learning_rate": 8.082382410242137e-06, - "loss": 0.0829, + "loss": 0.0267, "step": 6423 }, { "epoch": 1.79, "learning_rate": 8.080526950552001e-06, - "loss": 0.1352, + "loss": 0.0139, "step": 6424 }, { "epoch": 1.79, "learning_rate": 8.078671490861861e-06, - "loss": 0.0843, + "loss": 0.0534, "step": 6425 }, { "epoch": 1.79, "learning_rate": 8.076816031171723e-06, - "loss": 0.1963, + "loss": 0.0487, "step": 6426 }, { "epoch": 1.79, "learning_rate": 8.074960571481585e-06, - "loss": 0.1873, + "loss": 0.0166, "step": 6427 }, { "epoch": 1.79, "learning_rate": 8.073105111791447e-06, - "loss": 0.2981, + "loss": 0.04, "step": 6428 }, { "epoch": 1.79, "learning_rate": 8.071249652101309e-06, - "loss": 0.141, + "loss": 0.1822, "step": 6429 }, { "epoch": 1.79, "learning_rate": 8.06939419241117e-06, - "loss": 0.2999, + "loss": 0.0367, "step": 6430 }, { "epoch": 1.79, "learning_rate": 8.067538732721032e-06, - "loss": 0.0858, + "loss": 0.0603, "step": 6431 }, { "epoch": 1.79, "learning_rate": 8.065683273030894e-06, - "loss": 0.084, + "loss": 0.0561, "step": 6432 }, { "epoch": 1.79, "learning_rate": 8.063827813340756e-06, - "loss": 0.031, + "loss": 0.0192, "step": 6433 }, { "epoch": 1.79, "learning_rate": 8.061972353650618e-06, - "loss": 0.1386, + "loss": 0.1937, "step": 6434 }, { "epoch": 1.79, "learning_rate": 8.06011689396048e-06, - "loss": 0.0327, + "loss": 0.1759, "step": 6435 }, { "epoch": 1.79, "learning_rate": 8.058261434270342e-06, - "loss": 0.3016, + "loss": 0.0145, "step": 6436 }, { "epoch": 1.79, "learning_rate": 8.056405974580202e-06, - "loss": 0.1367, + "loss": 0.0848, "step": 6437 }, { "epoch": 1.79, "learning_rate": 8.054550514890065e-06, - "loss": 0.1358, + "loss": 0.0227, "step": 6438 }, { "epoch": 1.79, "learning_rate": 8.052695055199926e-06, - "loss": 0.0866, + "loss": 0.0845, "step": 6439 }, { "epoch": 1.79, "learning_rate": 8.050839595509789e-06, - "loss": 0.2457, + "loss": 0.0754, "step": 6440 }, { "epoch": 1.79, "learning_rate": 8.04898413581965e-06, - "loss": 0.1892, + "loss": 0.0191, "step": 6441 }, { "epoch": 1.79, "learning_rate": 8.047128676129511e-06, - "loss": 0.1393, + "loss": 0.019, "step": 6442 }, { "epoch": 1.79, "learning_rate": 8.045273216439373e-06, - "loss": 0.0865, + "loss": 0.0179, "step": 6443 }, { "epoch": 1.79, "learning_rate": 8.043417756749235e-06, - "loss": 0.1374, + "loss": 0.0804, "step": 6444 }, { "epoch": 1.79, "learning_rate": 8.041562297059097e-06, - "loss": 0.4516, + "loss": 0.0572, "step": 6445 }, { "epoch": 1.79, "learning_rate": 8.039706837368959e-06, - "loss": 0.189, + "loss": 0.1696, "step": 6446 }, { "epoch": 1.79, "learning_rate": 8.03785137767882e-06, - "loss": 0.1438, + "loss": 0.0244, "step": 6447 }, { "epoch": 1.79, "learning_rate": 8.035995917988682e-06, - "loss": 0.191, + "loss": 0.1089, "step": 6448 }, { "epoch": 1.79, "learning_rate": 8.034140458298544e-06, - "loss": 0.1405, + "loss": 0.0912, "step": 6449 }, { "epoch": 1.8, "learning_rate": 8.032284998608406e-06, - "loss": 0.135, + "loss": 0.041, "step": 6450 }, { "epoch": 1.8, "learning_rate": 8.030429538918268e-06, - "loss": 0.1927, + "loss": 0.0245, "step": 6451 }, { "epoch": 1.8, "learning_rate": 8.02857407922813e-06, - "loss": 0.0852, + "loss": 0.0208, "step": 6452 }, { "epoch": 1.8, "learning_rate": 8.026718619537992e-06, - "loss": 0.1946, + "loss": 0.0596, "step": 6453 }, { "epoch": 1.8, "learning_rate": 8.024863159847854e-06, - "loss": 0.0883, + "loss": 0.0536, "step": 6454 }, { "epoch": 1.8, "learning_rate": 8.023007700157714e-06, - "loss": 0.0863, + "loss": 0.0831, "step": 6455 }, { "epoch": 1.8, "learning_rate": 8.021152240467577e-06, - "loss": 0.1892, + "loss": 0.0948, "step": 6456 }, { "epoch": 1.8, "learning_rate": 8.019296780777438e-06, - "loss": 0.0889, + "loss": 0.0097, "step": 6457 }, { "epoch": 1.8, "learning_rate": 8.017441321087301e-06, - "loss": 0.139, + "loss": 0.2536, "step": 6458 }, { "epoch": 1.8, "learning_rate": 8.015585861397161e-06, - "loss": 0.0849, + "loss": 0.0683, "step": 6459 }, { "epoch": 1.8, "learning_rate": 8.013730401707023e-06, - "loss": 0.1387, + "loss": 0.0758, "step": 6460 }, { "epoch": 1.8, "learning_rate": 8.011874942016885e-06, - "loss": 0.352, + "loss": 0.0073, "step": 6461 }, { "epoch": 1.8, "learning_rate": 8.010019482326747e-06, - "loss": 0.2374, + "loss": 0.1212, "step": 6462 }, { "epoch": 1.8, "learning_rate": 8.008164022636609e-06, - "loss": 0.1884, + "loss": 0.1088, "step": 6463 }, { "epoch": 1.8, "learning_rate": 8.00630856294647e-06, - "loss": 0.1822, + "loss": 0.1219, "step": 6464 }, { "epoch": 1.8, "learning_rate": 8.004453103256333e-06, - "loss": 0.2403, + "loss": 0.0587, "step": 6465 }, { "epoch": 1.8, "learning_rate": 8.002597643566194e-06, - "loss": 0.0864, + "loss": 0.0983, "step": 6466 }, { "epoch": 1.8, "learning_rate": 8.000742183876056e-06, - "loss": 0.0375, + "loss": 0.0082, "step": 6467 }, { "epoch": 1.8, "learning_rate": 7.998886724185918e-06, - "loss": 0.0876, + "loss": 0.1787, "step": 6468 }, { "epoch": 1.8, "learning_rate": 7.99703126449578e-06, - "loss": 0.1446, + "loss": 0.0973, "step": 6469 }, { "epoch": 1.8, "learning_rate": 7.995175804805642e-06, - "loss": 0.14, + "loss": 0.0085, "step": 6470 }, { "epoch": 1.8, "learning_rate": 7.993320345115502e-06, - "loss": 0.0905, + "loss": 0.1693, "step": 6471 }, { "epoch": 1.8, "learning_rate": 7.991464885425366e-06, - "loss": 0.0865, + "loss": 0.1635, "step": 6472 }, { "epoch": 1.8, "learning_rate": 7.989609425735226e-06, - "loss": 0.0362, + "loss": 0.1741, "step": 6473 }, { "epoch": 1.8, "learning_rate": 7.98775396604509e-06, - "loss": 0.1449, + "loss": 0.0542, "step": 6474 }, { "epoch": 1.8, "learning_rate": 7.98589850635495e-06, - "loss": 0.1409, + "loss": 0.1175, "step": 6475 }, { "epoch": 1.8, "learning_rate": 7.984043046664811e-06, - "loss": 0.137, + "loss": 0.1195, "step": 6476 }, { "epoch": 1.8, "learning_rate": 7.982187586974673e-06, - "loss": 0.1404, + "loss": 0.0241, "step": 6477 }, { "epoch": 1.8, "learning_rate": 7.980332127284535e-06, - "loss": 0.0352, + "loss": 0.1084, "step": 6478 }, { "epoch": 1.8, "learning_rate": 7.978476667594397e-06, - "loss": 0.0881, + "loss": 0.051, "step": 6479 }, { "epoch": 1.8, "learning_rate": 7.976621207904259e-06, - "loss": 0.0895, + "loss": 0.0344, "step": 6480 }, { "epoch": 1.8, "learning_rate": 7.97476574821412e-06, - "loss": 0.0849, + "loss": 0.065, "step": 6481 }, { "epoch": 1.8, "learning_rate": 7.972910288523983e-06, - "loss": 0.0329, + "loss": 0.0411, "step": 6482 }, { "epoch": 1.8, "learning_rate": 7.971054828833844e-06, - "loss": 0.192, + "loss": 0.0611, "step": 6483 }, { "epoch": 1.8, "learning_rate": 7.969199369143706e-06, - "loss": 0.0859, + "loss": 0.0856, "step": 6484 }, { "epoch": 1.8, "learning_rate": 7.967343909453568e-06, - "loss": 0.1927, + "loss": 0.0285, "step": 6485 }, { "epoch": 1.81, "learning_rate": 7.96548844976343e-06, - "loss": 0.0849, + "loss": 0.0763, "step": 6486 }, { "epoch": 1.81, "learning_rate": 7.963632990073292e-06, - "loss": 0.1416, + "loss": 0.0601, "step": 6487 }, { "epoch": 1.81, "learning_rate": 7.961777530383154e-06, - "loss": 0.1396, + "loss": 0.053, "step": 6488 }, { "epoch": 1.81, "learning_rate": 7.959922070693014e-06, - "loss": 0.1361, + "loss": 0.0904, "step": 6489 }, { "epoch": 1.81, "learning_rate": 7.958066611002878e-06, - "loss": 0.2974, + "loss": 0.0241, "step": 6490 }, { "epoch": 1.81, "learning_rate": 7.956211151312738e-06, - "loss": 0.0824, + "loss": 0.0387, "step": 6491 }, { "epoch": 1.81, "learning_rate": 7.954355691622601e-06, - "loss": 0.193, + "loss": 0.1194, "step": 6492 }, { "epoch": 1.81, "learning_rate": 7.952500231932461e-06, - "loss": 0.1378, + "loss": 0.1266, "step": 6493 }, { "epoch": 1.81, "learning_rate": 7.950644772242323e-06, - "loss": 0.0837, + "loss": 0.015, "step": 6494 }, { "epoch": 1.81, "learning_rate": 7.948789312552185e-06, - "loss": 0.1907, + "loss": 0.072, "step": 6495 }, { "epoch": 1.81, "learning_rate": 7.946933852862047e-06, - "loss": 0.0812, + "loss": 0.1128, "step": 6496 }, { "epoch": 1.81, "learning_rate": 7.945078393171909e-06, - "loss": 0.1366, + "loss": 0.024, "step": 6497 }, { "epoch": 1.81, "learning_rate": 7.94322293348177e-06, - "loss": 0.1989, + "loss": 0.0279, "step": 6498 }, { "epoch": 1.81, "learning_rate": 7.941367473791633e-06, - "loss": 0.0859, + "loss": 0.2464, "step": 6499 }, { "epoch": 1.81, "learning_rate": 7.939512014101495e-06, - "loss": 0.1327, + "loss": 0.0213, "step": 6500 }, { "epoch": 1.81, "learning_rate": 7.937656554411356e-06, - "loss": 0.1426, + "loss": 0.06, "step": 6501 }, { "epoch": 1.81, "learning_rate": 7.935801094721218e-06, - "loss": 0.1413, + "loss": 0.0175, "step": 6502 }, { "epoch": 1.81, "learning_rate": 7.93394563503108e-06, - "loss": 0.1375, + "loss": 0.0935, "step": 6503 }, { "epoch": 1.81, "learning_rate": 7.932090175340942e-06, - "loss": 0.2501, + "loss": 0.1575, "step": 6504 }, { "epoch": 1.81, "learning_rate": 7.930234715650802e-06, - "loss": 0.305, + "loss": 0.0124, "step": 6505 }, { "epoch": 1.81, "learning_rate": 7.928379255960666e-06, - "loss": 0.0822, + "loss": 0.0427, "step": 6506 }, { "epoch": 1.81, "learning_rate": 7.926523796270526e-06, - "loss": 0.1387, + "loss": 0.1696, "step": 6507 }, { "epoch": 1.81, "learning_rate": 7.92466833658039e-06, - "loss": 0.142, + "loss": 0.0158, "step": 6508 }, { "epoch": 1.81, "learning_rate": 7.92281287689025e-06, - "loss": 0.2939, + "loss": 0.0175, "step": 6509 }, { "epoch": 1.81, "learning_rate": 7.920957417200113e-06, - "loss": 0.0889, + "loss": 0.0142, "step": 6510 }, { "epoch": 1.81, "learning_rate": 7.919101957509973e-06, - "loss": 0.031, + "loss": 0.2279, "step": 6511 }, { "epoch": 1.81, "learning_rate": 7.917246497819835e-06, - "loss": 0.1955, + "loss": 0.1629, "step": 6512 }, { "epoch": 1.81, "learning_rate": 7.915391038129697e-06, - "loss": 0.2984, + "loss": 0.0408, "step": 6513 }, { "epoch": 1.81, "learning_rate": 7.913535578439559e-06, - "loss": 0.3524, + "loss": 0.0594, "step": 6514 }, { "epoch": 1.81, "learning_rate": 7.911680118749421e-06, - "loss": 0.1398, + "loss": 0.0613, "step": 6515 }, { "epoch": 1.81, "learning_rate": 7.909824659059283e-06, - "loss": 0.1959, + "loss": 0.0542, "step": 6516 }, { "epoch": 1.81, "learning_rate": 7.907969199369145e-06, - "loss": 0.1378, + "loss": 0.0142, "step": 6517 }, { "epoch": 1.81, "learning_rate": 7.906113739679007e-06, - "loss": 0.0856, + "loss": 0.2222, "step": 6518 }, { "epoch": 1.81, "learning_rate": 7.904258279988868e-06, - "loss": 0.2482, + "loss": 0.154, "step": 6519 }, { "epoch": 1.81, "learning_rate": 7.90240282029873e-06, - "loss": 0.1446, + "loss": 0.1044, "step": 6520 }, { "epoch": 1.81, "learning_rate": 7.900547360608592e-06, - "loss": 0.2355, + "loss": 0.146, "step": 6521 }, { "epoch": 1.82, "learning_rate": 7.898691900918454e-06, - "loss": 0.0866, + "loss": 0.0613, "step": 6522 }, { "epoch": 1.82, "learning_rate": 7.896836441228314e-06, - "loss": 0.1423, + "loss": 0.0625, "step": 6523 }, { "epoch": 1.82, "learning_rate": 7.894980981538178e-06, - "loss": 0.0905, + "loss": 0.1265, "step": 6524 }, { "epoch": 1.82, "learning_rate": 7.893125521848038e-06, - "loss": 0.0356, + "loss": 0.1213, "step": 6525 }, { "epoch": 1.82, "learning_rate": 7.891270062157902e-06, - "loss": 0.0884, + "loss": 0.0613, "step": 6526 }, { "epoch": 1.82, "learning_rate": 7.889414602467762e-06, - "loss": 0.1955, + "loss": 0.0503, "step": 6527 }, { "epoch": 1.82, "learning_rate": 7.887559142777624e-06, - "loss": 0.1881, + "loss": 0.0476, "step": 6528 }, { "epoch": 1.82, "learning_rate": 7.885703683087485e-06, - "loss": 0.1902, + "loss": 0.1075, "step": 6529 }, { "epoch": 1.82, "learning_rate": 7.883848223397347e-06, - "loss": 0.4391, + "loss": 0.025, "step": 6530 }, { "epoch": 1.82, "learning_rate": 7.88199276370721e-06, - "loss": 0.1931, + "loss": 0.1376, "step": 6531 }, { "epoch": 1.82, "learning_rate": 7.880137304017071e-06, - "loss": 0.0383, + "loss": 0.0334, "step": 6532 }, { "epoch": 1.82, "learning_rate": 7.878281844326933e-06, - "loss": 0.0893, + "loss": 0.0201, "step": 6533 }, { "epoch": 1.82, "learning_rate": 7.876426384636793e-06, - "loss": 0.1405, + "loss": 0.1766, "step": 6534 }, { "epoch": 1.82, "learning_rate": 7.874570924946657e-06, - "loss": 0.0375, + "loss": 0.0331, "step": 6535 }, { "epoch": 1.82, "learning_rate": 7.872715465256517e-06, - "loss": 0.0911, + "loss": 0.0315, "step": 6536 }, { "epoch": 1.82, "learning_rate": 7.87086000556638e-06, - "loss": 0.0367, + "loss": 0.0187, "step": 6537 }, { "epoch": 1.82, "learning_rate": 7.86900454587624e-06, - "loss": 0.0373, + "loss": 0.1294, "step": 6538 }, { "epoch": 1.82, "learning_rate": 7.867149086186102e-06, - "loss": 0.0877, + "loss": 0.1224, "step": 6539 }, { "epoch": 1.82, "learning_rate": 7.865293626495964e-06, - "loss": 0.1938, + "loss": 0.0862, "step": 6540 }, { "epoch": 1.82, "learning_rate": 7.863438166805826e-06, - "loss": 0.1869, + "loss": 0.1208, "step": 6541 }, { "epoch": 1.82, "learning_rate": 7.861582707115688e-06, - "loss": 0.2472, + "loss": 0.0726, "step": 6542 }, { "epoch": 1.82, "learning_rate": 7.85972724742555e-06, - "loss": 0.0884, + "loss": 0.0214, "step": 6543 }, { "epoch": 1.82, "learning_rate": 7.857871787735412e-06, - "loss": 0.1407, + "loss": 0.023, "step": 6544 }, { "epoch": 1.82, "learning_rate": 7.856016328045274e-06, - "loss": 0.1893, + "loss": 0.063, "step": 6545 }, { "epoch": 1.82, "learning_rate": 7.854160868355136e-06, - "loss": 0.1912, + "loss": 0.0511, "step": 6546 }, { "epoch": 1.82, "learning_rate": 7.852305408664997e-06, - "loss": 0.1394, + "loss": 0.0855, "step": 6547 }, { "epoch": 1.82, "learning_rate": 7.85044994897486e-06, - "loss": 0.1917, + "loss": 0.0176, "step": 6548 }, { "epoch": 1.82, "learning_rate": 7.848594489284721e-06, - "loss": 0.1369, + "loss": 0.1376, "step": 6549 }, { "epoch": 1.82, "learning_rate": 7.846739029594583e-06, - "loss": 0.0841, + "loss": 0.0476, "step": 6550 }, { "epoch": 1.82, "learning_rate": 7.844883569904445e-06, - "loss": 0.0881, + "loss": 0.1995, "step": 6551 }, { "epoch": 1.82, "learning_rate": 7.843028110214305e-06, - "loss": 0.0337, + "loss": 0.0694, "step": 6552 }, { "epoch": 1.82, "learning_rate": 7.841172650524169e-06, - "loss": 0.1357, + "loss": 0.0189, "step": 6553 }, { "epoch": 1.82, "learning_rate": 7.839317190834029e-06, - "loss": 0.0885, + "loss": 0.1731, "step": 6554 }, { "epoch": 1.82, "learning_rate": 7.837461731143892e-06, - "loss": 0.0837, + "loss": 0.1034, "step": 6555 }, { "epoch": 1.82, "learning_rate": 7.835606271453753e-06, - "loss": 0.1956, + "loss": 0.1493, "step": 6556 }, { "epoch": 1.82, "learning_rate": 7.833750811763614e-06, - "loss": 0.1388, + "loss": 0.0314, "step": 6557 }, { "epoch": 1.83, "learning_rate": 7.831895352073476e-06, - "loss": 0.0854, + "loss": 0.062, "step": 6558 }, { "epoch": 1.83, "learning_rate": 7.830039892383338e-06, - "loss": 0.0885, + "loss": 0.0674, "step": 6559 }, { "epoch": 1.83, "learning_rate": 7.8281844326932e-06, - "loss": 0.0852, + "loss": 0.0995, "step": 6560 }, { "epoch": 1.83, "learning_rate": 7.826328973003062e-06, - "loss": 0.0868, + "loss": 0.0613, "step": 6561 }, { "epoch": 1.83, "learning_rate": 7.824473513312924e-06, - "loss": 0.1925, + "loss": 0.0567, "step": 6562 }, { "epoch": 1.83, "learning_rate": 7.822618053622786e-06, - "loss": 0.1354, + "loss": 0.0195, "step": 6563 }, { "epoch": 1.83, "learning_rate": 7.820762593932648e-06, - "loss": 0.1942, + "loss": 0.0368, "step": 6564 }, { "epoch": 1.83, "learning_rate": 7.81890713424251e-06, - "loss": 0.0838, + "loss": 0.0183, "step": 6565 }, { "epoch": 1.83, "learning_rate": 7.817051674552371e-06, - "loss": 0.4723, + "loss": 0.0665, "step": 6566 }, { "epoch": 1.83, "learning_rate": 7.815196214862233e-06, - "loss": 0.0838, + "loss": 0.1093, "step": 6567 }, { "epoch": 1.83, "learning_rate": 7.813340755172093e-06, - "loss": 0.0305, + "loss": 0.0469, "step": 6568 }, { "epoch": 1.83, "learning_rate": 7.811485295481957e-06, - "loss": 0.253, + "loss": 0.2039, "step": 6569 }, { "epoch": 1.83, "learning_rate": 7.809629835791817e-06, - "loss": 0.0296, + "loss": 0.0205, "step": 6570 }, { "epoch": 1.83, "learning_rate": 7.80777437610168e-06, - "loss": 0.1913, + "loss": 0.021, "step": 6571 }, { "epoch": 1.83, "learning_rate": 7.80591891641154e-06, - "loss": 0.0848, + "loss": 0.0275, "step": 6572 }, { "epoch": 1.83, "learning_rate": 7.804063456721404e-06, - "loss": 0.2463, + "loss": 0.0222, "step": 6573 }, { "epoch": 1.83, "learning_rate": 7.802207997031265e-06, - "loss": 0.0831, + "loss": 0.0213, "step": 6574 }, { "epoch": 1.83, "learning_rate": 7.800352537341126e-06, - "loss": 0.0844, + "loss": 0.1022, "step": 6575 }, { "epoch": 1.83, "learning_rate": 7.798497077650988e-06, - "loss": 0.2498, + "loss": 0.0533, "step": 6576 }, { "epoch": 1.83, "learning_rate": 7.79664161796085e-06, - "loss": 0.0855, + "loss": 0.0517, "step": 6577 }, { "epoch": 1.83, "learning_rate": 7.794786158270712e-06, - "loss": 0.0834, + "loss": 0.1829, "step": 6578 }, { "epoch": 1.83, "learning_rate": 7.792930698580574e-06, - "loss": 0.1377, + "loss": 0.0724, "step": 6579 }, { "epoch": 1.83, "learning_rate": 7.791075238890436e-06, - "loss": 0.1375, + "loss": 0.019, "step": 6580 }, { "epoch": 1.83, "learning_rate": 7.789219779200298e-06, - "loss": 0.0855, + "loss": 0.0102, "step": 6581 }, { "epoch": 1.83, "learning_rate": 7.78736431951016e-06, - "loss": 0.0824, + "loss": 0.141, "step": 6582 }, { "epoch": 1.83, "learning_rate": 7.785508859820021e-06, - "loss": 0.256, + "loss": 0.0777, "step": 6583 }, { "epoch": 1.83, "learning_rate": 7.783653400129883e-06, - "loss": 0.1939, + "loss": 0.037, "step": 6584 }, { "epoch": 1.83, "learning_rate": 7.781797940439745e-06, - "loss": 0.0819, + "loss": 0.1314, "step": 6585 }, { "epoch": 1.83, "learning_rate": 7.779942480749605e-06, - "loss": 0.0866, + "loss": 0.075, "step": 6586 }, { "epoch": 1.83, "learning_rate": 7.778087021059469e-06, - "loss": 0.0877, + "loss": 0.1301, "step": 6587 }, { "epoch": 1.83, "learning_rate": 7.776231561369329e-06, - "loss": 0.1925, + "loss": 0.0512, "step": 6588 }, { "epoch": 1.83, "learning_rate": 7.774376101679193e-06, - "loss": 0.0826, + "loss": 0.1129, "step": 6589 }, { "epoch": 1.83, "learning_rate": 7.772520641989053e-06, - "loss": 0.0856, + "loss": 0.0429, "step": 6590 }, { "epoch": 1.83, "learning_rate": 7.770665182298915e-06, - "loss": 0.1918, + "loss": 0.1537, "step": 6591 }, { "epoch": 1.83, "learning_rate": 7.768809722608777e-06, - "loss": 0.194, + "loss": 0.0094, "step": 6592 }, { "epoch": 1.83, "learning_rate": 7.766954262918638e-06, - "loss": 0.188, + "loss": 0.1761, "step": 6593 }, { "epoch": 1.84, "learning_rate": 7.7650988032285e-06, - "loss": 0.1375, + "loss": 0.0613, "step": 6594 }, { "epoch": 1.84, "learning_rate": 7.763243343538362e-06, - "loss": 0.0303, + "loss": 0.0764, "step": 6595 }, { "epoch": 1.84, "learning_rate": 7.761387883848224e-06, - "loss": 0.0853, + "loss": 0.0509, "step": 6596 }, { "epoch": 1.84, "learning_rate": 7.759532424158086e-06, - "loss": 0.1977, + "loss": 0.1102, "step": 6597 }, { "epoch": 1.84, "learning_rate": 7.757676964467948e-06, - "loss": 0.1398, + "loss": 0.0647, "step": 6598 }, { "epoch": 1.84, "learning_rate": 7.75582150477781e-06, - "loss": 0.0302, + "loss": 0.0113, "step": 6599 }, { "epoch": 1.84, "learning_rate": 7.753966045087671e-06, - "loss": 0.193, + "loss": 0.0812, "step": 6600 }, { "epoch": 1.84, "learning_rate": 7.752110585397533e-06, - "loss": 0.3567, + "loss": 0.0223, "step": 6601 }, { "epoch": 1.84, "learning_rate": 7.750255125707394e-06, - "loss": 0.0305, + "loss": 0.0512, "step": 6602 }, { "epoch": 1.84, "learning_rate": 7.748399666017257e-06, - "loss": 0.1388, + "loss": 0.0147, "step": 6603 }, { "epoch": 1.84, "learning_rate": 7.746544206327117e-06, - "loss": 0.1916, + "loss": 0.0159, "step": 6604 }, { "epoch": 1.84, "learning_rate": 7.74468874663698e-06, - "loss": 0.1415, + "loss": 0.1801, "step": 6605 }, { "epoch": 1.84, "learning_rate": 7.742833286946841e-06, - "loss": 0.1354, + "loss": 0.0785, "step": 6606 }, { "epoch": 1.84, "learning_rate": 7.740977827256705e-06, - "loss": 0.0308, + "loss": 0.0769, "step": 6607 }, { "epoch": 1.84, "learning_rate": 7.739122367566565e-06, - "loss": 0.1379, + "loss": 0.0556, "step": 6608 }, { "epoch": 1.84, "learning_rate": 7.737266907876427e-06, - "loss": 0.1393, + "loss": 0.0205, "step": 6609 }, { "epoch": 1.84, "learning_rate": 7.735411448186288e-06, - "loss": 0.1946, + "loss": 0.0247, "step": 6610 }, { "epoch": 1.84, "learning_rate": 7.73355598849615e-06, - "loss": 0.1395, + "loss": 0.0456, "step": 6611 }, { "epoch": 1.84, "learning_rate": 7.731700528806012e-06, - "loss": 0.1985, + "loss": 0.0948, "step": 6612 }, { "epoch": 1.84, "learning_rate": 7.729845069115874e-06, - "loss": 0.0832, + "loss": 0.2907, "step": 6613 }, { "epoch": 1.84, "learning_rate": 7.727989609425736e-06, - "loss": 0.1428, + "loss": 0.0702, "step": 6614 }, { "epoch": 1.84, "learning_rate": 7.726134149735598e-06, - "loss": 0.0847, + "loss": 0.0467, "step": 6615 }, { "epoch": 1.84, "learning_rate": 7.72427869004546e-06, - "loss": 0.1386, + "loss": 0.0132, "step": 6616 }, { "epoch": 1.84, "learning_rate": 7.722423230355322e-06, - "loss": 0.2527, + "loss": 0.0572, "step": 6617 }, { "epoch": 1.84, "learning_rate": 7.720567770665183e-06, - "loss": 0.0847, + "loss": 0.0162, "step": 6618 }, { "epoch": 1.84, "learning_rate": 7.718712310975045e-06, - "loss": 0.1978, + "loss": 0.1163, "step": 6619 }, { "epoch": 1.84, "learning_rate": 7.716856851284906e-06, - "loss": 0.2497, + "loss": 0.1446, "step": 6620 }, { "epoch": 1.84, "learning_rate": 7.715001391594769e-06, - "loss": 0.0327, + "loss": 0.0891, "step": 6621 }, { "epoch": 1.84, "learning_rate": 7.71314593190463e-06, - "loss": 0.2389, + "loss": 0.0971, "step": 6622 }, { "epoch": 1.84, "learning_rate": 7.711290472214493e-06, - "loss": 0.0856, + "loss": 0.0845, "step": 6623 }, { "epoch": 1.84, "learning_rate": 7.709435012524353e-06, - "loss": 0.1851, + "loss": 0.1042, "step": 6624 }, { "epoch": 1.84, "learning_rate": 7.707579552834215e-06, - "loss": 0.1442, + "loss": 0.1445, "step": 6625 }, { "epoch": 1.84, "learning_rate": 7.705724093144077e-06, - "loss": 0.0856, + "loss": 0.0783, "step": 6626 }, { "epoch": 1.84, "learning_rate": 7.703868633453939e-06, - "loss": 0.0325, + "loss": 0.0534, "step": 6627 }, { "epoch": 1.84, "learning_rate": 7.7020131737638e-06, - "loss": 0.0882, + "loss": 0.1025, "step": 6628 }, { "epoch": 1.84, "learning_rate": 7.700157714073662e-06, - "loss": 0.0864, + "loss": 0.1953, "step": 6629 }, { "epoch": 1.85, "learning_rate": 7.698302254383524e-06, - "loss": 0.1401, + "loss": 0.0508, "step": 6630 }, { "epoch": 1.85, "learning_rate": 7.696446794693386e-06, - "loss": 0.0857, + "loss": 0.0571, "step": 6631 }, { "epoch": 1.85, "learning_rate": 7.694591335003248e-06, - "loss": 0.1911, + "loss": 0.0242, "step": 6632 }, { "epoch": 1.85, "learning_rate": 7.69273587531311e-06, - "loss": 0.1393, + "loss": 0.0419, "step": 6633 }, { "epoch": 1.85, "learning_rate": 7.690880415622972e-06, - "loss": 0.0831, + "loss": 0.0542, "step": 6634 }, { "epoch": 1.85, "learning_rate": 7.689024955932834e-06, - "loss": 0.1953, + "loss": 0.0513, "step": 6635 }, { "epoch": 1.85, "learning_rate": 7.687169496242695e-06, - "loss": 0.0317, + "loss": 0.038, "step": 6636 }, { "epoch": 1.85, "learning_rate": 7.685314036552557e-06, - "loss": 0.0833, + "loss": 0.0302, "step": 6637 }, { "epoch": 1.85, "learning_rate": 7.683458576862417e-06, - "loss": 0.1409, + "loss": 0.0596, "step": 6638 }, { "epoch": 1.85, "learning_rate": 7.681603117172281e-06, - "loss": 0.1399, + "loss": 0.026, "step": 6639 }, { "epoch": 1.85, "learning_rate": 7.679747657482141e-06, - "loss": 0.1372, + "loss": 0.0178, "step": 6640 }, { "epoch": 1.85, "learning_rate": 7.677892197792005e-06, - "loss": 0.1426, + "loss": 0.0431, "step": 6641 }, { "epoch": 1.85, "learning_rate": 7.676036738101865e-06, - "loss": 0.0835, + "loss": 0.156, "step": 6642 }, { "epoch": 1.85, "learning_rate": 7.674181278411727e-06, - "loss": 0.14, + "loss": 0.1384, "step": 6643 }, { "epoch": 1.85, "learning_rate": 7.672325818721589e-06, - "loss": 0.0302, + "loss": 0.0903, "step": 6644 }, { "epoch": 1.85, "learning_rate": 7.67047035903145e-06, - "loss": 0.0843, + "loss": 0.0129, "step": 6645 }, { "epoch": 1.85, "learning_rate": 7.668614899341312e-06, - "loss": 0.1396, + "loss": 0.0608, "step": 6646 }, { "epoch": 1.85, "learning_rate": 7.666759439651174e-06, - "loss": 0.1381, + "loss": 0.1133, "step": 6647 }, { "epoch": 1.85, "learning_rate": 7.664903979961036e-06, - "loss": 0.0851, + "loss": 0.0099, "step": 6648 }, { "epoch": 1.85, "learning_rate": 7.663048520270898e-06, - "loss": 0.1362, + "loss": 0.0538, "step": 6649 }, { "epoch": 1.85, "learning_rate": 7.66119306058076e-06, - "loss": 0.1398, + "loss": 0.086, "step": 6650 }, { "epoch": 1.85, "learning_rate": 7.659337600890622e-06, - "loss": 0.1437, + "loss": 0.0184, "step": 6651 }, { "epoch": 1.85, "learning_rate": 7.657482141200484e-06, - "loss": 0.1971, + "loss": 0.0572, "step": 6652 }, { "epoch": 1.85, "learning_rate": 7.655626681510346e-06, - "loss": 0.0276, + "loss": 0.0403, "step": 6653 }, { "epoch": 1.85, "learning_rate": 7.653771221820206e-06, - "loss": 0.1917, + "loss": 0.0933, "step": 6654 }, { "epoch": 1.85, "learning_rate": 7.65191576213007e-06, - "loss": 0.1399, + "loss": 0.0134, "step": 6655 }, { "epoch": 1.85, "learning_rate": 7.65006030243993e-06, - "loss": 0.1992, + "loss": 0.0465, "step": 6656 }, { "epoch": 1.85, "learning_rate": 7.648204842749793e-06, - "loss": 0.3604, + "loss": 0.0724, "step": 6657 }, { "epoch": 1.85, "learning_rate": 7.646349383059653e-06, - "loss": 0.0833, + "loss": 0.0261, "step": 6658 }, { "epoch": 1.85, "learning_rate": 7.644493923369515e-06, - "loss": 0.2522, + "loss": 0.0201, "step": 6659 }, { "epoch": 1.85, "learning_rate": 7.642638463679377e-06, - "loss": 0.136, + "loss": 0.018, "step": 6660 }, { "epoch": 1.85, "learning_rate": 7.640783003989239e-06, - "loss": 0.086, + "loss": 0.0495, "step": 6661 }, { "epoch": 1.85, "learning_rate": 7.6389275442991e-06, - "loss": 0.0844, + "loss": 0.0526, "step": 6662 }, { "epoch": 1.85, "learning_rate": 7.637072084608963e-06, - "loss": 0.1416, + "loss": 0.0074, "step": 6663 }, { "epoch": 1.85, "learning_rate": 7.635216624918824e-06, - "loss": 0.0299, + "loss": 0.0602, "step": 6664 }, { "epoch": 1.85, "learning_rate": 7.633361165228686e-06, - "loss": 0.1413, + "loss": 0.0767, "step": 6665 }, { "epoch": 1.86, "learning_rate": 7.631505705538548e-06, - "loss": 0.0859, + "loss": 0.0471, "step": 6666 }, { "epoch": 1.86, "learning_rate": 7.62965024584841e-06, - "loss": 0.1404, + "loss": 0.0132, "step": 6667 }, { "epoch": 1.86, "learning_rate": 7.627794786158271e-06, - "loss": 0.0838, + "loss": 0.0432, "step": 6668 }, { "epoch": 1.86, "learning_rate": 7.625939326468134e-06, - "loss": 0.2484, + "loss": 0.1041, "step": 6669 }, { "epoch": 1.86, "learning_rate": 7.624083866777995e-06, - "loss": 0.2472, + "loss": 0.0261, "step": 6670 }, { "epoch": 1.86, "learning_rate": 7.6222284070878575e-06, - "loss": 0.0294, + "loss": 0.0447, "step": 6671 }, { "epoch": 1.86, "learning_rate": 7.6203729473977185e-06, - "loss": 0.1927, + "loss": 0.018, "step": 6672 }, { "epoch": 1.86, "learning_rate": 7.61851748770758e-06, - "loss": 0.1367, + "loss": 0.0104, "step": 6673 }, { "epoch": 1.86, "learning_rate": 7.616662028017442e-06, - "loss": 0.1398, + "loss": 0.0685, "step": 6674 }, { "epoch": 1.86, "learning_rate": 7.614806568327304e-06, - "loss": 0.144, + "loss": 0.0865, "step": 6675 }, { "epoch": 1.86, "learning_rate": 7.612951108637165e-06, - "loss": 0.1923, + "loss": 0.2324, "step": 6676 }, { "epoch": 1.86, "learning_rate": 7.611095648947028e-06, - "loss": 0.1397, + "loss": 0.0701, "step": 6677 }, { "epoch": 1.86, "learning_rate": 7.609240189256889e-06, - "loss": 0.0832, + "loss": 0.1287, "step": 6678 }, { "epoch": 1.86, "learning_rate": 7.607384729566752e-06, - "loss": 0.1391, + "loss": 0.0684, "step": 6679 }, { "epoch": 1.86, "learning_rate": 7.605529269876613e-06, - "loss": 0.0873, + "loss": 0.0694, "step": 6680 }, { "epoch": 1.86, "learning_rate": 7.603673810186474e-06, - "loss": 0.0868, + "loss": 0.0782, "step": 6681 }, { "epoch": 1.86, "learning_rate": 7.601818350496336e-06, - "loss": 0.1371, + "loss": 0.1078, "step": 6682 }, { "epoch": 1.86, "learning_rate": 7.599962890806197e-06, - "loss": 0.1454, + "loss": 0.2107, "step": 6683 }, { "epoch": 1.86, "learning_rate": 7.598107431116059e-06, - "loss": 0.0882, + "loss": 0.0509, "step": 6684 }, { "epoch": 1.86, "learning_rate": 7.596251971425921e-06, - "loss": 0.0848, + "loss": 0.083, "step": 6685 }, { "epoch": 1.86, "learning_rate": 7.594396511735783e-06, - "loss": 0.0308, + "loss": 0.0922, "step": 6686 }, { "epoch": 1.86, "learning_rate": 7.592541052045644e-06, - "loss": 0.1347, + "loss": 0.0259, "step": 6687 }, { "epoch": 1.86, "learning_rate": 7.590685592355507e-06, - "loss": 0.084, + "loss": 0.0508, "step": 6688 }, { "epoch": 1.86, "learning_rate": 7.588830132665368e-06, - "loss": 0.0293, + "loss": 0.1157, "step": 6689 }, { "epoch": 1.86, "learning_rate": 7.5869746729752305e-06, - "loss": 0.3055, + "loss": 0.091, "step": 6690 }, { "epoch": 1.86, "learning_rate": 7.5851192132850915e-06, - "loss": 0.1429, + "loss": 0.0993, "step": 6691 }, { "epoch": 1.86, "learning_rate": 7.583263753594953e-06, - "loss": 0.1421, + "loss": 0.1707, "step": 6692 }, { "epoch": 1.86, "learning_rate": 7.581408293904815e-06, - "loss": 0.029, + "loss": 0.0533, "step": 6693 }, { "epoch": 1.86, "learning_rate": 7.579552834214677e-06, - "loss": 0.0837, + "loss": 0.1614, "step": 6694 }, { "epoch": 1.86, "learning_rate": 7.577697374524538e-06, - "loss": 0.1928, + "loss": 0.0229, "step": 6695 }, { "epoch": 1.86, "learning_rate": 7.575841914834401e-06, - "loss": 0.2489, + "loss": 0.0576, "step": 6696 }, { "epoch": 1.86, "learning_rate": 7.573986455144262e-06, - "loss": 0.084, + "loss": 0.1486, "step": 6697 }, { "epoch": 1.86, "learning_rate": 7.572130995454125e-06, - "loss": 0.1413, + "loss": 0.0281, "step": 6698 }, { "epoch": 1.86, "learning_rate": 7.570275535763986e-06, - "loss": 0.0885, + "loss": 0.0228, "step": 6699 }, { "epoch": 1.86, "learning_rate": 7.568420076073848e-06, - "loss": 0.0271, + "loss": 0.0259, "step": 6700 }, { "epoch": 1.87, "learning_rate": 7.566564616383709e-06, - "loss": 0.1409, + "loss": 0.0404, "step": 6701 }, { "epoch": 1.87, "learning_rate": 7.564709156693571e-06, - "loss": 0.1359, + "loss": 0.1218, "step": 6702 }, { "epoch": 1.87, "learning_rate": 7.562853697003433e-06, - "loss": 0.2513, + "loss": 0.0291, "step": 6703 }, { "epoch": 1.87, "learning_rate": 7.560998237313295e-06, - "loss": 0.1359, + "loss": 0.0329, "step": 6704 }, { "epoch": 1.87, "learning_rate": 7.559142777623156e-06, - "loss": 0.1898, + "loss": 0.0592, "step": 6705 }, { "epoch": 1.87, "learning_rate": 7.557287317933019e-06, - "loss": 0.1957, + "loss": 0.0258, "step": 6706 }, { "epoch": 1.87, "learning_rate": 7.55543185824288e-06, - "loss": 0.0834, + "loss": 0.058, "step": 6707 }, { "epoch": 1.87, "learning_rate": 7.5535763985527425e-06, - "loss": 0.0821, + "loss": 0.0694, "step": 6708 }, { "epoch": 1.87, "learning_rate": 7.5517209388626035e-06, - "loss": 0.14, + "loss": 0.0947, "step": 6709 }, { "epoch": 1.87, "learning_rate": 7.549865479172465e-06, - "loss": 0.0822, + "loss": 0.0903, "step": 6710 }, { "epoch": 1.87, "learning_rate": 7.548010019482327e-06, - "loss": 0.1975, + "loss": 0.0415, "step": 6711 }, { "epoch": 1.87, "learning_rate": 7.546154559792189e-06, - "loss": 0.3575, + "loss": 0.0156, "step": 6712 }, { "epoch": 1.87, "learning_rate": 7.54429910010205e-06, - "loss": 0.2524, + "loss": 0.2835, "step": 6713 }, { "epoch": 1.87, "learning_rate": 7.542443640411913e-06, - "loss": 0.0807, + "loss": 0.0423, "step": 6714 }, { "epoch": 1.87, "learning_rate": 7.540588180721774e-06, - "loss": 0.0293, + "loss": 0.1079, "step": 6715 }, { "epoch": 1.87, "learning_rate": 7.538732721031637e-06, - "loss": 0.0312, + "loss": 0.065, "step": 6716 }, { "epoch": 1.87, "learning_rate": 7.536877261341498e-06, - "loss": 0.3057, + "loss": 0.0687, "step": 6717 }, { "epoch": 1.87, "learning_rate": 7.5350218016513595e-06, - "loss": 0.1955, + "loss": 0.1151, "step": 6718 }, { "epoch": 1.87, "learning_rate": 7.533166341961221e-06, - "loss": 0.088, + "loss": 0.0092, "step": 6719 }, { "epoch": 1.87, "learning_rate": 7.531310882271083e-06, - "loss": 0.0843, + "loss": 0.0557, "step": 6720 }, { "epoch": 1.87, "learning_rate": 7.529455422580944e-06, - "loss": 0.0837, + "loss": 0.0195, "step": 6721 }, { "epoch": 1.87, "learning_rate": 7.527599962890807e-06, - "loss": 0.1418, + "loss": 0.0852, "step": 6722 }, { "epoch": 1.87, "learning_rate": 7.525744503200668e-06, - "loss": 0.0869, + "loss": 0.0196, "step": 6723 }, { "epoch": 1.87, "learning_rate": 7.523889043510531e-06, - "loss": 0.0853, + "loss": 0.0798, "step": 6724 }, { "epoch": 1.87, "learning_rate": 7.522033583820392e-06, - "loss": 0.1884, + "loss": 0.0866, "step": 6725 }, { "epoch": 1.87, "learning_rate": 7.5201781241302545e-06, - "loss": 0.2493, + "loss": 0.0916, "step": 6726 }, { "epoch": 1.87, "learning_rate": 7.5183226644401155e-06, - "loss": 0.0311, + "loss": 0.0155, "step": 6727 }, { "epoch": 1.87, "learning_rate": 7.516467204749977e-06, - "loss": 0.0845, + "loss": 0.078, "step": 6728 }, { "epoch": 1.87, "learning_rate": 7.514611745059839e-06, - "loss": 0.247, + "loss": 0.0247, "step": 6729 }, { "epoch": 1.87, "learning_rate": 7.512756285369701e-06, - "loss": 0.1422, + "loss": 0.0495, "step": 6730 }, { "epoch": 1.87, "learning_rate": 7.510900825679562e-06, - "loss": 0.1366, + "loss": 0.1126, "step": 6731 }, { "epoch": 1.87, "learning_rate": 7.509045365989425e-06, - "loss": 0.0849, + "loss": 0.0386, "step": 6732 }, { "epoch": 1.87, "learning_rate": 7.507189906299286e-06, - "loss": 0.0318, + "loss": 0.0763, "step": 6733 }, { "epoch": 1.87, "learning_rate": 7.505334446609149e-06, - "loss": 0.3515, + "loss": 0.1134, "step": 6734 }, { "epoch": 1.87, "learning_rate": 7.50347898691901e-06, - "loss": 0.0857, + "loss": 0.0654, "step": 6735 }, { "epoch": 1.87, "learning_rate": 7.5016235272288715e-06, - "loss": 0.087, + "loss": 0.0691, "step": 6736 }, { "epoch": 1.88, "learning_rate": 7.499768067538733e-06, - "loss": 0.0314, + "loss": 0.0112, "step": 6737 }, { "epoch": 1.88, "learning_rate": 7.497912607848595e-06, - "loss": 0.1387, + "loss": 0.0096, "step": 6738 }, { "epoch": 1.88, "learning_rate": 7.496057148158456e-06, - "loss": 0.1403, + "loss": 0.0201, "step": 6739 }, { "epoch": 1.88, "learning_rate": 7.494201688468319e-06, - "loss": 0.1402, + "loss": 0.1281, "step": 6740 }, { "epoch": 1.88, "learning_rate": 7.49234622877818e-06, - "loss": 0.0314, + "loss": 0.102, "step": 6741 }, { "epoch": 1.88, "learning_rate": 7.490490769088043e-06, - "loss": 0.0859, + "loss": 0.1403, "step": 6742 }, { "epoch": 1.88, "learning_rate": 7.488635309397904e-06, - "loss": 0.0832, + "loss": 0.15, "step": 6743 }, { "epoch": 1.88, "learning_rate": 7.486779849707766e-06, - "loss": 0.1412, + "loss": 0.108, "step": 6744 }, { "epoch": 1.88, "learning_rate": 7.4849243900176275e-06, - "loss": 0.0852, + "loss": 0.015, "step": 6745 }, { "epoch": 1.88, "learning_rate": 7.483068930327489e-06, - "loss": 0.082, + "loss": 0.2128, "step": 6746 }, { "epoch": 1.88, "learning_rate": 7.48121347063735e-06, - "loss": 0.1408, + "loss": 0.0197, "step": 6747 }, { "epoch": 1.88, "learning_rate": 7.479358010947213e-06, - "loss": 0.1962, + "loss": 0.1466, "step": 6748 }, { "epoch": 1.88, "learning_rate": 7.477502551257074e-06, - "loss": 0.0292, + "loss": 0.1267, "step": 6749 }, { "epoch": 1.88, "learning_rate": 7.475647091566937e-06, - "loss": 0.2485, + "loss": 0.0525, "step": 6750 }, { "epoch": 1.88, "learning_rate": 7.473791631876798e-06, - "loss": 0.0828, + "loss": 0.1051, "step": 6751 }, { "epoch": 1.88, "learning_rate": 7.47193617218666e-06, - "loss": 0.1903, + "loss": 0.1159, "step": 6752 }, { "epoch": 1.88, "learning_rate": 7.470080712496522e-06, - "loss": 0.1354, + "loss": 0.0547, "step": 6753 }, { "epoch": 1.88, "learning_rate": 7.4682252528063835e-06, - "loss": 0.0279, + "loss": 0.1092, "step": 6754 }, { "epoch": 1.88, "learning_rate": 7.4663697931162445e-06, - "loss": 0.087, + "loss": 0.1086, "step": 6755 }, { "epoch": 1.88, "learning_rate": 7.464514333426107e-06, - "loss": 0.1972, + "loss": 0.0194, "step": 6756 }, { "epoch": 1.88, "learning_rate": 7.462658873735968e-06, - "loss": 0.1375, + "loss": 0.0296, "step": 6757 }, { "epoch": 1.88, "learning_rate": 7.460803414045831e-06, - "loss": 0.1994, + "loss": 0.1093, "step": 6758 }, { "epoch": 1.88, "learning_rate": 7.458947954355692e-06, - "loss": 0.0843, + "loss": 0.181, "step": 6759 }, { "epoch": 1.88, "learning_rate": 7.457092494665555e-06, - "loss": 0.1413, + "loss": 0.0242, "step": 6760 }, { "epoch": 1.88, "learning_rate": 7.455237034975416e-06, - "loss": 0.0824, + "loss": 0.1154, "step": 6761 }, { "epoch": 1.88, "learning_rate": 7.4533815752852776e-06, - "loss": 0.0276, + "loss": 0.0707, "step": 6762 }, { "epoch": 1.88, "learning_rate": 7.4515261155951394e-06, - "loss": 0.2565, + "loss": 0.0927, "step": 6763 }, { "epoch": 1.88, "learning_rate": 7.449670655905001e-06, - "loss": 0.1359, + "loss": 0.0541, "step": 6764 }, { "epoch": 1.88, "learning_rate": 7.447815196214862e-06, - "loss": 0.19, + "loss": 0.058, "step": 6765 }, { "epoch": 1.88, "learning_rate": 7.445959736524725e-06, - "loss": 0.1398, + "loss": 0.1224, "step": 6766 }, { "epoch": 1.88, "learning_rate": 7.444104276834586e-06, - "loss": 0.087, + "loss": 0.032, "step": 6767 }, { "epoch": 1.88, "learning_rate": 7.442248817144449e-06, - "loss": 0.1388, + "loss": 0.0538, "step": 6768 }, { "epoch": 1.88, "learning_rate": 7.44039335745431e-06, - "loss": 0.1397, + "loss": 0.0901, "step": 6769 }, { "epoch": 1.88, "learning_rate": 7.438537897764172e-06, - "loss": 0.3071, + "loss": 0.1056, "step": 6770 }, { "epoch": 1.88, "learning_rate": 7.4366824380740336e-06, - "loss": 0.2494, + "loss": 0.0232, "step": 6771 }, { "epoch": 1.88, "learning_rate": 7.4348269783838954e-06, - "loss": 0.1345, + "loss": 0.0272, "step": 6772 }, { "epoch": 1.89, "learning_rate": 7.4329715186937565e-06, - "loss": 0.0856, + "loss": 0.0202, "step": 6773 }, { "epoch": 1.89, "learning_rate": 7.431116059003619e-06, - "loss": 0.1913, + "loss": 0.1556, "step": 6774 }, { "epoch": 1.89, "learning_rate": 7.42926059931348e-06, - "loss": 0.1359, + "loss": 0.2659, "step": 6775 }, { "epoch": 1.89, "learning_rate": 7.427405139623343e-06, - "loss": 0.1916, + "loss": 0.1091, "step": 6776 }, { "epoch": 1.89, "learning_rate": 7.425549679933204e-06, - "loss": 0.0865, + "loss": 0.168, "step": 6777 }, { "epoch": 1.89, "learning_rate": 7.423694220243066e-06, - "loss": 0.031, + "loss": 0.0668, "step": 6778 }, { "epoch": 1.89, "learning_rate": 7.421838760552928e-06, - "loss": 0.25, + "loss": 0.0493, "step": 6779 }, { "epoch": 1.89, "learning_rate": 7.4199833008627895e-06, - "loss": 0.03, + "loss": 0.1063, "step": 6780 }, { "epoch": 1.89, "learning_rate": 7.4181278411726506e-06, - "loss": 0.1358, + "loss": 0.088, "step": 6781 }, { "epoch": 1.89, "learning_rate": 7.416272381482513e-06, - "loss": 0.1898, + "loss": 0.1362, "step": 6782 }, { "epoch": 1.89, "learning_rate": 7.414416921792374e-06, - "loss": 0.0869, + "loss": 0.0521, "step": 6783 }, { "epoch": 1.89, "learning_rate": 7.412561462102237e-06, - "loss": 0.0855, + "loss": 0.0201, "step": 6784 }, { "epoch": 1.89, "learning_rate": 7.410706002412098e-06, - "loss": 0.1425, + "loss": 0.0176, "step": 6785 }, { "epoch": 1.89, "learning_rate": 7.408850542721961e-06, - "loss": 0.0843, + "loss": 0.1629, "step": 6786 }, { "epoch": 1.89, "learning_rate": 7.406995083031822e-06, - "loss": 0.141, + "loss": 0.0699, "step": 6787 }, { "epoch": 1.89, "learning_rate": 7.405139623341684e-06, - "loss": 0.0856, + "loss": 0.0935, "step": 6788 }, { "epoch": 1.89, "learning_rate": 7.4032841636515455e-06, - "loss": 0.1373, + "loss": 0.1016, "step": 6789 }, { "epoch": 1.89, "learning_rate": 7.401428703961407e-06, - "loss": 0.0846, + "loss": 0.0501, "step": 6790 }, { "epoch": 1.89, "learning_rate": 7.3995732442712684e-06, - "loss": 0.0865, + "loss": 0.2092, "step": 6791 }, { "epoch": 1.89, "learning_rate": 7.397717784581131e-06, - "loss": 0.0857, + "loss": 0.0757, "step": 6792 }, { "epoch": 1.89, "learning_rate": 7.395862324890992e-06, - "loss": 0.2476, + "loss": 0.0581, "step": 6793 }, { "epoch": 1.89, "learning_rate": 7.394006865200855e-06, - "loss": 0.1392, + "loss": 0.0867, "step": 6794 }, { "epoch": 1.89, "learning_rate": 7.392151405510716e-06, - "loss": 0.0839, + "loss": 0.0287, "step": 6795 }, { "epoch": 1.89, "learning_rate": 7.390295945820578e-06, - "loss": 0.1396, + "loss": 0.095, "step": 6796 }, { "epoch": 1.89, "learning_rate": 7.38844048613044e-06, - "loss": 0.0847, + "loss": 0.0231, "step": 6797 }, { "epoch": 1.89, "learning_rate": 7.3865850264403015e-06, - "loss": 0.1929, + "loss": 0.09, "step": 6798 }, { "epoch": 1.89, "learning_rate": 7.3847295667501625e-06, - "loss": 0.2458, + "loss": 0.068, "step": 6799 }, { "epoch": 1.89, "learning_rate": 7.382874107060025e-06, - "loss": 0.2466, + "loss": 0.0795, "step": 6800 }, { "epoch": 1.89, "learning_rate": 7.381018647369886e-06, - "loss": 0.0297, + "loss": 0.0679, "step": 6801 }, { "epoch": 1.89, "learning_rate": 7.379163187679749e-06, - "loss": 0.137, + "loss": 0.1196, "step": 6802 }, { "epoch": 1.89, "learning_rate": 7.37730772798961e-06, - "loss": 0.1416, + "loss": 0.1831, "step": 6803 }, { "epoch": 1.89, "learning_rate": 7.375452268299472e-06, - "loss": 0.2457, + "loss": 0.0247, "step": 6804 }, { "epoch": 1.89, "learning_rate": 7.373596808609334e-06, - "loss": 0.1952, + "loss": 0.0532, "step": 6805 }, { "epoch": 1.89, "learning_rate": 7.371741348919196e-06, - "loss": 0.0849, + "loss": 0.0171, "step": 6806 }, { "epoch": 1.89, "learning_rate": 7.369885889229057e-06, - "loss": 0.1393, + "loss": 0.0856, "step": 6807 }, { "epoch": 1.89, "learning_rate": 7.368030429538919e-06, - "loss": 0.3011, + "loss": 0.031, "step": 6808 }, { "epoch": 1.9, "learning_rate": 7.36617496984878e-06, - "loss": 0.0873, + "loss": 0.1244, "step": 6809 }, { "epoch": 1.9, "learning_rate": 7.364319510158643e-06, - "loss": 0.1928, + "loss": 0.0695, "step": 6810 }, { "epoch": 1.9, "learning_rate": 7.362464050468504e-06, - "loss": 0.1303, + "loss": 0.1594, "step": 6811 }, { "epoch": 1.9, "learning_rate": 7.360608590778366e-06, - "loss": 0.0303, + "loss": 0.0642, "step": 6812 }, { "epoch": 1.9, "learning_rate": 7.358753131088228e-06, - "loss": 0.0838, + "loss": 0.148, "step": 6813 }, { "epoch": 1.9, "learning_rate": 7.35689767139809e-06, - "loss": 0.087, + "loss": 0.0485, "step": 6814 }, { "epoch": 1.9, "learning_rate": 7.355042211707951e-06, - "loss": 0.086, + "loss": 0.0754, "step": 6815 }, { "epoch": 1.9, "learning_rate": 7.3531867520178135e-06, - "loss": 0.0873, + "loss": 0.0812, "step": 6816 }, { "epoch": 1.9, "learning_rate": 7.3513312923276745e-06, - "loss": 0.0325, + "loss": 0.0526, "step": 6817 }, { "epoch": 1.9, "learning_rate": 7.349475832637537e-06, - "loss": 0.1421, + "loss": 0.0933, "step": 6818 }, { "epoch": 1.9, "learning_rate": 7.347620372947398e-06, - "loss": 0.0319, + "loss": 0.2229, "step": 6819 }, { "epoch": 1.9, "learning_rate": 7.345764913257261e-06, - "loss": 0.1385, + "loss": 0.1476, "step": 6820 }, { "epoch": 1.9, "learning_rate": 7.343909453567122e-06, - "loss": 0.1385, + "loss": 0.109, "step": 6821 }, { "epoch": 1.9, "learning_rate": 7.342053993876984e-06, - "loss": 0.0814, + "loss": 0.017, "step": 6822 }, { "epoch": 1.9, "learning_rate": 7.340198534186846e-06, - "loss": 0.14, + "loss": 0.1035, "step": 6823 }, { "epoch": 1.9, "learning_rate": 7.338343074496708e-06, - "loss": 0.0294, + "loss": 0.163, "step": 6824 }, { "epoch": 1.9, "learning_rate": 7.336487614806569e-06, - "loss": 0.1935, + "loss": 0.0644, "step": 6825 }, { "epoch": 1.9, "learning_rate": 7.334632155116431e-06, - "loss": 0.2, + "loss": 0.0979, "step": 6826 }, { "epoch": 1.9, "learning_rate": 7.332776695426292e-06, - "loss": 0.0835, + "loss": 0.0758, "step": 6827 }, { "epoch": 1.9, "learning_rate": 7.330921235736155e-06, - "loss": 0.0286, + "loss": 0.0246, "step": 6828 }, { "epoch": 1.9, "learning_rate": 7.329065776046016e-06, - "loss": 0.1957, + "loss": 0.0607, "step": 6829 }, { "epoch": 1.9, "learning_rate": 7.327210316355878e-06, - "loss": 0.0802, + "loss": 0.1158, "step": 6830 }, { "epoch": 1.9, "learning_rate": 7.32535485666574e-06, - "loss": 0.1449, + "loss": 0.0131, "step": 6831 }, { "epoch": 1.9, "learning_rate": 7.323499396975601e-06, - "loss": 0.141, + "loss": 0.0275, "step": 6832 }, { "epoch": 1.9, "learning_rate": 7.321643937285463e-06, - "loss": 0.195, + "loss": 0.0151, "step": 6833 }, { "epoch": 1.9, "learning_rate": 7.319788477595325e-06, - "loss": 0.307, + "loss": 0.1627, "step": 6834 }, { "epoch": 1.9, "learning_rate": 7.3179330179051865e-06, - "loss": 0.1403, + "loss": 0.1298, "step": 6835 }, { "epoch": 1.9, "learning_rate": 7.3160775582150475e-06, - "loss": 0.14, + "loss": 0.0868, "step": 6836 }, { "epoch": 1.9, "learning_rate": 7.31422209852491e-06, - "loss": 0.2494, + "loss": 0.0211, "step": 6837 }, { "epoch": 1.9, "learning_rate": 7.312366638834771e-06, - "loss": 0.0847, + "loss": 0.0547, "step": 6838 }, { "epoch": 1.9, "learning_rate": 7.310511179144634e-06, - "loss": 0.2556, + "loss": 0.1733, "step": 6839 }, { "epoch": 1.9, "learning_rate": 7.308655719454495e-06, - "loss": 0.1934, + "loss": 0.1128, "step": 6840 }, { "epoch": 1.9, "learning_rate": 7.306800259764357e-06, - "loss": 0.0855, + "loss": 0.0506, "step": 6841 }, { "epoch": 1.9, "learning_rate": 7.304944800074219e-06, - "loss": 0.0302, + "loss": 0.1132, "step": 6842 }, { "epoch": 1.9, "learning_rate": 7.303089340384081e-06, - "loss": 0.0862, + "loss": 0.2408, "step": 6843 }, { "epoch": 1.9, "learning_rate": 7.301233880693942e-06, - "loss": 0.0879, + "loss": 0.0366, "step": 6844 }, { "epoch": 1.91, "learning_rate": 7.299378421003804e-06, - "loss": 0.3556, + "loss": 0.0185, "step": 6845 }, { "epoch": 1.91, "learning_rate": 7.297522961313665e-06, - "loss": 0.0304, + "loss": 0.0213, "step": 6846 }, { "epoch": 1.91, "learning_rate": 7.295667501623528e-06, - "loss": 0.0308, + "loss": 0.0211, "step": 6847 }, { "epoch": 1.91, "learning_rate": 7.293812041933389e-06, - "loss": 0.1372, + "loss": 0.173, "step": 6848 }, { "epoch": 1.91, "learning_rate": 7.291956582243252e-06, - "loss": 0.1978, + "loss": 0.0928, "step": 6849 }, { "epoch": 1.91, "learning_rate": 7.290101122553113e-06, - "loss": 0.1929, + "loss": 0.1208, "step": 6850 }, { "epoch": 1.91, "learning_rate": 7.288245662862975e-06, - "loss": 0.1942, + "loss": 0.136, "step": 6851 }, { "epoch": 1.91, "learning_rate": 7.286390203172837e-06, - "loss": 0.3061, + "loss": 0.0803, "step": 6852 }, { "epoch": 1.91, "learning_rate": 7.2845347434826985e-06, - "loss": 0.1411, + "loss": 0.1316, "step": 6853 }, { "epoch": 1.91, "learning_rate": 7.2826792837925595e-06, - "loss": 0.1888, + "loss": 0.1781, "step": 6854 }, { "epoch": 1.91, "learning_rate": 7.280823824102422e-06, - "loss": 0.0869, + "loss": 0.0627, "step": 6855 }, { "epoch": 1.91, "learning_rate": 7.278968364412283e-06, - "loss": 0.1942, + "loss": 0.0511, "step": 6856 }, { "epoch": 1.91, "learning_rate": 7.277112904722146e-06, - "loss": 0.0308, + "loss": 0.0632, "step": 6857 }, { "epoch": 1.91, "learning_rate": 7.275257445032007e-06, - "loss": 0.0319, + "loss": 0.0512, "step": 6858 }, { "epoch": 1.91, "learning_rate": 7.273401985341869e-06, - "loss": 0.1411, + "loss": 0.0503, "step": 6859 }, { "epoch": 1.91, "learning_rate": 7.271546525651731e-06, - "loss": 0.0855, + "loss": 0.0188, "step": 6860 }, { "epoch": 1.91, "learning_rate": 7.269691065961593e-06, - "loss": 0.0881, + "loss": 0.0204, "step": 6861 }, { "epoch": 1.91, "learning_rate": 7.267835606271454e-06, - "loss": 0.1425, + "loss": 0.0286, "step": 6862 }, { "epoch": 1.91, "learning_rate": 7.265980146581316e-06, - "loss": 0.1406, + "loss": 0.0333, "step": 6863 }, { "epoch": 1.91, "learning_rate": 7.264124686891177e-06, - "loss": 0.0847, + "loss": 0.0142, "step": 6864 }, { "epoch": 1.91, "learning_rate": 7.26226922720104e-06, - "loss": 0.1887, + "loss": 0.0804, "step": 6865 }, { "epoch": 1.91, "learning_rate": 7.260413767510901e-06, - "loss": 0.1431, + "loss": 0.0934, "step": 6866 }, { "epoch": 1.91, "learning_rate": 7.258558307820763e-06, - "loss": 0.0841, + "loss": 0.1235, "step": 6867 }, { "epoch": 1.91, "learning_rate": 7.256702848130625e-06, - "loss": 0.1356, + "loss": 0.0222, "step": 6868 }, { "epoch": 1.91, "learning_rate": 7.254847388440487e-06, - "loss": 0.196, + "loss": 0.0161, "step": 6869 }, { "epoch": 1.91, "learning_rate": 7.252991928750348e-06, - "loss": 0.0303, + "loss": 0.0144, "step": 6870 }, { "epoch": 1.91, "learning_rate": 7.2511364690602105e-06, - "loss": 0.1921, + "loss": 0.0136, "step": 6871 }, { "epoch": 1.91, "learning_rate": 7.2492810093700715e-06, - "loss": 0.0827, + "loss": 0.0122, "step": 6872 }, { "epoch": 1.91, "learning_rate": 7.247425549679934e-06, - "loss": 0.0866, + "loss": 0.1828, "step": 6873 }, { "epoch": 1.91, "learning_rate": 7.245570089989795e-06, - "loss": 0.1409, + "loss": 0.0166, "step": 6874 }, { "epoch": 1.91, "learning_rate": 7.243714630299657e-06, - "loss": 0.084, + "loss": 0.1132, "step": 6875 }, { "epoch": 1.91, "learning_rate": 7.241859170609519e-06, - "loss": 0.1391, + "loss": 0.0115, "step": 6876 }, { "epoch": 1.91, "learning_rate": 7.240003710919381e-06, - "loss": 0.1936, + "loss": 0.0099, "step": 6877 }, { "epoch": 1.91, "learning_rate": 7.238148251229242e-06, - "loss": 0.2012, + "loss": 0.0661, "step": 6878 }, { "epoch": 1.91, "learning_rate": 7.2362927915391046e-06, - "loss": 0.0839, + "loss": 0.007, "step": 6879 }, { "epoch": 1.91, "learning_rate": 7.234437331848966e-06, - "loss": 0.2488, + "loss": 0.0488, "step": 6880 }, { "epoch": 1.92, "learning_rate": 7.232581872158828e-06, - "loss": 0.1947, + "loss": 0.1517, "step": 6881 }, { "epoch": 1.92, "learning_rate": 7.230726412468689e-06, - "loss": 0.2946, + "loss": 0.11, "step": 6882 }, { "epoch": 1.92, "learning_rate": 7.228870952778552e-06, - "loss": 0.1919, + "loss": 0.0112, "step": 6883 }, { "epoch": 1.92, "learning_rate": 7.227015493088413e-06, - "loss": 0.0878, + "loss": 0.0062, "step": 6884 }, { "epoch": 1.92, "learning_rate": 7.225160033398275e-06, - "loss": 0.0302, + "loss": 0.0801, "step": 6885 }, { "epoch": 1.92, "learning_rate": 7.223304573708137e-06, - "loss": 0.1421, + "loss": 0.0486, "step": 6886 }, { "epoch": 1.92, "learning_rate": 7.221449114017999e-06, - "loss": 0.1363, + "loss": 0.0589, "step": 6887 }, { "epoch": 1.92, "learning_rate": 7.21959365432786e-06, - "loss": 0.1883, + "loss": 0.0089, "step": 6888 }, { "epoch": 1.92, "learning_rate": 7.2177381946377224e-06, - "loss": 0.1421, + "loss": 0.0106, "step": 6889 }, { "epoch": 1.92, "learning_rate": 7.2158827349475835e-06, - "loss": 0.1952, + "loss": 0.1569, "step": 6890 }, { "epoch": 1.92, "learning_rate": 7.214027275257446e-06, - "loss": 0.0843, + "loss": 0.0167, "step": 6891 }, { "epoch": 1.92, "learning_rate": 7.212171815567307e-06, - "loss": 0.1939, + "loss": 0.0702, "step": 6892 }, { "epoch": 1.92, "learning_rate": 7.210316355877169e-06, - "loss": 0.0854, + "loss": 0.0258, "step": 6893 }, { "epoch": 1.92, "learning_rate": 7.208460896187031e-06, - "loss": 0.1391, + "loss": 0.1041, "step": 6894 }, { "epoch": 1.92, "learning_rate": 7.206605436496893e-06, - "loss": 0.0877, + "loss": 0.0752, "step": 6895 }, { "epoch": 1.92, "learning_rate": 7.204749976806754e-06, - "loss": 0.2461, + "loss": 0.0966, "step": 6896 }, { "epoch": 1.92, "learning_rate": 7.2028945171166165e-06, - "loss": 0.085, + "loss": 0.0783, "step": 6897 }, { "epoch": 1.92, "learning_rate": 7.2010390574264776e-06, - "loss": 0.0854, + "loss": 0.0094, "step": 6898 }, { "epoch": 1.92, "learning_rate": 7.19918359773634e-06, - "loss": 0.1919, + "loss": 0.0598, "step": 6899 }, { "epoch": 1.92, "learning_rate": 7.197328138046201e-06, - "loss": 0.0861, + "loss": 0.098, "step": 6900 }, { "epoch": 1.92, "learning_rate": 7.195472678356063e-06, - "loss": 0.0868, + "loss": 0.2414, "step": 6901 }, { "epoch": 1.92, "learning_rate": 7.193617218665925e-06, - "loss": 0.0868, + "loss": 0.0167, "step": 6902 }, { "epoch": 1.92, "learning_rate": 7.191761758975787e-06, - "loss": 0.1426, + "loss": 0.1338, "step": 6903 }, { "epoch": 1.92, "learning_rate": 7.189906299285648e-06, - "loss": 0.0324, + "loss": 0.1043, "step": 6904 }, { "epoch": 1.92, "learning_rate": 7.188050839595511e-06, - "loss": 0.1394, + "loss": 0.1327, "step": 6905 }, { "epoch": 1.92, "learning_rate": 7.186195379905372e-06, - "loss": 0.1393, + "loss": 0.0871, "step": 6906 }, { "epoch": 1.92, "learning_rate": 7.184339920215234e-06, - "loss": 0.0855, + "loss": 0.0192, "step": 6907 }, { "epoch": 1.92, "learning_rate": 7.1824844605250954e-06, - "loss": 0.0315, + "loss": 0.0929, "step": 6908 }, { "epoch": 1.92, "learning_rate": 7.180629000834958e-06, - "loss": 0.031, + "loss": 0.0225, "step": 6909 }, { "epoch": 1.92, "learning_rate": 7.178773541144819e-06, - "loss": 0.0852, + "loss": 0.0922, "step": 6910 }, { "epoch": 1.92, "learning_rate": 7.176918081454681e-06, - "loss": 0.1391, + "loss": 0.081, "step": 6911 }, { "epoch": 1.92, "learning_rate": 7.175062621764543e-06, - "loss": 0.0838, + "loss": 0.1059, "step": 6912 }, { "epoch": 1.92, "learning_rate": 7.173207162074405e-06, - "loss": 0.1393, + "loss": 0.0295, "step": 6913 }, { "epoch": 1.92, "learning_rate": 7.171351702384266e-06, - "loss": 0.0873, + "loss": 0.0501, "step": 6914 }, { "epoch": 1.92, "learning_rate": 7.1694962426941285e-06, - "loss": 0.1422, + "loss": 0.1148, "step": 6915 }, { "epoch": 1.92, "learning_rate": 7.1676407830039895e-06, - "loss": 0.0865, + "loss": 0.0537, "step": 6916 }, { "epoch": 1.93, "learning_rate": 7.165785323313852e-06, - "loss": 0.0289, + "loss": 0.1423, "step": 6917 }, { "epoch": 1.93, "learning_rate": 7.163929863623713e-06, - "loss": 0.1383, + "loss": 0.0157, "step": 6918 }, { "epoch": 1.93, "learning_rate": 7.162074403933575e-06, - "loss": 0.2503, + "loss": 0.0173, "step": 6919 }, { "epoch": 1.93, "learning_rate": 7.160218944243437e-06, - "loss": 0.1977, + "loss": 0.0785, "step": 6920 }, { "epoch": 1.93, "learning_rate": 7.158363484553299e-06, - "loss": 0.0822, + "loss": 0.0572, "step": 6921 }, { "epoch": 1.93, "learning_rate": 7.15650802486316e-06, - "loss": 0.307, + "loss": 0.1974, "step": 6922 }, { "epoch": 1.93, "learning_rate": 7.154652565173023e-06, - "loss": 0.1415, + "loss": 0.1486, "step": 6923 }, { "epoch": 1.93, "learning_rate": 7.152797105482884e-06, - "loss": 0.0817, + "loss": 0.105, "step": 6924 }, { "epoch": 1.93, "learning_rate": 7.150941645792746e-06, - "loss": 0.2512, + "loss": 0.0209, "step": 6925 }, { "epoch": 1.93, "learning_rate": 7.149086186102607e-06, - "loss": 0.084, + "loss": 0.0553, "step": 6926 }, { "epoch": 1.93, "learning_rate": 7.147230726412469e-06, - "loss": 0.1862, + "loss": 0.0602, "step": 6927 }, { "epoch": 1.93, "learning_rate": 7.145375266722331e-06, - "loss": 0.0273, + "loss": 0.1567, "step": 6928 }, { "epoch": 1.93, "learning_rate": 7.143519807032193e-06, - "loss": 0.1959, + "loss": 0.0183, "step": 6929 }, { "epoch": 1.93, "learning_rate": 7.141664347342054e-06, - "loss": 0.3658, + "loss": 0.1417, "step": 6930 }, { "epoch": 1.93, "learning_rate": 7.139808887651917e-06, - "loss": 0.1405, + "loss": 0.0225, "step": 6931 }, { "epoch": 1.93, "learning_rate": 7.137953427961778e-06, - "loss": 0.1408, + "loss": 0.1459, "step": 6932 }, { "epoch": 1.93, "learning_rate": 7.1360979682716405e-06, - "loss": 0.1349, + "loss": 0.148, "step": 6933 }, { "epoch": 1.93, "learning_rate": 7.1342425085815015e-06, - "loss": 0.0861, + "loss": 0.0257, "step": 6934 }, { "epoch": 1.93, "learning_rate": 7.132387048891363e-06, - "loss": 0.0828, + "loss": 0.0964, "step": 6935 }, { "epoch": 1.93, "learning_rate": 7.130531589201225e-06, - "loss": 0.1923, + "loss": 0.0204, "step": 6936 }, { "epoch": 1.93, "learning_rate": 7.128676129511087e-06, - "loss": 0.0294, + "loss": 0.0327, "step": 6937 }, { "epoch": 1.93, "learning_rate": 7.126820669820948e-06, - "loss": 0.1914, + "loss": 0.1021, "step": 6938 }, { "epoch": 1.93, "learning_rate": 7.124965210130811e-06, - "loss": 0.1371, + "loss": 0.0245, "step": 6939 }, { "epoch": 1.93, "learning_rate": 7.123109750440672e-06, - "loss": 0.0838, + "loss": 0.0262, "step": 6940 }, { "epoch": 1.93, "learning_rate": 7.121254290750535e-06, - "loss": 0.1416, + "loss": 0.0312, "step": 6941 }, { "epoch": 1.93, "learning_rate": 7.119398831060396e-06, - "loss": 0.2536, + "loss": 0.054, "step": 6942 }, { "epoch": 1.93, "learning_rate": 7.117543371370258e-06, - "loss": 0.2473, + "loss": 0.1043, "step": 6943 }, { "epoch": 1.93, "learning_rate": 7.115687911680119e-06, - "loss": 0.0835, + "loss": 0.0652, "step": 6944 }, { "epoch": 1.93, "learning_rate": 7.113832451989981e-06, - "loss": 0.0868, + "loss": 0.1324, "step": 6945 }, { "epoch": 1.93, "learning_rate": 7.111976992299843e-06, - "loss": 0.1904, + "loss": 0.0473, "step": 6946 }, { "epoch": 1.93, "learning_rate": 7.110121532609705e-06, - "loss": 0.1941, + "loss": 0.2169, "step": 6947 }, { "epoch": 1.93, "learning_rate": 7.108266072919566e-06, - "loss": 0.1425, + "loss": 0.0571, "step": 6948 }, { "epoch": 1.93, "learning_rate": 7.106410613229429e-06, - "loss": 0.1359, + "loss": 0.0825, "step": 6949 }, { "epoch": 1.93, "learning_rate": 7.10455515353929e-06, - "loss": 0.1919, + "loss": 0.0566, "step": 6950 }, { "epoch": 1.93, "learning_rate": 7.1026996938491525e-06, - "loss": 0.1918, + "loss": 0.0571, "step": 6951 }, { "epoch": 1.93, "learning_rate": 7.1008442341590135e-06, - "loss": 0.1405, + "loss": 0.0709, "step": 6952 }, { "epoch": 1.94, "learning_rate": 7.098988774468875e-06, - "loss": 0.245, + "loss": 0.1486, "step": 6953 }, { "epoch": 1.94, "learning_rate": 7.097133314778737e-06, - "loss": 0.0857, + "loss": 0.049, "step": 6954 }, { "epoch": 1.94, "learning_rate": 7.095277855088599e-06, - "loss": 0.1359, + "loss": 0.1596, "step": 6955 }, { "epoch": 1.94, "learning_rate": 7.09342239539846e-06, - "loss": 0.0846, + "loss": 0.0976, "step": 6956 }, { "epoch": 1.94, "learning_rate": 7.091566935708323e-06, - "loss": 0.0331, + "loss": 0.0181, "step": 6957 }, { "epoch": 1.94, "learning_rate": 7.089711476018184e-06, - "loss": 0.1417, + "loss": 0.0799, "step": 6958 }, { "epoch": 1.94, "learning_rate": 7.087856016328047e-06, - "loss": 0.0328, + "loss": 0.0223, "step": 6959 }, { "epoch": 1.94, "learning_rate": 7.086000556637908e-06, - "loss": 0.032, + "loss": 0.0926, "step": 6960 }, { "epoch": 1.94, "learning_rate": 7.0841450969477695e-06, - "loss": 0.0813, + "loss": 0.0669, "step": 6961 }, { "epoch": 1.94, "learning_rate": 7.082289637257631e-06, - "loss": 0.0861, + "loss": 0.0275, "step": 6962 }, { "epoch": 1.94, "learning_rate": 7.080434177567493e-06, - "loss": 0.243, + "loss": 0.0966, "step": 6963 }, { "epoch": 1.94, "learning_rate": 7.078578717877354e-06, - "loss": 0.0319, + "loss": 0.1274, "step": 6964 }, { "epoch": 1.94, "learning_rate": 7.076723258187217e-06, - "loss": 0.1911, + "loss": 0.0128, "step": 6965 }, { "epoch": 1.94, "learning_rate": 7.074867798497078e-06, - "loss": 0.3037, + "loss": 0.1063, "step": 6966 }, { "epoch": 1.94, "learning_rate": 7.073012338806941e-06, - "loss": 0.088, + "loss": 0.0097, "step": 6967 }, { "epoch": 1.94, "learning_rate": 7.071156879116802e-06, - "loss": 0.3069, + "loss": 0.1924, "step": 6968 }, { "epoch": 1.94, "learning_rate": 7.0693014194266645e-06, - "loss": 0.3036, + "loss": 0.1722, "step": 6969 }, { "epoch": 1.94, "learning_rate": 7.0674459597365255e-06, - "loss": 0.1887, + "loss": 0.0563, "step": 6970 }, { "epoch": 1.94, "learning_rate": 7.065590500046387e-06, - "loss": 0.343, + "loss": 0.0573, "step": 6971 }, { "epoch": 1.94, "learning_rate": 7.063735040356249e-06, - "loss": 0.1927, + "loss": 0.213, "step": 6972 }, { "epoch": 1.94, "learning_rate": 7.061879580666111e-06, - "loss": 0.2438, + "loss": 0.0714, "step": 6973 }, { "epoch": 1.94, "learning_rate": 7.060024120975972e-06, - "loss": 0.0332, + "loss": 0.0722, "step": 6974 }, { "epoch": 1.94, "learning_rate": 7.058168661285835e-06, - "loss": 0.035, + "loss": 0.0488, "step": 6975 }, { "epoch": 1.94, "learning_rate": 7.056313201595696e-06, - "loss": 0.0857, + "loss": 0.0182, "step": 6976 }, { "epoch": 1.94, "learning_rate": 7.0544577419055586e-06, - "loss": 0.0882, + "loss": 0.0916, "step": 6977 }, { "epoch": 1.94, "learning_rate": 7.05260228221542e-06, - "loss": 0.1396, + "loss": 0.1353, "step": 6978 }, { "epoch": 1.94, "learning_rate": 7.0507468225252815e-06, - "loss": 0.0885, + "loss": 0.0234, "step": 6979 }, { "epoch": 1.94, "learning_rate": 7.048891362835143e-06, - "loss": 0.296, + "loss": 0.0563, "step": 6980 }, { "epoch": 1.94, "learning_rate": 7.047035903145005e-06, - "loss": 0.2447, + "loss": 0.0234, "step": 6981 }, { "epoch": 1.94, "learning_rate": 7.045180443454866e-06, - "loss": 0.2459, + "loss": 0.0997, "step": 6982 }, { "epoch": 1.94, "learning_rate": 7.043324983764729e-06, - "loss": 0.2932, + "loss": 0.0252, "step": 6983 }, { "epoch": 1.94, "learning_rate": 7.04146952407459e-06, - "loss": 0.0861, + "loss": 0.0751, "step": 6984 }, { "epoch": 1.94, "learning_rate": 7.039614064384451e-06, - "loss": 0.2345, + "loss": 0.0726, "step": 6985 }, { "epoch": 1.94, "learning_rate": 7.037758604694314e-06, - "loss": 0.138, + "loss": 0.0245, "step": 6986 }, { "epoch": 1.94, "learning_rate": 7.035903145004175e-06, - "loss": 0.1379, + "loss": 0.1192, "step": 6987 }, { "epoch": 1.94, "learning_rate": 7.0340476853140374e-06, - "loss": 0.0888, + "loss": 0.0627, "step": 6988 }, { "epoch": 1.95, "learning_rate": 7.0321922256238985e-06, - "loss": 0.3976, + "loss": 0.1683, "step": 6989 }, { "epoch": 1.95, "learning_rate": 7.03033676593376e-06, - "loss": 0.0873, + "loss": 0.088, "step": 6990 }, { "epoch": 1.95, "learning_rate": 7.028481306243622e-06, - "loss": 0.1408, + "loss": 0.1085, "step": 6991 }, { "epoch": 1.95, "learning_rate": 7.026625846553484e-06, - "loss": 0.2368, + "loss": 0.0923, "step": 6992 }, { "epoch": 1.95, "learning_rate": 7.024770386863345e-06, - "loss": 0.0896, + "loss": 0.1133, "step": 6993 }, { "epoch": 1.95, "learning_rate": 7.022914927173208e-06, - "loss": 0.1918, + "loss": 0.1033, "step": 6994 }, { "epoch": 1.95, "learning_rate": 7.021059467483069e-06, - "loss": 0.0913, + "loss": 0.0968, "step": 6995 }, { "epoch": 1.95, "learning_rate": 7.0192040077929316e-06, - "loss": 0.141, + "loss": 0.0556, "step": 6996 }, { "epoch": 1.95, "learning_rate": 7.017348548102793e-06, - "loss": 0.0932, + "loss": 0.1025, "step": 6997 }, { "epoch": 1.95, "learning_rate": 7.0154930884126545e-06, - "loss": 0.244, + "loss": 0.0498, "step": 6998 }, { "epoch": 1.95, "learning_rate": 7.013637628722516e-06, - "loss": 0.0935, + "loss": 0.0961, "step": 6999 }, { "epoch": 1.95, "learning_rate": 7.011782169032378e-06, - "loss": 0.0922, + "loss": 0.0577, "step": 7000 }, { "epoch": 1.95, "learning_rate": 7.009926709342239e-06, - "loss": 0.2444, + "loss": 0.0493, "step": 7001 }, { "epoch": 1.95, "learning_rate": 7.008071249652102e-06, - "loss": 0.0904, + "loss": 0.0769, "step": 7002 }, { "epoch": 1.95, "learning_rate": 7.006215789961963e-06, - "loss": 0.3348, + "loss": 0.0919, "step": 7003 }, { "epoch": 1.95, "learning_rate": 7.004360330271826e-06, - "loss": 0.1384, + "loss": 0.143, "step": 7004 }, { "epoch": 1.95, "learning_rate": 7.002504870581687e-06, - "loss": 0.091, + "loss": 0.0201, "step": 7005 }, { "epoch": 1.95, "learning_rate": 7.0006494108915494e-06, - "loss": 0.0896, + "loss": 0.1066, "step": 7006 }, { "epoch": 1.95, "learning_rate": 6.9987939512014104e-06, - "loss": 0.0914, + "loss": 0.1008, "step": 7007 }, { "epoch": 1.95, "learning_rate": 6.996938491511272e-06, - "loss": 0.1922, + "loss": 0.1413, "step": 7008 }, { "epoch": 1.95, "learning_rate": 6.995083031821134e-06, - "loss": 0.0893, + "loss": 0.0907, "step": 7009 }, { "epoch": 1.95, "learning_rate": 6.993227572130996e-06, - "loss": 0.0878, + "loss": 0.081, "step": 7010 }, { "epoch": 1.95, "learning_rate": 6.991372112440857e-06, - "loss": 0.0391, + "loss": 0.0276, "step": 7011 }, { "epoch": 1.95, "learning_rate": 6.98951665275072e-06, - "loss": 0.2386, + "loss": 0.0826, "step": 7012 }, { "epoch": 1.95, "learning_rate": 6.987661193060581e-06, - "loss": 0.1415, + "loss": 0.0901, "step": 7013 }, { "epoch": 1.95, "learning_rate": 6.9858057333704435e-06, - "loss": 0.1888, + "loss": 0.1127, "step": 7014 }, { "epoch": 1.95, "learning_rate": 6.9839502736803046e-06, - "loss": 0.2382, + "loss": 0.0168, "step": 7015 }, { "epoch": 1.95, "learning_rate": 6.9820948139901664e-06, - "loss": 0.2982, + "loss": 0.0211, "step": 7016 }, { "epoch": 1.95, "learning_rate": 6.980239354300028e-06, - "loss": 0.0374, + "loss": 0.0594, "step": 7017 }, { "epoch": 1.95, "learning_rate": 6.97838389460989e-06, - "loss": 0.1885, + "loss": 0.0548, "step": 7018 }, { "epoch": 1.95, "learning_rate": 6.976528434919751e-06, - "loss": 0.0892, + "loss": 0.0574, "step": 7019 }, { "epoch": 1.95, "learning_rate": 6.974672975229614e-06, - "loss": 0.089, + "loss": 0.0559, "step": 7020 }, { "epoch": 1.95, "learning_rate": 6.972817515539475e-06, - "loss": 0.1396, + "loss": 0.0901, "step": 7021 }, { "epoch": 1.95, "learning_rate": 6.970962055849338e-06, - "loss": 0.188, + "loss": 0.0151, "step": 7022 }, { "epoch": 1.95, "learning_rate": 6.969106596159199e-06, - "loss": 0.1865, + "loss": 0.0575, "step": 7023 }, { "epoch": 1.95, "learning_rate": 6.9672511364690606e-06, - "loss": 0.1434, + "loss": 0.0207, "step": 7024 }, { "epoch": 1.96, "learning_rate": 6.9653956767789224e-06, - "loss": 0.0362, + "loss": 0.0165, "step": 7025 }, { "epoch": 1.96, "learning_rate": 6.963540217088784e-06, - "loss": 0.139, + "loss": 0.0179, "step": 7026 }, { "epoch": 1.96, "learning_rate": 6.961684757398645e-06, - "loss": 0.1422, + "loss": 0.1608, "step": 7027 }, { "epoch": 1.96, "learning_rate": 6.959829297708508e-06, - "loss": 0.036, + "loss": 0.021, "step": 7028 }, { "epoch": 1.96, "learning_rate": 6.957973838018369e-06, - "loss": 0.1398, + "loss": 0.0135, "step": 7029 }, { "epoch": 1.96, "learning_rate": 6.956118378328232e-06, - "loss": 0.1351, + "loss": 0.0901, "step": 7030 }, { "epoch": 1.96, "learning_rate": 6.954262918638093e-06, - "loss": 0.1863, + "loss": 0.0279, "step": 7031 }, { "epoch": 1.96, "learning_rate": 6.9524074589479555e-06, - "loss": 0.1917, + "loss": 0.0393, "step": 7032 }, { "epoch": 1.96, "learning_rate": 6.9505519992578165e-06, - "loss": 0.0345, + "loss": 0.0387, "step": 7033 }, { "epoch": 1.96, "learning_rate": 6.948696539567678e-06, - "loss": 0.089, + "loss": 0.0161, "step": 7034 }, { "epoch": 1.96, "learning_rate": 6.94684107987754e-06, - "loss": 0.137, + "loss": 0.0942, "step": 7035 }, { "epoch": 1.96, "learning_rate": 6.944985620187402e-06, - "loss": 0.0872, + "loss": 0.0353, "step": 7036 }, { "epoch": 1.96, "learning_rate": 6.943130160497263e-06, - "loss": 0.1388, + "loss": 0.0119, "step": 7037 }, { "epoch": 1.96, "learning_rate": 6.941274700807126e-06, - "loss": 0.1933, + "loss": 0.0463, "step": 7038 }, { "epoch": 1.96, "learning_rate": 6.939419241116987e-06, - "loss": 0.0839, + "loss": 0.1139, "step": 7039 }, { "epoch": 1.96, "learning_rate": 6.93756378142685e-06, - "loss": 0.0869, + "loss": 0.0631, "step": 7040 }, { "epoch": 1.96, "learning_rate": 6.935708321736711e-06, - "loss": 0.1395, + "loss": 0.089, "step": 7041 }, { "epoch": 1.96, "learning_rate": 6.9338528620465725e-06, - "loss": 0.1927, + "loss": 0.013, "step": 7042 }, { "epoch": 1.96, "learning_rate": 6.931997402356434e-06, - "loss": 0.0869, + "loss": 0.0548, "step": 7043 }, { "epoch": 1.96, "learning_rate": 6.930141942666296e-06, - "loss": 0.1327, + "loss": 0.0549, "step": 7044 }, { "epoch": 1.96, "learning_rate": 6.928286482976157e-06, - "loss": 0.1386, + "loss": 0.0471, "step": 7045 }, { "epoch": 1.96, "learning_rate": 6.92643102328602e-06, - "loss": 0.0835, + "loss": 0.1921, "step": 7046 }, { "epoch": 1.96, "learning_rate": 6.924575563595881e-06, - "loss": 0.1914, + "loss": 0.0424, "step": 7047 }, { "epoch": 1.96, "learning_rate": 6.922720103905744e-06, - "loss": 0.1372, + "loss": 0.109, "step": 7048 }, { "epoch": 1.96, "learning_rate": 6.920864644215605e-06, - "loss": 0.252, + "loss": 0.0116, "step": 7049 }, { "epoch": 1.96, "learning_rate": 6.919009184525467e-06, - "loss": 0.3024, + "loss": 0.0327, "step": 7050 }, { "epoch": 1.96, "learning_rate": 6.9171537248353285e-06, - "loss": 0.1419, + "loss": 0.0063, "step": 7051 }, { "epoch": 1.96, "learning_rate": 6.91529826514519e-06, - "loss": 0.0839, + "loss": 0.0862, "step": 7052 }, { "epoch": 1.96, "learning_rate": 6.913442805455051e-06, - "loss": 0.1381, + "loss": 0.1677, "step": 7053 }, { "epoch": 1.96, "learning_rate": 6.911587345764914e-06, - "loss": 0.0842, + "loss": 0.0173, "step": 7054 }, { "epoch": 1.96, "learning_rate": 6.909731886074775e-06, - "loss": 0.1376, + "loss": 0.1614, "step": 7055 }, { "epoch": 1.96, "learning_rate": 6.907876426384638e-06, - "loss": 0.193, + "loss": 0.0502, "step": 7056 }, { "epoch": 1.96, "learning_rate": 6.906020966694499e-06, - "loss": 0.1929, + "loss": 0.1138, "step": 7057 }, { "epoch": 1.96, "learning_rate": 6.904165507004361e-06, - "loss": 0.1414, + "loss": 0.0094, "step": 7058 }, { "epoch": 1.96, "learning_rate": 6.902310047314223e-06, - "loss": 0.0824, + "loss": 0.0527, "step": 7059 }, { "epoch": 1.96, "learning_rate": 6.9004545876240845e-06, - "loss": 0.0824, + "loss": 0.1079, "step": 7060 }, { "epoch": 1.97, "learning_rate": 6.8985991279339455e-06, - "loss": 0.191, + "loss": 0.115, "step": 7061 }, { "epoch": 1.97, "learning_rate": 6.896743668243808e-06, - "loss": 0.1965, + "loss": 0.0164, "step": 7062 }, { "epoch": 1.97, "learning_rate": 6.894888208553669e-06, - "loss": 0.0323, + "loss": 0.0123, "step": 7063 }, { "epoch": 1.97, "learning_rate": 6.893032748863532e-06, - "loss": 0.2467, + "loss": 0.0343, "step": 7064 }, { "epoch": 1.97, "learning_rate": 6.891177289173393e-06, - "loss": 0.0875, + "loss": 0.0839, "step": 7065 }, { "epoch": 1.97, "learning_rate": 6.889321829483256e-06, - "loss": 0.1425, + "loss": 0.1177, "step": 7066 }, { "epoch": 1.97, "learning_rate": 6.887466369793117e-06, - "loss": 0.0868, + "loss": 0.0211, "step": 7067 }, { "epoch": 1.97, "learning_rate": 6.885610910102979e-06, - "loss": 0.0881, + "loss": 0.0474, "step": 7068 }, { "epoch": 1.97, "learning_rate": 6.8837554504128405e-06, - "loss": 0.1363, + "loss": 0.1186, "step": 7069 }, { "epoch": 1.97, "learning_rate": 6.881899990722702e-06, - "loss": 0.086, + "loss": 0.0438, "step": 7070 }, { "epoch": 1.97, "learning_rate": 6.880044531032563e-06, - "loss": 0.2963, + "loss": 0.1342, "step": 7071 }, { "epoch": 1.97, "learning_rate": 6.878189071342426e-06, - "loss": 0.0849, + "loss": 0.1293, "step": 7072 }, { "epoch": 1.97, "learning_rate": 6.876333611652287e-06, - "loss": 0.1386, + "loss": 0.0259, "step": 7073 }, { "epoch": 1.97, "learning_rate": 6.87447815196215e-06, - "loss": 0.0866, + "loss": 0.0809, "step": 7074 }, { "epoch": 1.97, "learning_rate": 6.872622692272011e-06, - "loss": 0.0842, + "loss": 0.1486, "step": 7075 }, { "epoch": 1.97, "learning_rate": 6.870767232581873e-06, - "loss": 0.2968, + "loss": 0.153, "step": 7076 }, { "epoch": 1.97, "learning_rate": 6.868911772891735e-06, - "loss": 0.1371, + "loss": 0.0891, "step": 7077 }, { "epoch": 1.97, "learning_rate": 6.8670563132015965e-06, - "loss": 0.2461, + "loss": 0.1077, "step": 7078 }, { "epoch": 1.97, "learning_rate": 6.8652008535114575e-06, - "loss": 0.1955, + "loss": 0.099, "step": 7079 }, { "epoch": 1.97, "learning_rate": 6.86334539382132e-06, - "loss": 0.1397, + "loss": 0.0759, "step": 7080 }, { "epoch": 1.97, "learning_rate": 6.861489934131181e-06, - "loss": 0.1386, + "loss": 0.0466, "step": 7081 }, { "epoch": 1.97, "learning_rate": 6.859634474441044e-06, - "loss": 0.3023, + "loss": 0.067, "step": 7082 }, { "epoch": 1.97, "learning_rate": 6.857779014750905e-06, - "loss": 0.2474, + "loss": 0.0697, "step": 7083 }, { "epoch": 1.97, "learning_rate": 6.855923555060767e-06, - "loss": 0.1921, + "loss": 0.0676, "step": 7084 }, { "epoch": 1.97, "learning_rate": 6.854068095370629e-06, - "loss": 0.1949, + "loss": 0.0625, "step": 7085 }, { "epoch": 1.97, "learning_rate": 6.852212635680491e-06, - "loss": 0.2965, + "loss": 0.0808, "step": 7086 }, { "epoch": 1.97, "learning_rate": 6.850357175990352e-06, - "loss": 0.1364, + "loss": 0.015, "step": 7087 }, { "epoch": 1.97, "learning_rate": 6.848501716300214e-06, - "loss": 0.1427, + "loss": 0.028, "step": 7088 }, { "epoch": 1.97, "learning_rate": 6.846646256610075e-06, - "loss": 0.1392, + "loss": 0.0786, "step": 7089 }, { "epoch": 1.97, "learning_rate": 6.844790796919938e-06, - "loss": 0.1378, + "loss": 0.1014, "step": 7090 }, { "epoch": 1.97, "learning_rate": 6.842935337229799e-06, - "loss": 0.1423, + "loss": 0.0907, "step": 7091 }, { "epoch": 1.97, "learning_rate": 6.841079877539662e-06, - "loss": 0.1906, + "loss": 0.0194, "step": 7092 }, { "epoch": 1.97, "learning_rate": 6.839224417849523e-06, - "loss": 0.1918, + "loss": 0.0537, "step": 7093 }, { "epoch": 1.97, "learning_rate": 6.837368958159385e-06, - "loss": 0.0912, + "loss": 0.1664, "step": 7094 }, { "epoch": 1.97, "learning_rate": 6.835513498469247e-06, - "loss": 0.0895, + "loss": 0.0156, "step": 7095 }, { "epoch": 1.97, "learning_rate": 6.8336580387791085e-06, - "loss": 0.0376, + "loss": 0.0842, "step": 7096 }, { "epoch": 1.98, "learning_rate": 6.8318025790889695e-06, - "loss": 0.1367, + "loss": 0.09, "step": 7097 }, { "epoch": 1.98, "learning_rate": 6.829947119398832e-06, - "loss": 0.1423, + "loss": 0.022, "step": 7098 }, { "epoch": 1.98, "learning_rate": 6.828091659708693e-06, - "loss": 0.1353, + "loss": 0.0287, "step": 7099 }, { "epoch": 1.98, "learning_rate": 6.826236200018556e-06, - "loss": 0.2396, + "loss": 0.0581, "step": 7100 }, { "epoch": 1.98, "learning_rate": 6.824380740328417e-06, - "loss": 0.0875, + "loss": 0.0156, "step": 7101 }, { "epoch": 1.98, "learning_rate": 6.822525280638279e-06, - "loss": 0.1912, + "loss": 0.248, "step": 7102 }, { "epoch": 1.98, "learning_rate": 6.820669820948141e-06, - "loss": 0.0367, + "loss": 0.0455, "step": 7103 }, { "epoch": 1.98, "learning_rate": 6.818814361258003e-06, - "loss": 0.1357, + "loss": 0.0964, "step": 7104 }, { "epoch": 1.98, "learning_rate": 6.816958901567864e-06, - "loss": 0.1393, + "loss": 0.0659, "step": 7105 }, { "epoch": 1.98, "learning_rate": 6.815103441877726e-06, - "loss": 0.1382, + "loss": 0.0875, "step": 7106 }, { "epoch": 1.98, "learning_rate": 6.813247982187587e-06, - "loss": 0.0888, + "loss": 0.0595, "step": 7107 }, { "epoch": 1.98, "learning_rate": 6.81139252249745e-06, - "loss": 0.0358, + "loss": 0.0738, "step": 7108 }, { "epoch": 1.98, "learning_rate": 6.809537062807311e-06, - "loss": 0.1355, + "loss": 0.0204, "step": 7109 }, { "epoch": 1.98, "learning_rate": 6.807681603117173e-06, - "loss": 0.0341, + "loss": 0.0607, "step": 7110 }, { "epoch": 1.98, "learning_rate": 6.805826143427035e-06, - "loss": 0.0345, + "loss": 0.1002, "step": 7111 }, { "epoch": 1.98, "learning_rate": 6.803970683736897e-06, - "loss": 0.1917, + "loss": 0.0874, "step": 7112 }, { "epoch": 1.98, "learning_rate": 6.802115224046758e-06, - "loss": 0.0322, + "loss": 0.0178, "step": 7113 }, { "epoch": 1.98, "learning_rate": 6.8002597643566204e-06, - "loss": 0.0332, + "loss": 0.024, "step": 7114 }, { "epoch": 1.98, "learning_rate": 6.7984043046664815e-06, - "loss": 0.0313, + "loss": 0.0444, "step": 7115 }, { "epoch": 1.98, "learning_rate": 6.796548844976344e-06, - "loss": 0.1385, + "loss": 0.0293, "step": 7116 }, { "epoch": 1.98, "learning_rate": 6.794693385286205e-06, - "loss": 0.2455, + "loss": 0.0492, "step": 7117 }, { "epoch": 1.98, "learning_rate": 6.792837925596067e-06, - "loss": 0.4086, + "loss": 0.0482, "step": 7118 }, { "epoch": 1.98, "learning_rate": 6.790982465905929e-06, - "loss": 0.1899, + "loss": 0.0156, "step": 7119 }, { "epoch": 1.98, "learning_rate": 6.789127006215791e-06, - "loss": 0.1382, + "loss": 0.0813, "step": 7120 }, { "epoch": 1.98, "learning_rate": 6.787271546525652e-06, - "loss": 0.0848, + "loss": 0.0633, "step": 7121 }, { "epoch": 1.98, "learning_rate": 6.7854160868355146e-06, - "loss": 0.138, + "loss": 0.0123, "step": 7122 }, { "epoch": 1.98, "learning_rate": 6.783560627145376e-06, - "loss": 0.1937, + "loss": 0.0959, "step": 7123 }, { "epoch": 1.98, "learning_rate": 6.781705167455238e-06, - "loss": 0.0319, + "loss": 0.0108, "step": 7124 }, { "epoch": 1.98, "learning_rate": 6.779849707765099e-06, - "loss": 0.0815, + "loss": 0.0108, "step": 7125 }, { "epoch": 1.98, "learning_rate": 6.777994248074962e-06, - "loss": 0.2465, + "loss": 0.1496, "step": 7126 }, { "epoch": 1.98, "learning_rate": 6.776138788384823e-06, - "loss": 0.1923, + "loss": 0.0136, "step": 7127 }, { "epoch": 1.98, "learning_rate": 6.774283328694685e-06, - "loss": 0.192, + "loss": 0.0103, "step": 7128 }, { "epoch": 1.98, "learning_rate": 6.772427869004547e-06, - "loss": 0.03, + "loss": 0.0085, "step": 7129 }, { "epoch": 1.98, "learning_rate": 6.770572409314409e-06, - "loss": 0.201, + "loss": 0.1029, "step": 7130 }, { "epoch": 1.98, "learning_rate": 6.76871694962427e-06, - "loss": 0.1352, + "loss": 0.0114, "step": 7131 }, { "epoch": 1.98, "learning_rate": 6.766861489934132e-06, - "loss": 0.1404, + "loss": 0.0942, "step": 7132 }, { "epoch": 1.99, "learning_rate": 6.7650060302439934e-06, - "loss": 0.0825, + "loss": 0.0873, "step": 7133 }, { "epoch": 1.99, "learning_rate": 6.763150570553856e-06, - "loss": 0.1935, + "loss": 0.0081, "step": 7134 }, { "epoch": 1.99, "learning_rate": 6.761295110863717e-06, - "loss": 0.1873, + "loss": 0.0122, "step": 7135 }, { "epoch": 1.99, "learning_rate": 6.759439651173578e-06, - "loss": 0.2456, + "loss": 0.139, "step": 7136 }, { "epoch": 1.99, "learning_rate": 6.757584191483441e-06, - "loss": 0.0306, + "loss": 0.0875, "step": 7137 }, { "epoch": 1.99, "learning_rate": 6.755728731793302e-06, - "loss": 0.0848, + "loss": 0.1437, "step": 7138 }, { "epoch": 1.99, "learning_rate": 6.753873272103164e-06, - "loss": 0.1936, + "loss": 0.1574, "step": 7139 }, { "epoch": 1.99, "learning_rate": 6.752017812413026e-06, - "loss": 0.2473, + "loss": 0.0491, "step": 7140 }, { "epoch": 1.99, "learning_rate": 6.7501623527228876e-06, - "loss": 0.0849, + "loss": 0.0123, "step": 7141 }, { "epoch": 1.99, "learning_rate": 6.748306893032749e-06, - "loss": 0.1398, + "loss": 0.1197, "step": 7142 }, { "epoch": 1.99, "learning_rate": 6.746451433342611e-06, - "loss": 0.0309, + "loss": 0.012, "step": 7143 }, { "epoch": 1.99, "learning_rate": 6.744595973652472e-06, - "loss": 0.0849, + "loss": 0.0656, "step": 7144 }, { "epoch": 1.99, "learning_rate": 6.742740513962335e-06, - "loss": 0.0859, + "loss": 0.0895, "step": 7145 }, { "epoch": 1.99, "learning_rate": 6.740885054272196e-06, - "loss": 0.0316, + "loss": 0.1431, "step": 7146 }, { "epoch": 1.99, "learning_rate": 6.739029594582058e-06, - "loss": 0.1354, + "loss": 0.0719, "step": 7147 }, { "epoch": 1.99, "learning_rate": 6.73717413489192e-06, - "loss": 0.2925, + "loss": 0.0283, "step": 7148 }, { "epoch": 1.99, "learning_rate": 6.735318675201782e-06, - "loss": 0.1428, + "loss": 0.0534, "step": 7149 }, { "epoch": 1.99, "learning_rate": 6.733463215511643e-06, - "loss": 0.1388, + "loss": 0.0196, "step": 7150 }, { "epoch": 1.99, "learning_rate": 6.731607755821505e-06, - "loss": 0.1349, + "loss": 0.0631, "step": 7151 }, { "epoch": 1.99, "learning_rate": 6.7297522961313664e-06, - "loss": 0.1378, + "loss": 0.0494, "step": 7152 }, { "epoch": 1.99, "learning_rate": 6.727896836441229e-06, - "loss": 0.2434, + "loss": 0.0952, "step": 7153 }, { "epoch": 1.99, "learning_rate": 6.72604137675109e-06, - "loss": 0.1401, + "loss": 0.0608, "step": 7154 }, { "epoch": 1.99, "learning_rate": 6.724185917060953e-06, - "loss": 0.0859, + "loss": 0.0131, "step": 7155 }, { "epoch": 1.99, "learning_rate": 6.722330457370814e-06, - "loss": 0.0839, + "loss": 0.0661, "step": 7156 }, { "epoch": 1.99, "learning_rate": 6.720474997680676e-06, - "loss": 0.0319, + "loss": 0.0504, "step": 7157 }, { "epoch": 1.99, "learning_rate": 6.718619537990537e-06, - "loss": 0.1363, + "loss": 0.0623, "step": 7158 }, { "epoch": 1.99, "learning_rate": 6.7167640783003995e-06, - "loss": 0.2515, + "loss": 0.3217, "step": 7159 }, { "epoch": 1.99, "learning_rate": 6.7149086186102606e-06, - "loss": 0.1405, + "loss": 0.0714, "step": 7160 }, { "epoch": 1.99, "learning_rate": 6.713053158920123e-06, - "loss": 0.1959, + "loss": 0.0645, "step": 7161 }, { "epoch": 1.99, "learning_rate": 6.711197699229984e-06, - "loss": 0.3524, + "loss": 0.1013, "step": 7162 }, { "epoch": 1.99, "learning_rate": 6.709342239539847e-06, - "loss": 0.0828, + "loss": 0.0907, "step": 7163 }, { "epoch": 1.99, "learning_rate": 6.707486779849708e-06, - "loss": 0.1381, + "loss": 0.1682, "step": 7164 }, { "epoch": 1.99, "learning_rate": 6.70563132015957e-06, - "loss": 0.1931, + "loss": 0.069, "step": 7165 }, { "epoch": 1.99, "learning_rate": 6.703775860469432e-06, - "loss": 0.1949, + "loss": 0.14, "step": 7166 }, { "epoch": 1.99, "learning_rate": 6.701920400779294e-06, - "loss": 0.1404, + "loss": 0.0202, "step": 7167 }, { "epoch": 1.99, "learning_rate": 6.700064941089155e-06, - "loss": 0.2447, + "loss": 0.0445, "step": 7168 }, { "epoch": 2.0, "learning_rate": 6.698209481399017e-06, - "loss": 0.1392, + "loss": 0.0964, "step": 7169 }, { "epoch": 2.0, "learning_rate": 6.696354021708878e-06, - "loss": 0.0324, + "loss": 0.0954, "step": 7170 }, { "epoch": 2.0, "learning_rate": 6.694498562018741e-06, - "loss": 0.1465, + "loss": 0.0327, "step": 7171 }, { "epoch": 2.0, "learning_rate": 6.692643102328602e-06, - "loss": 0.1394, + "loss": 0.1476, "step": 7172 }, { "epoch": 2.0, "learning_rate": 6.690787642638464e-06, - "loss": 0.1879, + "loss": 0.0252, "step": 7173 }, { "epoch": 2.0, "learning_rate": 6.688932182948326e-06, - "loss": 0.1387, + "loss": 0.0221, "step": 7174 }, { "epoch": 2.0, "learning_rate": 6.687076723258188e-06, - "loss": 0.0846, + "loss": 0.0572, "step": 7175 }, { "epoch": 2.0, "learning_rate": 6.685221263568049e-06, - "loss": 0.0864, + "loss": 0.0571, "step": 7176 }, { "epoch": 2.0, "learning_rate": 6.6833658038779115e-06, - "loss": 0.191, + "loss": 0.0822, "step": 7177 }, { "epoch": 2.0, "learning_rate": 6.6815103441877725e-06, - "loss": 0.1416, + "loss": 0.1058, "step": 7178 }, { "epoch": 2.0, "learning_rate": 6.679654884497635e-06, - "loss": 0.2421, + "loss": 0.0241, "step": 7179 }, { "epoch": 2.0, "learning_rate": 6.677799424807496e-06, - "loss": 0.1379, + "loss": 0.0528, "step": 7180 }, { "epoch": 2.0, "learning_rate": 6.675943965117358e-06, - "loss": 0.086, + "loss": 0.0192, "step": 7181 }, { "epoch": 2.0, "learning_rate": 6.67408850542722e-06, - "loss": 0.2972, + "loss": 0.0279, "step": 7182 }, { "epoch": 2.0, "learning_rate": 6.672233045737082e-06, - "loss": 0.1902, + "loss": 0.0194, "step": 7183 }, { "epoch": 2.0, "learning_rate": 6.670377586046943e-06, - "loss": 0.0876, + "loss": 0.0612, "step": 7184 }, { "epoch": 2.0, "learning_rate": 6.668522126356806e-06, - "loss": 0.0339, + "loss": 0.1429, "step": 7185 }, { "epoch": 2.0, "learning_rate": 6.666666666666667e-06, - "loss": 0.0341, + "loss": 0.151, "step": 7186 }, { "epoch": 2.0, "learning_rate": 6.664811206976529e-06, - "loss": 0.0333, + "loss": 0.1564, "step": 7187 }, { "epoch": 2.0, "learning_rate": 6.66295574728639e-06, - "loss": 0.1916, + "loss": 0.0286, "step": 7188 }, { "epoch": 2.0, "learning_rate": 6.661100287596253e-06, - "loss": 0.1392, + "loss": 0.0162, "step": 7189 }, { "epoch": 2.0, "learning_rate": 6.659244827906114e-06, - "loss": 0.1376, + "loss": 0.0838, "step": 7190 }, { "epoch": 2.0, "learning_rate": 6.657389368215976e-06, - "loss": 0.0343, + "loss": 0.0414, "step": 7191 }, { "epoch": 2.0, "learning_rate": 6.655533908525838e-06, - "loss": 0.0337, + "loss": 0.0645, "step": 7192 }, { "epoch": 2.0, "learning_rate": 6.6536784488357e-06, - "loss": 0.1929, + "loss": 0.0971, "step": 7193 }, { "epoch": 2.0, "learning_rate": 6.651822989145561e-06, - "loss": 0.1352, + "loss": 0.0145, "step": 7194 }, { "epoch": 2.0, "learning_rate": 6.6499675294554235e-06, - "loss": 0.1414, + "loss": 0.09, "step": 7195 }, { "epoch": 2.0, "learning_rate": 6.6481120697652845e-06, - "loss": 0.085, + "loss": 0.0506, "step": 7196 }, { "epoch": 2.0, "learning_rate": 6.646256610075147e-06, - "loss": 0.0855, + "loss": 0.1503, "step": 7197 }, { "epoch": 2.0, "learning_rate": 6.644401150385008e-06, - "loss": 0.0853, + "loss": 0.063, "step": 7198 }, { "epoch": 2.0, "learning_rate": 6.64254569069487e-06, - "loss": 0.0873, + "loss": 0.0129, "step": 7199 }, { "epoch": 2.0, "learning_rate": 6.640690231004732e-06, - "loss": 0.2509, + "loss": 0.0159, "step": 7200 }, { "epoch": 2.0, "learning_rate": 6.638834771314594e-06, - "loss": 0.0309, + "loss": 0.066, "step": 7201 }, { "epoch": 2.0, "learning_rate": 6.636979311624455e-06, - "loss": 0.1981, + "loss": 0.0069, "step": 7202 }, { "epoch": 2.0, "learning_rate": 6.635123851934318e-06, - "loss": 0.3026, + "loss": 0.0462, "step": 7203 }, { "epoch": 2.01, "learning_rate": 6.633268392244179e-06, - "loss": 0.1398, + "loss": 0.0882, "step": 7204 }, { "epoch": 2.01, "learning_rate": 6.631412932554041e-06, - "loss": 0.0853, + "loss": 0.0435, "step": 7205 }, { "epoch": 2.01, "learning_rate": 6.629557472863902e-06, - "loss": 0.0869, + "loss": 0.105, "step": 7206 }, { "epoch": 2.01, "learning_rate": 6.627702013173764e-06, - "loss": 0.0826, + "loss": 0.0135, "step": 7207 }, { "epoch": 2.01, "learning_rate": 6.625846553483626e-06, - "loss": 0.2457, + "loss": 0.0572, "step": 7208 }, { "epoch": 2.01, "learning_rate": 6.623991093793488e-06, - "loss": 0.1396, + "loss": 0.0181, "step": 7209 }, { "epoch": 2.01, "learning_rate": 6.622135634103349e-06, - "loss": 0.0846, + "loss": 0.0678, "step": 7210 }, { "epoch": 2.01, "learning_rate": 6.620280174413212e-06, - "loss": 0.3025, + "loss": 0.1533, "step": 7211 }, { "epoch": 2.01, "learning_rate": 6.618424714723073e-06, - "loss": 0.249, + "loss": 0.1748, "step": 7212 }, { "epoch": 2.01, "learning_rate": 6.6165692550329355e-06, - "loss": 0.1359, + "loss": 0.0478, "step": 7213 }, { "epoch": 2.01, "learning_rate": 6.6147137953427965e-06, - "loss": 0.3055, + "loss": 0.0213, "step": 7214 }, { "epoch": 2.01, "learning_rate": 6.612858335652659e-06, - "loss": 0.0317, + "loss": 0.0137, "step": 7215 }, { "epoch": 2.01, "learning_rate": 6.61100287596252e-06, - "loss": 0.1906, + "loss": 0.0596, "step": 7216 }, { "epoch": 2.01, "learning_rate": 6.609147416272382e-06, - "loss": 0.1844, + "loss": 0.1735, "step": 7217 }, { "epoch": 2.01, "learning_rate": 6.607291956582243e-06, - "loss": 0.2443, + "loss": 0.0988, "step": 7218 }, { "epoch": 2.01, "learning_rate": 6.605436496892106e-06, - "loss": 0.1427, + "loss": 0.0604, "step": 7219 }, { "epoch": 2.01, "learning_rate": 6.603581037201967e-06, - "loss": 0.0844, + "loss": 0.0184, "step": 7220 }, { "epoch": 2.01, "learning_rate": 6.6017255775118296e-06, - "loss": 0.1399, + "loss": 0.0484, "step": 7221 }, { "epoch": 2.01, "learning_rate": 6.599870117821691e-06, - "loss": 0.1397, + "loss": 0.2232, "step": 7222 }, { "epoch": 2.01, "learning_rate": 6.598014658131553e-06, - "loss": 0.0861, + "loss": 0.0137, "step": 7223 }, { "epoch": 2.01, "learning_rate": 6.596159198441414e-06, - "loss": 0.1437, + "loss": 0.039, "step": 7224 }, { "epoch": 2.01, "learning_rate": 6.594303738751276e-06, - "loss": 0.1364, + "loss": 0.0513, "step": 7225 }, { "epoch": 2.01, "learning_rate": 6.592448279061138e-06, - "loss": 0.0868, + "loss": 0.1222, "step": 7226 }, { "epoch": 2.01, "learning_rate": 6.590592819371e-06, - "loss": 0.0332, + "loss": 0.206, "step": 7227 }, { "epoch": 2.01, "learning_rate": 6.588737359680861e-06, - "loss": 0.3507, + "loss": 0.0953, "step": 7228 }, { "epoch": 2.01, "learning_rate": 6.586881899990724e-06, - "loss": 0.2422, + "loss": 0.0983, "step": 7229 }, { "epoch": 2.01, "learning_rate": 6.585026440300585e-06, - "loss": 0.1377, + "loss": 0.0637, "step": 7230 }, { "epoch": 2.01, "learning_rate": 6.5831709806104474e-06, - "loss": 0.1903, + "loss": 0.0699, "step": 7231 }, { "epoch": 2.01, "learning_rate": 6.5813155209203085e-06, - "loss": 0.2982, + "loss": 0.0705, "step": 7232 }, { "epoch": 2.01, "learning_rate": 6.57946006123017e-06, - "loss": 0.2935, + "loss": 0.0692, "step": 7233 }, { "epoch": 2.01, "learning_rate": 6.577604601540032e-06, - "loss": 0.1442, + "loss": 0.0223, "step": 7234 }, { "epoch": 2.01, "learning_rate": 6.575749141849894e-06, - "loss": 0.0877, + "loss": 0.0597, "step": 7235 }, { "epoch": 2.01, "learning_rate": 6.573893682159755e-06, - "loss": 0.1874, + "loss": 0.0493, "step": 7236 }, { "epoch": 2.01, "learning_rate": 6.572038222469618e-06, - "loss": 0.0358, + "loss": 0.0914, "step": 7237 }, { "epoch": 2.01, "learning_rate": 6.570182762779479e-06, - "loss": 0.238, + "loss": 0.075, "step": 7238 }, { "epoch": 2.01, "learning_rate": 6.5683273030893416e-06, - "loss": 0.0883, + "loss": 0.0495, "step": 7239 }, { "epoch": 2.02, "learning_rate": 6.5664718433992026e-06, - "loss": 0.2446, + "loss": 0.0594, "step": 7240 }, { "epoch": 2.02, "learning_rate": 6.5646163837090644e-06, - "loss": 0.1388, + "loss": 0.1017, "step": 7241 }, { "epoch": 2.02, "learning_rate": 6.562760924018926e-06, - "loss": 0.0363, + "loss": 0.0515, "step": 7242 }, { "epoch": 2.02, "learning_rate": 6.560905464328788e-06, - "loss": 0.0898, + "loss": 0.1516, "step": 7243 }, { "epoch": 2.02, "learning_rate": 6.559050004638649e-06, - "loss": 0.2372, + "loss": 0.0832, "step": 7244 }, { "epoch": 2.02, "learning_rate": 6.557194544948512e-06, - "loss": 0.0876, + "loss": 0.0471, "step": 7245 }, { "epoch": 2.02, "learning_rate": 6.555339085258373e-06, - "loss": 0.0388, + "loss": 0.1114, "step": 7246 }, { "epoch": 2.02, "learning_rate": 6.553483625568236e-06, - "loss": 0.1888, + "loss": 0.1062, "step": 7247 }, { "epoch": 2.02, "learning_rate": 6.551628165878097e-06, - "loss": 0.2444, + "loss": 0.1047, "step": 7248 }, { "epoch": 2.02, "learning_rate": 6.549772706187959e-06, - "loss": 0.1411, + "loss": 0.1935, "step": 7249 }, { "epoch": 2.02, "learning_rate": 6.5479172464978204e-06, - "loss": 0.191, + "loss": 0.016, "step": 7250 }, { "epoch": 2.02, "learning_rate": 6.546061786807682e-06, - "loss": 0.1957, + "loss": 0.0756, "step": 7251 }, { "epoch": 2.02, "learning_rate": 6.544206327117544e-06, - "loss": 0.2448, + "loss": 0.0286, "step": 7252 }, { "epoch": 2.02, "learning_rate": 6.542350867427406e-06, - "loss": 0.1389, + "loss": 0.0182, "step": 7253 }, { "epoch": 2.02, "learning_rate": 6.540495407737267e-06, - "loss": 0.192, + "loss": 0.1106, "step": 7254 }, { "epoch": 2.02, "learning_rate": 6.53863994804713e-06, - "loss": 0.0882, + "loss": 0.0469, "step": 7255 }, { "epoch": 2.02, "learning_rate": 6.536784488356991e-06, - "loss": 0.1872, + "loss": 0.0223, "step": 7256 }, { "epoch": 2.02, "learning_rate": 6.5349290286668535e-06, - "loss": 0.0393, + "loss": 0.0182, "step": 7257 }, { "epoch": 2.02, "learning_rate": 6.5330735689767146e-06, - "loss": 0.0384, + "loss": 0.1813, "step": 7258 }, { "epoch": 2.02, "learning_rate": 6.531218109286576e-06, - "loss": 0.0375, + "loss": 0.2278, "step": 7259 }, { "epoch": 2.02, "learning_rate": 6.529362649596438e-06, - "loss": 0.2368, + "loss": 0.1227, "step": 7260 }, { "epoch": 2.02, "learning_rate": 6.5275071899063e-06, - "loss": 0.1408, + "loss": 0.0507, "step": 7261 }, { "epoch": 2.02, "learning_rate": 6.525651730216161e-06, - "loss": 0.1395, + "loss": 0.0485, "step": 7262 }, { "epoch": 2.02, "learning_rate": 6.523796270526024e-06, - "loss": 0.09, + "loss": 0.0185, "step": 7263 }, { "epoch": 2.02, "learning_rate": 6.521940810835885e-06, - "loss": 0.1416, + "loss": 0.1039, "step": 7264 }, { "epoch": 2.02, "learning_rate": 6.520085351145748e-06, - "loss": 0.143, + "loss": 0.0253, "step": 7265 }, { "epoch": 2.02, "learning_rate": 6.518229891455609e-06, - "loss": 0.0368, + "loss": 0.1284, "step": 7266 }, { "epoch": 2.02, "learning_rate": 6.5163744317654705e-06, - "loss": 0.0892, + "loss": 0.0559, "step": 7267 }, { "epoch": 2.02, "learning_rate": 6.514518972075332e-06, - "loss": 0.0349, + "loss": 0.0471, "step": 7268 }, { "epoch": 2.02, "learning_rate": 6.512663512385194e-06, - "loss": 0.1396, + "loss": 0.0488, "step": 7269 }, { "epoch": 2.02, "learning_rate": 6.510808052695055e-06, - "loss": 0.0878, + "loss": 0.0659, "step": 7270 }, { "epoch": 2.02, "learning_rate": 6.508952593004918e-06, - "loss": 0.1908, + "loss": 0.1349, "step": 7271 }, { "epoch": 2.02, "learning_rate": 6.507097133314779e-06, - "loss": 0.1372, + "loss": 0.1472, "step": 7272 }, { "epoch": 2.02, "learning_rate": 6.505241673624642e-06, - "loss": 0.0842, + "loss": 0.0207, "step": 7273 }, { "epoch": 2.02, "learning_rate": 6.503386213934503e-06, - "loss": 0.1952, + "loss": 0.0514, "step": 7274 }, { "epoch": 2.02, "learning_rate": 6.5015307542443655e-06, - "loss": 0.3027, + "loss": 0.0548, "step": 7275 }, { "epoch": 2.03, "learning_rate": 6.4996752945542265e-06, - "loss": 0.1946, + "loss": 0.1043, "step": 7276 }, { "epoch": 2.03, "learning_rate": 6.497819834864088e-06, - "loss": 0.0327, + "loss": 0.0275, "step": 7277 }, { "epoch": 2.03, "learning_rate": 6.495964375173949e-06, - "loss": 0.2481, + "loss": 0.1328, "step": 7278 }, { "epoch": 2.03, "learning_rate": 6.494108915483812e-06, - "loss": 0.0831, + "loss": 0.0161, "step": 7279 }, { "epoch": 2.03, "learning_rate": 6.492253455793673e-06, - "loss": 0.0857, + "loss": 0.0764, "step": 7280 }, { "epoch": 2.03, "learning_rate": 6.490397996103536e-06, - "loss": 0.0819, + "loss": 0.0784, "step": 7281 }, { "epoch": 2.03, "learning_rate": 6.488542536413397e-06, - "loss": 0.1969, + "loss": 0.0167, "step": 7282 }, { "epoch": 2.03, "learning_rate": 6.48668707672326e-06, - "loss": 0.0319, + "loss": 0.0563, "step": 7283 }, { "epoch": 2.03, "learning_rate": 6.484831617033121e-06, - "loss": 0.0854, + "loss": 0.1368, "step": 7284 }, { "epoch": 2.03, "learning_rate": 6.4829761573429825e-06, - "loss": 0.1424, + "loss": 0.0375, "step": 7285 }, { "epoch": 2.03, "learning_rate": 6.481120697652844e-06, - "loss": 0.3022, + "loss": 0.1058, "step": 7286 }, { "epoch": 2.03, "learning_rate": 6.479265237962706e-06, - "loss": 0.1379, + "loss": 0.1601, "step": 7287 }, { "epoch": 2.03, "learning_rate": 6.477409778272567e-06, - "loss": 0.0846, + "loss": 0.0237, "step": 7288 }, { "epoch": 2.03, "learning_rate": 6.475554318582429e-06, - "loss": 0.0867, + "loss": 0.0482, "step": 7289 }, { "epoch": 2.03, "learning_rate": 6.473698858892291e-06, - "loss": 0.1967, + "loss": 0.0725, "step": 7290 }, { "epoch": 2.03, "learning_rate": 6.471843399202152e-06, - "loss": 0.1856, + "loss": 0.047, "step": 7291 }, { "epoch": 2.03, "learning_rate": 6.469987939512015e-06, - "loss": 0.1958, + "loss": 0.0742, "step": 7292 }, { "epoch": 2.03, "learning_rate": 6.468132479821876e-06, - "loss": 0.0304, + "loss": 0.1652, "step": 7293 }, { "epoch": 2.03, "learning_rate": 6.4662770201317385e-06, - "loss": 0.0314, + "loss": 0.0103, "step": 7294 }, { "epoch": 2.03, "learning_rate": 6.4644215604415995e-06, - "loss": 0.0306, + "loss": 0.1296, "step": 7295 }, { "epoch": 2.03, "learning_rate": 6.462566100751461e-06, - "loss": 0.1962, + "loss": 0.028, "step": 7296 }, { "epoch": 2.03, "learning_rate": 6.460710641061323e-06, - "loss": 0.2444, + "loss": 0.0721, "step": 7297 }, { "epoch": 2.03, "learning_rate": 6.458855181371185e-06, - "loss": 0.0292, + "loss": 0.0191, "step": 7298 }, { "epoch": 2.03, "learning_rate": 6.456999721681046e-06, - "loss": 0.0309, + "loss": 0.01, "step": 7299 }, { "epoch": 2.03, "learning_rate": 6.455144261990909e-06, - "loss": 0.2512, + "loss": 0.0331, "step": 7300 }, { "epoch": 2.03, "learning_rate": 6.45328880230077e-06, - "loss": 0.0859, + "loss": 0.0147, "step": 7301 }, { "epoch": 2.03, "learning_rate": 6.451433342610633e-06, - "loss": 0.1929, + "loss": 0.1454, "step": 7302 }, { "epoch": 2.03, "learning_rate": 6.449577882920494e-06, - "loss": 0.1357, + "loss": 0.0077, "step": 7303 }, { "epoch": 2.03, "learning_rate": 6.4477224232303555e-06, - "loss": 0.1376, + "loss": 0.0676, "step": 7304 }, { "epoch": 2.03, "learning_rate": 6.445866963540217e-06, - "loss": 0.0872, + "loss": 0.1161, "step": 7305 }, { "epoch": 2.03, "learning_rate": 6.444011503850079e-06, - "loss": 0.0842, + "loss": 0.3127, "step": 7306 }, { "epoch": 2.03, "learning_rate": 6.44215604415994e-06, - "loss": 0.1376, + "loss": 0.0211, "step": 7307 }, { "epoch": 2.03, "learning_rate": 6.440300584469803e-06, - "loss": 0.0305, + "loss": 0.0865, "step": 7308 }, { "epoch": 2.03, "learning_rate": 6.438445124779664e-06, - "loss": 0.0295, + "loss": 0.0935, "step": 7309 }, { "epoch": 2.03, "learning_rate": 6.436589665089527e-06, - "loss": 0.1383, + "loss": 0.1692, "step": 7310 }, { "epoch": 2.03, "learning_rate": 6.434734205399388e-06, - "loss": 0.1973, + "loss": 0.0856, "step": 7311 }, { "epoch": 2.04, "learning_rate": 6.4328787457092505e-06, - "loss": 0.2473, + "loss": 0.1439, "step": 7312 }, { "epoch": 2.04, "learning_rate": 6.4310232860191115e-06, - "loss": 0.1972, + "loss": 0.0278, "step": 7313 }, { "epoch": 2.04, "learning_rate": 6.429167826328973e-06, - "loss": 0.1945, + "loss": 0.1135, "step": 7314 }, { "epoch": 2.04, "learning_rate": 6.427312366638835e-06, - "loss": 0.1384, + "loss": 0.1859, "step": 7315 }, { "epoch": 2.04, "learning_rate": 6.425456906948697e-06, - "loss": 0.0821, + "loss": 0.0661, "step": 7316 }, { "epoch": 2.04, "learning_rate": 6.423601447258558e-06, - "loss": 0.083, + "loss": 0.0969, "step": 7317 }, { "epoch": 2.04, "learning_rate": 6.421745987568421e-06, - "loss": 0.1389, + "loss": 0.1079, "step": 7318 }, { "epoch": 2.04, "learning_rate": 6.419890527878282e-06, - "loss": 0.2453, + "loss": 0.0339, "step": 7319 }, { "epoch": 2.04, "learning_rate": 6.418035068188145e-06, - "loss": 0.137, + "loss": 0.0224, "step": 7320 }, { "epoch": 2.04, "learning_rate": 6.416179608498006e-06, - "loss": 0.0295, + "loss": 0.0559, "step": 7321 }, { "epoch": 2.04, "learning_rate": 6.4143241488078675e-06, - "loss": 0.1962, + "loss": 0.1072, "step": 7322 }, { "epoch": 2.04, "learning_rate": 6.412468689117729e-06, - "loss": 0.1387, + "loss": 0.1114, "step": 7323 }, { "epoch": 2.04, "learning_rate": 6.410613229427591e-06, - "loss": 0.4162, + "loss": 0.0308, "step": 7324 }, { "epoch": 2.04, "learning_rate": 6.408757769737452e-06, - "loss": 0.1412, + "loss": 0.0829, "step": 7325 }, { "epoch": 2.04, "learning_rate": 6.406902310047315e-06, - "loss": 0.1355, + "loss": 0.0702, "step": 7326 }, { "epoch": 2.04, "learning_rate": 6.405046850357176e-06, - "loss": 0.1952, + "loss": 0.0491, "step": 7327 }, { "epoch": 2.04, "learning_rate": 6.403191390667039e-06, - "loss": 0.3451, + "loss": 0.0501, "step": 7328 }, { "epoch": 2.04, "learning_rate": 6.4013359309769e-06, - "loss": 0.2489, + "loss": 0.0375, "step": 7329 }, { "epoch": 2.04, "learning_rate": 6.399480471286762e-06, - "loss": 0.0311, + "loss": 0.0946, "step": 7330 }, { "epoch": 2.04, "learning_rate": 6.3976250115966235e-06, - "loss": 0.1356, + "loss": 0.2773, "step": 7331 }, { "epoch": 2.04, "learning_rate": 6.395769551906485e-06, - "loss": 0.0855, + "loss": 0.096, "step": 7332 }, { "epoch": 2.04, "learning_rate": 6.393914092216346e-06, - "loss": 0.1959, + "loss": 0.0755, "step": 7333 }, { "epoch": 2.04, "learning_rate": 6.392058632526209e-06, - "loss": 0.0326, + "loss": 0.1286, "step": 7334 }, { "epoch": 2.04, "learning_rate": 6.39020317283607e-06, - "loss": 0.1343, + "loss": 0.0078, "step": 7335 }, { "epoch": 2.04, "learning_rate": 6.388347713145933e-06, - "loss": 0.2419, + "loss": 0.0128, "step": 7336 }, { "epoch": 2.04, "learning_rate": 6.386492253455794e-06, - "loss": 0.139, + "loss": 0.1265, "step": 7337 }, { "epoch": 2.04, "learning_rate": 6.384636793765656e-06, - "loss": 0.1398, + "loss": 0.0215, "step": 7338 }, { "epoch": 2.04, "learning_rate": 6.382781334075518e-06, - "loss": 0.1336, + "loss": 0.0224, "step": 7339 }, { "epoch": 2.04, "learning_rate": 6.3809258743853795e-06, - "loss": 0.2965, + "loss": 0.109, "step": 7340 }, { "epoch": 2.04, "learning_rate": 6.3790704146952405e-06, - "loss": 0.1892, + "loss": 0.2325, "step": 7341 }, { "epoch": 2.04, "learning_rate": 6.377214955005103e-06, - "loss": 0.0866, + "loss": 0.0186, "step": 7342 }, { "epoch": 2.04, "learning_rate": 6.375359495314964e-06, - "loss": 0.2449, + "loss": 0.0155, "step": 7343 }, { "epoch": 2.04, "learning_rate": 6.373504035624827e-06, - "loss": 0.0347, + "loss": 0.146, "step": 7344 }, { "epoch": 2.04, "learning_rate": 6.371648575934688e-06, - "loss": 0.2985, + "loss": 0.1458, "step": 7345 }, { "epoch": 2.04, "learning_rate": 6.369793116244551e-06, - "loss": 0.0854, + "loss": 0.0516, "step": 7346 }, { "epoch": 2.04, "learning_rate": 6.367937656554412e-06, - "loss": 0.2462, + "loss": 0.1957, "step": 7347 }, { "epoch": 2.05, "learning_rate": 6.366082196864274e-06, - "loss": 0.0876, + "loss": 0.0736, "step": 7348 }, { "epoch": 2.05, "learning_rate": 6.3642267371741355e-06, - "loss": 0.0876, + "loss": 0.0609, "step": 7349 }, { "epoch": 2.05, "learning_rate": 6.362371277483997e-06, - "loss": 0.1955, + "loss": 0.0206, "step": 7350 }, { "epoch": 2.05, "learning_rate": 6.360515817793858e-06, - "loss": 0.0874, + "loss": 0.055, "step": 7351 }, { "epoch": 2.05, "learning_rate": 6.358660358103721e-06, - "loss": 0.0875, + "loss": 0.1092, "step": 7352 }, { "epoch": 2.05, "learning_rate": 6.356804898413582e-06, - "loss": 0.0893, + "loss": 0.0247, "step": 7353 }, { "epoch": 2.05, "learning_rate": 6.354949438723445e-06, - "loss": 0.0362, + "loss": 0.0967, "step": 7354 }, { "epoch": 2.05, "learning_rate": 6.353093979033306e-06, - "loss": 0.0873, + "loss": 0.0808, "step": 7355 }, { "epoch": 2.05, "learning_rate": 6.351238519343168e-06, - "loss": 0.0357, + "loss": 0.1226, "step": 7356 }, { "epoch": 2.05, "learning_rate": 6.3493830596530296e-06, - "loss": 0.1371, + "loss": 0.165, "step": 7357 }, { "epoch": 2.05, "learning_rate": 6.3475275999628914e-06, - "loss": 0.0861, + "loss": 0.0644, "step": 7358 }, { "epoch": 2.05, "learning_rate": 6.3456721402727525e-06, - "loss": 0.1912, + "loss": 0.0291, "step": 7359 }, { "epoch": 2.05, "learning_rate": 6.343816680582615e-06, - "loss": 0.1399, + "loss": 0.078, "step": 7360 }, { "epoch": 2.05, "learning_rate": 6.341961220892476e-06, - "loss": 0.0341, + "loss": 0.0727, "step": 7361 }, { "epoch": 2.05, "learning_rate": 6.340105761202339e-06, - "loss": 0.0342, + "loss": 0.0197, "step": 7362 }, { "epoch": 2.05, "learning_rate": 6.3382503015122e-06, - "loss": 0.1391, + "loss": 0.0786, "step": 7363 }, { "epoch": 2.05, "learning_rate": 6.336394841822062e-06, - "loss": 0.1376, + "loss": 0.0398, "step": 7364 }, { "epoch": 2.05, "learning_rate": 6.334539382131924e-06, - "loss": 0.1371, + "loss": 0.0618, "step": 7365 }, { "epoch": 2.05, "learning_rate": 6.3326839224417856e-06, - "loss": 0.0316, + "loss": 0.0862, "step": 7366 }, { "epoch": 2.05, "learning_rate": 6.330828462751647e-06, - "loss": 0.0859, + "loss": 0.0168, "step": 7367 }, { "epoch": 2.05, "learning_rate": 6.328973003061509e-06, - "loss": 0.0865, + "loss": 0.2383, "step": 7368 }, { "epoch": 2.05, "learning_rate": 6.32711754337137e-06, - "loss": 0.2532, + "loss": 0.1025, "step": 7369 }, { "epoch": 2.05, "learning_rate": 6.325262083681233e-06, - "loss": 0.1431, + "loss": 0.0273, "step": 7370 }, { "epoch": 2.05, "learning_rate": 6.323406623991094e-06, - "loss": 0.2434, + "loss": 0.0359, "step": 7371 }, { "epoch": 2.05, "learning_rate": 6.321551164300957e-06, - "loss": 0.0845, + "loss": 0.017, "step": 7372 }, { "epoch": 2.05, "learning_rate": 6.319695704610818e-06, - "loss": 0.0842, + "loss": 0.1077, "step": 7373 }, { "epoch": 2.05, "learning_rate": 6.31784024492068e-06, - "loss": 0.0861, + "loss": 0.0165, "step": 7374 }, { "epoch": 2.05, "learning_rate": 6.3159847852305416e-06, - "loss": 0.2548, + "loss": 0.0134, "step": 7375 }, { "epoch": 2.05, "learning_rate": 6.314129325540403e-06, - "loss": 0.1389, + "loss": 0.0106, "step": 7376 }, { "epoch": 2.05, "learning_rate": 6.3122738658502644e-06, - "loss": 0.5784, + "loss": 0.0611, "step": 7377 }, { "epoch": 2.05, "learning_rate": 6.310418406160127e-06, - "loss": 0.1957, + "loss": 0.1381, "step": 7378 }, { "epoch": 2.05, "learning_rate": 6.308562946469988e-06, - "loss": 0.1358, + "loss": 0.0176, "step": 7379 }, { "epoch": 2.05, "learning_rate": 6.306707486779851e-06, - "loss": 0.1931, + "loss": 0.0084, "step": 7380 }, { "epoch": 2.05, "learning_rate": 6.304852027089712e-06, - "loss": 0.2537, + "loss": 0.0498, "step": 7381 }, { "epoch": 2.05, "learning_rate": 6.302996567399574e-06, - "loss": 0.1411, + "loss": 0.0131, "step": 7382 }, { "epoch": 2.05, "learning_rate": 6.301141107709436e-06, - "loss": 0.1909, + "loss": 0.0155, "step": 7383 }, { "epoch": 2.06, "learning_rate": 6.2992856480192975e-06, - "loss": 0.195, + "loss": 0.1189, "step": 7384 }, { "epoch": 2.06, "learning_rate": 6.2974301883291586e-06, - "loss": 0.2993, + "loss": 0.076, "step": 7385 }, { "epoch": 2.06, "learning_rate": 6.295574728639021e-06, - "loss": 0.0846, + "loss": 0.0492, "step": 7386 }, { "epoch": 2.06, "learning_rate": 6.293719268948882e-06, - "loss": 0.1383, + "loss": 0.0118, "step": 7387 }, { "epoch": 2.06, "learning_rate": 6.291863809258745e-06, - "loss": 0.302, + "loss": 0.0138, "step": 7388 }, { "epoch": 2.06, "learning_rate": 6.290008349568606e-06, - "loss": 0.1414, + "loss": 0.0516, "step": 7389 }, { "epoch": 2.06, "learning_rate": 6.288152889878468e-06, - "loss": 0.2469, + "loss": 0.1025, "step": 7390 }, { "epoch": 2.06, "learning_rate": 6.28629743018833e-06, - "loss": 0.0847, + "loss": 0.0077, "step": 7391 }, { "epoch": 2.06, "learning_rate": 6.284441970498192e-06, - "loss": 0.1363, + "loss": 0.0118, "step": 7392 }, { "epoch": 2.06, "learning_rate": 6.282586510808053e-06, - "loss": 0.1964, + "loss": 0.007, "step": 7393 }, { "epoch": 2.06, "learning_rate": 6.280731051117915e-06, - "loss": 0.2404, + "loss": 0.0135, "step": 7394 }, { "epoch": 2.06, "learning_rate": 6.278875591427776e-06, - "loss": 0.2423, + "loss": 0.0683, "step": 7395 }, { "epoch": 2.06, "learning_rate": 6.277020131737639e-06, - "loss": 0.1938, + "loss": 0.0619, "step": 7396 }, { "epoch": 2.06, "learning_rate": 6.2751646720475e-06, - "loss": 0.088, + "loss": 0.0117, "step": 7397 }, { "epoch": 2.06, "learning_rate": 6.273309212357362e-06, - "loss": 0.1933, + "loss": 0.0068, "step": 7398 }, { "epoch": 2.06, "learning_rate": 6.271453752667224e-06, - "loss": 0.0874, + "loss": 0.0957, "step": 7399 }, { "epoch": 2.06, "learning_rate": 6.269598292977086e-06, - "loss": 0.0885, + "loss": 0.048, "step": 7400 }, { "epoch": 2.06, "learning_rate": 6.267742833286947e-06, - "loss": 0.1928, + "loss": 0.2041, "step": 7401 }, { "epoch": 2.06, "learning_rate": 6.2658873735968095e-06, - "loss": 0.1389, + "loss": 0.0069, "step": 7402 }, { "epoch": 2.06, "learning_rate": 6.2640319139066705e-06, - "loss": 0.2421, + "loss": 0.0088, "step": 7403 }, { "epoch": 2.06, "learning_rate": 6.262176454216533e-06, - "loss": 0.0876, + "loss": 0.0588, "step": 7404 }, { "epoch": 2.06, "learning_rate": 6.260320994526394e-06, - "loss": 0.0903, + "loss": 0.0097, "step": 7405 }, { "epoch": 2.06, "learning_rate": 6.258465534836257e-06, - "loss": 0.1942, + "loss": 0.1397, "step": 7406 }, { "epoch": 2.06, "learning_rate": 6.256610075146118e-06, - "loss": 0.0901, + "loss": 0.0118, "step": 7407 }, { "epoch": 2.06, "learning_rate": 6.25475461545598e-06, - "loss": 0.1381, + "loss": 0.1144, "step": 7408 }, { "epoch": 2.06, "learning_rate": 6.252899155765842e-06, - "loss": 0.1887, + "loss": 0.0649, "step": 7409 }, { "epoch": 2.06, "learning_rate": 6.251043696075704e-06, - "loss": 0.0899, + "loss": 0.0714, "step": 7410 }, { "epoch": 2.06, "learning_rate": 6.249188236385565e-06, - "loss": 0.0366, + "loss": 0.1026, "step": 7411 }, { "epoch": 2.06, "learning_rate": 6.247332776695427e-06, - "loss": 0.139, + "loss": 0.1154, "step": 7412 }, { "epoch": 2.06, "learning_rate": 6.245477317005288e-06, - "loss": 0.0901, + "loss": 0.0101, "step": 7413 }, { "epoch": 2.06, "learning_rate": 6.243621857315151e-06, - "loss": 0.0901, + "loss": 0.0225, "step": 7414 }, { "epoch": 2.06, "learning_rate": 6.241766397625012e-06, - "loss": 0.0865, + "loss": 0.097, "step": 7415 }, { "epoch": 2.06, "learning_rate": 6.239910937934874e-06, - "loss": 0.1941, + "loss": 0.0273, "step": 7416 }, { "epoch": 2.06, "learning_rate": 6.238055478244736e-06, - "loss": 0.0363, + "loss": 0.0513, "step": 7417 }, { "epoch": 2.06, "learning_rate": 6.236200018554598e-06, - "loss": 0.2412, + "loss": 0.0444, "step": 7418 }, { "epoch": 2.06, "learning_rate": 6.234344558864459e-06, - "loss": 0.0364, + "loss": 0.0161, "step": 7419 }, { "epoch": 2.07, "learning_rate": 6.2324890991743215e-06, - "loss": 0.1374, + "loss": 0.0895, "step": 7420 }, { "epoch": 2.07, "learning_rate": 6.2306336394841825e-06, - "loss": 0.0868, + "loss": 0.0155, "step": 7421 }, { "epoch": 2.07, "learning_rate": 6.228778179794045e-06, - "loss": 0.0881, + "loss": 0.0955, "step": 7422 }, { "epoch": 2.07, "learning_rate": 6.226922720103906e-06, - "loss": 0.0866, + "loss": 0.0792, "step": 7423 }, { "epoch": 2.07, "learning_rate": 6.225067260413768e-06, - "loss": 0.2465, + "loss": 0.112, "step": 7424 }, { "epoch": 2.07, "learning_rate": 6.22321180072363e-06, - "loss": 0.1937, + "loss": 0.0397, "step": 7425 }, { "epoch": 2.07, "learning_rate": 6.221356341033492e-06, - "loss": 0.1394, + "loss": 0.059, "step": 7426 }, { "epoch": 2.07, "learning_rate": 6.219500881343353e-06, - "loss": 0.0332, + "loss": 0.064, "step": 7427 }, { "epoch": 2.07, "learning_rate": 6.217645421653216e-06, - "loss": 0.0873, + "loss": 0.1172, "step": 7428 }, { "epoch": 2.07, "learning_rate": 6.215789961963077e-06, - "loss": 0.2425, + "loss": 0.0143, "step": 7429 }, { "epoch": 2.07, "learning_rate": 6.213934502272939e-06, - "loss": 0.3025, + "loss": 0.072, "step": 7430 }, { "epoch": 2.07, "learning_rate": 6.2120790425828e-06, - "loss": 0.0844, + "loss": 0.0198, "step": 7431 }, { "epoch": 2.07, "learning_rate": 6.210223582892663e-06, - "loss": 0.1366, + "loss": 0.0472, "step": 7432 }, { "epoch": 2.07, "learning_rate": 6.208368123202524e-06, - "loss": 0.1931, + "loss": 0.0546, "step": 7433 }, { "epoch": 2.07, "learning_rate": 6.206512663512386e-06, - "loss": 0.1906, + "loss": 0.1158, "step": 7434 }, { "epoch": 2.07, "learning_rate": 6.204657203822248e-06, - "loss": 0.1385, + "loss": 0.1599, "step": 7435 }, { "epoch": 2.07, "learning_rate": 6.20280174413211e-06, - "loss": 0.0853, + "loss": 0.0519, "step": 7436 }, { "epoch": 2.07, "learning_rate": 6.200946284441971e-06, - "loss": 0.0322, + "loss": 0.0107, "step": 7437 }, { "epoch": 2.07, "learning_rate": 6.1990908247518335e-06, - "loss": 0.2454, + "loss": 0.0611, "step": 7438 }, { "epoch": 2.07, "learning_rate": 6.1972353650616945e-06, - "loss": 0.0853, + "loss": 0.0596, "step": 7439 }, { "epoch": 2.07, "learning_rate": 6.1953799053715555e-06, - "loss": 0.1932, + "loss": 0.1596, "step": 7440 }, { "epoch": 2.07, "learning_rate": 6.193524445681418e-06, - "loss": 0.1395, + "loss": 0.043, "step": 7441 }, { "epoch": 2.07, "learning_rate": 6.191668985991279e-06, - "loss": 0.1857, + "loss": 0.0485, "step": 7442 }, { "epoch": 2.07, "learning_rate": 6.189813526301142e-06, - "loss": 0.1954, + "loss": 0.1218, "step": 7443 }, { "epoch": 2.07, "learning_rate": 6.187958066611003e-06, - "loss": 0.1384, + "loss": 0.0149, "step": 7444 }, { "epoch": 2.07, "learning_rate": 6.186102606920865e-06, - "loss": 0.19, + "loss": 0.1267, "step": 7445 }, { "epoch": 2.07, "learning_rate": 6.184247147230727e-06, - "loss": 0.0335, + "loss": 0.0342, "step": 7446 }, { "epoch": 2.07, "learning_rate": 6.182391687540589e-06, - "loss": 0.0878, + "loss": 0.0497, "step": 7447 }, { "epoch": 2.07, "learning_rate": 6.18053622785045e-06, - "loss": 0.1363, + "loss": 0.059, "step": 7448 }, { "epoch": 2.07, "learning_rate": 6.178680768160312e-06, - "loss": 0.0842, + "loss": 0.0652, "step": 7449 }, { "epoch": 2.07, "learning_rate": 6.176825308470173e-06, - "loss": 0.1891, + "loss": 0.0144, "step": 7450 }, { "epoch": 2.07, "learning_rate": 6.174969848780036e-06, - "loss": 0.2465, + "loss": 0.0128, "step": 7451 }, { "epoch": 2.07, "learning_rate": 6.173114389089897e-06, - "loss": 0.0883, + "loss": 0.0215, "step": 7452 }, { "epoch": 2.07, "learning_rate": 6.171258929399759e-06, - "loss": 0.0865, + "loss": 0.021, "step": 7453 }, { "epoch": 2.07, "learning_rate": 6.169403469709621e-06, - "loss": 0.0317, + "loss": 0.0185, "step": 7454 }, { "epoch": 2.07, "learning_rate": 6.167548010019483e-06, - "loss": 0.1911, + "loss": 0.0485, "step": 7455 }, { "epoch": 2.08, "learning_rate": 6.165692550329344e-06, - "loss": 0.0858, + "loss": 0.0099, "step": 7456 }, { "epoch": 2.08, "learning_rate": 6.1638370906392065e-06, - "loss": 0.1959, + "loss": 0.0165, "step": 7457 }, { "epoch": 2.08, "learning_rate": 6.1619816309490675e-06, - "loss": 0.0829, + "loss": 0.018, "step": 7458 }, { "epoch": 2.08, "learning_rate": 6.16012617125893e-06, - "loss": 0.0329, + "loss": 0.0085, "step": 7459 }, { "epoch": 2.08, "learning_rate": 6.158270711568791e-06, - "loss": 0.1388, + "loss": 0.1494, "step": 7460 }, { "epoch": 2.08, "learning_rate": 6.156415251878653e-06, - "loss": 0.1385, + "loss": 0.0435, "step": 7461 }, { "epoch": 2.08, "learning_rate": 6.154559792188515e-06, - "loss": 0.2476, + "loss": 0.1022, "step": 7462 }, { "epoch": 2.08, "learning_rate": 6.152704332498377e-06, - "loss": 0.1941, + "loss": 0.0092, "step": 7463 }, { "epoch": 2.08, "learning_rate": 6.150848872808238e-06, - "loss": 0.0867, + "loss": 0.1341, "step": 7464 }, { "epoch": 2.08, "learning_rate": 6.148993413118101e-06, - "loss": 0.0853, + "loss": 0.203, "step": 7465 }, { "epoch": 2.08, "learning_rate": 6.147137953427962e-06, - "loss": 0.1898, + "loss": 0.2062, "step": 7466 }, { "epoch": 2.08, "learning_rate": 6.145282493737824e-06, - "loss": 0.0855, + "loss": 0.0678, "step": 7467 }, { "epoch": 2.08, "learning_rate": 6.143427034047685e-06, - "loss": 0.0839, + "loss": 0.0555, "step": 7468 }, { "epoch": 2.08, "learning_rate": 6.141571574357548e-06, - "loss": 0.1919, + "loss": 0.1189, "step": 7469 }, { "epoch": 2.08, "learning_rate": 6.139716114667409e-06, - "loss": 0.1972, + "loss": 0.1256, "step": 7470 }, { "epoch": 2.08, "learning_rate": 6.137860654977271e-06, - "loss": 0.191, + "loss": 0.0606, "step": 7471 }, { "epoch": 2.08, "learning_rate": 6.136005195287133e-06, - "loss": 0.031, + "loss": 0.1702, "step": 7472 }, { "epoch": 2.08, "learning_rate": 6.134149735596995e-06, - "loss": 0.0316, + "loss": 0.1014, "step": 7473 }, { "epoch": 2.08, "learning_rate": 6.132294275906856e-06, - "loss": 0.1413, + "loss": 0.0516, "step": 7474 }, { "epoch": 2.08, "learning_rate": 6.1304388162167184e-06, - "loss": 0.2964, + "loss": 0.0515, "step": 7475 }, { "epoch": 2.08, "learning_rate": 6.1285833565265795e-06, - "loss": 0.2442, + "loss": 0.0816, "step": 7476 }, { "epoch": 2.08, "learning_rate": 6.126727896836442e-06, - "loss": 0.139, + "loss": 0.0835, "step": 7477 }, { "epoch": 2.08, "learning_rate": 6.124872437146303e-06, - "loss": 0.1398, + "loss": 0.0236, "step": 7478 }, { "epoch": 2.08, "learning_rate": 6.123016977456165e-06, - "loss": 0.1371, + "loss": 0.1193, "step": 7479 }, { "epoch": 2.08, "learning_rate": 6.121161517766027e-06, - "loss": 0.248, + "loss": 0.0905, "step": 7480 }, { "epoch": 2.08, "learning_rate": 6.119306058075889e-06, - "loss": 0.1398, + "loss": 0.0207, "step": 7481 }, { "epoch": 2.08, "learning_rate": 6.11745059838575e-06, - "loss": 0.1906, + "loss": 0.0979, "step": 7482 }, { "epoch": 2.08, "learning_rate": 6.1155951386956126e-06, - "loss": 0.1334, + "loss": 0.208, "step": 7483 }, { "epoch": 2.08, "learning_rate": 6.113739679005474e-06, - "loss": 0.0845, + "loss": 0.0252, "step": 7484 }, { "epoch": 2.08, "learning_rate": 6.111884219315336e-06, - "loss": 0.1959, + "loss": 0.0581, "step": 7485 }, { "epoch": 2.08, "learning_rate": 6.110028759625197e-06, - "loss": 0.0331, + "loss": 0.1254, "step": 7486 }, { "epoch": 2.08, "learning_rate": 6.108173299935059e-06, - "loss": 0.0886, + "loss": 0.1092, "step": 7487 }, { "epoch": 2.08, "learning_rate": 6.106317840244921e-06, - "loss": 0.2417, + "loss": 0.0622, "step": 7488 }, { "epoch": 2.08, "learning_rate": 6.104462380554783e-06, - "loss": 0.2927, + "loss": 0.0654, "step": 7489 }, { "epoch": 2.08, "learning_rate": 6.102606920864644e-06, - "loss": 0.1935, + "loss": 0.1714, "step": 7490 }, { "epoch": 2.08, "learning_rate": 6.100751461174507e-06, - "loss": 0.0847, + "loss": 0.129, "step": 7491 }, { "epoch": 2.09, "learning_rate": 6.098896001484368e-06, - "loss": 0.0879, + "loss": 0.1237, "step": 7492 }, { "epoch": 2.09, "learning_rate": 6.09704054179423e-06, - "loss": 0.1391, + "loss": 0.0225, "step": 7493 }, { "epoch": 2.09, "learning_rate": 6.0951850821040914e-06, - "loss": 0.3002, + "loss": 0.0746, "step": 7494 }, { "epoch": 2.09, "learning_rate": 6.093329622413954e-06, - "loss": 0.0882, + "loss": 0.0899, "step": 7495 }, { "epoch": 2.09, "learning_rate": 6.091474162723815e-06, - "loss": 0.1918, + "loss": 0.0373, "step": 7496 }, { "epoch": 2.09, "learning_rate": 6.089618703033677e-06, - "loss": 0.1392, + "loss": 0.0989, "step": 7497 }, { "epoch": 2.09, "learning_rate": 6.087763243343539e-06, - "loss": 0.0853, + "loss": 0.1186, "step": 7498 }, { "epoch": 2.09, "learning_rate": 6.085907783653401e-06, - "loss": 0.0343, + "loss": 0.0577, "step": 7499 }, { "epoch": 2.09, "learning_rate": 6.084052323963262e-06, - "loss": 0.1352, + "loss": 0.0949, "step": 7500 }, { "epoch": 2.09, "learning_rate": 6.0821968642731245e-06, - "loss": 0.2946, + "loss": 0.0549, "step": 7501 }, { "epoch": 2.09, "learning_rate": 6.0803414045829856e-06, - "loss": 0.1418, + "loss": 0.0211, "step": 7502 }, { "epoch": 2.09, "learning_rate": 6.078485944892848e-06, - "loss": 0.1889, + "loss": 0.1039, "step": 7503 }, { "epoch": 2.09, "learning_rate": 6.076630485202709e-06, - "loss": 0.1386, + "loss": 0.1255, "step": 7504 }, { "epoch": 2.09, "learning_rate": 6.074775025512571e-06, - "loss": 0.2364, + "loss": 0.1229, "step": 7505 }, { "epoch": 2.09, "learning_rate": 6.072919565822433e-06, - "loss": 0.0347, + "loss": 0.1135, "step": 7506 }, { "epoch": 2.09, "learning_rate": 6.071064106132295e-06, - "loss": 0.1428, + "loss": 0.0542, "step": 7507 }, { "epoch": 2.09, "learning_rate": 6.069208646442156e-06, - "loss": 0.0874, + "loss": 0.0231, "step": 7508 }, { "epoch": 2.09, "learning_rate": 6.067353186752019e-06, - "loss": 0.1369, + "loss": 0.0729, "step": 7509 }, { "epoch": 2.09, "learning_rate": 6.06549772706188e-06, - "loss": 0.1411, + "loss": 0.0851, "step": 7510 }, { "epoch": 2.09, "learning_rate": 6.063642267371742e-06, - "loss": 0.1925, + "loss": 0.1615, "step": 7511 }, { "epoch": 2.09, "learning_rate": 6.061786807681603e-06, - "loss": 0.1967, + "loss": 0.0217, "step": 7512 }, { "epoch": 2.09, "learning_rate": 6.059931347991465e-06, - "loss": 0.1389, + "loss": 0.084, "step": 7513 }, { "epoch": 2.09, "learning_rate": 6.058075888301327e-06, - "loss": 0.0346, + "loss": 0.0658, "step": 7514 }, { "epoch": 2.09, "learning_rate": 6.056220428611189e-06, - "loss": 0.0851, + "loss": 0.013, "step": 7515 }, { "epoch": 2.09, "learning_rate": 6.05436496892105e-06, - "loss": 0.0859, + "loss": 0.071, "step": 7516 }, { "epoch": 2.09, "learning_rate": 6.052509509230913e-06, - "loss": 0.2499, + "loss": 0.0747, "step": 7517 }, { "epoch": 2.09, "learning_rate": 6.050654049540774e-06, - "loss": 0.2417, + "loss": 0.1424, "step": 7518 }, { "epoch": 2.09, "learning_rate": 6.0487985898506365e-06, - "loss": 0.0869, + "loss": 0.0247, "step": 7519 }, { "epoch": 2.09, "learning_rate": 6.0469431301604975e-06, - "loss": 0.2428, + "loss": 0.0251, "step": 7520 }, { "epoch": 2.09, "learning_rate": 6.045087670470359e-06, - "loss": 0.2497, + "loss": 0.1345, "step": 7521 }, { "epoch": 2.09, "learning_rate": 6.043232210780221e-06, - "loss": 0.0884, + "loss": 0.0241, "step": 7522 }, { "epoch": 2.09, "learning_rate": 6.041376751090083e-06, - "loss": 0.1891, + "loss": 0.0563, "step": 7523 }, { "epoch": 2.09, "learning_rate": 6.039521291399944e-06, - "loss": 0.034, + "loss": 0.0472, "step": 7524 }, { "epoch": 2.09, "learning_rate": 6.037665831709807e-06, - "loss": 0.1419, + "loss": 0.1332, "step": 7525 }, { "epoch": 2.09, "learning_rate": 6.035810372019668e-06, - "loss": 0.1876, + "loss": 0.0588, "step": 7526 }, { "epoch": 2.09, "learning_rate": 6.033954912329531e-06, - "loss": 0.0851, + "loss": 0.0509, "step": 7527 }, { "epoch": 2.1, "learning_rate": 6.032099452639392e-06, - "loss": 0.2474, + "loss": 0.0476, "step": 7528 }, { "epoch": 2.1, "learning_rate": 6.030243992949254e-06, - "loss": 0.2366, + "loss": 0.0552, "step": 7529 }, { "epoch": 2.1, "learning_rate": 6.028388533259115e-06, - "loss": 0.2432, + "loss": 0.0475, "step": 7530 }, { "epoch": 2.1, "learning_rate": 6.026533073568977e-06, - "loss": 0.1917, + "loss": 0.1975, "step": 7531 }, { "epoch": 2.1, "learning_rate": 6.024677613878839e-06, - "loss": 0.2472, + "loss": 0.0185, "step": 7532 }, { "epoch": 2.1, "learning_rate": 6.022822154188701e-06, - "loss": 0.0863, + "loss": 0.0349, "step": 7533 }, { "epoch": 2.1, "learning_rate": 6.020966694498562e-06, - "loss": 0.1368, + "loss": 0.0729, "step": 7534 }, { "epoch": 2.1, "learning_rate": 6.019111234808425e-06, - "loss": 0.0868, + "loss": 0.0733, "step": 7535 }, { "epoch": 2.1, "learning_rate": 6.017255775118286e-06, - "loss": 0.1399, + "loss": 0.0115, "step": 7536 }, { "epoch": 2.1, "learning_rate": 6.0154003154281485e-06, - "loss": 0.0861, + "loss": 0.015, "step": 7537 }, { "epoch": 2.1, "learning_rate": 6.0135448557380095e-06, - "loss": 0.2407, + "loss": 0.077, "step": 7538 }, { "epoch": 2.1, "learning_rate": 6.011689396047871e-06, - "loss": 0.1919, + "loss": 0.1113, "step": 7539 }, { "epoch": 2.1, "learning_rate": 6.009833936357733e-06, - "loss": 0.088, + "loss": 0.0169, "step": 7540 }, { "epoch": 2.1, "learning_rate": 6.007978476667595e-06, - "loss": 0.1392, + "loss": 0.0089, "step": 7541 }, { "epoch": 2.1, "learning_rate": 6.006123016977456e-06, - "loss": 0.1937, + "loss": 0.2588, "step": 7542 }, { "epoch": 2.1, "learning_rate": 6.004267557287319e-06, - "loss": 0.1904, + "loss": 0.1946, "step": 7543 }, { "epoch": 2.1, "learning_rate": 6.00241209759718e-06, - "loss": 0.1896, + "loss": 0.0616, "step": 7544 }, { "epoch": 2.1, "learning_rate": 6.000556637907043e-06, - "loss": 0.1878, + "loss": 0.0483, "step": 7545 }, { "epoch": 2.1, "learning_rate": 5.998701178216904e-06, - "loss": 0.1907, + "loss": 0.0188, "step": 7546 }, { "epoch": 2.1, "learning_rate": 5.9968457185267655e-06, - "loss": 0.1387, + "loss": 0.0088, "step": 7547 }, { "epoch": 2.1, "learning_rate": 5.994990258836627e-06, - "loss": 0.1387, + "loss": 0.1047, "step": 7548 }, { "epoch": 2.1, "learning_rate": 5.993134799146489e-06, - "loss": 0.2348, + "loss": 0.0102, "step": 7549 }, { "epoch": 2.1, "learning_rate": 5.99127933945635e-06, - "loss": 0.1408, + "loss": 0.0926, "step": 7550 }, { "epoch": 2.1, "learning_rate": 5.989423879766213e-06, - "loss": 0.1895, + "loss": 0.1161, "step": 7551 }, { "epoch": 2.1, "learning_rate": 5.987568420076074e-06, - "loss": 0.0891, + "loss": 0.1685, "step": 7552 }, { "epoch": 2.1, "learning_rate": 5.985712960385937e-06, - "loss": 0.2436, + "loss": 0.0191, "step": 7553 }, { "epoch": 2.1, "learning_rate": 5.983857500695798e-06, - "loss": 0.1404, + "loss": 0.0169, "step": 7554 }, { "epoch": 2.1, "learning_rate": 5.9820020410056605e-06, - "loss": 0.2429, + "loss": 0.0513, "step": 7555 }, { "epoch": 2.1, "learning_rate": 5.9801465813155215e-06, - "loss": 0.137, + "loss": 0.1059, "step": 7556 }, { "epoch": 2.1, "learning_rate": 5.978291121625383e-06, - "loss": 0.1905, + "loss": 0.1309, "step": 7557 }, { "epoch": 2.1, "learning_rate": 5.976435661935245e-06, - "loss": 0.1385, + "loss": 0.058, "step": 7558 }, { "epoch": 2.1, "learning_rate": 5.974580202245107e-06, - "loss": 0.1441, + "loss": 0.1001, "step": 7559 }, { "epoch": 2.1, "learning_rate": 5.972724742554968e-06, - "loss": 0.1917, + "loss": 0.0194, "step": 7560 }, { "epoch": 2.1, "learning_rate": 5.970869282864831e-06, - "loss": 0.0895, + "loss": 0.0937, "step": 7561 }, { "epoch": 2.1, "learning_rate": 5.969013823174692e-06, - "loss": 0.1373, + "loss": 0.0102, "step": 7562 }, { "epoch": 2.1, "learning_rate": 5.967158363484555e-06, - "loss": 0.1894, + "loss": 0.0264, "step": 7563 }, { "epoch": 2.11, "learning_rate": 5.965302903794416e-06, - "loss": 0.242, + "loss": 0.0554, "step": 7564 }, { "epoch": 2.11, "learning_rate": 5.9634474441042775e-06, - "loss": 0.1901, + "loss": 0.1338, "step": 7565 }, { "epoch": 2.11, "learning_rate": 5.961591984414139e-06, - "loss": 0.2384, + "loss": 0.0491, "step": 7566 }, { "epoch": 2.11, "learning_rate": 5.959736524724001e-06, - "loss": 0.0408, + "loss": 0.1536, "step": 7567 }, { "epoch": 2.11, "learning_rate": 5.957881065033862e-06, - "loss": 0.0901, + "loss": 0.1554, "step": 7568 }, { "epoch": 2.11, "learning_rate": 5.956025605343725e-06, - "loss": 0.1381, + "loss": 0.1638, "step": 7569 }, { "epoch": 2.11, "learning_rate": 5.954170145653586e-06, - "loss": 0.1337, + "loss": 0.061, "step": 7570 }, { "epoch": 2.11, "learning_rate": 5.952314685963449e-06, - "loss": 0.0387, + "loss": 0.1386, "step": 7571 }, { "epoch": 2.11, "learning_rate": 5.95045922627331e-06, - "loss": 0.0908, + "loss": 0.0627, "step": 7572 }, { "epoch": 2.11, "learning_rate": 5.948603766583172e-06, - "loss": 0.0895, + "loss": 0.0172, "step": 7573 }, { "epoch": 2.11, "learning_rate": 5.9467483068930335e-06, - "loss": 0.0892, + "loss": 0.0667, "step": 7574 }, { "epoch": 2.11, "learning_rate": 5.944892847202895e-06, - "loss": 0.0897, + "loss": 0.0231, "step": 7575 }, { "epoch": 2.11, "learning_rate": 5.943037387512756e-06, - "loss": 0.2381, + "loss": 0.13, "step": 7576 }, { "epoch": 2.11, "learning_rate": 5.941181927822619e-06, - "loss": 0.1424, + "loss": 0.0234, "step": 7577 }, { "epoch": 2.11, "learning_rate": 5.93932646813248e-06, - "loss": 0.0871, + "loss": 0.0553, "step": 7578 }, { "epoch": 2.11, "learning_rate": 5.937471008442343e-06, - "loss": 0.1904, + "loss": 0.052, "step": 7579 }, { "epoch": 2.11, "learning_rate": 5.935615548752204e-06, - "loss": 0.2413, + "loss": 0.0488, "step": 7580 }, { "epoch": 2.11, "learning_rate": 5.933760089062066e-06, - "loss": 0.2941, + "loss": 0.0206, "step": 7581 }, { "epoch": 2.11, "learning_rate": 5.931904629371928e-06, - "loss": 0.1386, + "loss": 0.0958, "step": 7582 }, { "epoch": 2.11, "learning_rate": 5.9300491696817895e-06, - "loss": 0.1434, + "loss": 0.0188, "step": 7583 }, { "epoch": 2.11, "learning_rate": 5.9281937099916505e-06, - "loss": 0.0862, + "loss": 0.1122, "step": 7584 }, { "epoch": 2.11, "learning_rate": 5.926338250301513e-06, - "loss": 0.1404, + "loss": 0.0211, "step": 7585 }, { "epoch": 2.11, "learning_rate": 5.924482790611374e-06, - "loss": 0.0358, + "loss": 0.0837, "step": 7586 }, { "epoch": 2.11, "learning_rate": 5.922627330921237e-06, - "loss": 0.1396, + "loss": 0.1035, "step": 7587 }, { "epoch": 2.11, "learning_rate": 5.920771871231098e-06, - "loss": 0.0865, + "loss": 0.2022, "step": 7588 }, { "epoch": 2.11, "learning_rate": 5.918916411540961e-06, - "loss": 0.0883, + "loss": 0.0374, "step": 7589 }, { "epoch": 2.11, "learning_rate": 5.917060951850822e-06, - "loss": 0.2385, + "loss": 0.0126, "step": 7590 }, { "epoch": 2.11, "learning_rate": 5.915205492160683e-06, - "loss": 0.0893, + "loss": 0.0109, "step": 7591 }, { "epoch": 2.11, "learning_rate": 5.9133500324705454e-06, - "loss": 0.1387, + "loss": 0.1413, "step": 7592 }, { "epoch": 2.11, "learning_rate": 5.9114945727804065e-06, - "loss": 0.1418, + "loss": 0.0631, "step": 7593 }, { "epoch": 2.11, "learning_rate": 5.909639113090268e-06, - "loss": 0.0878, + "loss": 0.0848, "step": 7594 }, { "epoch": 2.11, "learning_rate": 5.90778365340013e-06, - "loss": 0.089, + "loss": 0.1477, "step": 7595 }, { "epoch": 2.11, "learning_rate": 5.905928193709992e-06, - "loss": 0.035, + "loss": 0.0507, "step": 7596 }, { "epoch": 2.11, "learning_rate": 5.904072734019853e-06, - "loss": 0.2981, + "loss": 0.086, "step": 7597 }, { "epoch": 2.11, "learning_rate": 5.902217274329716e-06, - "loss": 0.1399, + "loss": 0.0168, "step": 7598 }, { "epoch": 2.11, "learning_rate": 5.900361814639577e-06, - "loss": 0.2943, + "loss": 0.2083, "step": 7599 }, { "epoch": 2.12, "learning_rate": 5.8985063549494396e-06, - "loss": 0.0864, + "loss": 0.094, "step": 7600 }, { "epoch": 2.12, "learning_rate": 5.896650895259301e-06, - "loss": 0.0335, + "loss": 0.1193, "step": 7601 }, { "epoch": 2.12, "learning_rate": 5.8947954355691625e-06, - "loss": 0.1892, + "loss": 0.0815, "step": 7602 }, { "epoch": 2.12, "learning_rate": 5.892939975879024e-06, - "loss": 0.0904, + "loss": 0.1415, "step": 7603 }, { "epoch": 2.12, "learning_rate": 5.891084516188886e-06, - "loss": 0.1962, + "loss": 0.0162, "step": 7604 }, { "epoch": 2.12, "learning_rate": 5.889229056498747e-06, - "loss": 0.1407, + "loss": 0.1805, "step": 7605 }, { "epoch": 2.12, "learning_rate": 5.88737359680861e-06, - "loss": 0.3017, + "loss": 0.0799, "step": 7606 }, { "epoch": 2.12, "learning_rate": 5.885518137118471e-06, - "loss": 0.1357, + "loss": 0.166, "step": 7607 }, { "epoch": 2.12, "learning_rate": 5.883662677428334e-06, - "loss": 0.0855, + "loss": 0.019, "step": 7608 }, { "epoch": 2.12, "learning_rate": 5.881807217738195e-06, - "loss": 0.0322, + "loss": 0.0207, "step": 7609 }, { "epoch": 2.12, "learning_rate": 5.8799517580480566e-06, - "loss": 0.1394, + "loss": 0.11, "step": 7610 }, { "epoch": 2.12, "learning_rate": 5.8780962983579184e-06, - "loss": 0.1421, + "loss": 0.0566, "step": 7611 }, { "epoch": 2.12, "learning_rate": 5.87624083866778e-06, - "loss": 0.1414, + "loss": 0.0252, "step": 7612 }, { "epoch": 2.12, "learning_rate": 5.874385378977641e-06, - "loss": 0.0842, + "loss": 0.0493, "step": 7613 }, { "epoch": 2.12, "learning_rate": 5.872529919287504e-06, - "loss": 0.1393, + "loss": 0.0783, "step": 7614 }, { "epoch": 2.12, "learning_rate": 5.870674459597365e-06, - "loss": 0.2477, + "loss": 0.0704, "step": 7615 }, { "epoch": 2.12, "learning_rate": 5.868818999907228e-06, - "loss": 0.1921, + "loss": 0.0462, "step": 7616 }, { "epoch": 2.12, "learning_rate": 5.866963540217089e-06, - "loss": 0.2462, + "loss": 0.023, "step": 7617 }, { "epoch": 2.12, "learning_rate": 5.8651080805269515e-06, - "loss": 0.1386, + "loss": 0.067, "step": 7618 }, { "epoch": 2.12, "learning_rate": 5.8632526208368126e-06, - "loss": 0.1919, + "loss": 0.0294, "step": 7619 }, { "epoch": 2.12, "learning_rate": 5.8613971611466744e-06, - "loss": 0.1374, + "loss": 0.0683, "step": 7620 }, { "epoch": 2.12, "learning_rate": 5.859541701456536e-06, - "loss": 0.0842, + "loss": 0.0486, "step": 7621 }, { "epoch": 2.12, "learning_rate": 5.857686241766398e-06, - "loss": 0.0335, + "loss": 0.0913, "step": 7622 }, { "epoch": 2.12, "learning_rate": 5.855830782076259e-06, - "loss": 0.1383, + "loss": 0.0882, "step": 7623 }, { "epoch": 2.12, "learning_rate": 5.853975322386122e-06, - "loss": 0.0847, + "loss": 0.0471, "step": 7624 }, { "epoch": 2.12, "learning_rate": 5.852119862695983e-06, - "loss": 0.0325, + "loss": 0.0147, "step": 7625 }, { "epoch": 2.12, "learning_rate": 5.850264403005846e-06, - "loss": 0.1406, + "loss": 0.0501, "step": 7626 }, { "epoch": 2.12, "learning_rate": 5.848408943315707e-06, - "loss": 0.0886, + "loss": 0.0891, "step": 7627 }, { "epoch": 2.12, "learning_rate": 5.8465534836255685e-06, - "loss": 0.2455, + "loss": 0.0673, "step": 7628 }, { "epoch": 2.12, "learning_rate": 5.84469802393543e-06, - "loss": 0.1399, + "loss": 0.0235, "step": 7629 }, { "epoch": 2.12, "learning_rate": 5.842842564245292e-06, - "loss": 0.0318, + "loss": 0.1679, "step": 7630 }, { "epoch": 2.12, "learning_rate": 5.840987104555153e-06, - "loss": 0.0857, + "loss": 0.0832, "step": 7631 }, { "epoch": 2.12, "learning_rate": 5.839131644865016e-06, - "loss": 0.1404, + "loss": 0.12, "step": 7632 }, { "epoch": 2.12, "learning_rate": 5.837276185174877e-06, - "loss": 0.1939, + "loss": 0.0447, "step": 7633 }, { "epoch": 2.12, "learning_rate": 5.83542072548474e-06, - "loss": 0.3525, + "loss": 0.0486, "step": 7634 }, { "epoch": 2.12, "learning_rate": 5.833565265794601e-06, - "loss": 0.1366, + "loss": 0.1563, "step": 7635 }, { "epoch": 2.13, "learning_rate": 5.831709806104463e-06, - "loss": 0.0318, + "loss": 0.1305, "step": 7636 }, { "epoch": 2.13, "learning_rate": 5.8298543464143245e-06, - "loss": 0.0882, + "loss": 0.1004, "step": 7637 }, { "epoch": 2.13, "learning_rate": 5.827998886724186e-06, - "loss": 0.138, + "loss": 0.0533, "step": 7638 }, { "epoch": 2.13, "learning_rate": 5.8261434270340474e-06, - "loss": 0.1384, + "loss": 0.0935, "step": 7639 }, { "epoch": 2.13, "learning_rate": 5.82428796734391e-06, - "loss": 0.0846, + "loss": 0.0647, "step": 7640 }, { "epoch": 2.13, "learning_rate": 5.822432507653771e-06, - "loss": 0.0317, + "loss": 0.1755, "step": 7641 }, { "epoch": 2.13, "learning_rate": 5.820577047963634e-06, - "loss": 0.2431, + "loss": 0.0381, "step": 7642 }, { "epoch": 2.13, "learning_rate": 5.818721588273495e-06, - "loss": 0.1427, + "loss": 0.0613, "step": 7643 }, { "epoch": 2.13, "learning_rate": 5.816866128583357e-06, - "loss": 0.2494, + "loss": 0.1657, "step": 7644 }, { "epoch": 2.13, "learning_rate": 5.815010668893219e-06, - "loss": 0.0833, + "loss": 0.0671, "step": 7645 }, { "epoch": 2.13, "learning_rate": 5.8131552092030805e-06, - "loss": 0.1375, + "loss": 0.0718, "step": 7646 }, { "epoch": 2.13, "learning_rate": 5.8112997495129415e-06, - "loss": 0.0309, + "loss": 0.0797, "step": 7647 }, { "epoch": 2.13, "learning_rate": 5.809444289822804e-06, - "loss": 0.1376, + "loss": 0.0294, "step": 7648 }, { "epoch": 2.13, "learning_rate": 5.807588830132665e-06, - "loss": 0.0307, + "loss": 0.0187, "step": 7649 }, { "epoch": 2.13, "learning_rate": 5.805733370442528e-06, - "loss": 0.242, + "loss": 0.0292, "step": 7650 }, { "epoch": 2.13, "learning_rate": 5.803877910752389e-06, - "loss": 0.1402, + "loss": 0.0487, "step": 7651 }, { "epoch": 2.13, "learning_rate": 5.802022451062252e-06, - "loss": 0.1414, + "loss": 0.0428, "step": 7652 }, { "epoch": 2.13, "learning_rate": 5.800166991372113e-06, - "loss": 0.1997, + "loss": 0.0569, "step": 7653 }, { "epoch": 2.13, "learning_rate": 5.798311531681975e-06, - "loss": 0.1929, + "loss": 0.2109, "step": 7654 }, { "epoch": 2.13, "learning_rate": 5.7964560719918365e-06, - "loss": 0.14, + "loss": 0.087, "step": 7655 }, { "epoch": 2.13, "learning_rate": 5.794600612301698e-06, - "loss": 0.0311, + "loss": 0.0218, "step": 7656 }, { "epoch": 2.13, "learning_rate": 5.792745152611559e-06, - "loss": 0.0875, + "loss": 0.0607, "step": 7657 }, { "epoch": 2.13, "learning_rate": 5.790889692921422e-06, - "loss": 0.2467, + "loss": 0.1016, "step": 7658 }, { "epoch": 2.13, "learning_rate": 5.789034233231283e-06, - "loss": 0.1381, + "loss": 0.1111, "step": 7659 }, { "epoch": 2.13, "learning_rate": 5.787178773541146e-06, - "loss": 0.2459, + "loss": 0.0264, "step": 7660 }, { "epoch": 2.13, "learning_rate": 5.785323313851007e-06, - "loss": 0.084, + "loss": 0.0329, "step": 7661 }, { "epoch": 2.13, "learning_rate": 5.783467854160869e-06, - "loss": 0.0855, + "loss": 0.0178, "step": 7662 }, { "epoch": 2.13, "learning_rate": 5.781612394470731e-06, - "loss": 0.1394, + "loss": 0.0596, "step": 7663 }, { "epoch": 2.13, "learning_rate": 5.7797569347805925e-06, - "loss": 0.2468, + "loss": 0.0398, "step": 7664 }, { "epoch": 2.13, "learning_rate": 5.7779014750904535e-06, - "loss": 0.0858, + "loss": 0.1004, "step": 7665 }, { "epoch": 2.13, "learning_rate": 5.776046015400316e-06, - "loss": 0.0852, + "loss": 0.1016, "step": 7666 }, { "epoch": 2.13, "learning_rate": 5.774190555710177e-06, - "loss": 0.1902, + "loss": 0.0093, "step": 7667 }, { "epoch": 2.13, "learning_rate": 5.77233509602004e-06, - "loss": 0.14, + "loss": 0.1186, "step": 7668 }, { "epoch": 2.13, "learning_rate": 5.770479636329901e-06, - "loss": 0.1891, + "loss": 0.1313, "step": 7669 }, { "epoch": 2.13, "learning_rate": 5.768624176639763e-06, - "loss": 0.1417, + "loss": 0.0976, "step": 7670 }, { "epoch": 2.13, "learning_rate": 5.766768716949625e-06, - "loss": 0.1379, + "loss": 0.1905, "step": 7671 }, { "epoch": 2.14, "learning_rate": 5.764913257259487e-06, - "loss": 0.2402, + "loss": 0.0514, "step": 7672 }, { "epoch": 2.14, "learning_rate": 5.763057797569348e-06, - "loss": 0.1963, + "loss": 0.0194, "step": 7673 }, { "epoch": 2.14, "learning_rate": 5.76120233787921e-06, - "loss": 0.0842, + "loss": 0.0239, "step": 7674 }, { "epoch": 2.14, "learning_rate": 5.759346878189071e-06, - "loss": 0.1403, + "loss": 0.0471, "step": 7675 }, { "epoch": 2.14, "learning_rate": 5.757491418498934e-06, - "loss": 0.1419, + "loss": 0.05, "step": 7676 }, { "epoch": 2.14, "learning_rate": 5.755635958808795e-06, - "loss": 0.1887, + "loss": 0.1443, "step": 7677 }, { "epoch": 2.14, "learning_rate": 5.753780499118658e-06, - "loss": 0.1984, + "loss": 0.0266, "step": 7678 }, { "epoch": 2.14, "learning_rate": 5.751925039428519e-06, - "loss": 0.1924, + "loss": 0.0491, "step": 7679 }, { "epoch": 2.14, "learning_rate": 5.750069579738381e-06, - "loss": 0.1425, + "loss": 0.1076, "step": 7680 }, { "epoch": 2.14, "learning_rate": 5.748214120048243e-06, - "loss": 0.1399, + "loss": 0.0925, "step": 7681 }, { "epoch": 2.14, "learning_rate": 5.7463586603581045e-06, - "loss": 0.0856, + "loss": 0.0272, "step": 7682 }, { "epoch": 2.14, "learning_rate": 5.7445032006679655e-06, - "loss": 0.1397, + "loss": 0.0505, "step": 7683 }, { "epoch": 2.14, "learning_rate": 5.742647740977828e-06, - "loss": 0.0862, + "loss": 0.0353, "step": 7684 }, { "epoch": 2.14, "learning_rate": 5.740792281287689e-06, - "loss": 0.1894, + "loss": 0.1228, "step": 7685 }, { "epoch": 2.14, "learning_rate": 5.738936821597552e-06, - "loss": 0.1392, + "loss": 0.0186, "step": 7686 }, { "epoch": 2.14, "learning_rate": 5.737081361907413e-06, - "loss": 0.1399, + "loss": 0.0345, "step": 7687 }, { "epoch": 2.14, "learning_rate": 5.735225902217275e-06, - "loss": 0.033, + "loss": 0.0172, "step": 7688 }, { "epoch": 2.14, "learning_rate": 5.733370442527137e-06, - "loss": 0.0343, + "loss": 0.1092, "step": 7689 }, { "epoch": 2.14, "learning_rate": 5.731514982836999e-06, - "loss": 0.034, + "loss": 0.0216, "step": 7690 }, { "epoch": 2.14, "learning_rate": 5.72965952314686e-06, - "loss": 0.086, + "loss": 0.016, "step": 7691 }, { "epoch": 2.14, "learning_rate": 5.727804063456722e-06, - "loss": 0.0861, + "loss": 0.0835, "step": 7692 }, { "epoch": 2.14, "learning_rate": 5.725948603766583e-06, - "loss": 0.1936, + "loss": 0.1531, "step": 7693 }, { "epoch": 2.14, "learning_rate": 5.724093144076446e-06, - "loss": 0.0321, + "loss": 0.1018, "step": 7694 }, { "epoch": 2.14, "learning_rate": 5.722237684386307e-06, - "loss": 0.0864, + "loss": 0.0122, "step": 7695 }, { "epoch": 2.14, "learning_rate": 5.720382224696169e-06, - "loss": 0.0865, + "loss": 0.0169, "step": 7696 }, { "epoch": 2.14, "learning_rate": 5.718526765006031e-06, - "loss": 0.245, + "loss": 0.1255, "step": 7697 }, { "epoch": 2.14, "learning_rate": 5.716671305315893e-06, - "loss": 0.0866, + "loss": 0.0128, "step": 7698 }, { "epoch": 2.14, "learning_rate": 5.714815845625754e-06, - "loss": 0.2427, + "loss": 0.1681, "step": 7699 }, { "epoch": 2.14, "learning_rate": 5.7129603859356165e-06, - "loss": 0.0846, + "loss": 0.1626, "step": 7700 }, { "epoch": 2.14, "learning_rate": 5.7111049262454775e-06, - "loss": 0.1401, + "loss": 0.1295, "step": 7701 }, { "epoch": 2.14, "learning_rate": 5.70924946655534e-06, - "loss": 0.1398, + "loss": 0.0421, "step": 7702 }, { "epoch": 2.14, "learning_rate": 5.707394006865201e-06, - "loss": 0.1961, + "loss": 0.0089, "step": 7703 }, { "epoch": 2.14, "learning_rate": 5.705538547175063e-06, - "loss": 0.195, + "loss": 0.1728, "step": 7704 }, { "epoch": 2.14, "learning_rate": 5.703683087484925e-06, - "loss": 0.0833, + "loss": 0.0501, "step": 7705 }, { "epoch": 2.14, "learning_rate": 5.701827627794787e-06, - "loss": 0.1382, + "loss": 0.0681, "step": 7706 }, { "epoch": 2.15, "learning_rate": 5.699972168104648e-06, - "loss": 0.1395, + "loss": 0.165, "step": 7707 }, { "epoch": 2.15, "learning_rate": 5.6981167084145106e-06, - "loss": 0.1387, + "loss": 0.0889, "step": 7708 }, { "epoch": 2.15, "learning_rate": 5.696261248724372e-06, - "loss": 0.0867, + "loss": 0.0116, "step": 7709 }, { "epoch": 2.15, "learning_rate": 5.694405789034234e-06, - "loss": 0.1412, + "loss": 0.15, "step": 7710 }, { "epoch": 2.15, "learning_rate": 5.692550329344095e-06, - "loss": 0.0305, + "loss": 0.0647, "step": 7711 }, { "epoch": 2.15, "learning_rate": 5.690694869653958e-06, - "loss": 0.14, + "loss": 0.1062, "step": 7712 }, { "epoch": 2.15, "learning_rate": 5.688839409963819e-06, - "loss": 0.1386, + "loss": 0.022, "step": 7713 }, { "epoch": 2.15, "learning_rate": 5.686983950273681e-06, - "loss": 0.0302, + "loss": 0.1263, "step": 7714 }, { "epoch": 2.15, "learning_rate": 5.685128490583543e-06, - "loss": 0.1926, + "loss": 0.0612, "step": 7715 }, { "epoch": 2.15, "learning_rate": 5.683273030893405e-06, - "loss": 0.0863, + "loss": 0.0763, "step": 7716 }, { "epoch": 2.15, "learning_rate": 5.681417571203266e-06, - "loss": 0.1408, + "loss": 0.0521, "step": 7717 }, { "epoch": 2.15, "learning_rate": 5.6795621115131284e-06, - "loss": 0.1416, + "loss": 0.0713, "step": 7718 }, { "epoch": 2.15, "learning_rate": 5.6777066518229895e-06, - "loss": 0.0297, + "loss": 0.067, "step": 7719 }, { "epoch": 2.15, "learning_rate": 5.675851192132852e-06, - "loss": 0.0293, + "loss": 0.0229, "step": 7720 }, { "epoch": 2.15, "learning_rate": 5.673995732442713e-06, - "loss": 0.1407, + "loss": 0.1268, "step": 7721 }, { "epoch": 2.15, "learning_rate": 5.672140272752575e-06, - "loss": 0.2483, + "loss": 0.13, "step": 7722 }, { "epoch": 2.15, "learning_rate": 5.670284813062437e-06, - "loss": 0.3072, + "loss": 0.0349, "step": 7723 }, { "epoch": 2.15, "learning_rate": 5.668429353372299e-06, - "loss": 0.0849, + "loss": 0.0975, "step": 7724 }, { "epoch": 2.15, "learning_rate": 5.66657389368216e-06, - "loss": 0.1386, + "loss": 0.1491, "step": 7725 }, { "epoch": 2.15, "learning_rate": 5.6647184339920225e-06, - "loss": 0.0811, + "loss": 0.1306, "step": 7726 }, { "epoch": 2.15, "learning_rate": 5.6628629743018836e-06, - "loss": 0.0825, + "loss": 0.1485, "step": 7727 }, { "epoch": 2.15, "learning_rate": 5.661007514611746e-06, - "loss": 0.1947, + "loss": 0.0499, "step": 7728 }, { "epoch": 2.15, "learning_rate": 5.659152054921607e-06, - "loss": 0.1376, + "loss": 0.0249, "step": 7729 }, { "epoch": 2.15, "learning_rate": 5.657296595231469e-06, - "loss": 0.1938, + "loss": 0.1742, "step": 7730 }, { "epoch": 2.15, "learning_rate": 5.655441135541331e-06, - "loss": 0.0837, + "loss": 0.0285, "step": 7731 }, { "epoch": 2.15, "learning_rate": 5.653585675851193e-06, - "loss": 0.0823, + "loss": 0.0506, "step": 7732 }, { "epoch": 2.15, "learning_rate": 5.651730216161054e-06, - "loss": 0.1431, + "loss": 0.0242, "step": 7733 }, { "epoch": 2.15, "learning_rate": 5.649874756470917e-06, - "loss": 0.1931, + "loss": 0.0822, "step": 7734 }, { "epoch": 2.15, "learning_rate": 5.648019296780778e-06, - "loss": 0.0289, + "loss": 0.0259, "step": 7735 }, { "epoch": 2.15, "learning_rate": 5.64616383709064e-06, - "loss": 0.0823, + "loss": 0.0667, "step": 7736 }, { "epoch": 2.15, "learning_rate": 5.6443083774005014e-06, - "loss": 0.1927, + "loss": 0.0169, "step": 7737 }, { "epoch": 2.15, "learning_rate": 5.642452917710364e-06, - "loss": 0.2497, + "loss": 0.0788, "step": 7738 }, { "epoch": 2.15, "learning_rate": 5.640597458020225e-06, - "loss": 0.2502, + "loss": 0.0145, "step": 7739 }, { "epoch": 2.15, "learning_rate": 5.638741998330087e-06, - "loss": 0.0827, + "loss": 0.0222, "step": 7740 }, { "epoch": 2.15, "learning_rate": 5.636886538639949e-06, - "loss": 0.0849, + "loss": 0.0505, "step": 7741 }, { "epoch": 2.15, "learning_rate": 5.635031078949811e-06, - "loss": 0.084, + "loss": 0.0648, "step": 7742 }, { "epoch": 2.16, "learning_rate": 5.633175619259672e-06, - "loss": 0.029, + "loss": 0.1813, "step": 7743 }, { "epoch": 2.16, "learning_rate": 5.631320159569534e-06, - "loss": 0.1372, + "loss": 0.0471, "step": 7744 }, { "epoch": 2.16, "learning_rate": 5.6294646998793955e-06, - "loss": 0.1947, + "loss": 0.0131, "step": 7745 }, { "epoch": 2.16, "learning_rate": 5.6276092401892566e-06, - "loss": 0.0845, + "loss": 0.0701, "step": 7746 }, { "epoch": 2.16, "learning_rate": 5.625753780499119e-06, - "loss": 0.0832, + "loss": 0.0122, "step": 7747 }, { "epoch": 2.16, "learning_rate": 5.62389832080898e-06, - "loss": 0.1392, + "loss": 0.1261, "step": 7748 }, { "epoch": 2.16, "learning_rate": 5.622042861118843e-06, - "loss": 0.1375, + "loss": 0.0929, "step": 7749 }, { "epoch": 2.16, "learning_rate": 5.620187401428704e-06, - "loss": 0.1872, + "loss": 0.0583, "step": 7750 }, { "epoch": 2.16, "learning_rate": 5.618331941738566e-06, - "loss": 0.0846, + "loss": 0.0341, "step": 7751 }, { "epoch": 2.16, "learning_rate": 5.616476482048428e-06, - "loss": 0.0869, + "loss": 0.0633, "step": 7752 }, { "epoch": 2.16, "learning_rate": 5.61462102235829e-06, - "loss": 0.1938, + "loss": 0.0469, "step": 7753 }, { "epoch": 2.16, "learning_rate": 5.612765562668151e-06, - "loss": 0.0847, + "loss": 0.0169, "step": 7754 }, { "epoch": 2.16, "learning_rate": 5.610910102978013e-06, - "loss": 0.1966, + "loss": 0.0659, "step": 7755 }, { "epoch": 2.16, "learning_rate": 5.6090546432878744e-06, - "loss": 0.1956, + "loss": 0.0988, "step": 7756 }, { "epoch": 2.16, "learning_rate": 5.607199183597737e-06, - "loss": 0.0856, + "loss": 0.018, "step": 7757 }, { "epoch": 2.16, "learning_rate": 5.605343723907598e-06, - "loss": 0.083, + "loss": 0.0087, "step": 7758 }, { "epoch": 2.16, "learning_rate": 5.60348826421746e-06, - "loss": 0.0831, + "loss": 0.034, "step": 7759 }, { "epoch": 2.16, "learning_rate": 5.601632804527322e-06, - "loss": 0.0829, + "loss": 0.0671, "step": 7760 }, { "epoch": 2.16, "learning_rate": 5.599777344837184e-06, - "loss": 0.0851, + "loss": 0.0515, "step": 7761 }, { "epoch": 2.16, "learning_rate": 5.597921885147045e-06, - "loss": 0.1396, + "loss": 0.0111, "step": 7762 }, { "epoch": 2.16, "learning_rate": 5.5960664254569075e-06, - "loss": 0.4116, + "loss": 0.0835, "step": 7763 }, { "epoch": 2.16, "learning_rate": 5.5942109657667685e-06, - "loss": 0.0857, + "loss": 0.1849, "step": 7764 }, { "epoch": 2.16, "learning_rate": 5.592355506076631e-06, - "loss": 0.2505, + "loss": 0.2435, "step": 7765 }, { "epoch": 2.16, "learning_rate": 5.590500046386492e-06, - "loss": 0.0303, + "loss": 0.0049, "step": 7766 }, { "epoch": 2.16, "learning_rate": 5.588644586696354e-06, - "loss": 0.2441, + "loss": 0.0809, "step": 7767 }, { "epoch": 2.16, "learning_rate": 5.586789127006216e-06, - "loss": 0.1976, + "loss": 0.0626, "step": 7768 }, { "epoch": 2.16, "learning_rate": 5.584933667316078e-06, - "loss": 0.2495, + "loss": 0.1003, "step": 7769 }, { "epoch": 2.16, "learning_rate": 5.583078207625939e-06, - "loss": 0.1921, + "loss": 0.1134, "step": 7770 }, { "epoch": 2.16, "learning_rate": 5.581222747935802e-06, - "loss": 0.0313, + "loss": 0.0515, "step": 7771 }, { "epoch": 2.16, "learning_rate": 5.579367288245663e-06, - "loss": 0.1388, + "loss": 0.0108, "step": 7772 }, { "epoch": 2.16, "learning_rate": 5.577511828555525e-06, - "loss": 0.1948, + "loss": 0.0204, "step": 7773 }, { "epoch": 2.16, "learning_rate": 5.575656368865386e-06, - "loss": 0.1924, + "loss": 0.0574, "step": 7774 }, { "epoch": 2.16, "learning_rate": 5.573800909175249e-06, - "loss": 0.4076, + "loss": 0.0677, "step": 7775 }, { "epoch": 2.16, "learning_rate": 5.57194544948511e-06, - "loss": 0.0856, + "loss": 0.0181, "step": 7776 }, { "epoch": 2.16, "learning_rate": 5.570089989794972e-06, - "loss": 0.2463, + "loss": 0.1004, "step": 7777 }, { "epoch": 2.16, "learning_rate": 5.568234530104834e-06, - "loss": 0.0834, + "loss": 0.0791, "step": 7778 }, { "epoch": 2.17, "learning_rate": 5.566379070414696e-06, - "loss": 0.0884, + "loss": 0.0183, "step": 7779 }, { "epoch": 2.17, "learning_rate": 5.564523610724557e-06, - "loss": 0.1414, + "loss": 0.0655, "step": 7780 }, { "epoch": 2.17, "learning_rate": 5.5626681510344195e-06, - "loss": 0.0318, + "loss": 0.0158, "step": 7781 }, { "epoch": 2.17, "learning_rate": 5.5608126913442805e-06, - "loss": 0.0843, + "loss": 0.0134, "step": 7782 }, { "epoch": 2.17, "learning_rate": 5.558957231654143e-06, - "loss": 0.0814, + "loss": 0.0175, "step": 7783 }, { "epoch": 2.17, "learning_rate": 5.557101771964004e-06, - "loss": 0.1921, + "loss": 0.0249, "step": 7784 }, { "epoch": 2.17, "learning_rate": 5.555246312273866e-06, - "loss": 0.2496, + "loss": 0.0156, "step": 7785 }, { "epoch": 2.17, "learning_rate": 5.553390852583728e-06, - "loss": 0.0864, + "loss": 0.0173, "step": 7786 }, { "epoch": 2.17, "learning_rate": 5.55153539289359e-06, - "loss": 0.0321, + "loss": 0.1873, "step": 7787 }, { "epoch": 2.17, "learning_rate": 5.549679933203451e-06, - "loss": 0.0854, + "loss": 0.1751, "step": 7788 }, { "epoch": 2.17, "learning_rate": 5.547824473513314e-06, - "loss": 0.1865, + "loss": 0.0536, "step": 7789 }, { "epoch": 2.17, "learning_rate": 5.545969013823175e-06, - "loss": 0.1392, + "loss": 0.01, "step": 7790 }, { "epoch": 2.17, "learning_rate": 5.544113554133037e-06, - "loss": 0.091, + "loss": 0.1368, "step": 7791 }, { "epoch": 2.17, "learning_rate": 5.542258094442898e-06, - "loss": 0.1959, + "loss": 0.0844, "step": 7792 }, { "epoch": 2.17, "learning_rate": 5.54040263475276e-06, - "loss": 0.1422, + "loss": 0.0719, "step": 7793 }, { "epoch": 2.17, "learning_rate": 5.538547175062622e-06, - "loss": 0.1878, + "loss": 0.0539, "step": 7794 }, { "epoch": 2.17, "learning_rate": 5.536691715372484e-06, - "loss": 0.1896, + "loss": 0.0586, "step": 7795 }, { "epoch": 2.17, "learning_rate": 5.534836255682345e-06, - "loss": 0.0857, + "loss": 0.077, "step": 7796 }, { "epoch": 2.17, "learning_rate": 5.532980795992208e-06, - "loss": 0.1953, + "loss": 0.0747, "step": 7797 }, { "epoch": 2.17, "learning_rate": 5.531125336302069e-06, - "loss": 0.1384, + "loss": 0.0476, "step": 7798 }, { "epoch": 2.17, "learning_rate": 5.5292698766119315e-06, - "loss": 0.1362, + "loss": 0.0109, "step": 7799 }, { "epoch": 2.17, "learning_rate": 5.5274144169217925e-06, - "loss": 0.1911, + "loss": 0.016, "step": 7800 }, { "epoch": 2.17, "learning_rate": 5.525558957231655e-06, - "loss": 0.1417, + "loss": 0.0217, "step": 7801 }, { "epoch": 2.17, "learning_rate": 5.523703497541516e-06, - "loss": 0.0321, + "loss": 0.103, "step": 7802 }, { "epoch": 2.17, "learning_rate": 5.521848037851378e-06, - "loss": 0.1389, + "loss": 0.1464, "step": 7803 }, { "epoch": 2.17, "learning_rate": 5.51999257816124e-06, - "loss": 0.1392, + "loss": 0.0079, "step": 7804 }, { "epoch": 2.17, "learning_rate": 5.518137118471102e-06, - "loss": 0.0838, + "loss": 0.0672, "step": 7805 }, { "epoch": 2.17, "learning_rate": 5.516281658780963e-06, - "loss": 0.1372, + "loss": 0.1102, "step": 7806 }, { "epoch": 2.17, "learning_rate": 5.514426199090826e-06, - "loss": 0.0323, + "loss": 0.1262, "step": 7807 }, { "epoch": 2.17, "learning_rate": 5.512570739400687e-06, - "loss": 0.1403, + "loss": 0.0108, "step": 7808 }, { "epoch": 2.17, "learning_rate": 5.510715279710549e-06, - "loss": 0.0853, + "loss": 0.0523, "step": 7809 }, { "epoch": 2.17, "learning_rate": 5.50885982002041e-06, - "loss": 0.0866, + "loss": 0.0771, "step": 7810 }, { "epoch": 2.17, "learning_rate": 5.507004360330272e-06, - "loss": 0.1389, + "loss": 0.0791, "step": 7811 }, { "epoch": 2.17, "learning_rate": 5.505148900640134e-06, - "loss": 0.1903, + "loss": 0.0452, "step": 7812 }, { "epoch": 2.17, "learning_rate": 5.503293440949996e-06, - "loss": 0.0865, + "loss": 0.0132, "step": 7813 }, { "epoch": 2.17, "learning_rate": 5.501437981259857e-06, - "loss": 0.0317, + "loss": 0.0524, "step": 7814 }, { "epoch": 2.18, "learning_rate": 5.49958252156972e-06, - "loss": 0.1425, + "loss": 0.1758, "step": 7815 }, { "epoch": 2.18, "learning_rate": 5.497727061879581e-06, - "loss": 0.0875, + "loss": 0.0935, "step": 7816 }, { "epoch": 2.18, "learning_rate": 5.4958716021894435e-06, - "loss": 0.0856, + "loss": 0.0759, "step": 7817 }, { "epoch": 2.18, "learning_rate": 5.4940161424993045e-06, - "loss": 0.0817, + "loss": 0.0298, "step": 7818 }, { "epoch": 2.18, "learning_rate": 5.492160682809166e-06, - "loss": 0.2521, + "loss": 0.0546, "step": 7819 }, { "epoch": 2.18, "learning_rate": 5.490305223119028e-06, - "loss": 0.1929, + "loss": 0.0749, "step": 7820 }, { "epoch": 2.18, "learning_rate": 5.48844976342889e-06, - "loss": 0.1943, + "loss": 0.0259, "step": 7821 }, { "epoch": 2.18, "learning_rate": 5.486594303738751e-06, - "loss": 0.0834, + "loss": 0.0538, "step": 7822 }, { "epoch": 2.18, "learning_rate": 5.484738844048614e-06, - "loss": 0.0866, + "loss": 0.0189, "step": 7823 }, { "epoch": 2.18, "learning_rate": 5.482883384358475e-06, - "loss": 0.1961, + "loss": 0.0592, "step": 7824 }, { "epoch": 2.18, "learning_rate": 5.4810279246683376e-06, - "loss": 0.1971, + "loss": 0.0118, "step": 7825 }, { "epoch": 2.18, "learning_rate": 5.479172464978199e-06, - "loss": 0.0299, + "loss": 0.0956, "step": 7826 }, { "epoch": 2.18, "learning_rate": 5.4773170052880605e-06, - "loss": 0.2476, + "loss": 0.0589, "step": 7827 }, { "epoch": 2.18, "learning_rate": 5.475461545597922e-06, - "loss": 0.0889, + "loss": 0.0157, "step": 7828 }, { "epoch": 2.18, "learning_rate": 5.473606085907784e-06, - "loss": 0.0844, + "loss": 0.1762, "step": 7829 }, { "epoch": 2.18, "learning_rate": 5.471750626217645e-06, - "loss": 0.1366, + "loss": 0.0572, "step": 7830 }, { "epoch": 2.18, "learning_rate": 5.469895166527508e-06, - "loss": 0.1404, + "loss": 0.0178, "step": 7831 }, { "epoch": 2.18, "learning_rate": 5.468039706837369e-06, - "loss": 0.0836, + "loss": 0.0597, "step": 7832 }, { "epoch": 2.18, "learning_rate": 5.466184247147232e-06, - "loss": 0.1946, + "loss": 0.1743, "step": 7833 }, { "epoch": 2.18, "learning_rate": 5.464328787457093e-06, - "loss": 0.2007, + "loss": 0.1054, "step": 7834 }, { "epoch": 2.18, "learning_rate": 5.4624733277669554e-06, - "loss": 0.1384, + "loss": 0.0487, "step": 7835 }, { "epoch": 2.18, "learning_rate": 5.4606178680768165e-06, - "loss": 0.0299, + "loss": 0.0654, "step": 7836 }, { "epoch": 2.18, "learning_rate": 5.458762408386678e-06, - "loss": 0.1385, + "loss": 0.0872, "step": 7837 }, { "epoch": 2.18, "learning_rate": 5.45690694869654e-06, - "loss": 0.1445, + "loss": 0.2308, "step": 7838 }, { "epoch": 2.18, "learning_rate": 5.455051489006402e-06, - "loss": 0.2582, + "loss": 0.0171, "step": 7839 }, { "epoch": 2.18, "learning_rate": 5.453196029316263e-06, - "loss": 0.0845, + "loss": 0.0104, "step": 7840 }, { "epoch": 2.18, "learning_rate": 5.451340569626126e-06, - "loss": 0.1919, + "loss": 0.1918, "step": 7841 }, { "epoch": 2.18, "learning_rate": 5.449485109935987e-06, - "loss": 0.0303, + "loss": 0.0494, "step": 7842 }, { "epoch": 2.18, "learning_rate": 5.4476296502458495e-06, - "loss": 0.0829, + "loss": 0.0809, "step": 7843 }, { "epoch": 2.18, "learning_rate": 5.4457741905557106e-06, - "loss": 0.1377, + "loss": 0.1072, "step": 7844 }, { "epoch": 2.18, "learning_rate": 5.4439187308655724e-06, - "loss": 0.0297, + "loss": 0.0493, "step": 7845 }, { "epoch": 2.18, "learning_rate": 5.442063271175434e-06, - "loss": 0.0841, + "loss": 0.0717, "step": 7846 }, { "epoch": 2.18, "learning_rate": 5.440207811485296e-06, - "loss": 0.0303, + "loss": 0.0464, "step": 7847 }, { "epoch": 2.18, "learning_rate": 5.438352351795157e-06, - "loss": 0.1939, + "loss": 0.2631, "step": 7848 }, { "epoch": 2.18, "learning_rate": 5.43649689210502e-06, - "loss": 0.1914, + "loss": 0.051, "step": 7849 }, { "epoch": 2.18, "learning_rate": 5.434641432414881e-06, - "loss": 0.0834, + "loss": 0.1131, "step": 7850 }, { "epoch": 2.19, "learning_rate": 5.432785972724744e-06, - "loss": 0.1404, + "loss": 0.0595, "step": 7851 }, { "epoch": 2.19, "learning_rate": 5.430930513034605e-06, - "loss": 0.1406, + "loss": 0.0635, "step": 7852 }, { "epoch": 2.19, "learning_rate": 5.4290750533444666e-06, - "loss": 0.0845, + "loss": 0.1296, "step": 7853 }, { "epoch": 2.19, "learning_rate": 5.4272195936543284e-06, - "loss": 0.0287, + "loss": 0.0632, "step": 7854 }, { "epoch": 2.19, "learning_rate": 5.42536413396419e-06, - "loss": 0.0857, + "loss": 0.0132, "step": 7855 }, { "epoch": 2.19, "learning_rate": 5.423508674274051e-06, - "loss": 0.0835, + "loss": 0.0589, "step": 7856 }, { "epoch": 2.19, "learning_rate": 5.421653214583914e-06, - "loss": 0.0803, + "loss": 0.0273, "step": 7857 }, { "epoch": 2.19, "learning_rate": 5.419797754893775e-06, - "loss": 0.3129, + "loss": 0.1585, "step": 7858 }, { "epoch": 2.19, "learning_rate": 5.417942295203638e-06, - "loss": 0.0283, + "loss": 0.1233, "step": 7859 }, { "epoch": 2.19, "learning_rate": 5.416086835513499e-06, - "loss": 0.4201, + "loss": 0.0172, "step": 7860 }, { "epoch": 2.19, "learning_rate": 5.4142313758233615e-06, - "loss": 0.3615, + "loss": 0.0147, "step": 7861 }, { "epoch": 2.19, "learning_rate": 5.4123759161332225e-06, - "loss": 0.1402, + "loss": 0.1543, "step": 7862 }, { "epoch": 2.19, "learning_rate": 5.410520456443084e-06, - "loss": 0.0843, + "loss": 0.0568, "step": 7863 }, { "epoch": 2.19, "learning_rate": 5.408664996752946e-06, - "loss": 0.0833, + "loss": 0.0795, "step": 7864 }, { "epoch": 2.19, "learning_rate": 5.406809537062808e-06, - "loss": 0.0289, + "loss": 0.0437, "step": 7865 }, { "epoch": 2.19, "learning_rate": 5.404954077372669e-06, - "loss": 0.1375, + "loss": 0.0153, "step": 7866 }, { "epoch": 2.19, "learning_rate": 5.403098617682532e-06, - "loss": 0.0297, + "loss": 0.0886, "step": 7867 }, { "epoch": 2.19, "learning_rate": 5.401243157992393e-06, - "loss": 0.1412, + "loss": 0.063, "step": 7868 }, { "epoch": 2.19, "learning_rate": 5.399387698302256e-06, - "loss": 0.0286, + "loss": 0.0182, "step": 7869 }, { "epoch": 2.19, "learning_rate": 5.397532238612117e-06, - "loss": 0.134, + "loss": 0.0155, "step": 7870 }, { "epoch": 2.19, "learning_rate": 5.3956767789219785e-06, - "loss": 0.0842, + "loss": 0.0498, "step": 7871 }, { "epoch": 2.19, "learning_rate": 5.39382131923184e-06, - "loss": 0.3078, + "loss": 0.0711, "step": 7872 }, { "epoch": 2.19, "learning_rate": 5.391965859541702e-06, - "loss": 0.1382, + "loss": 0.0652, "step": 7873 }, { "epoch": 2.19, "learning_rate": 5.390110399851563e-06, - "loss": 0.0835, + "loss": 0.0531, "step": 7874 }, { "epoch": 2.19, "learning_rate": 5.388254940161426e-06, - "loss": 0.0813, + "loss": 0.0493, "step": 7875 }, { "epoch": 2.19, "learning_rate": 5.386399480471287e-06, - "loss": 0.0801, + "loss": 0.0935, "step": 7876 }, { "epoch": 2.19, "learning_rate": 5.38454402078115e-06, - "loss": 0.0828, + "loss": 0.0313, "step": 7877 }, { "epoch": 2.19, "learning_rate": 5.382688561091011e-06, - "loss": 0.1975, + "loss": 0.1282, "step": 7878 }, { "epoch": 2.19, "learning_rate": 5.380833101400873e-06, - "loss": 0.029, + "loss": 0.0562, "step": 7879 }, { "epoch": 2.19, "learning_rate": 5.3789776417107345e-06, - "loss": 0.1969, + "loss": 0.1041, "step": 7880 }, { "epoch": 2.19, "learning_rate": 5.377122182020596e-06, - "loss": 0.0285, + "loss": 0.0747, "step": 7881 }, { "epoch": 2.19, "learning_rate": 5.375266722330457e-06, - "loss": 0.0815, + "loss": 0.0583, "step": 7882 }, { "epoch": 2.19, "learning_rate": 5.37341126264032e-06, - "loss": 0.1435, + "loss": 0.1585, "step": 7883 }, { "epoch": 2.19, "learning_rate": 5.371555802950181e-06, - "loss": 0.0825, + "loss": 0.0505, "step": 7884 }, { "epoch": 2.19, "learning_rate": 5.369700343260044e-06, - "loss": 0.1396, + "loss": 0.1353, "step": 7885 }, { "epoch": 2.19, "learning_rate": 5.367844883569905e-06, - "loss": 0.0276, + "loss": 0.0141, "step": 7886 }, { "epoch": 2.2, "learning_rate": 5.365989423879767e-06, - "loss": 0.1377, + "loss": 0.1059, "step": 7887 }, { "epoch": 2.2, "learning_rate": 5.364133964189629e-06, - "loss": 0.2552, + "loss": 0.0879, "step": 7888 }, { "epoch": 2.2, "learning_rate": 5.3622785044994905e-06, - "loss": 0.0823, + "loss": 0.1448, "step": 7889 }, { "epoch": 2.2, "learning_rate": 5.3604230448093515e-06, - "loss": 0.082, + "loss": 0.1361, "step": 7890 }, { "epoch": 2.2, "learning_rate": 5.358567585119214e-06, - "loss": 0.3052, + "loss": 0.0213, "step": 7891 }, { "epoch": 2.2, "learning_rate": 5.356712125429075e-06, - "loss": 0.1363, + "loss": 0.0974, "step": 7892 }, { "epoch": 2.2, "learning_rate": 5.354856665738938e-06, - "loss": 0.1406, + "loss": 0.0356, "step": 7893 }, { "epoch": 2.2, "learning_rate": 5.353001206048799e-06, - "loss": 0.1424, + "loss": 0.0151, "step": 7894 }, { "epoch": 2.2, "learning_rate": 5.35114574635866e-06, - "loss": 0.0845, + "loss": 0.081, "step": 7895 }, { "epoch": 2.2, "learning_rate": 5.349290286668523e-06, - "loss": 0.1431, + "loss": 0.1386, "step": 7896 }, { "epoch": 2.2, "learning_rate": 5.347434826978384e-06, - "loss": 0.1414, + "loss": 0.1102, "step": 7897 }, { "epoch": 2.2, "learning_rate": 5.3455793672882465e-06, - "loss": 0.083, + "loss": 0.1838, "step": 7898 }, { "epoch": 2.2, "learning_rate": 5.3437239075981075e-06, - "loss": 0.1935, + "loss": 0.0906, "step": 7899 }, { "epoch": 2.2, "learning_rate": 5.341868447907969e-06, - "loss": 0.0863, + "loss": 0.0625, "step": 7900 }, { "epoch": 2.2, "learning_rate": 5.340012988217831e-06, - "loss": 0.1959, + "loss": 0.0302, "step": 7901 }, { "epoch": 2.2, "learning_rate": 5.338157528527693e-06, - "loss": 0.086, + "loss": 0.0786, "step": 7902 }, { "epoch": 2.2, "learning_rate": 5.336302068837554e-06, - "loss": 0.2026, + "loss": 0.1243, "step": 7903 }, { "epoch": 2.2, "learning_rate": 5.334446609147417e-06, - "loss": 0.0846, + "loss": 0.0219, "step": 7904 }, { "epoch": 2.2, "learning_rate": 5.332591149457278e-06, - "loss": 0.3633, + "loss": 0.0463, "step": 7905 }, { "epoch": 2.2, "learning_rate": 5.330735689767141e-06, - "loss": 0.141, + "loss": 0.036, "step": 7906 }, { "epoch": 2.2, "learning_rate": 5.328880230077002e-06, - "loss": 0.1943, + "loss": 0.0215, "step": 7907 }, { "epoch": 2.2, "learning_rate": 5.3270247703868635e-06, - "loss": 0.1398, + "loss": 0.0259, "step": 7908 }, { "epoch": 2.2, "learning_rate": 5.325169310696725e-06, - "loss": 0.0832, + "loss": 0.1394, "step": 7909 }, { "epoch": 2.2, "learning_rate": 5.323313851006587e-06, - "loss": 0.1992, + "loss": 0.1714, "step": 7910 }, { "epoch": 2.2, "learning_rate": 5.321458391316448e-06, - "loss": 0.0888, + "loss": 0.064, "step": 7911 }, { "epoch": 2.2, "learning_rate": 5.319602931626311e-06, - "loss": 0.0803, + "loss": 0.0167, "step": 7912 }, { "epoch": 2.2, "learning_rate": 5.317747471936172e-06, - "loss": 0.1417, + "loss": 0.0575, "step": 7913 }, { "epoch": 2.2, "learning_rate": 5.315892012246035e-06, - "loss": 0.0843, + "loss": 0.045, "step": 7914 }, { "epoch": 2.2, "learning_rate": 5.314036552555896e-06, - "loss": 0.198, + "loss": 0.0209, "step": 7915 }, { "epoch": 2.2, "learning_rate": 5.312181092865758e-06, - "loss": 0.3579, + "loss": 0.0188, "step": 7916 }, { "epoch": 2.2, "learning_rate": 5.3103256331756195e-06, - "loss": 0.0299, + "loss": 0.0725, "step": 7917 }, { "epoch": 2.2, "learning_rate": 5.308470173485481e-06, - "loss": 0.0835, + "loss": 0.0829, "step": 7918 }, { "epoch": 2.2, "learning_rate": 5.306614713795342e-06, - "loss": 0.3049, + "loss": 0.0509, "step": 7919 }, { "epoch": 2.2, "learning_rate": 5.304759254105205e-06, - "loss": 0.1926, + "loss": 0.011, "step": 7920 }, { "epoch": 2.2, "learning_rate": 5.302903794415066e-06, - "loss": 0.1385, + "loss": 0.0227, "step": 7921 }, { "epoch": 2.2, "learning_rate": 5.301048334724929e-06, - "loss": 0.0855, + "loss": 0.0441, "step": 7922 }, { "epoch": 2.21, "learning_rate": 5.29919287503479e-06, - "loss": 0.1939, + "loss": 0.0566, "step": 7923 }, { "epoch": 2.21, "learning_rate": 5.297337415344653e-06, - "loss": 0.1933, + "loss": 0.0644, "step": 7924 }, { "epoch": 2.21, "learning_rate": 5.295481955654514e-06, - "loss": 0.248, + "loss": 0.1557, "step": 7925 }, { "epoch": 2.21, "learning_rate": 5.2936264959643755e-06, - "loss": 0.2478, + "loss": 0.1003, "step": 7926 }, { "epoch": 2.21, "learning_rate": 5.291771036274237e-06, - "loss": 0.0874, + "loss": 0.0965, "step": 7927 }, { "epoch": 2.21, "learning_rate": 5.289915576584099e-06, - "loss": 0.0856, + "loss": 0.1467, "step": 7928 }, { "epoch": 2.21, "learning_rate": 5.28806011689396e-06, - "loss": 0.0853, + "loss": 0.1258, "step": 7929 }, { "epoch": 2.21, "learning_rate": 5.286204657203823e-06, - "loss": 0.1396, + "loss": 0.0478, "step": 7930 }, { "epoch": 2.21, "learning_rate": 5.284349197513684e-06, - "loss": 0.0838, + "loss": 0.0087, "step": 7931 }, { "epoch": 2.21, "learning_rate": 5.282493737823547e-06, - "loss": 0.1873, + "loss": 0.0558, "step": 7932 }, { "epoch": 2.21, "learning_rate": 5.280638278133408e-06, - "loss": 0.1343, + "loss": 0.044, "step": 7933 }, { "epoch": 2.21, "learning_rate": 5.27878281844327e-06, - "loss": 0.0334, + "loss": 0.0534, "step": 7934 }, { "epoch": 2.21, "learning_rate": 5.2769273587531315e-06, - "loss": 0.0849, + "loss": 0.0173, "step": 7935 }, { "epoch": 2.21, "learning_rate": 5.275071899062993e-06, - "loss": 0.1379, + "loss": 0.015, "step": 7936 }, { "epoch": 2.21, "learning_rate": 5.273216439372854e-06, - "loss": 0.032, + "loss": 0.1233, "step": 7937 }, { "epoch": 2.21, "learning_rate": 5.271360979682717e-06, - "loss": 0.0831, + "loss": 0.0916, "step": 7938 }, { "epoch": 2.21, "learning_rate": 5.269505519992578e-06, - "loss": 0.1441, + "loss": 0.0216, "step": 7939 }, { "epoch": 2.21, "learning_rate": 5.267650060302441e-06, - "loss": 0.0875, + "loss": 0.0168, "step": 7940 }, { "epoch": 2.21, "learning_rate": 5.265794600612302e-06, - "loss": 0.1421, + "loss": 0.0608, "step": 7941 }, { "epoch": 2.21, "learning_rate": 5.263939140922164e-06, - "loss": 0.0313, + "loss": 0.2016, "step": 7942 }, { "epoch": 2.21, "learning_rate": 5.262083681232026e-06, - "loss": 0.1346, + "loss": 0.0812, "step": 7943 }, { "epoch": 2.21, "learning_rate": 5.2602282215418875e-06, - "loss": 0.1405, + "loss": 0.0465, "step": 7944 }, { "epoch": 2.21, "learning_rate": 5.2583727618517485e-06, - "loss": 0.1939, + "loss": 0.0519, "step": 7945 }, { "epoch": 2.21, "learning_rate": 5.256517302161611e-06, - "loss": 0.0848, + "loss": 0.1847, "step": 7946 }, { "epoch": 2.21, "learning_rate": 5.254661842471472e-06, - "loss": 0.1398, + "loss": 0.0653, "step": 7947 }, { "epoch": 2.21, "learning_rate": 5.252806382781335e-06, - "loss": 0.1388, + "loss": 0.0666, "step": 7948 }, { "epoch": 2.21, "learning_rate": 5.250950923091196e-06, - "loss": 0.356, + "loss": 0.0945, "step": 7949 }, { "epoch": 2.21, "learning_rate": 5.249095463401058e-06, - "loss": 0.0314, + "loss": 0.0994, "step": 7950 }, { "epoch": 2.21, "learning_rate": 5.24724000371092e-06, - "loss": 0.1401, + "loss": 0.0435, "step": 7951 }, { "epoch": 2.21, "learning_rate": 5.245384544020782e-06, - "loss": 0.0295, + "loss": 0.1106, "step": 7952 }, { "epoch": 2.21, "learning_rate": 5.243529084330643e-06, - "loss": 0.253, + "loss": 0.057, "step": 7953 }, { "epoch": 2.21, "learning_rate": 5.241673624640505e-06, - "loss": 0.0852, + "loss": 0.1351, "step": 7954 }, { "epoch": 2.21, "learning_rate": 5.239818164950366e-06, - "loss": 0.1395, + "loss": 0.0195, "step": 7955 }, { "epoch": 2.21, "learning_rate": 5.237962705260229e-06, - "loss": 0.2463, + "loss": 0.0566, "step": 7956 }, { "epoch": 2.21, "learning_rate": 5.23610724557009e-06, - "loss": 0.0867, + "loss": 0.1069, "step": 7957 }, { "epoch": 2.21, "learning_rate": 5.234251785879953e-06, - "loss": 0.1406, + "loss": 0.0181, "step": 7958 }, { "epoch": 2.22, "learning_rate": 5.232396326189814e-06, - "loss": 0.1919, + "loss": 0.069, "step": 7959 }, { "epoch": 2.22, "learning_rate": 5.230540866499676e-06, - "loss": 0.1991, + "loss": 0.0257, "step": 7960 }, { "epoch": 2.22, "learning_rate": 5.2286854068095376e-06, - "loss": 0.0307, + "loss": 0.1411, "step": 7961 }, { "epoch": 2.22, "learning_rate": 5.2268299471193994e-06, - "loss": 0.0847, + "loss": 0.0886, "step": 7962 }, { "epoch": 2.22, "learning_rate": 5.2249744874292605e-06, - "loss": 0.14, + "loss": 0.0217, "step": 7963 }, { "epoch": 2.22, "learning_rate": 5.223119027739123e-06, - "loss": 0.1384, + "loss": 0.0457, "step": 7964 }, { "epoch": 2.22, "learning_rate": 5.221263568048984e-06, - "loss": 0.2469, + "loss": 0.0148, "step": 7965 }, { "epoch": 2.22, "learning_rate": 5.219408108358847e-06, - "loss": 0.1377, + "loss": 0.0733, "step": 7966 }, { "epoch": 2.22, "learning_rate": 5.217552648668708e-06, - "loss": 0.1429, + "loss": 0.0779, "step": 7967 }, { "epoch": 2.22, "learning_rate": 5.21569718897857e-06, - "loss": 0.0311, + "loss": 0.1144, "step": 7968 }, { "epoch": 2.22, "learning_rate": 5.213841729288432e-06, - "loss": 0.0866, + "loss": 0.049, "step": 7969 }, { "epoch": 2.22, "learning_rate": 5.2119862695982936e-06, - "loss": 0.19, + "loss": 0.028, "step": 7970 }, { "epoch": 2.22, "learning_rate": 5.210130809908155e-06, - "loss": 0.1383, + "loss": 0.1009, "step": 7971 }, { "epoch": 2.22, "learning_rate": 5.208275350218017e-06, - "loss": 0.1399, + "loss": 0.0708, "step": 7972 }, { "epoch": 2.22, "learning_rate": 5.206419890527878e-06, - "loss": 0.1954, + "loss": 0.1358, "step": 7973 }, { "epoch": 2.22, "learning_rate": 5.204564430837741e-06, - "loss": 0.0312, + "loss": 0.1277, "step": 7974 }, { "epoch": 2.22, "learning_rate": 5.202708971147602e-06, - "loss": 0.1944, + "loss": 0.01, "step": 7975 }, { "epoch": 2.22, "learning_rate": 5.200853511457464e-06, - "loss": 0.1352, + "loss": 0.151, "step": 7976 }, { "epoch": 2.22, "learning_rate": 5.198998051767326e-06, - "loss": 0.189, + "loss": 0.052, "step": 7977 }, { "epoch": 2.22, "learning_rate": 5.197142592077188e-06, - "loss": 0.0295, + "loss": 0.0238, "step": 7978 }, { "epoch": 2.22, "learning_rate": 5.195287132387049e-06, - "loss": 0.1931, + "loss": 0.1094, "step": 7979 }, { "epoch": 2.22, "learning_rate": 5.193431672696911e-06, - "loss": 0.1369, + "loss": 0.1259, "step": 7980 }, { "epoch": 2.22, "learning_rate": 5.1915762130067724e-06, - "loss": 0.0303, + "loss": 0.0233, "step": 7981 }, { "epoch": 2.22, "learning_rate": 5.189720753316635e-06, - "loss": 0.0831, + "loss": 0.0215, "step": 7982 }, { "epoch": 2.22, "learning_rate": 5.187865293626496e-06, - "loss": 0.1407, + "loss": 0.019, "step": 7983 }, { "epoch": 2.22, "learning_rate": 5.186009833936359e-06, - "loss": 0.0852, + "loss": 0.0592, "step": 7984 }, { "epoch": 2.22, "learning_rate": 5.18415437424622e-06, - "loss": 0.0294, + "loss": 0.0679, "step": 7985 }, { "epoch": 2.22, "learning_rate": 5.182298914556082e-06, - "loss": 0.3015, + "loss": 0.0202, "step": 7986 }, { "epoch": 2.22, "learning_rate": 5.180443454865944e-06, - "loss": 0.1367, + "loss": 0.0718, "step": 7987 }, { "epoch": 2.22, "learning_rate": 5.1785879951758055e-06, - "loss": 0.0852, + "loss": 0.1037, "step": 7988 }, { "epoch": 2.22, "learning_rate": 5.1767325354856666e-06, - "loss": 0.141, + "loss": 0.0631, "step": 7989 }, { "epoch": 2.22, "learning_rate": 5.174877075795529e-06, - "loss": 0.1383, + "loss": 0.0622, "step": 7990 }, { "epoch": 2.22, "learning_rate": 5.17302161610539e-06, - "loss": 0.1376, + "loss": 0.0755, "step": 7991 }, { "epoch": 2.22, "learning_rate": 5.171166156415253e-06, - "loss": 0.1414, + "loss": 0.0462, "step": 7992 }, { "epoch": 2.22, "learning_rate": 5.169310696725114e-06, - "loss": 0.0836, + "loss": 0.1065, "step": 7993 }, { "epoch": 2.22, "learning_rate": 5.167455237034976e-06, - "loss": 0.0848, + "loss": 0.1331, "step": 7994 }, { "epoch": 2.23, "learning_rate": 5.165599777344838e-06, - "loss": 0.0866, + "loss": 0.1186, "step": 7995 }, { "epoch": 2.23, "learning_rate": 5.1637443176547e-06, - "loss": 0.196, + "loss": 0.0448, "step": 7996 }, { "epoch": 2.23, "learning_rate": 5.161888857964561e-06, - "loss": 0.1917, + "loss": 0.0133, "step": 7997 }, { "epoch": 2.23, "learning_rate": 5.160033398274423e-06, - "loss": 0.1392, + "loss": 0.1591, "step": 7998 }, { "epoch": 2.23, "learning_rate": 5.158177938584284e-06, - "loss": 0.2421, + "loss": 0.1006, "step": 7999 }, { "epoch": 2.23, "learning_rate": 5.156322478894147e-06, - "loss": 0.3616, + "loss": 0.0965, "step": 8000 }, { "epoch": 2.23, "learning_rate": 5.154467019204008e-06, - "loss": 0.0862, + "loss": 0.0348, "step": 8001 }, { "epoch": 2.23, "learning_rate": 5.15261155951387e-06, - "loss": 0.1947, + "loss": 0.0615, "step": 8002 }, { "epoch": 2.23, "learning_rate": 5.150756099823732e-06, - "loss": 0.086, + "loss": 0.0125, "step": 8003 }, { "epoch": 2.23, "learning_rate": 5.148900640133594e-06, - "loss": 0.2493, + "loss": 0.1131, "step": 8004 }, { "epoch": 2.23, "learning_rate": 5.147045180443455e-06, - "loss": 0.3517, + "loss": 0.143, "step": 8005 }, { "epoch": 2.23, "learning_rate": 5.1451897207533175e-06, - "loss": 0.0862, + "loss": 0.0915, "step": 8006 }, { "epoch": 2.23, "learning_rate": 5.1433342610631785e-06, - "loss": 0.2504, + "loss": 0.0636, "step": 8007 }, { "epoch": 2.23, "learning_rate": 5.141478801373041e-06, - "loss": 0.0882, + "loss": 0.0369, "step": 8008 }, { "epoch": 2.23, "learning_rate": 5.139623341682902e-06, - "loss": 0.1845, + "loss": 0.228, "step": 8009 }, { "epoch": 2.23, "learning_rate": 5.137767881992764e-06, - "loss": 0.1379, + "loss": 0.1714, "step": 8010 }, { "epoch": 2.23, "learning_rate": 5.135912422302626e-06, - "loss": 0.0887, + "loss": 0.0533, "step": 8011 }, { "epoch": 2.23, "learning_rate": 5.134056962612488e-06, - "loss": 0.1987, + "loss": 0.0169, "step": 8012 }, { "epoch": 2.23, "learning_rate": 5.132201502922349e-06, - "loss": 0.0852, + "loss": 0.0225, "step": 8013 }, { "epoch": 2.23, "learning_rate": 5.130346043232212e-06, - "loss": 0.2512, + "loss": 0.116, "step": 8014 }, { "epoch": 2.23, "learning_rate": 5.128490583542073e-06, - "loss": 0.0347, + "loss": 0.099, "step": 8015 }, { "epoch": 2.23, "learning_rate": 5.126635123851935e-06, - "loss": 0.1955, + "loss": 0.1355, "step": 8016 }, { "epoch": 2.23, "learning_rate": 5.124779664161796e-06, - "loss": 0.1942, + "loss": 0.1239, "step": 8017 }, { "epoch": 2.23, "learning_rate": 5.122924204471659e-06, - "loss": 0.2395, + "loss": 0.0686, "step": 8018 }, { "epoch": 2.23, "learning_rate": 5.12106874478152e-06, - "loss": 0.1359, + "loss": 0.0276, "step": 8019 }, { "epoch": 2.23, "learning_rate": 5.119213285091382e-06, - "loss": 0.0875, + "loss": 0.0657, "step": 8020 }, { "epoch": 2.23, "learning_rate": 5.117357825401244e-06, - "loss": 0.0354, + "loss": 0.1227, "step": 8021 }, { "epoch": 2.23, "learning_rate": 5.115502365711106e-06, - "loss": 0.1901, + "loss": 0.1457, "step": 8022 }, { "epoch": 2.23, "learning_rate": 5.113646906020967e-06, - "loss": 0.0898, + "loss": 0.0591, "step": 8023 }, { "epoch": 2.23, "learning_rate": 5.1117914463308295e-06, - "loss": 0.2423, + "loss": 0.1092, "step": 8024 }, { "epoch": 2.23, "learning_rate": 5.1099359866406905e-06, - "loss": 0.1378, + "loss": 0.0767, "step": 8025 }, { "epoch": 2.23, "learning_rate": 5.108080526950553e-06, - "loss": 0.2453, + "loss": 0.1015, "step": 8026 }, { "epoch": 2.23, "learning_rate": 5.106225067260414e-06, - "loss": 0.1388, + "loss": 0.079, "step": 8027 }, { "epoch": 2.23, "learning_rate": 5.104369607570276e-06, - "loss": 0.2432, + "loss": 0.0191, "step": 8028 }, { "epoch": 2.23, "learning_rate": 5.102514147880138e-06, - "loss": 0.143, + "loss": 0.1379, "step": 8029 }, { "epoch": 2.23, "learning_rate": 5.10065868819e-06, - "loss": 0.1445, + "loss": 0.1337, "step": 8030 }, { "epoch": 2.24, "learning_rate": 5.098803228499861e-06, - "loss": 0.1388, + "loss": 0.082, "step": 8031 }, { "epoch": 2.24, "learning_rate": 5.096947768809724e-06, - "loss": 0.0372, + "loss": 0.1002, "step": 8032 }, { "epoch": 2.24, "learning_rate": 5.095092309119585e-06, - "loss": 0.2437, + "loss": 0.0218, "step": 8033 }, { "epoch": 2.24, "learning_rate": 5.093236849429447e-06, - "loss": 0.1373, + "loss": 0.032, "step": 8034 }, { "epoch": 2.24, "learning_rate": 5.091381389739308e-06, - "loss": 0.0866, + "loss": 0.0703, "step": 8035 }, { "epoch": 2.24, "learning_rate": 5.08952593004917e-06, - "loss": 0.0369, + "loss": 0.0879, "step": 8036 }, { "epoch": 2.24, "learning_rate": 5.087670470359032e-06, - "loss": 0.0363, + "loss": 0.0559, "step": 8037 }, { "epoch": 2.24, "learning_rate": 5.085815010668894e-06, - "loss": 0.1895, + "loss": 0.0498, "step": 8038 }, { "epoch": 2.24, "learning_rate": 5.083959550978755e-06, - "loss": 0.0347, + "loss": 0.0964, "step": 8039 }, { "epoch": 2.24, "learning_rate": 5.082104091288618e-06, - "loss": 0.1935, + "loss": 0.0307, "step": 8040 }, { "epoch": 2.24, "learning_rate": 5.080248631598479e-06, - "loss": 0.1433, + "loss": 0.0182, "step": 8041 }, { "epoch": 2.24, "learning_rate": 5.0783931719083415e-06, - "loss": 0.0891, + "loss": 0.0757, "step": 8042 }, { "epoch": 2.24, "learning_rate": 5.0765377122182025e-06, - "loss": 0.1389, + "loss": 0.0972, "step": 8043 }, { "epoch": 2.24, "learning_rate": 5.074682252528065e-06, - "loss": 0.1441, + "loss": 0.0212, "step": 8044 }, { "epoch": 2.24, "learning_rate": 5.072826792837926e-06, - "loss": 0.2461, + "loss": 0.0472, "step": 8045 }, { "epoch": 2.24, "learning_rate": 5.070971333147788e-06, - "loss": 0.0346, + "loss": 0.0518, "step": 8046 }, { "epoch": 2.24, "learning_rate": 5.06911587345765e-06, - "loss": 0.1902, + "loss": 0.0797, "step": 8047 }, { "epoch": 2.24, "learning_rate": 5.067260413767511e-06, - "loss": 0.142, + "loss": 0.0687, "step": 8048 }, { "epoch": 2.24, "learning_rate": 5.065404954077373e-06, - "loss": 0.1903, + "loss": 0.0591, "step": 8049 }, { "epoch": 2.24, "learning_rate": 5.063549494387235e-06, - "loss": 0.1402, + "loss": 0.0134, "step": 8050 }, { "epoch": 2.24, "learning_rate": 5.061694034697097e-06, - "loss": 0.0843, + "loss": 0.0179, "step": 8051 }, { "epoch": 2.24, "learning_rate": 5.059838575006958e-06, - "loss": 0.1319, + "loss": 0.0284, "step": 8052 }, { "epoch": 2.24, "learning_rate": 5.05798311531682e-06, - "loss": 0.0333, + "loss": 0.0652, "step": 8053 }, { "epoch": 2.24, "learning_rate": 5.056127655626681e-06, - "loss": 0.2371, + "loss": 0.1776, "step": 8054 }, { "epoch": 2.24, "learning_rate": 5.054272195936544e-06, - "loss": 0.1937, + "loss": 0.1211, "step": 8055 }, { "epoch": 2.24, "learning_rate": 5.052416736246405e-06, - "loss": 0.4052, + "loss": 0.0789, "step": 8056 }, { "epoch": 2.24, "learning_rate": 5.050561276556267e-06, - "loss": 0.137, + "loss": 0.1459, "step": 8057 }, { "epoch": 2.24, "learning_rate": 5.048705816866129e-06, - "loss": 0.0337, + "loss": 0.0482, "step": 8058 }, { "epoch": 2.24, "learning_rate": 5.046850357175991e-06, - "loss": 0.1407, + "loss": 0.0646, "step": 8059 }, { "epoch": 2.24, "learning_rate": 5.044994897485852e-06, - "loss": 0.0337, + "loss": 0.0504, "step": 8060 }, { "epoch": 2.24, "learning_rate": 5.0431394377957145e-06, - "loss": 0.0884, + "loss": 0.0129, "step": 8061 }, { "epoch": 2.24, "learning_rate": 5.0412839781055755e-06, - "loss": 0.0871, + "loss": 0.0546, "step": 8062 }, { "epoch": 2.24, "learning_rate": 5.039428518415438e-06, - "loss": 0.1385, + "loss": 0.1219, "step": 8063 }, { "epoch": 2.24, "learning_rate": 5.037573058725299e-06, - "loss": 0.2495, + "loss": 0.0248, "step": 8064 }, { "epoch": 2.24, "learning_rate": 5.035717599035161e-06, - "loss": 0.2434, + "loss": 0.0109, "step": 8065 }, { "epoch": 2.24, "learning_rate": 5.033862139345023e-06, - "loss": 0.1905, + "loss": 0.1165, "step": 8066 }, { "epoch": 2.25, "learning_rate": 5.032006679654885e-06, - "loss": 0.0337, + "loss": 0.0913, "step": 8067 }, { "epoch": 2.25, "learning_rate": 5.030151219964746e-06, - "loss": 0.0343, + "loss": 0.0205, "step": 8068 }, { "epoch": 2.25, "learning_rate": 5.028295760274609e-06, - "loss": 0.1924, + "loss": 0.0919, "step": 8069 }, { "epoch": 2.25, "learning_rate": 5.02644030058447e-06, - "loss": 0.3503, + "loss": 0.0506, "step": 8070 }, { "epoch": 2.25, "learning_rate": 5.024584840894332e-06, - "loss": 0.244, + "loss": 0.1657, "step": 8071 }, { "epoch": 2.25, "learning_rate": 5.022729381204193e-06, - "loss": 0.0855, + "loss": 0.0512, "step": 8072 }, { "epoch": 2.25, "learning_rate": 5.020873921514055e-06, - "loss": 0.1387, + "loss": 0.0638, "step": 8073 }, { "epoch": 2.25, "learning_rate": 5.019018461823917e-06, - "loss": 0.0862, + "loss": 0.1427, "step": 8074 }, { "epoch": 2.25, "learning_rate": 5.017163002133779e-06, - "loss": 0.1359, + "loss": 0.1413, "step": 8075 }, { "epoch": 2.25, "learning_rate": 5.01530754244364e-06, - "loss": 0.1362, + "loss": 0.0122, "step": 8076 }, { "epoch": 2.25, "learning_rate": 5.013452082753503e-06, - "loss": 0.1896, + "loss": 0.039, "step": 8077 }, { "epoch": 2.25, "learning_rate": 5.011596623063364e-06, - "loss": 0.241, + "loss": 0.1344, "step": 8078 }, { "epoch": 2.25, "learning_rate": 5.0097411633732264e-06, - "loss": 0.1388, + "loss": 0.1436, "step": 8079 }, { "epoch": 2.25, "learning_rate": 5.0078857036830875e-06, - "loss": 0.0861, + "loss": 0.1317, "step": 8080 }, { "epoch": 2.25, "learning_rate": 5.00603024399295e-06, - "loss": 0.2488, + "loss": 0.1008, "step": 8081 }, { "epoch": 2.25, "learning_rate": 5.004174784302811e-06, - "loss": 0.0844, + "loss": 0.1359, "step": 8082 }, { "epoch": 2.25, "learning_rate": 5.002319324612673e-06, - "loss": 0.2899, + "loss": 0.0153, "step": 8083 }, { "epoch": 2.25, "learning_rate": 5.000463864922535e-06, - "loss": 0.1423, + "loss": 0.0555, "step": 8084 }, { "epoch": 2.25, "learning_rate": 4.998608405232397e-06, - "loss": 0.0886, + "loss": 0.0888, "step": 8085 }, { "epoch": 2.25, "learning_rate": 4.996752945542259e-06, - "loss": 0.2938, + "loss": 0.1275, "step": 8086 }, { "epoch": 2.25, "learning_rate": 4.9948974858521206e-06, - "loss": 0.1396, + "loss": 0.1428, "step": 8087 }, { "epoch": 2.25, "learning_rate": 4.9930420261619824e-06, - "loss": 0.1416, + "loss": 0.0301, "step": 8088 }, { "epoch": 2.25, "learning_rate": 4.991186566471844e-06, - "loss": 0.1414, + "loss": 0.1142, "step": 8089 }, { "epoch": 2.25, "learning_rate": 4.989331106781705e-06, - "loss": 0.2439, + "loss": 0.1317, "step": 8090 }, { "epoch": 2.25, "learning_rate": 4.987475647091567e-06, - "loss": 0.1404, + "loss": 0.0388, "step": 8091 }, { "epoch": 2.25, "learning_rate": 4.985620187401429e-06, - "loss": 0.2432, + "loss": 0.0996, "step": 8092 }, { "epoch": 2.25, "learning_rate": 4.983764727711291e-06, - "loss": 0.0896, + "loss": 0.0537, "step": 8093 }, { "epoch": 2.25, "learning_rate": 4.981909268021153e-06, - "loss": 0.0379, + "loss": 0.0389, "step": 8094 }, { "epoch": 2.25, "learning_rate": 4.980053808331015e-06, - "loss": 0.0366, + "loss": 0.0319, "step": 8095 }, { "epoch": 2.25, "learning_rate": 4.9781983486408765e-06, - "loss": 0.0363, + "loss": 0.0967, "step": 8096 }, { "epoch": 2.25, "learning_rate": 4.976342888950738e-06, - "loss": 0.1901, + "loss": 0.0526, "step": 8097 }, { "epoch": 2.25, "learning_rate": 4.9744874292606e-06, - "loss": 0.0875, + "loss": 0.0245, "step": 8098 }, { "epoch": 2.25, "learning_rate": 4.972631969570461e-06, - "loss": 0.0874, + "loss": 0.0337, "step": 8099 }, { "epoch": 2.25, "learning_rate": 4.970776509880323e-06, - "loss": 0.0902, + "loss": 0.0312, "step": 8100 }, { "epoch": 2.25, "learning_rate": 4.968921050190185e-06, - "loss": 0.1921, + "loss": 0.0933, "step": 8101 }, { "epoch": 2.25, "learning_rate": 4.967065590500047e-06, - "loss": 0.0345, + "loss": 0.0584, "step": 8102 }, { "epoch": 2.26, "learning_rate": 4.965210130809909e-06, - "loss": 0.1912, + "loss": 0.0528, "step": 8103 }, { "epoch": 2.26, "learning_rate": 4.963354671119771e-06, - "loss": 0.1372, + "loss": 0.0705, "step": 8104 }, { "epoch": 2.26, "learning_rate": 4.9614992114296325e-06, - "loss": 0.1353, + "loss": 0.0934, "step": 8105 }, { "epoch": 2.26, "learning_rate": 4.959643751739494e-06, - "loss": 0.0839, + "loss": 0.0915, "step": 8106 }, { "epoch": 2.26, "learning_rate": 4.957788292049356e-06, - "loss": 0.0346, + "loss": 0.0159, "step": 8107 }, { "epoch": 2.26, "learning_rate": 4.955932832359217e-06, - "loss": 0.1926, + "loss": 0.0986, "step": 8108 }, { "epoch": 2.26, "learning_rate": 4.954077372669079e-06, - "loss": 0.2459, + "loss": 0.0212, "step": 8109 }, { "epoch": 2.26, "learning_rate": 4.952221912978941e-06, - "loss": 0.0321, + "loss": 0.0188, "step": 8110 }, { "epoch": 2.26, "learning_rate": 4.950366453288803e-06, - "loss": 0.0852, + "loss": 0.0173, "step": 8111 }, { "epoch": 2.26, "learning_rate": 4.948510993598665e-06, - "loss": 0.1433, + "loss": 0.1321, "step": 8112 }, { "epoch": 2.26, "learning_rate": 4.946655533908527e-06, - "loss": 0.1432, + "loss": 0.0955, "step": 8113 }, { "epoch": 2.26, "learning_rate": 4.9448000742183885e-06, - "loss": 0.5123, + "loss": 0.0126, "step": 8114 }, { "epoch": 2.26, "learning_rate": 4.94294461452825e-06, - "loss": 0.2444, + "loss": 0.0961, "step": 8115 }, { "epoch": 2.26, "learning_rate": 4.941089154838111e-06, - "loss": 0.0324, + "loss": 0.0199, "step": 8116 }, { "epoch": 2.26, "learning_rate": 4.939233695147973e-06, - "loss": 0.1975, + "loss": 0.0431, "step": 8117 }, { "epoch": 2.26, "learning_rate": 4.937378235457835e-06, - "loss": 0.1384, + "loss": 0.087, "step": 8118 }, { "epoch": 2.26, "learning_rate": 4.935522775767697e-06, - "loss": 0.1961, + "loss": 0.0536, "step": 8119 }, { "epoch": 2.26, "learning_rate": 4.933667316077559e-06, - "loss": 0.0844, + "loss": 0.0175, "step": 8120 }, { "epoch": 2.26, "learning_rate": 4.931811856387421e-06, - "loss": 0.19, + "loss": 0.0158, "step": 8121 }, { "epoch": 2.26, "learning_rate": 4.929956396697283e-06, - "loss": 0.1894, + "loss": 0.0619, "step": 8122 }, { "epoch": 2.26, "learning_rate": 4.928100937007144e-06, - "loss": 0.1951, + "loss": 0.1057, "step": 8123 }, { "epoch": 2.26, "learning_rate": 4.9262454773170055e-06, - "loss": 0.1393, + "loss": 0.1155, "step": 8124 }, { "epoch": 2.26, "learning_rate": 4.924390017626867e-06, - "loss": 0.1864, + "loss": 0.0511, "step": 8125 }, { "epoch": 2.26, "learning_rate": 4.922534557936729e-06, - "loss": 0.0859, + "loss": 0.0384, "step": 8126 }, { "epoch": 2.26, "learning_rate": 4.920679098246591e-06, - "loss": 0.1368, + "loss": 0.0072, "step": 8127 }, { "epoch": 2.26, "learning_rate": 4.918823638556452e-06, - "loss": 0.1406, + "loss": 0.0193, "step": 8128 }, { "epoch": 2.26, "learning_rate": 4.916968178866314e-06, - "loss": 0.1872, + "loss": 0.044, "step": 8129 }, { "epoch": 2.26, "learning_rate": 4.915112719176176e-06, - "loss": 0.1927, + "loss": 0.1141, "step": 8130 }, { "epoch": 2.26, "learning_rate": 4.913257259486038e-06, - "loss": 0.142, + "loss": 0.0081, "step": 8131 }, { "epoch": 2.26, "learning_rate": 4.9114017997959e-06, - "loss": 0.0339, + "loss": 0.122, "step": 8132 }, { "epoch": 2.26, "learning_rate": 4.9095463401057615e-06, - "loss": 0.1878, + "loss": 0.0107, "step": 8133 }, { "epoch": 2.26, "learning_rate": 4.907690880415623e-06, - "loss": 0.034, + "loss": 0.092, "step": 8134 }, { "epoch": 2.26, "learning_rate": 4.905835420725485e-06, - "loss": 0.1405, + "loss": 0.1007, "step": 8135 }, { "epoch": 2.26, "learning_rate": 4.903979961035346e-06, - "loss": 0.0849, + "loss": 0.0185, "step": 8136 }, { "epoch": 2.26, "learning_rate": 4.902124501345208e-06, - "loss": 0.0864, + "loss": 0.0634, "step": 8137 }, { "epoch": 2.26, "learning_rate": 4.90026904165507e-06, - "loss": 0.0339, + "loss": 0.0643, "step": 8138 }, { "epoch": 2.27, "learning_rate": 4.898413581964932e-06, - "loss": 0.1363, + "loss": 0.0339, "step": 8139 }, { "epoch": 2.27, "learning_rate": 4.896558122274794e-06, - "loss": 0.1369, + "loss": 0.0316, "step": 8140 }, { "epoch": 2.27, "learning_rate": 4.894702662584656e-06, - "loss": 0.138, + "loss": 0.0143, "step": 8141 }, { "epoch": 2.27, "learning_rate": 4.8928472028945175e-06, - "loss": 0.0333, + "loss": 0.1164, "step": 8142 }, { "epoch": 2.27, "learning_rate": 4.890991743204379e-06, - "loss": 0.1349, + "loss": 0.043, "step": 8143 }, { "epoch": 2.27, "learning_rate": 4.889136283514241e-06, - "loss": 0.1356, + "loss": 0.0437, "step": 8144 }, { "epoch": 2.27, "learning_rate": 4.887280823824102e-06, - "loss": 0.1369, + "loss": 0.1581, "step": 8145 }, { "epoch": 2.27, "learning_rate": 4.885425364133964e-06, - "loss": 0.3028, + "loss": 0.1056, "step": 8146 }, { "epoch": 2.27, "learning_rate": 4.883569904443826e-06, - "loss": 0.2986, + "loss": 0.0538, "step": 8147 }, { "epoch": 2.27, "learning_rate": 4.881714444753688e-06, - "loss": 0.1945, + "loss": 0.0681, "step": 8148 }, { "epoch": 2.27, "learning_rate": 4.87985898506355e-06, - "loss": 0.1374, + "loss": 0.0699, "step": 8149 }, { "epoch": 2.27, "learning_rate": 4.878003525373412e-06, - "loss": 0.2505, + "loss": 0.0298, "step": 8150 }, { "epoch": 2.27, "learning_rate": 4.8761480656832735e-06, - "loss": 0.1899, + "loss": 0.0802, "step": 8151 }, { "epoch": 2.27, "learning_rate": 4.874292605993135e-06, - "loss": 0.0327, + "loss": 0.136, "step": 8152 }, { "epoch": 2.27, "learning_rate": 4.872437146302996e-06, - "loss": 0.2466, + "loss": 0.07, "step": 8153 }, { "epoch": 2.27, "learning_rate": 4.870581686612858e-06, - "loss": 0.1397, + "loss": 0.0208, "step": 8154 }, { "epoch": 2.27, "learning_rate": 4.86872622692272e-06, - "loss": 0.0861, + "loss": 0.0836, "step": 8155 }, { "epoch": 2.27, "learning_rate": 4.866870767232582e-06, - "loss": 0.1394, + "loss": 0.0211, "step": 8156 }, { "epoch": 2.27, "learning_rate": 4.865015307542444e-06, - "loss": 0.1906, + "loss": 0.1365, "step": 8157 }, { "epoch": 2.27, "learning_rate": 4.863159847852306e-06, - "loss": 0.0335, + "loss": 0.0498, "step": 8158 }, { "epoch": 2.27, "learning_rate": 4.861304388162168e-06, - "loss": 0.1916, + "loss": 0.1333, "step": 8159 }, { "epoch": 2.27, "learning_rate": 4.8594489284720295e-06, - "loss": 0.2454, + "loss": 0.0384, "step": 8160 }, { "epoch": 2.27, "learning_rate": 4.857593468781891e-06, - "loss": 0.1401, + "loss": 0.0213, "step": 8161 }, { "epoch": 2.27, "learning_rate": 4.855738009091752e-06, - "loss": 0.2437, + "loss": 0.0206, "step": 8162 }, { "epoch": 2.27, "learning_rate": 4.853882549401614e-06, - "loss": 0.1952, + "loss": 0.1091, "step": 8163 }, { "epoch": 2.27, "learning_rate": 4.852027089711476e-06, - "loss": 0.134, + "loss": 0.0115, "step": 8164 }, { "epoch": 2.27, "learning_rate": 4.850171630021338e-06, - "loss": 0.0349, + "loss": 0.067, "step": 8165 }, { "epoch": 2.27, "learning_rate": 4.8483161703312e-06, - "loss": 0.1369, + "loss": 0.0521, "step": 8166 }, { "epoch": 2.27, "learning_rate": 4.846460710641062e-06, - "loss": 0.0331, + "loss": 0.0138, "step": 8167 }, { "epoch": 2.27, "learning_rate": 4.844605250950924e-06, - "loss": 0.1927, + "loss": 0.0177, "step": 8168 }, { "epoch": 2.27, "learning_rate": 4.8427497912607855e-06, - "loss": 0.3543, + "loss": 0.0878, "step": 8169 }, { "epoch": 2.27, "learning_rate": 4.840894331570647e-06, - "loss": 0.1401, + "loss": 0.1506, "step": 8170 }, { "epoch": 2.27, "learning_rate": 4.839038871880508e-06, - "loss": 0.1393, + "loss": 0.0191, "step": 8171 }, { "epoch": 2.27, "learning_rate": 4.83718341219037e-06, - "loss": 0.0343, + "loss": 0.0558, "step": 8172 }, { "epoch": 2.27, "learning_rate": 4.835327952500232e-06, - "loss": 0.1929, + "loss": 0.0109, "step": 8173 }, { "epoch": 2.27, "learning_rate": 4.833472492810094e-06, - "loss": 0.0879, + "loss": 0.1271, "step": 8174 }, { "epoch": 2.28, "learning_rate": 4.831617033119956e-06, - "loss": 0.0842, + "loss": 0.1217, "step": 8175 }, { "epoch": 2.28, "learning_rate": 4.829761573429818e-06, - "loss": 0.1872, + "loss": 0.1065, "step": 8176 }, { "epoch": 2.28, "learning_rate": 4.82790611373968e-06, - "loss": 0.0849, + "loss": 0.074, "step": 8177 }, { "epoch": 2.28, "learning_rate": 4.8260506540495415e-06, - "loss": 0.1917, + "loss": 0.0251, "step": 8178 }, { "epoch": 2.28, "learning_rate": 4.8241951943594025e-06, - "loss": 0.1387, + "loss": 0.1245, "step": 8179 }, { "epoch": 2.28, "learning_rate": 4.822339734669264e-06, - "loss": 0.0881, + "loss": 0.1154, "step": 8180 }, { "epoch": 2.28, "learning_rate": 4.820484274979126e-06, - "loss": 0.0878, + "loss": 0.1035, "step": 8181 }, { "epoch": 2.28, "learning_rate": 4.818628815288988e-06, - "loss": 0.0344, + "loss": 0.0748, "step": 8182 }, { "epoch": 2.28, "learning_rate": 4.81677335559885e-06, - "loss": 0.141, + "loss": 0.1299, "step": 8183 }, { "epoch": 2.28, "learning_rate": 4.814917895908712e-06, - "loss": 0.0346, + "loss": 0.0324, "step": 8184 }, { "epoch": 2.28, "learning_rate": 4.813062436218574e-06, - "loss": 0.1896, + "loss": 0.0353, "step": 8185 }, { "epoch": 2.28, "learning_rate": 4.8112069765284356e-06, - "loss": 0.1918, + "loss": 0.0158, "step": 8186 }, { "epoch": 2.28, "learning_rate": 4.8093515168382974e-06, - "loss": 0.1911, + "loss": 0.1371, "step": 8187 }, { "epoch": 2.28, "learning_rate": 4.8074960571481585e-06, - "loss": 0.1374, + "loss": 0.046, "step": 8188 }, { "epoch": 2.28, "learning_rate": 4.80564059745802e-06, - "loss": 0.2974, + "loss": 0.1146, "step": 8189 }, { "epoch": 2.28, "learning_rate": 4.803785137767882e-06, - "loss": 0.2444, + "loss": 0.0166, "step": 8190 }, { "epoch": 2.28, "learning_rate": 4.801929678077744e-06, - "loss": 0.2439, + "loss": 0.0094, "step": 8191 }, { "epoch": 2.28, "learning_rate": 4.800074218387606e-06, - "loss": 0.0859, + "loss": 0.1153, "step": 8192 }, { "epoch": 2.28, "learning_rate": 4.798218758697468e-06, - "loss": 0.1897, + "loss": 0.0737, "step": 8193 }, { "epoch": 2.28, "learning_rate": 4.79636329900733e-06, - "loss": 0.0874, + "loss": 0.1239, "step": 8194 }, { "epoch": 2.28, "learning_rate": 4.7945078393171916e-06, - "loss": 0.192, + "loss": 0.0806, "step": 8195 }, { "epoch": 2.28, "learning_rate": 4.792652379627053e-06, - "loss": 0.0847, + "loss": 0.0195, "step": 8196 }, { "epoch": 2.28, "learning_rate": 4.7907969199369145e-06, - "loss": 0.0875, + "loss": 0.1066, "step": 8197 }, { "epoch": 2.28, "learning_rate": 4.788941460246776e-06, - "loss": 0.1398, + "loss": 0.128, "step": 8198 }, { "epoch": 2.28, "learning_rate": 4.787086000556638e-06, - "loss": 0.1904, + "loss": 0.1504, "step": 8199 }, { "epoch": 2.28, "learning_rate": 4.7852305408665e-06, - "loss": 0.1396, + "loss": 0.1606, "step": 8200 }, { "epoch": 2.28, "learning_rate": 4.783375081176362e-06, - "loss": 0.1388, + "loss": 0.019, "step": 8201 }, { "epoch": 2.28, "learning_rate": 4.781519621486224e-06, - "loss": 0.0851, + "loss": 0.0141, "step": 8202 }, { "epoch": 2.28, "learning_rate": 4.779664161796086e-06, - "loss": 0.1384, + "loss": 0.0927, "step": 8203 }, { "epoch": 2.28, "learning_rate": 4.7778087021059476e-06, - "loss": 0.0336, + "loss": 0.0688, "step": 8204 }, { "epoch": 2.28, "learning_rate": 4.7759532424158086e-06, - "loss": 0.0876, + "loss": 0.1024, "step": 8205 }, { "epoch": 2.28, "learning_rate": 4.7740977827256704e-06, - "loss": 0.1357, + "loss": 0.1315, "step": 8206 }, { "epoch": 2.28, "learning_rate": 4.772242323035532e-06, - "loss": 0.1383, + "loss": 0.0706, "step": 8207 }, { "epoch": 2.28, "learning_rate": 4.770386863345394e-06, - "loss": 0.0863, + "loss": 0.0425, "step": 8208 }, { "epoch": 2.28, "learning_rate": 4.768531403655256e-06, - "loss": 0.1911, + "loss": 0.0198, "step": 8209 }, { "epoch": 2.28, "learning_rate": 4.766675943965118e-06, - "loss": 0.1354, + "loss": 0.0224, "step": 8210 }, { "epoch": 2.29, "learning_rate": 4.76482048427498e-06, - "loss": 0.2428, + "loss": 0.0148, "step": 8211 }, { "epoch": 2.29, "learning_rate": 4.762965024584842e-06, - "loss": 0.1406, + "loss": 0.0349, "step": 8212 }, { "epoch": 2.29, "learning_rate": 4.761109564894703e-06, - "loss": 0.0858, + "loss": 0.0128, "step": 8213 }, { "epoch": 2.29, "learning_rate": 4.7592541052045646e-06, - "loss": 0.0325, + "loss": 0.1036, "step": 8214 }, { "epoch": 2.29, "learning_rate": 4.7573986455144264e-06, - "loss": 0.0835, + "loss": 0.0425, "step": 8215 }, { "epoch": 2.29, "learning_rate": 4.755543185824288e-06, - "loss": 0.1401, + "loss": 0.1082, "step": 8216 }, { "epoch": 2.29, "learning_rate": 4.75368772613415e-06, - "loss": 0.1382, + "loss": 0.108, "step": 8217 }, { "epoch": 2.29, "learning_rate": 4.751832266444012e-06, - "loss": 0.3622, + "loss": 0.0612, "step": 8218 }, { "epoch": 2.29, "learning_rate": 4.749976806753874e-06, - "loss": 0.0872, + "loss": 0.126, "step": 8219 }, { "epoch": 2.29, "learning_rate": 4.748121347063736e-06, - "loss": 0.0843, + "loss": 0.0177, "step": 8220 }, { "epoch": 2.29, "learning_rate": 4.746265887373598e-06, - "loss": 0.0861, + "loss": 0.0452, "step": 8221 }, { "epoch": 2.29, "learning_rate": 4.744410427683459e-06, - "loss": 0.139, + "loss": 0.0468, "step": 8222 }, { "epoch": 2.29, "learning_rate": 4.7425549679933206e-06, - "loss": 0.4072, + "loss": 0.0585, "step": 8223 }, { "epoch": 2.29, "learning_rate": 4.740699508303182e-06, - "loss": 0.2986, + "loss": 0.0675, "step": 8224 }, { "epoch": 2.29, "learning_rate": 4.738844048613044e-06, - "loss": 0.0892, + "loss": 0.0487, "step": 8225 }, { "epoch": 2.29, "learning_rate": 4.736988588922906e-06, - "loss": 0.0861, + "loss": 0.0416, "step": 8226 }, { "epoch": 2.29, "learning_rate": 4.735133129232768e-06, - "loss": 0.2989, + "loss": 0.0184, "step": 8227 }, { "epoch": 2.29, "learning_rate": 4.73327766954263e-06, - "loss": 0.1916, + "loss": 0.0214, "step": 8228 }, { "epoch": 2.29, "learning_rate": 4.731422209852492e-06, - "loss": 0.1396, + "loss": 0.0612, "step": 8229 }, { "epoch": 2.29, "learning_rate": 4.729566750162354e-06, - "loss": 0.0875, + "loss": 0.051, "step": 8230 }, { "epoch": 2.29, "learning_rate": 4.727711290472215e-06, - "loss": 0.1372, + "loss": 0.0483, "step": 8231 }, { "epoch": 2.29, "learning_rate": 4.7258558307820765e-06, - "loss": 0.0885, + "loss": 0.1374, "step": 8232 }, { "epoch": 2.29, "learning_rate": 4.724000371091938e-06, - "loss": 0.1404, + "loss": 0.1292, "step": 8233 }, { "epoch": 2.29, "learning_rate": 4.7221449114018e-06, - "loss": 0.086, + "loss": 0.1085, "step": 8234 }, { "epoch": 2.29, "learning_rate": 4.720289451711662e-06, - "loss": 0.1934, + "loss": 0.0156, "step": 8235 }, { "epoch": 2.29, "learning_rate": 4.718433992021524e-06, - "loss": 0.0877, + "loss": 0.0138, "step": 8236 }, { "epoch": 2.29, "learning_rate": 4.716578532331386e-06, - "loss": 0.0854, + "loss": 0.0651, "step": 8237 }, { "epoch": 2.29, "learning_rate": 4.714723072641248e-06, - "loss": 0.1958, + "loss": 0.0474, "step": 8238 }, { "epoch": 2.29, "learning_rate": 4.712867612951109e-06, - "loss": 0.2443, + "loss": 0.0193, "step": 8239 }, { "epoch": 2.29, "learning_rate": 4.711012153260971e-06, - "loss": 0.1953, + "loss": 0.126, "step": 8240 }, { "epoch": 2.29, "learning_rate": 4.7091566935708325e-06, - "loss": 0.1373, + "loss": 0.0528, "step": 8241 }, { "epoch": 2.29, "learning_rate": 4.707301233880694e-06, - "loss": 0.0338, + "loss": 0.0579, "step": 8242 }, { "epoch": 2.29, "learning_rate": 4.705445774190556e-06, - "loss": 0.1936, + "loss": 0.1224, "step": 8243 }, { "epoch": 2.29, "learning_rate": 4.703590314500418e-06, - "loss": 0.0856, + "loss": 0.0155, "step": 8244 }, { "epoch": 2.29, "learning_rate": 4.70173485481028e-06, - "loss": 0.1399, + "loss": 0.0432, "step": 8245 }, { "epoch": 2.3, "learning_rate": 4.699879395120142e-06, - "loss": 0.0851, + "loss": 0.0649, "step": 8246 }, { "epoch": 2.3, "learning_rate": 4.698023935430004e-06, - "loss": 0.0338, + "loss": 0.0791, "step": 8247 }, { "epoch": 2.3, "learning_rate": 4.696168475739865e-06, - "loss": 0.0343, + "loss": 0.0484, "step": 8248 }, { "epoch": 2.3, "learning_rate": 4.694313016049727e-06, - "loss": 0.0876, + "loss": 0.1473, "step": 8249 }, { "epoch": 2.3, "learning_rate": 4.6924575563595885e-06, - "loss": 0.0862, + "loss": 0.1319, "step": 8250 }, { "epoch": 2.3, "learning_rate": 4.69060209666945e-06, - "loss": 0.0886, + "loss": 0.0682, "step": 8251 }, { "epoch": 2.3, "learning_rate": 4.688746636979312e-06, - "loss": 0.1898, + "loss": 0.0063, "step": 8252 }, { "epoch": 2.3, "learning_rate": 4.686891177289174e-06, - "loss": 0.1399, + "loss": 0.0581, "step": 8253 }, { "epoch": 2.3, "learning_rate": 4.685035717599036e-06, - "loss": 0.2461, + "loss": 0.0759, "step": 8254 }, { "epoch": 2.3, "learning_rate": 4.683180257908898e-06, - "loss": 0.1915, + "loss": 0.0753, "step": 8255 }, { "epoch": 2.3, "learning_rate": 4.681324798218759e-06, - "loss": 0.0317, + "loss": 0.0251, "step": 8256 }, { "epoch": 2.3, "learning_rate": 4.679469338528621e-06, - "loss": 0.1913, + "loss": 0.0827, "step": 8257 }, { "epoch": 2.3, "learning_rate": 4.677613878838483e-06, - "loss": 0.032, + "loss": 0.0228, "step": 8258 }, { "epoch": 2.3, "learning_rate": 4.6757584191483445e-06, - "loss": 0.1411, + "loss": 0.0559, "step": 8259 }, { "epoch": 2.3, "learning_rate": 4.673902959458206e-06, - "loss": 0.2383, + "loss": 0.1047, "step": 8260 }, { "epoch": 2.3, "learning_rate": 4.672047499768068e-06, - "loss": 0.0841, + "loss": 0.0487, "step": 8261 }, { "epoch": 2.3, "learning_rate": 4.67019204007793e-06, - "loss": 0.1445, + "loss": 0.1314, "step": 8262 }, { "epoch": 2.3, "learning_rate": 4.668336580387792e-06, - "loss": 0.084, + "loss": 0.0473, "step": 8263 }, { "epoch": 2.3, "learning_rate": 4.666481120697654e-06, - "loss": 0.1418, + "loss": 0.1395, "step": 8264 }, { "epoch": 2.3, "learning_rate": 4.664625661007515e-06, - "loss": 0.1418, + "loss": 0.1428, "step": 8265 }, { "epoch": 2.3, "learning_rate": 4.662770201317377e-06, - "loss": 0.0298, + "loss": 0.1422, "step": 8266 }, { "epoch": 2.3, "learning_rate": 4.660914741627239e-06, - "loss": 0.0872, + "loss": 0.0427, "step": 8267 }, { "epoch": 2.3, "learning_rate": 4.6590592819371005e-06, - "loss": 0.1904, + "loss": 0.0902, "step": 8268 }, { "epoch": 2.3, "learning_rate": 4.657203822246962e-06, - "loss": 0.137, + "loss": 0.0948, "step": 8269 }, { "epoch": 2.3, "learning_rate": 4.655348362556824e-06, - "loss": 0.1395, + "loss": 0.118, "step": 8270 }, { "epoch": 2.3, "learning_rate": 4.653492902866686e-06, - "loss": 0.1391, + "loss": 0.0152, "step": 8271 }, { "epoch": 2.3, "learning_rate": 4.651637443176548e-06, - "loss": 0.086, + "loss": 0.1766, "step": 8272 }, { "epoch": 2.3, "learning_rate": 4.649781983486409e-06, - "loss": 0.2482, + "loss": 0.0182, "step": 8273 }, { "epoch": 2.3, "learning_rate": 4.647926523796271e-06, - "loss": 0.1416, + "loss": 0.0465, "step": 8274 }, { "epoch": 2.3, "learning_rate": 4.646071064106133e-06, - "loss": 0.1906, + "loss": 0.0543, "step": 8275 }, { "epoch": 2.3, "learning_rate": 4.644215604415994e-06, - "loss": 0.0295, + "loss": 0.1241, "step": 8276 }, { "epoch": 2.3, "learning_rate": 4.642360144725856e-06, - "loss": 0.1915, + "loss": 0.06, "step": 8277 }, { "epoch": 2.3, "learning_rate": 4.6405046850357175e-06, - "loss": 0.1981, + "loss": 0.1401, "step": 8278 }, { "epoch": 2.3, "learning_rate": 4.638649225345579e-06, - "loss": 0.1424, + "loss": 0.0219, "step": 8279 }, { "epoch": 2.3, "learning_rate": 4.636793765655441e-06, - "loss": 0.1882, + "loss": 0.0956, "step": 8280 }, { "epoch": 2.3, "learning_rate": 4.634938305965303e-06, - "loss": 0.1935, + "loss": 0.0583, "step": 8281 }, { "epoch": 2.31, "learning_rate": 4.633082846275165e-06, - "loss": 0.0855, + "loss": 0.1663, "step": 8282 }, { "epoch": 2.31, "learning_rate": 4.631227386585027e-06, - "loss": 0.2489, + "loss": 0.0964, "step": 8283 }, { "epoch": 2.31, "learning_rate": 4.629371926894889e-06, - "loss": 0.0826, + "loss": 0.0857, "step": 8284 }, { "epoch": 2.31, "learning_rate": 4.62751646720475e-06, - "loss": 0.1394, + "loss": 0.0513, "step": 8285 }, { "epoch": 2.31, "learning_rate": 4.625661007514612e-06, - "loss": 0.2967, + "loss": 0.0522, "step": 8286 }, { "epoch": 2.31, "learning_rate": 4.6238055478244735e-06, - "loss": 0.1375, + "loss": 0.1154, "step": 8287 }, { "epoch": 2.31, "learning_rate": 4.621950088134335e-06, - "loss": 0.1924, + "loss": 0.0296, "step": 8288 }, { "epoch": 2.31, "learning_rate": 4.620094628444197e-06, - "loss": 0.304, + "loss": 0.0815, "step": 8289 }, { "epoch": 2.31, "learning_rate": 4.618239168754059e-06, - "loss": 0.1419, + "loss": 0.0911, "step": 8290 }, { "epoch": 2.31, "learning_rate": 4.616383709063921e-06, - "loss": 0.0872, + "loss": 0.0234, "step": 8291 }, { "epoch": 2.31, "learning_rate": 4.614528249373783e-06, - "loss": 0.2401, + "loss": 0.169, "step": 8292 }, { "epoch": 2.31, "learning_rate": 4.612672789683645e-06, - "loss": 0.1404, + "loss": 0.0632, "step": 8293 }, { "epoch": 2.31, "learning_rate": 4.610817329993506e-06, - "loss": 0.1416, + "loss": 0.0587, "step": 8294 }, { "epoch": 2.31, "learning_rate": 4.608961870303368e-06, - "loss": 0.3035, + "loss": 0.027, "step": 8295 }, { "epoch": 2.31, "learning_rate": 4.6071064106132295e-06, - "loss": 0.1908, + "loss": 0.0989, "step": 8296 }, { "epoch": 2.31, "learning_rate": 4.605250950923091e-06, - "loss": 0.1406, + "loss": 0.1468, "step": 8297 }, { "epoch": 2.31, "learning_rate": 4.603395491232953e-06, - "loss": 0.0879, + "loss": 0.0628, "step": 8298 }, { "epoch": 2.31, "learning_rate": 4.601540031542815e-06, - "loss": 0.192, + "loss": 0.0166, "step": 8299 }, { "epoch": 2.31, "learning_rate": 4.599684571852677e-06, - "loss": 0.0347, + "loss": 0.1272, "step": 8300 }, { "epoch": 2.31, "learning_rate": 4.597829112162539e-06, - "loss": 0.0869, + "loss": 0.0522, "step": 8301 }, { "epoch": 2.31, "learning_rate": 4.5959736524724e-06, - "loss": 0.0343, + "loss": 0.124, "step": 8302 }, { "epoch": 2.31, "learning_rate": 4.594118192782262e-06, - "loss": 0.0865, + "loss": 0.0132, "step": 8303 }, { "epoch": 2.31, "learning_rate": 4.592262733092124e-06, - "loss": 0.0868, + "loss": 0.0248, "step": 8304 }, { "epoch": 2.31, "learning_rate": 4.5904072734019855e-06, - "loss": 0.1385, + "loss": 0.1198, "step": 8305 }, { "epoch": 2.31, "learning_rate": 4.588551813711847e-06, - "loss": 0.1919, + "loss": 0.0666, "step": 8306 }, { "epoch": 2.31, "learning_rate": 4.586696354021709e-06, - "loss": 0.0887, + "loss": 0.0303, "step": 8307 }, { "epoch": 2.31, "learning_rate": 4.584840894331571e-06, - "loss": 0.0885, + "loss": 0.0677, "step": 8308 }, { "epoch": 2.31, "learning_rate": 4.582985434641433e-06, - "loss": 0.1933, + "loss": 0.0589, "step": 8309 }, { "epoch": 2.31, "learning_rate": 4.581129974951295e-06, - "loss": 0.0335, + "loss": 0.054, "step": 8310 }, { "epoch": 2.31, "learning_rate": 4.579274515261156e-06, - "loss": 0.188, + "loss": 0.0628, "step": 8311 }, { "epoch": 2.31, "learning_rate": 4.577419055571018e-06, - "loss": 0.1431, + "loss": 0.0803, "step": 8312 }, { "epoch": 2.31, "learning_rate": 4.57556359588088e-06, - "loss": 0.0853, + "loss": 0.1485, "step": 8313 }, { "epoch": 2.31, "learning_rate": 4.5737081361907415e-06, - "loss": 0.0332, + "loss": 0.0495, "step": 8314 }, { "epoch": 2.31, "learning_rate": 4.571852676500603e-06, - "loss": 0.1382, + "loss": 0.0744, "step": 8315 }, { "epoch": 2.31, "learning_rate": 4.569997216810465e-06, - "loss": 0.0321, + "loss": 0.0127, "step": 8316 }, { "epoch": 2.31, "learning_rate": 4.568141757120327e-06, - "loss": 0.1421, + "loss": 0.1495, "step": 8317 }, { "epoch": 2.32, "learning_rate": 4.566286297430189e-06, - "loss": 0.1899, + "loss": 0.0168, "step": 8318 }, { "epoch": 2.32, "learning_rate": 4.56443083774005e-06, - "loss": 0.0899, + "loss": 0.1149, "step": 8319 }, { "epoch": 2.32, "learning_rate": 4.562575378049912e-06, - "loss": 0.0328, + "loss": 0.1719, "step": 8320 }, { "epoch": 2.32, "learning_rate": 4.560719918359774e-06, - "loss": 0.0844, + "loss": 0.1146, "step": 8321 }, { "epoch": 2.32, "learning_rate": 4.5588644586696356e-06, - "loss": 0.1348, + "loss": 0.1101, "step": 8322 }, { "epoch": 2.32, "learning_rate": 4.5570089989794974e-06, - "loss": 0.0845, + "loss": 0.0151, "step": 8323 }, { "epoch": 2.32, "learning_rate": 4.555153539289359e-06, - "loss": 0.1924, + "loss": 0.1282, "step": 8324 }, { "epoch": 2.32, "learning_rate": 4.553298079599221e-06, - "loss": 0.0865, + "loss": 0.1019, "step": 8325 }, { "epoch": 2.32, "learning_rate": 4.551442619909083e-06, - "loss": 0.2552, + "loss": 0.0307, "step": 8326 }, { "epoch": 2.32, "learning_rate": 4.549587160218945e-06, - "loss": 0.3077, + "loss": 0.0708, "step": 8327 }, { "epoch": 2.32, "learning_rate": 4.547731700528806e-06, - "loss": 0.0836, + "loss": 0.0341, "step": 8328 }, { "epoch": 2.32, "learning_rate": 4.545876240838668e-06, - "loss": 0.1379, + "loss": 0.0674, "step": 8329 }, { "epoch": 2.32, "learning_rate": 4.54402078114853e-06, - "loss": 0.0869, + "loss": 0.0443, "step": 8330 }, { "epoch": 2.32, "learning_rate": 4.5421653214583916e-06, - "loss": 0.2483, + "loss": 0.0829, "step": 8331 }, { "epoch": 2.32, "learning_rate": 4.5403098617682534e-06, - "loss": 0.1335, + "loss": 0.0434, "step": 8332 }, { "epoch": 2.32, "learning_rate": 4.538454402078115e-06, - "loss": 0.1407, + "loss": 0.0155, "step": 8333 }, { "epoch": 2.32, "learning_rate": 4.536598942387977e-06, - "loss": 0.1918, + "loss": 0.119, "step": 8334 }, { "epoch": 2.32, "learning_rate": 4.534743482697839e-06, - "loss": 0.1407, + "loss": 0.0195, "step": 8335 }, { "epoch": 2.32, "learning_rate": 4.5328880230077e-06, - "loss": 0.1386, + "loss": 0.0903, "step": 8336 }, { "epoch": 2.32, "learning_rate": 4.531032563317562e-06, - "loss": 0.1915, + "loss": 0.0195, "step": 8337 }, { "epoch": 2.32, "learning_rate": 4.529177103627424e-06, - "loss": 0.1406, + "loss": 0.0915, "step": 8338 }, { "epoch": 2.32, "learning_rate": 4.527321643937286e-06, - "loss": 0.0314, + "loss": 0.0675, "step": 8339 }, { "epoch": 2.32, "learning_rate": 4.5254661842471475e-06, - "loss": 0.0833, + "loss": 0.1746, "step": 8340 }, { "epoch": 2.32, "learning_rate": 4.523610724557009e-06, - "loss": 0.3647, + "loss": 0.0121, "step": 8341 }, { "epoch": 2.32, "learning_rate": 4.521755264866871e-06, - "loss": 0.0853, + "loss": 0.0657, "step": 8342 }, { "epoch": 2.32, "learning_rate": 4.519899805176733e-06, - "loss": 0.0845, + "loss": 0.017, "step": 8343 }, { "epoch": 2.32, "learning_rate": 4.518044345486595e-06, - "loss": 0.0318, + "loss": 0.108, "step": 8344 }, { "epoch": 2.32, "learning_rate": 4.516188885796456e-06, - "loss": 0.0867, + "loss": 0.0119, "step": 8345 }, { "epoch": 2.32, "learning_rate": 4.514333426106318e-06, - "loss": 0.0842, + "loss": 0.0199, "step": 8346 }, { "epoch": 2.32, "learning_rate": 4.51247796641618e-06, - "loss": 0.0838, + "loss": 0.0876, "step": 8347 }, { "epoch": 2.32, "learning_rate": 4.510622506726042e-06, - "loss": 0.1399, + "loss": 0.0782, "step": 8348 }, { "epoch": 2.32, "learning_rate": 4.5087670470359035e-06, - "loss": 0.0828, + "loss": 0.0824, "step": 8349 }, { "epoch": 2.32, "learning_rate": 4.506911587345765e-06, - "loss": 0.2521, + "loss": 0.0858, "step": 8350 }, { "epoch": 2.32, "learning_rate": 4.505056127655627e-06, - "loss": 0.1365, + "loss": 0.1009, "step": 8351 }, { "epoch": 2.32, "learning_rate": 4.503200667965489e-06, - "loss": 0.0854, + "loss": 0.0561, "step": 8352 }, { "epoch": 2.32, "learning_rate": 4.501345208275351e-06, - "loss": 0.3061, + "loss": 0.0191, "step": 8353 }, { "epoch": 2.33, "learning_rate": 4.499489748585212e-06, - "loss": 0.0305, + "loss": 0.0156, "step": 8354 }, { "epoch": 2.33, "learning_rate": 4.497634288895074e-06, - "loss": 0.1385, + "loss": 0.1611, "step": 8355 }, { "epoch": 2.33, "learning_rate": 4.495778829204936e-06, - "loss": 0.2458, + "loss": 0.1973, "step": 8356 }, { "epoch": 2.33, "learning_rate": 4.493923369514798e-06, - "loss": 0.0835, + "loss": 0.1564, "step": 8357 }, { "epoch": 2.33, "learning_rate": 4.4920679098246595e-06, - "loss": 0.136, + "loss": 0.1578, "step": 8358 }, { "epoch": 2.33, "learning_rate": 4.490212450134521e-06, - "loss": 0.1384, + "loss": 0.0558, "step": 8359 }, { "epoch": 2.33, "learning_rate": 4.488356990444383e-06, - "loss": 0.0302, + "loss": 0.0595, "step": 8360 }, { "epoch": 2.33, "learning_rate": 4.486501530754245e-06, - "loss": 0.0304, + "loss": 0.071, "step": 8361 }, { "epoch": 2.33, "learning_rate": 4.484646071064106e-06, - "loss": 0.031, + "loss": 0.0634, "step": 8362 }, { "epoch": 2.33, "learning_rate": 4.482790611373968e-06, - "loss": 0.031, + "loss": 0.0897, "step": 8363 }, { "epoch": 2.33, "learning_rate": 4.48093515168383e-06, - "loss": 0.1373, + "loss": 0.0143, "step": 8364 }, { "epoch": 2.33, "learning_rate": 4.479079691993692e-06, - "loss": 0.0289, + "loss": 0.0781, "step": 8365 }, { "epoch": 2.33, "learning_rate": 4.477224232303554e-06, - "loss": 0.084, + "loss": 0.0626, "step": 8366 }, { "epoch": 2.33, "learning_rate": 4.4753687726134155e-06, - "loss": 0.195, + "loss": 0.106, "step": 8367 }, { "epoch": 2.33, "learning_rate": 4.473513312923277e-06, - "loss": 0.0816, + "loss": 0.2554, "step": 8368 }, { "epoch": 2.33, "learning_rate": 4.471657853233139e-06, - "loss": 0.135, + "loss": 0.044, "step": 8369 }, { "epoch": 2.33, "learning_rate": 4.469802393543001e-06, - "loss": 0.1407, + "loss": 0.1661, "step": 8370 }, { "epoch": 2.33, "learning_rate": 4.467946933852862e-06, - "loss": 0.138, + "loss": 0.1169, "step": 8371 }, { "epoch": 2.33, "learning_rate": 4.466091474162724e-06, - "loss": 0.2443, + "loss": 0.1235, "step": 8372 }, { "epoch": 2.33, "learning_rate": 4.464236014472586e-06, - "loss": 0.0283, + "loss": 0.0185, "step": 8373 }, { "epoch": 2.33, "learning_rate": 4.462380554782448e-06, - "loss": 0.0819, + "loss": 0.0558, "step": 8374 }, { "epoch": 2.33, "learning_rate": 4.46052509509231e-06, - "loss": 0.2472, + "loss": 0.0523, "step": 8375 }, { "epoch": 2.33, "learning_rate": 4.4586696354021715e-06, - "loss": 0.0863, + "loss": 0.1429, "step": 8376 }, { "epoch": 2.33, "learning_rate": 4.456814175712033e-06, - "loss": 0.0286, + "loss": 0.0539, "step": 8377 }, { "epoch": 2.33, "learning_rate": 4.454958716021895e-06, - "loss": 0.1954, + "loss": 0.0324, "step": 8378 }, { "epoch": 2.33, "learning_rate": 4.453103256331756e-06, - "loss": 0.0277, + "loss": 0.0271, "step": 8379 }, { "epoch": 2.33, "learning_rate": 4.451247796641618e-06, - "loss": 0.2509, + "loss": 0.0795, "step": 8380 }, { "epoch": 2.33, "learning_rate": 4.44939233695148e-06, - "loss": 0.1932, + "loss": 0.0269, "step": 8381 }, { "epoch": 2.33, "learning_rate": 4.447536877261342e-06, - "loss": 0.0846, + "loss": 0.0606, "step": 8382 }, { "epoch": 2.33, "learning_rate": 4.445681417571204e-06, - "loss": 0.1946, + "loss": 0.0713, "step": 8383 }, { "epoch": 2.33, "learning_rate": 4.443825957881066e-06, - "loss": 0.1931, + "loss": 0.0586, "step": 8384 }, { "epoch": 2.33, "learning_rate": 4.4419704981909275e-06, - "loss": 0.0846, + "loss": 0.0501, "step": 8385 }, { "epoch": 2.33, "learning_rate": 4.440115038500789e-06, - "loss": 0.1956, + "loss": 0.0181, "step": 8386 }, { "epoch": 2.33, "learning_rate": 4.438259578810651e-06, - "loss": 0.1917, + "loss": 0.0784, "step": 8387 }, { "epoch": 2.33, "learning_rate": 4.436404119120512e-06, - "loss": 0.1925, + "loss": 0.0458, "step": 8388 }, { "epoch": 2.33, "learning_rate": 4.434548659430374e-06, - "loss": 0.0291, + "loss": 0.0114, "step": 8389 }, { "epoch": 2.34, "learning_rate": 4.432693199740236e-06, - "loss": 0.1415, + "loss": 0.0172, "step": 8390 }, { "epoch": 2.34, "learning_rate": 4.430837740050098e-06, - "loss": 0.1409, + "loss": 0.0151, "step": 8391 }, { "epoch": 2.34, "learning_rate": 4.42898228035996e-06, - "loss": 0.0806, + "loss": 0.0114, "step": 8392 }, { "epoch": 2.34, "learning_rate": 4.427126820669822e-06, - "loss": 0.0823, + "loss": 0.1428, "step": 8393 }, { "epoch": 2.34, "learning_rate": 4.4252713609796835e-06, - "loss": 0.1918, + "loss": 0.2414, "step": 8394 }, { "epoch": 2.34, "learning_rate": 4.423415901289545e-06, - "loss": 0.1386, + "loss": 0.1461, "step": 8395 }, { "epoch": 2.34, "learning_rate": 4.421560441599406e-06, - "loss": 0.1954, + "loss": 0.0136, "step": 8396 }, { "epoch": 2.34, "learning_rate": 4.419704981909268e-06, - "loss": 0.1387, + "loss": 0.0059, "step": 8397 }, { "epoch": 2.34, "learning_rate": 4.41784952221913e-06, - "loss": 0.1964, + "loss": 0.0162, "step": 8398 }, { "epoch": 2.34, "learning_rate": 4.415994062528992e-06, - "loss": 0.2511, + "loss": 0.0191, "step": 8399 }, { "epoch": 2.34, "learning_rate": 4.414138602838854e-06, - "loss": 0.0842, + "loss": 0.1202, "step": 8400 }, { "epoch": 2.34, "learning_rate": 4.412283143148716e-06, - "loss": 0.1395, + "loss": 0.0085, "step": 8401 }, { "epoch": 2.34, "learning_rate": 4.410427683458578e-06, - "loss": 0.249, + "loss": 0.0545, "step": 8402 }, { "epoch": 2.34, "learning_rate": 4.4085722237684395e-06, - "loss": 0.1387, + "loss": 0.0677, "step": 8403 }, { "epoch": 2.34, "learning_rate": 4.406716764078301e-06, - "loss": 0.1403, + "loss": 0.0181, "step": 8404 }, { "epoch": 2.34, "learning_rate": 4.404861304388162e-06, - "loss": 0.296, + "loss": 0.0133, "step": 8405 }, { "epoch": 2.34, "learning_rate": 4.403005844698024e-06, - "loss": 0.0297, + "loss": 0.0214, "step": 8406 }, { "epoch": 2.34, "learning_rate": 4.401150385007886e-06, - "loss": 0.2976, + "loss": 0.1637, "step": 8407 }, { "epoch": 2.34, "learning_rate": 4.399294925317748e-06, - "loss": 0.0834, + "loss": 0.0123, "step": 8408 }, { "epoch": 2.34, "learning_rate": 4.39743946562761e-06, - "loss": 0.1918, + "loss": 0.052, "step": 8409 }, { "epoch": 2.34, "learning_rate": 4.395584005937472e-06, - "loss": 0.0849, + "loss": 0.0533, "step": 8410 }, { "epoch": 2.34, "learning_rate": 4.393728546247334e-06, - "loss": 0.1936, + "loss": 0.0217, "step": 8411 }, { "epoch": 2.34, "learning_rate": 4.3918730865571955e-06, - "loss": 0.1964, + "loss": 0.0193, "step": 8412 }, { "epoch": 2.34, "learning_rate": 4.390017626867057e-06, - "loss": 0.1411, + "loss": 0.0748, "step": 8413 }, { "epoch": 2.34, "learning_rate": 4.388162167176918e-06, - "loss": 0.1388, + "loss": 0.0093, "step": 8414 }, { "epoch": 2.34, "learning_rate": 4.38630670748678e-06, - "loss": 0.0321, + "loss": 0.0619, "step": 8415 }, { "epoch": 2.34, "learning_rate": 4.384451247796642e-06, - "loss": 0.1405, + "loss": 0.0155, "step": 8416 }, { "epoch": 2.34, "learning_rate": 4.382595788106504e-06, - "loss": 0.14, + "loss": 0.0656, "step": 8417 }, { "epoch": 2.34, "learning_rate": 4.380740328416366e-06, - "loss": 0.0843, + "loss": 0.0056, "step": 8418 }, { "epoch": 2.34, "learning_rate": 4.378884868726228e-06, - "loss": 0.1929, + "loss": 0.1353, "step": 8419 }, { "epoch": 2.34, "learning_rate": 4.3770294090360896e-06, - "loss": 0.0328, + "loss": 0.046, "step": 8420 }, { "epoch": 2.34, "learning_rate": 4.3751739493459514e-06, - "loss": 0.0321, + "loss": 0.0416, "step": 8421 }, { "epoch": 2.34, "learning_rate": 4.3733184896558125e-06, - "loss": 0.0846, + "loss": 0.0154, "step": 8422 }, { "epoch": 2.34, "learning_rate": 4.371463029965674e-06, - "loss": 0.141, + "loss": 0.0569, "step": 8423 }, { "epoch": 2.34, "learning_rate": 4.369607570275536e-06, - "loss": 0.1394, + "loss": 0.0733, "step": 8424 }, { "epoch": 2.34, "learning_rate": 4.367752110585398e-06, - "loss": 0.138, + "loss": 0.1984, "step": 8425 }, { "epoch": 2.35, "learning_rate": 4.365896650895259e-06, - "loss": 0.1389, + "loss": 0.0166, "step": 8426 }, { "epoch": 2.35, "learning_rate": 4.364041191205121e-06, - "loss": 0.0321, + "loss": 0.138, "step": 8427 }, { "epoch": 2.35, "learning_rate": 4.362185731514983e-06, - "loss": 0.1438, + "loss": 0.0096, "step": 8428 }, { "epoch": 2.35, "learning_rate": 4.360330271824845e-06, - "loss": 0.0858, + "loss": 0.0508, "step": 8429 }, { "epoch": 2.35, "learning_rate": 4.358474812134707e-06, - "loss": 0.1929, + "loss": 0.1432, "step": 8430 }, { "epoch": 2.35, "learning_rate": 4.3566193524445685e-06, - "loss": 0.0313, + "loss": 0.0683, "step": 8431 }, { "epoch": 2.35, "learning_rate": 4.35476389275443e-06, - "loss": 0.083, + "loss": 0.0931, "step": 8432 }, { "epoch": 2.35, "learning_rate": 4.352908433064292e-06, - "loss": 0.0316, + "loss": 0.1203, "step": 8433 }, { "epoch": 2.35, "learning_rate": 4.351052973374153e-06, - "loss": 0.1388, + "loss": 0.0691, "step": 8434 }, { "epoch": 2.35, "learning_rate": 4.349197513684015e-06, - "loss": 0.1404, + "loss": 0.0118, "step": 8435 }, { "epoch": 2.35, "learning_rate": 4.347342053993877e-06, - "loss": 0.2531, + "loss": 0.0905, "step": 8436 }, { "epoch": 2.35, "learning_rate": 4.345486594303739e-06, - "loss": 0.0298, + "loss": 0.1581, "step": 8437 }, { "epoch": 2.35, "learning_rate": 4.343631134613601e-06, - "loss": 0.0834, + "loss": 0.023, "step": 8438 }, { "epoch": 2.35, "learning_rate": 4.3417756749234626e-06, - "loss": 0.0843, + "loss": 0.0161, "step": 8439 }, { "epoch": 2.35, "learning_rate": 4.3399202152333244e-06, - "loss": 0.2516, + "loss": 0.0114, "step": 8440 }, { "epoch": 2.35, "learning_rate": 4.338064755543186e-06, - "loss": 0.0811, + "loss": 0.0425, "step": 8441 }, { "epoch": 2.35, "learning_rate": 4.336209295853047e-06, - "loss": 0.1941, + "loss": 0.106, "step": 8442 }, { "epoch": 2.35, "learning_rate": 4.334353836162909e-06, - "loss": 0.1417, + "loss": 0.0366, "step": 8443 }, { "epoch": 2.35, "learning_rate": 4.332498376472771e-06, - "loss": 0.0834, + "loss": 0.0663, "step": 8444 }, { "epoch": 2.35, "learning_rate": 4.330642916782633e-06, - "loss": 0.1413, + "loss": 0.1052, "step": 8445 }, { "epoch": 2.35, "learning_rate": 4.328787457092495e-06, - "loss": 0.0857, + "loss": 0.1442, "step": 8446 }, { "epoch": 2.35, "learning_rate": 4.326931997402357e-06, - "loss": 0.0299, + "loss": 0.0169, "step": 8447 }, { "epoch": 2.35, "learning_rate": 4.3250765377122186e-06, - "loss": 0.082, + "loss": 0.0792, "step": 8448 }, { "epoch": 2.35, "learning_rate": 4.3232210780220804e-06, - "loss": 0.1416, + "loss": 0.045, "step": 8449 }, { "epoch": 2.35, "learning_rate": 4.321365618331942e-06, - "loss": 0.0291, + "loss": 0.1144, "step": 8450 }, { "epoch": 2.35, "learning_rate": 4.319510158641803e-06, - "loss": 0.2501, + "loss": 0.0601, "step": 8451 }, { "epoch": 2.35, "learning_rate": 4.317654698951665e-06, - "loss": 0.0291, + "loss": 0.0479, "step": 8452 }, { "epoch": 2.35, "learning_rate": 4.315799239261527e-06, - "loss": 0.2486, + "loss": 0.0671, "step": 8453 }, { "epoch": 2.35, "learning_rate": 4.313943779571389e-06, - "loss": 0.2525, + "loss": 0.0539, "step": 8454 }, { "epoch": 2.35, "learning_rate": 4.312088319881251e-06, - "loss": 0.1407, + "loss": 0.052, "step": 8455 }, { "epoch": 2.35, "learning_rate": 4.310232860191113e-06, - "loss": 0.1392, + "loss": 0.1594, "step": 8456 }, { "epoch": 2.35, "learning_rate": 4.3083774005009745e-06, - "loss": 0.0838, + "loss": 0.0237, "step": 8457 }, { "epoch": 2.35, "learning_rate": 4.306521940810836e-06, - "loss": 0.1954, + "loss": 0.0658, "step": 8458 }, { "epoch": 2.35, "learning_rate": 4.3046664811206974e-06, - "loss": 0.1945, + "loss": 0.1024, "step": 8459 }, { "epoch": 2.35, "learning_rate": 4.302811021430559e-06, - "loss": 0.1397, + "loss": 0.0131, "step": 8460 }, { "epoch": 2.35, "learning_rate": 4.300955561740421e-06, - "loss": 0.1391, + "loss": 0.0649, "step": 8461 }, { "epoch": 2.36, "learning_rate": 4.299100102050283e-06, - "loss": 0.0859, + "loss": 0.0118, "step": 8462 }, { "epoch": 2.36, "learning_rate": 4.297244642360145e-06, - "loss": 0.1938, + "loss": 0.1915, "step": 8463 }, { "epoch": 2.36, "learning_rate": 4.295389182670007e-06, - "loss": 0.0882, + "loss": 0.0187, "step": 8464 }, { "epoch": 2.36, "learning_rate": 4.293533722979869e-06, - "loss": 0.14, + "loss": 0.1693, "step": 8465 }, { "epoch": 2.36, "learning_rate": 4.2916782632897305e-06, - "loss": 0.084, + "loss": 0.1352, "step": 8466 }, { "epoch": 2.36, "learning_rate": 4.289822803599592e-06, - "loss": 0.0294, + "loss": 0.0145, "step": 8467 }, { "epoch": 2.36, "learning_rate": 4.2879673439094534e-06, - "loss": 0.1951, + "loss": 0.2196, "step": 8468 }, { "epoch": 2.36, "learning_rate": 4.286111884219315e-06, - "loss": 0.0296, + "loss": 0.0238, "step": 8469 }, { "epoch": 2.36, "learning_rate": 4.284256424529177e-06, - "loss": 0.3058, + "loss": 0.0536, "step": 8470 }, { "epoch": 2.36, "learning_rate": 4.282400964839039e-06, - "loss": 0.029, + "loss": 0.061, "step": 8471 }, { "epoch": 2.36, "learning_rate": 4.280545505148901e-06, - "loss": 0.0851, + "loss": 0.0837, "step": 8472 }, { "epoch": 2.36, "learning_rate": 4.278690045458763e-06, - "loss": 0.2513, + "loss": 0.0222, "step": 8473 }, { "epoch": 2.36, "learning_rate": 4.276834585768625e-06, - "loss": 0.0857, + "loss": 0.0223, "step": 8474 }, { "epoch": 2.36, "learning_rate": 4.2749791260784865e-06, - "loss": 0.0281, + "loss": 0.0302, "step": 8475 }, { "epoch": 2.36, "learning_rate": 4.2731236663883475e-06, - "loss": 0.1355, + "loss": 0.0241, "step": 8476 }, { "epoch": 2.36, "learning_rate": 4.271268206698209e-06, - "loss": 0.195, + "loss": 0.0175, "step": 8477 }, { "epoch": 2.36, "learning_rate": 4.269412747008071e-06, - "loss": 0.3571, + "loss": 0.0607, "step": 8478 }, { "epoch": 2.36, "learning_rate": 4.267557287317933e-06, - "loss": 0.1951, + "loss": 0.0419, "step": 8479 }, { "epoch": 2.36, "learning_rate": 4.265701827627795e-06, - "loss": 0.1424, + "loss": 0.1054, "step": 8480 }, { "epoch": 2.36, "learning_rate": 4.263846367937657e-06, - "loss": 0.1367, + "loss": 0.0618, "step": 8481 }, { "epoch": 2.36, "learning_rate": 4.261990908247519e-06, - "loss": 0.0842, + "loss": 0.0629, "step": 8482 }, { "epoch": 2.36, "learning_rate": 4.260135448557381e-06, - "loss": 0.0292, + "loss": 0.0488, "step": 8483 }, { "epoch": 2.36, "learning_rate": 4.2582799888672425e-06, - "loss": 0.1416, + "loss": 0.2612, "step": 8484 }, { "epoch": 2.36, "learning_rate": 4.2564245291771035e-06, - "loss": 0.0849, + "loss": 0.0894, "step": 8485 }, { "epoch": 2.36, "learning_rate": 4.254569069486965e-06, - "loss": 0.1923, + "loss": 0.0469, "step": 8486 }, { "epoch": 2.36, "learning_rate": 4.252713609796827e-06, - "loss": 0.1382, + "loss": 0.1228, "step": 8487 }, { "epoch": 2.36, "learning_rate": 4.250858150106689e-06, - "loss": 0.0294, + "loss": 0.0786, "step": 8488 }, { "epoch": 2.36, "learning_rate": 4.249002690416551e-06, - "loss": 0.0845, + "loss": 0.0263, "step": 8489 }, { "epoch": 2.36, "learning_rate": 4.247147230726413e-06, - "loss": 0.0843, + "loss": 0.0152, "step": 8490 }, { "epoch": 2.36, "learning_rate": 4.245291771036275e-06, - "loss": 0.1399, + "loss": 0.0815, "step": 8491 }, { "epoch": 2.36, "learning_rate": 4.243436311346137e-06, - "loss": 0.0291, + "loss": 0.1107, "step": 8492 }, { "epoch": 2.36, "learning_rate": 4.2415808516559985e-06, - "loss": 0.0297, + "loss": 0.054, "step": 8493 }, { "epoch": 2.36, "learning_rate": 4.2397253919658595e-06, - "loss": 0.1368, + "loss": 0.0584, "step": 8494 }, { "epoch": 2.36, "learning_rate": 4.237869932275721e-06, - "loss": 0.1976, + "loss": 0.1012, "step": 8495 }, { "epoch": 2.36, "learning_rate": 4.236014472585583e-06, - "loss": 0.1421, + "loss": 0.1014, "step": 8496 }, { "epoch": 2.36, "learning_rate": 4.234159012895445e-06, - "loss": 0.1952, + "loss": 0.0689, "step": 8497 }, { "epoch": 2.37, "learning_rate": 4.232303553205307e-06, - "loss": 0.0832, + "loss": 0.0547, "step": 8498 }, { "epoch": 2.37, "learning_rate": 4.230448093515169e-06, - "loss": 0.0814, + "loss": 0.0979, "step": 8499 }, { "epoch": 2.37, "learning_rate": 4.228592633825031e-06, - "loss": 0.1968, + "loss": 0.2425, "step": 8500 }, { "epoch": 2.37, "learning_rate": 4.226737174134893e-06, - "loss": 0.0292, + "loss": 0.067, "step": 8501 }, { "epoch": 2.37, "learning_rate": 4.224881714444754e-06, - "loss": 0.0808, + "loss": 0.1003, "step": 8502 }, { "epoch": 2.37, "learning_rate": 4.2230262547546155e-06, - "loss": 0.1381, + "loss": 0.0707, "step": 8503 }, { "epoch": 2.37, "learning_rate": 4.221170795064477e-06, - "loss": 0.1936, + "loss": 0.0596, "step": 8504 }, { "epoch": 2.37, "learning_rate": 4.219315335374339e-06, - "loss": 0.0272, + "loss": 0.0453, "step": 8505 }, { "epoch": 2.37, "learning_rate": 4.217459875684201e-06, - "loss": 0.1428, + "loss": 0.018, "step": 8506 }, { "epoch": 2.37, "learning_rate": 4.215604415994063e-06, - "loss": 0.2512, + "loss": 0.1257, "step": 8507 }, { "epoch": 2.37, "learning_rate": 4.213748956303925e-06, - "loss": 0.1958, + "loss": 0.0876, "step": 8508 }, { "epoch": 2.37, "learning_rate": 4.211893496613787e-06, - "loss": 0.136, + "loss": 0.0224, "step": 8509 }, { "epoch": 2.37, "learning_rate": 4.210038036923649e-06, - "loss": 0.2417, + "loss": 0.1022, "step": 8510 }, { "epoch": 2.37, "learning_rate": 4.20818257723351e-06, - "loss": 0.0843, + "loss": 0.0758, "step": 8511 }, { "epoch": 2.37, "learning_rate": 4.2063271175433715e-06, - "loss": 0.202, + "loss": 0.0123, "step": 8512 }, { "epoch": 2.37, "learning_rate": 4.204471657853233e-06, - "loss": 0.1415, + "loss": 0.0892, "step": 8513 }, { "epoch": 2.37, "learning_rate": 4.202616198163095e-06, - "loss": 0.138, + "loss": 0.0477, "step": 8514 }, { "epoch": 2.37, "learning_rate": 4.200760738472957e-06, - "loss": 0.0829, + "loss": 0.0899, "step": 8515 }, { "epoch": 2.37, "learning_rate": 4.198905278782819e-06, - "loss": 0.2468, + "loss": 0.027, "step": 8516 }, { "epoch": 2.37, "learning_rate": 4.197049819092681e-06, - "loss": 0.0835, + "loss": 0.0179, "step": 8517 }, { "epoch": 2.37, "learning_rate": 4.195194359402543e-06, - "loss": 0.3052, + "loss": 0.0523, "step": 8518 }, { "epoch": 2.37, "learning_rate": 4.193338899712404e-06, - "loss": 0.085, + "loss": 0.0832, "step": 8519 }, { "epoch": 2.37, "learning_rate": 4.191483440022266e-06, - "loss": 0.137, + "loss": 0.1262, "step": 8520 }, { "epoch": 2.37, "learning_rate": 4.1896279803321275e-06, - "loss": 0.0861, + "loss": 0.0952, "step": 8521 }, { "epoch": 2.37, "learning_rate": 4.187772520641989e-06, - "loss": 0.0866, + "loss": 0.0173, "step": 8522 }, { "epoch": 2.37, "learning_rate": 4.185917060951851e-06, - "loss": 0.1389, + "loss": 0.0154, "step": 8523 }, { "epoch": 2.37, "learning_rate": 4.184061601261713e-06, - "loss": 0.1948, + "loss": 0.044, "step": 8524 }, { "epoch": 2.37, "learning_rate": 4.182206141571575e-06, - "loss": 0.086, + "loss": 0.041, "step": 8525 }, { "epoch": 2.37, "learning_rate": 4.180350681881437e-06, - "loss": 0.1938, + "loss": 0.0434, "step": 8526 }, { "epoch": 2.37, "learning_rate": 4.178495222191299e-06, - "loss": 0.086, + "loss": 0.113, "step": 8527 }, { "epoch": 2.37, "learning_rate": 4.17663976250116e-06, - "loss": 0.0298, + "loss": 0.1646, "step": 8528 }, { "epoch": 2.37, "learning_rate": 4.174784302811022e-06, - "loss": 0.0304, + "loss": 0.0924, "step": 8529 }, { "epoch": 2.37, "learning_rate": 4.1729288431208835e-06, - "loss": 0.1393, + "loss": 0.0202, "step": 8530 }, { "epoch": 2.37, "learning_rate": 4.171073383430745e-06, - "loss": 0.0836, + "loss": 0.0083, "step": 8531 }, { "epoch": 2.37, "learning_rate": 4.169217923740607e-06, - "loss": 0.133, + "loss": 0.1156, "step": 8532 }, { "epoch": 2.37, "learning_rate": 4.167362464050469e-06, - "loss": 0.0853, + "loss": 0.0796, "step": 8533 }, { "epoch": 2.38, "learning_rate": 4.165507004360331e-06, - "loss": 0.03, + "loss": 0.0452, "step": 8534 }, { "epoch": 2.38, "learning_rate": 4.163651544670193e-06, - "loss": 0.2492, + "loss": 0.0473, "step": 8535 }, { "epoch": 2.38, "learning_rate": 4.161796084980054e-06, - "loss": 0.0849, + "loss": 0.0398, "step": 8536 }, { "epoch": 2.38, "learning_rate": 4.159940625289916e-06, - "loss": 0.1387, + "loss": 0.0873, "step": 8537 }, { "epoch": 2.38, "learning_rate": 4.158085165599778e-06, - "loss": 0.25, + "loss": 0.018, "step": 8538 }, { "epoch": 2.38, "learning_rate": 4.1562297059096395e-06, - "loss": 0.0842, + "loss": 0.1461, "step": 8539 }, { "epoch": 2.38, "learning_rate": 4.154374246219501e-06, - "loss": 0.138, + "loss": 0.1114, "step": 8540 }, { "epoch": 2.38, "learning_rate": 4.152518786529363e-06, - "loss": 0.138, + "loss": 0.1568, "step": 8541 }, { "epoch": 2.38, "learning_rate": 4.150663326839225e-06, - "loss": 0.2475, + "loss": 0.0579, "step": 8542 }, { "epoch": 2.38, "learning_rate": 4.148807867149087e-06, - "loss": 0.3132, + "loss": 0.1634, "step": 8543 }, { "epoch": 2.38, "learning_rate": 4.146952407458949e-06, - "loss": 0.1416, + "loss": 0.1149, "step": 8544 }, { "epoch": 2.38, "learning_rate": 4.14509694776881e-06, - "loss": 0.1393, + "loss": 0.0193, "step": 8545 }, { "epoch": 2.38, "learning_rate": 4.143241488078672e-06, - "loss": 0.3058, + "loss": 0.0175, "step": 8546 }, { "epoch": 2.38, "learning_rate": 4.141386028388534e-06, - "loss": 0.1397, + "loss": 0.0411, "step": 8547 }, { "epoch": 2.38, "learning_rate": 4.1395305686983955e-06, - "loss": 0.1359, + "loss": 0.0472, "step": 8548 }, { "epoch": 2.38, "learning_rate": 4.137675109008257e-06, - "loss": 0.0859, + "loss": 0.0127, "step": 8549 }, { "epoch": 2.38, "learning_rate": 4.135819649318119e-06, - "loss": 0.1391, + "loss": 0.0484, "step": 8550 }, { "epoch": 2.38, "learning_rate": 4.133964189627981e-06, - "loss": 0.141, + "loss": 0.0786, "step": 8551 }, { "epoch": 2.38, "learning_rate": 4.132108729937843e-06, - "loss": 0.1388, + "loss": 0.0283, "step": 8552 }, { "epoch": 2.38, "learning_rate": 4.130253270247705e-06, - "loss": 0.0304, + "loss": 0.1319, "step": 8553 }, { "epoch": 2.38, "learning_rate": 4.128397810557566e-06, - "loss": 0.0848, + "loss": 0.0461, "step": 8554 }, { "epoch": 2.38, "learning_rate": 4.126542350867428e-06, - "loss": 0.138, + "loss": 0.0751, "step": 8555 }, { "epoch": 2.38, "learning_rate": 4.1246868911772896e-06, - "loss": 0.1974, + "loss": 0.1104, "step": 8556 }, { "epoch": 2.38, "learning_rate": 4.1228314314871514e-06, - "loss": 0.0308, + "loss": 0.0381, "step": 8557 }, { "epoch": 2.38, "learning_rate": 4.120975971797013e-06, - "loss": 0.135, + "loss": 0.0258, "step": 8558 }, { "epoch": 2.38, "learning_rate": 4.119120512106875e-06, - "loss": 0.1961, + "loss": 0.0172, "step": 8559 }, { "epoch": 2.38, "learning_rate": 4.117265052416737e-06, - "loss": 0.0839, + "loss": 0.0994, "step": 8560 }, { "epoch": 2.38, "learning_rate": 4.115409592726599e-06, - "loss": 0.0823, + "loss": 0.197, "step": 8561 }, { "epoch": 2.38, "learning_rate": 4.11355413303646e-06, - "loss": 0.0858, + "loss": 0.0834, "step": 8562 }, { "epoch": 2.38, "learning_rate": 4.111698673346322e-06, - "loss": 0.194, + "loss": 0.1111, "step": 8563 }, { "epoch": 2.38, "learning_rate": 4.109843213656184e-06, - "loss": 0.1929, + "loss": 0.1588, "step": 8564 }, { "epoch": 2.38, "learning_rate": 4.1079877539660456e-06, - "loss": 0.0298, + "loss": 0.1051, "step": 8565 }, { "epoch": 2.38, "learning_rate": 4.1061322942759074e-06, - "loss": 0.086, + "loss": 0.0729, "step": 8566 }, { "epoch": 2.38, "learning_rate": 4.104276834585769e-06, - "loss": 0.0856, + "loss": 0.0784, "step": 8567 }, { "epoch": 2.38, "learning_rate": 4.102421374895631e-06, - "loss": 0.194, + "loss": 0.1464, "step": 8568 }, { "epoch": 2.38, "learning_rate": 4.100565915205493e-06, - "loss": 0.0307, + "loss": 0.326, "step": 8569 }, { "epoch": 2.39, "learning_rate": 4.098710455515355e-06, - "loss": 0.1406, + "loss": 0.02, "step": 8570 }, { "epoch": 2.39, "learning_rate": 4.096854995825216e-06, - "loss": 0.1928, + "loss": 0.1019, "step": 8571 }, { "epoch": 2.39, "learning_rate": 4.094999536135078e-06, - "loss": 0.1391, + "loss": 0.0527, "step": 8572 }, { "epoch": 2.39, "learning_rate": 4.09314407644494e-06, - "loss": 0.0862, + "loss": 0.0741, "step": 8573 }, { "epoch": 2.39, "learning_rate": 4.0912886167548015e-06, - "loss": 0.0294, + "loss": 0.0254, "step": 8574 }, { "epoch": 2.39, "learning_rate": 4.089433157064663e-06, - "loss": 0.1906, + "loss": 0.0233, "step": 8575 }, { "epoch": 2.39, "learning_rate": 4.087577697374525e-06, - "loss": 0.2511, + "loss": 0.0185, "step": 8576 }, { "epoch": 2.39, "learning_rate": 4.085722237684387e-06, - "loss": 0.137, + "loss": 0.0156, "step": 8577 }, { "epoch": 2.39, "learning_rate": 4.083866777994248e-06, - "loss": 0.3507, + "loss": 0.0964, "step": 8578 }, { "epoch": 2.39, "learning_rate": 4.08201131830411e-06, - "loss": 0.0867, + "loss": 0.0266, "step": 8579 }, { "epoch": 2.39, "learning_rate": 4.080155858613972e-06, - "loss": 0.0303, + "loss": 0.0598, "step": 8580 }, { "epoch": 2.39, "learning_rate": 4.078300398923834e-06, - "loss": 0.1356, + "loss": 0.0656, "step": 8581 }, { "epoch": 2.39, "learning_rate": 4.076444939233695e-06, - "loss": 0.142, + "loss": 0.0939, "step": 8582 }, { "epoch": 2.39, "learning_rate": 4.074589479543557e-06, - "loss": 0.1914, + "loss": 0.0985, "step": 8583 }, { "epoch": 2.39, "learning_rate": 4.0727340198534186e-06, - "loss": 0.1404, + "loss": 0.0771, "step": 8584 }, { "epoch": 2.39, "learning_rate": 4.0708785601632804e-06, - "loss": 0.0875, + "loss": 0.0747, "step": 8585 }, { "epoch": 2.39, "learning_rate": 4.069023100473142e-06, - "loss": 0.1415, + "loss": 0.1104, "step": 8586 }, { "epoch": 2.39, "learning_rate": 4.067167640783004e-06, - "loss": 0.087, + "loss": 0.1063, "step": 8587 }, { "epoch": 2.39, "learning_rate": 4.065312181092866e-06, - "loss": 0.19, + "loss": 0.0181, "step": 8588 }, { "epoch": 2.39, "learning_rate": 4.063456721402728e-06, - "loss": 0.193, + "loss": 0.013, "step": 8589 }, { "epoch": 2.39, "learning_rate": 4.06160126171259e-06, - "loss": 0.0866, + "loss": 0.0221, "step": 8590 }, { "epoch": 2.39, "learning_rate": 4.059745802022451e-06, - "loss": 0.2524, + "loss": 0.151, "step": 8591 }, { "epoch": 2.39, "learning_rate": 4.057890342332313e-06, - "loss": 0.1928, + "loss": 0.2587, "step": 8592 }, { "epoch": 2.39, "learning_rate": 4.0560348826421745e-06, - "loss": 0.0855, + "loss": 0.0198, "step": 8593 }, { "epoch": 2.39, "learning_rate": 4.054179422952036e-06, - "loss": 0.1411, + "loss": 0.0611, "step": 8594 }, { "epoch": 2.39, "learning_rate": 4.052323963261898e-06, - "loss": 0.1425, + "loss": 0.0606, "step": 8595 }, { "epoch": 2.39, "learning_rate": 4.05046850357176e-06, - "loss": 0.2434, + "loss": 0.0193, "step": 8596 }, { "epoch": 2.39, "learning_rate": 4.048613043881622e-06, - "loss": 0.0855, + "loss": 0.0345, "step": 8597 }, { "epoch": 2.39, "learning_rate": 4.046757584191484e-06, - "loss": 0.139, + "loss": 0.0649, "step": 8598 }, { "epoch": 2.39, "learning_rate": 4.044902124501345e-06, - "loss": 0.1362, + "loss": 0.0107, "step": 8599 }, { "epoch": 2.39, "learning_rate": 4.043046664811207e-06, - "loss": 0.1975, + "loss": 0.1149, "step": 8600 }, { "epoch": 2.39, "learning_rate": 4.041191205121069e-06, - "loss": 0.1937, + "loss": 0.0212, "step": 8601 }, { "epoch": 2.39, "learning_rate": 4.0393357454309305e-06, - "loss": 0.192, + "loss": 0.0139, "step": 8602 }, { "epoch": 2.39, "learning_rate": 4.037480285740792e-06, - "loss": 0.0843, + "loss": 0.0457, "step": 8603 }, { "epoch": 2.39, "learning_rate": 4.035624826050654e-06, - "loss": 0.139, + "loss": 0.074, "step": 8604 }, { "epoch": 2.39, "learning_rate": 4.033769366360516e-06, - "loss": 0.1391, + "loss": 0.1041, "step": 8605 }, { "epoch": 2.4, "learning_rate": 4.031913906670378e-06, - "loss": 0.1425, + "loss": 0.055, "step": 8606 }, { "epoch": 2.4, "learning_rate": 4.03005844698024e-06, - "loss": 0.1405, + "loss": 0.0583, "step": 8607 }, { "epoch": 2.4, "learning_rate": 4.028202987290101e-06, - "loss": 0.2967, + "loss": 0.0553, "step": 8608 }, { "epoch": 2.4, "learning_rate": 4.026347527599963e-06, - "loss": 0.0316, + "loss": 0.0142, "step": 8609 }, { "epoch": 2.4, "learning_rate": 4.024492067909825e-06, - "loss": 0.0857, + "loss": 0.0934, "step": 8610 }, { "epoch": 2.4, "learning_rate": 4.0226366082196865e-06, - "loss": 0.1408, + "loss": 0.0563, "step": 8611 }, { "epoch": 2.4, "learning_rate": 4.020781148529548e-06, - "loss": 0.19, + "loss": 0.0272, "step": 8612 }, { "epoch": 2.4, "learning_rate": 4.01892568883941e-06, - "loss": 0.0847, + "loss": 0.1089, "step": 8613 }, { "epoch": 2.4, "learning_rate": 4.017070229149272e-06, - "loss": 0.1935, + "loss": 0.0803, "step": 8614 }, { "epoch": 2.4, "learning_rate": 4.015214769459134e-06, - "loss": 0.0865, + "loss": 0.0195, "step": 8615 }, { "epoch": 2.4, "learning_rate": 4.013359309768996e-06, - "loss": 0.2429, + "loss": 0.0246, "step": 8616 }, { "epoch": 2.4, "learning_rate": 4.011503850078857e-06, - "loss": 0.1909, + "loss": 0.0654, "step": 8617 }, { "epoch": 2.4, "learning_rate": 4.009648390388719e-06, - "loss": 0.1399, + "loss": 0.086, "step": 8618 }, { "epoch": 2.4, "learning_rate": 4.007792930698581e-06, - "loss": 0.1921, + "loss": 0.0541, "step": 8619 }, { "epoch": 2.4, "learning_rate": 4.0059374710084425e-06, - "loss": 0.0322, + "loss": 0.0438, "step": 8620 }, { "epoch": 2.4, "learning_rate": 4.004082011318304e-06, - "loss": 0.1368, + "loss": 0.0571, "step": 8621 }, { "epoch": 2.4, "learning_rate": 4.002226551628166e-06, - "loss": 0.1901, + "loss": 0.0414, "step": 8622 }, { "epoch": 2.4, "learning_rate": 4.000371091938028e-06, - "loss": 0.2873, + "loss": 0.071, "step": 8623 }, { "epoch": 2.4, "learning_rate": 3.99851563224789e-06, - "loss": 0.1932, + "loss": 0.1629, "step": 8624 }, { "epoch": 2.4, "learning_rate": 3.996660172557751e-06, - "loss": 0.3024, + "loss": 0.015, "step": 8625 }, { "epoch": 2.4, "learning_rate": 3.994804712867613e-06, - "loss": 0.1393, + "loss": 0.0415, "step": 8626 }, { "epoch": 2.4, "learning_rate": 3.992949253177475e-06, - "loss": 0.0875, + "loss": 0.0949, "step": 8627 }, { "epoch": 2.4, "learning_rate": 3.991093793487337e-06, - "loss": 0.1953, + "loss": 0.0908, "step": 8628 }, { "epoch": 2.4, "learning_rate": 3.9892383337971985e-06, - "loss": 0.0874, + "loss": 0.0447, "step": 8629 }, { "epoch": 2.4, "learning_rate": 3.98738287410706e-06, - "loss": 0.1364, + "loss": 0.0061, "step": 8630 }, { "epoch": 2.4, "learning_rate": 3.985527414416922e-06, - "loss": 0.2942, + "loss": 0.0786, "step": 8631 }, { "epoch": 2.4, "learning_rate": 3.983671954726784e-06, - "loss": 0.0339, + "loss": 0.14, "step": 8632 }, { "epoch": 2.4, "learning_rate": 3.981816495036646e-06, - "loss": 0.1382, + "loss": 0.0439, "step": 8633 }, { "epoch": 2.4, "learning_rate": 3.979961035346507e-06, - "loss": 0.088, + "loss": 0.0101, "step": 8634 }, { "epoch": 2.4, "learning_rate": 3.978105575656369e-06, - "loss": 0.2419, + "loss": 0.0182, "step": 8635 }, { "epoch": 2.4, "learning_rate": 3.976250115966231e-06, - "loss": 0.0364, + "loss": 0.0496, "step": 8636 }, { "epoch": 2.4, "learning_rate": 3.974394656276093e-06, - "loss": 0.0833, + "loss": 0.0222, "step": 8637 }, { "epoch": 2.4, "learning_rate": 3.9725391965859545e-06, - "loss": 0.0855, + "loss": 0.0217, "step": 8638 }, { "epoch": 2.4, "learning_rate": 3.970683736895816e-06, - "loss": 0.0361, + "loss": 0.0315, "step": 8639 }, { "epoch": 2.4, "learning_rate": 3.968828277205678e-06, - "loss": 0.1931, + "loss": 0.0815, "step": 8640 }, { "epoch": 2.4, "learning_rate": 3.96697281751554e-06, - "loss": 0.0876, + "loss": 0.0791, "step": 8641 }, { "epoch": 2.41, "learning_rate": 3.965117357825401e-06, - "loss": 0.2462, + "loss": 0.0077, "step": 8642 }, { "epoch": 2.41, "learning_rate": 3.963261898135263e-06, - "loss": 0.1904, + "loss": 0.1225, "step": 8643 }, { "epoch": 2.41, "learning_rate": 3.961406438445125e-06, - "loss": 0.1926, + "loss": 0.0345, "step": 8644 }, { "epoch": 2.41, "learning_rate": 3.959550978754987e-06, - "loss": 0.1374, + "loss": 0.0332, "step": 8645 }, { "epoch": 2.41, "learning_rate": 3.957695519064849e-06, - "loss": 0.1921, + "loss": 0.0875, "step": 8646 }, { "epoch": 2.41, "learning_rate": 3.9558400593747105e-06, - "loss": 0.0327, + "loss": 0.1431, "step": 8647 }, { "epoch": 2.41, "learning_rate": 3.953984599684572e-06, - "loss": 0.139, + "loss": 0.0863, "step": 8648 }, { "epoch": 2.41, "learning_rate": 3.952129139994434e-06, - "loss": 0.0832, + "loss": 0.0461, "step": 8649 }, { "epoch": 2.41, "learning_rate": 3.950273680304296e-06, - "loss": 0.1383, + "loss": 0.2593, "step": 8650 }, { "epoch": 2.41, "learning_rate": 3.948418220614157e-06, - "loss": 0.0869, + "loss": 0.1535, "step": 8651 }, { "epoch": 2.41, "learning_rate": 3.946562760924019e-06, - "loss": 0.1923, + "loss": 0.1302, "step": 8652 }, { "epoch": 2.41, "learning_rate": 3.944707301233881e-06, - "loss": 0.192, + "loss": 0.0395, "step": 8653 }, { "epoch": 2.41, "learning_rate": 3.942851841543743e-06, - "loss": 0.0863, + "loss": 0.1686, "step": 8654 }, { "epoch": 2.41, "learning_rate": 3.940996381853605e-06, - "loss": 0.4056, + "loss": 0.0457, "step": 8655 }, { "epoch": 2.41, "learning_rate": 3.9391409221634665e-06, - "loss": 0.1927, + "loss": 0.2127, "step": 8656 }, { "epoch": 2.41, "learning_rate": 3.937285462473328e-06, - "loss": 0.1893, + "loss": 0.1328, "step": 8657 }, { "epoch": 2.41, "learning_rate": 3.93543000278319e-06, - "loss": 0.1393, + "loss": 0.0862, "step": 8658 }, { "epoch": 2.41, "learning_rate": 3.933574543093051e-06, - "loss": 0.1368, + "loss": 0.111, "step": 8659 }, { "epoch": 2.41, "learning_rate": 3.931719083402913e-06, - "loss": 0.1393, + "loss": 0.1001, "step": 8660 }, { "epoch": 2.41, "learning_rate": 3.929863623712775e-06, - "loss": 0.1382, + "loss": 0.0115, "step": 8661 }, { "epoch": 2.41, "learning_rate": 3.928008164022637e-06, - "loss": 0.1883, + "loss": 0.0121, "step": 8662 }, { "epoch": 2.41, "learning_rate": 3.926152704332499e-06, - "loss": 0.0829, + "loss": 0.0573, "step": 8663 }, { "epoch": 2.41, "learning_rate": 3.924297244642361e-06, - "loss": 0.1382, + "loss": 0.1151, "step": 8664 }, { "epoch": 2.41, "learning_rate": 3.9224417849522225e-06, - "loss": 0.1399, + "loss": 0.0592, "step": 8665 }, { "epoch": 2.41, "learning_rate": 3.920586325262084e-06, - "loss": 0.0869, + "loss": 0.0552, "step": 8666 }, { "epoch": 2.41, "learning_rate": 3.918730865571946e-06, - "loss": 0.0347, + "loss": 0.0301, "step": 8667 }, { "epoch": 2.41, "learning_rate": 3.916875405881807e-06, - "loss": 0.0346, + "loss": 0.0673, "step": 8668 }, { "epoch": 2.41, "learning_rate": 3.915019946191669e-06, - "loss": 0.1913, + "loss": 0.0565, "step": 8669 }, { "epoch": 2.41, "learning_rate": 3.913164486501531e-06, - "loss": 0.2448, + "loss": 0.1266, "step": 8670 }, { "epoch": 2.41, "learning_rate": 3.911309026811393e-06, - "loss": 0.0881, + "loss": 0.0469, "step": 8671 }, { "epoch": 2.41, "learning_rate": 3.909453567121255e-06, - "loss": 0.1905, + "loss": 0.08, "step": 8672 }, { "epoch": 2.41, "learning_rate": 3.9075981074311166e-06, - "loss": 0.0332, + "loss": 0.0806, "step": 8673 }, { "epoch": 2.41, "learning_rate": 3.9057426477409784e-06, - "loss": 0.087, + "loss": 0.0187, "step": 8674 }, { "epoch": 2.41, "learning_rate": 3.90388718805084e-06, - "loss": 0.296, + "loss": 0.0679, "step": 8675 }, { "epoch": 2.41, "learning_rate": 3.902031728360702e-06, - "loss": 0.1907, + "loss": 0.0396, "step": 8676 }, { "epoch": 2.41, "learning_rate": 3.900176268670563e-06, - "loss": 0.1364, + "loss": 0.0156, "step": 8677 }, { "epoch": 2.42, "learning_rate": 3.898320808980425e-06, - "loss": 0.1381, + "loss": 0.0236, "step": 8678 }, { "epoch": 2.42, "learning_rate": 3.896465349290287e-06, - "loss": 0.3005, + "loss": 0.083, "step": 8679 }, { "epoch": 2.42, "learning_rate": 3.894609889600149e-06, - "loss": 0.3585, + "loss": 0.1074, "step": 8680 }, { "epoch": 2.42, "learning_rate": 3.892754429910011e-06, - "loss": 0.1361, + "loss": 0.0818, "step": 8681 }, { "epoch": 2.42, "learning_rate": 3.8908989702198726e-06, - "loss": 0.1416, + "loss": 0.0189, "step": 8682 }, { "epoch": 2.42, "learning_rate": 3.8890435105297344e-06, - "loss": 0.1404, + "loss": 0.0871, "step": 8683 }, { "epoch": 2.42, "learning_rate": 3.887188050839596e-06, - "loss": 0.1903, + "loss": 0.0633, "step": 8684 }, { "epoch": 2.42, "learning_rate": 3.885332591149457e-06, - "loss": 0.1861, + "loss": 0.1676, "step": 8685 }, { "epoch": 2.42, "learning_rate": 3.883477131459319e-06, - "loss": 0.2956, + "loss": 0.0516, "step": 8686 }, { "epoch": 2.42, "learning_rate": 3.881621671769181e-06, - "loss": 0.1882, + "loss": 0.0628, "step": 8687 }, { "epoch": 2.42, "learning_rate": 3.879766212079043e-06, - "loss": 0.086, + "loss": 0.0212, "step": 8688 }, { "epoch": 2.42, "learning_rate": 3.877910752388905e-06, - "loss": 0.1898, + "loss": 0.1193, "step": 8689 }, { "epoch": 2.42, "learning_rate": 3.876055292698767e-06, - "loss": 0.1369, + "loss": 0.0594, "step": 8690 }, { "epoch": 2.42, "learning_rate": 3.8741998330086285e-06, - "loss": 0.0905, + "loss": 0.0734, "step": 8691 }, { "epoch": 2.42, "learning_rate": 3.87234437331849e-06, - "loss": 0.2416, + "loss": 0.1403, "step": 8692 }, { "epoch": 2.42, "learning_rate": 3.870488913628352e-06, - "loss": 0.1885, + "loss": 0.0284, "step": 8693 }, { "epoch": 2.42, "learning_rate": 3.868633453938213e-06, - "loss": 0.0871, + "loss": 0.0747, "step": 8694 }, { "epoch": 2.42, "learning_rate": 3.866777994248075e-06, - "loss": 0.0872, + "loss": 0.0835, "step": 8695 }, { "epoch": 2.42, "learning_rate": 3.864922534557937e-06, - "loss": 0.0356, + "loss": 0.0759, "step": 8696 }, { "epoch": 2.42, "learning_rate": 3.863067074867799e-06, - "loss": 0.087, + "loss": 0.1006, "step": 8697 }, { "epoch": 2.42, "learning_rate": 3.861211615177661e-06, - "loss": 0.0902, + "loss": 0.0504, "step": 8698 }, { "epoch": 2.42, "learning_rate": 3.859356155487523e-06, - "loss": 0.1395, + "loss": 0.1171, "step": 8699 }, { "epoch": 2.42, "learning_rate": 3.8575006957973845e-06, - "loss": 0.1925, + "loss": 0.0735, "step": 8700 }, { "epoch": 2.42, "learning_rate": 3.855645236107246e-06, - "loss": 0.1408, + "loss": 0.0165, "step": 8701 }, { "epoch": 2.42, "learning_rate": 3.8537897764171074e-06, - "loss": 0.3478, + "loss": 0.0187, "step": 8702 }, { "epoch": 2.42, "learning_rate": 3.851934316726969e-06, - "loss": 0.4021, + "loss": 0.0182, "step": 8703 }, { "epoch": 2.42, "learning_rate": 3.850078857036831e-06, - "loss": 0.0897, + "loss": 0.054, "step": 8704 }, { "epoch": 2.42, "learning_rate": 3.848223397346693e-06, - "loss": 0.0876, + "loss": 0.047, "step": 8705 }, { "epoch": 2.42, "learning_rate": 3.846367937656555e-06, - "loss": 0.1915, + "loss": 0.0189, "step": 8706 }, { "epoch": 2.42, "learning_rate": 3.844512477966417e-06, - "loss": 0.1933, + "loss": 0.0509, "step": 8707 }, { "epoch": 2.42, "learning_rate": 3.842657018276279e-06, - "loss": 0.1921, + "loss": 0.0537, "step": 8708 }, { "epoch": 2.42, "learning_rate": 3.8408015585861405e-06, - "loss": 0.291, + "loss": 0.0933, "step": 8709 }, { "epoch": 2.42, "learning_rate": 3.838946098896002e-06, - "loss": 0.2441, + "loss": 0.0812, "step": 8710 }, { "epoch": 2.42, "learning_rate": 3.837090639205863e-06, - "loss": 0.1919, + "loss": 0.0529, "step": 8711 }, { "epoch": 2.42, "learning_rate": 3.835235179515725e-06, - "loss": 0.1874, + "loss": 0.1564, "step": 8712 }, { "epoch": 2.42, "learning_rate": 3.833379719825587e-06, - "loss": 0.1382, + "loss": 0.0131, "step": 8713 }, { "epoch": 2.43, "learning_rate": 3.831524260135449e-06, - "loss": 0.3974, + "loss": 0.0512, "step": 8714 }, { "epoch": 2.43, "learning_rate": 3.829668800445311e-06, - "loss": 0.0907, + "loss": 0.0157, "step": 8715 }, { "epoch": 2.43, "learning_rate": 3.827813340755173e-06, - "loss": 0.287, + "loss": 0.0653, "step": 8716 }, { "epoch": 2.43, "learning_rate": 3.825957881065035e-06, - "loss": 0.1379, + "loss": 0.0953, "step": 8717 }, { "epoch": 2.43, "learning_rate": 3.8241024213748965e-06, - "loss": 0.2898, + "loss": 0.0854, "step": 8718 }, { "epoch": 2.43, "learning_rate": 3.8222469616847575e-06, - "loss": 0.091, + "loss": 0.0831, "step": 8719 }, { "epoch": 2.43, "learning_rate": 3.820391501994619e-06, - "loss": 0.1404, + "loss": 0.0547, "step": 8720 }, { "epoch": 2.43, "learning_rate": 3.818536042304481e-06, - "loss": 0.1913, + "loss": 0.0811, "step": 8721 }, { "epoch": 2.43, "learning_rate": 3.816680582614343e-06, - "loss": 0.1945, + "loss": 0.1104, "step": 8722 }, { "epoch": 2.43, "learning_rate": 3.814825122924205e-06, - "loss": 0.1871, + "loss": 0.0213, "step": 8723 }, { "epoch": 2.43, "learning_rate": 3.812969663234067e-06, - "loss": 0.1862, + "loss": 0.0735, "step": 8724 }, { "epoch": 2.43, "learning_rate": 3.8111142035439288e-06, - "loss": 0.0932, + "loss": 0.0955, "step": 8725 }, { "epoch": 2.43, "learning_rate": 3.80925874385379e-06, - "loss": 0.0925, + "loss": 0.0572, "step": 8726 }, { "epoch": 2.43, "learning_rate": 3.807403284163652e-06, - "loss": 0.1425, + "loss": 0.1588, "step": 8727 }, { "epoch": 2.43, "learning_rate": 3.805547824473514e-06, - "loss": 0.1888, + "loss": 0.0432, "step": 8728 }, { "epoch": 2.43, "learning_rate": 3.803692364783376e-06, - "loss": 0.1859, + "loss": 0.0639, "step": 8729 }, { "epoch": 2.43, "learning_rate": 3.801836905093237e-06, - "loss": 0.0923, + "loss": 0.0472, "step": 8730 }, { "epoch": 2.43, "learning_rate": 3.7999814454030987e-06, - "loss": 0.1403, + "loss": 0.0189, "step": 8731 }, { "epoch": 2.43, "learning_rate": 3.7981259857129606e-06, - "loss": 0.1412, + "loss": 0.0565, "step": 8732 }, { "epoch": 2.43, "learning_rate": 3.796270526022822e-06, - "loss": 0.1907, + "loss": 0.0696, "step": 8733 }, { "epoch": 2.43, "learning_rate": 3.794415066332684e-06, - "loss": 0.142, + "loss": 0.0534, "step": 8734 }, { "epoch": 2.43, "learning_rate": 3.7925596066425458e-06, - "loss": 0.1871, + "loss": 0.0233, "step": 8735 }, { "epoch": 2.43, "learning_rate": 3.7907041469524076e-06, - "loss": 0.1437, + "loss": 0.0593, "step": 8736 }, { "epoch": 2.43, "learning_rate": 3.788848687262269e-06, - "loss": 0.192, + "loss": 0.0205, "step": 8737 }, { "epoch": 2.43, "learning_rate": 3.786993227572131e-06, - "loss": 0.0888, + "loss": 0.1029, "step": 8738 }, { "epoch": 2.43, "learning_rate": 3.785137767881993e-06, - "loss": 0.1392, + "loss": 0.0685, "step": 8739 }, { "epoch": 2.43, "learning_rate": 3.7832823081918547e-06, - "loss": 0.1422, + "loss": 0.0433, "step": 8740 }, { "epoch": 2.43, "learning_rate": 3.7814268485017166e-06, - "loss": 0.1907, + "loss": 0.0201, "step": 8741 }, { "epoch": 2.43, "learning_rate": 3.779571388811578e-06, - "loss": 0.1898, + "loss": 0.016, "step": 8742 }, { "epoch": 2.43, "learning_rate": 3.77771592912144e-06, - "loss": 0.042, + "loss": 0.1929, "step": 8743 }, { "epoch": 2.43, "learning_rate": 3.7758604694313018e-06, - "loss": 0.0908, + "loss": 0.022, "step": 8744 }, { "epoch": 2.43, "learning_rate": 3.7740050097411636e-06, - "loss": 0.0883, + "loss": 0.0889, "step": 8745 }, { "epoch": 2.43, "learning_rate": 3.772149550051025e-06, - "loss": 0.041, + "loss": 0.0617, "step": 8746 }, { "epoch": 2.43, "learning_rate": 3.770294090360887e-06, - "loss": 0.0394, + "loss": 0.0652, "step": 8747 }, { "epoch": 2.43, "learning_rate": 3.768438630670749e-06, - "loss": 0.0389, + "loss": 0.0625, "step": 8748 }, { "epoch": 2.44, "learning_rate": 3.7665831709806107e-06, - "loss": 0.0899, + "loss": 0.0892, "step": 8749 }, { "epoch": 2.44, "learning_rate": 3.764727711290472e-06, - "loss": 0.139, + "loss": 0.0226, "step": 8750 }, { "epoch": 2.44, "learning_rate": 3.762872251600334e-06, - "loss": 0.1921, + "loss": 0.0089, "step": 8751 }, { "epoch": 2.44, "learning_rate": 3.761016791910196e-06, - "loss": 0.0903, + "loss": 0.0353, "step": 8752 }, { "epoch": 2.44, "learning_rate": 3.7591613322200577e-06, - "loss": 0.1383, + "loss": 0.0897, "step": 8753 }, { "epoch": 2.44, "learning_rate": 3.7573058725299196e-06, - "loss": 0.1926, + "loss": 0.1194, "step": 8754 }, { "epoch": 2.44, "learning_rate": 3.755450412839781e-06, - "loss": 0.0889, + "loss": 0.0494, "step": 8755 }, { "epoch": 2.44, "learning_rate": 3.753594953149643e-06, - "loss": 0.2429, + "loss": 0.0712, "step": 8756 }, { "epoch": 2.44, "learning_rate": 3.751739493459505e-06, - "loss": 0.1864, + "loss": 0.0831, "step": 8757 }, { "epoch": 2.44, "learning_rate": 3.7498840337693667e-06, - "loss": 0.0353, + "loss": 0.0266, "step": 8758 }, { "epoch": 2.44, "learning_rate": 3.748028574079228e-06, - "loss": 0.1398, + "loss": 0.1537, "step": 8759 }, { "epoch": 2.44, "learning_rate": 3.74617311438909e-06, - "loss": 0.4002, + "loss": 0.038, "step": 8760 }, { "epoch": 2.44, "learning_rate": 3.744317654698952e-06, - "loss": 0.1899, + "loss": 0.014, "step": 8761 }, { "epoch": 2.44, "learning_rate": 3.7424621950088137e-06, - "loss": 0.0869, + "loss": 0.0177, "step": 8762 }, { "epoch": 2.44, "learning_rate": 3.740606735318675e-06, - "loss": 0.3422, + "loss": 0.0146, "step": 8763 }, { "epoch": 2.44, "learning_rate": 3.738751275628537e-06, - "loss": 0.1859, + "loss": 0.0886, "step": 8764 }, { "epoch": 2.44, "learning_rate": 3.736895815938399e-06, - "loss": 0.2434, + "loss": 0.1975, "step": 8765 }, { "epoch": 2.44, "learning_rate": 3.735040356248261e-06, - "loss": 0.0868, + "loss": 0.1744, "step": 8766 }, { "epoch": 2.44, "learning_rate": 3.7331848965581222e-06, - "loss": 0.1377, + "loss": 0.0141, "step": 8767 }, { "epoch": 2.44, "learning_rate": 3.731329436867984e-06, - "loss": 0.2432, + "loss": 0.0217, "step": 8768 }, { "epoch": 2.44, "learning_rate": 3.729473977177846e-06, - "loss": 0.296, + "loss": 0.0619, "step": 8769 }, { "epoch": 2.44, "learning_rate": 3.727618517487708e-06, - "loss": 0.1888, + "loss": 0.0574, "step": 8770 }, { "epoch": 2.44, "learning_rate": 3.7257630577975697e-06, - "loss": 0.1424, + "loss": 0.0948, "step": 8771 }, { "epoch": 2.44, "learning_rate": 3.723907598107431e-06, - "loss": 0.1375, + "loss": 0.0161, "step": 8772 }, { "epoch": 2.44, "learning_rate": 3.722052138417293e-06, - "loss": 0.0902, + "loss": 0.0859, "step": 8773 }, { "epoch": 2.44, "learning_rate": 3.720196678727155e-06, - "loss": 0.1873, + "loss": 0.1547, "step": 8774 }, { "epoch": 2.44, "learning_rate": 3.7183412190370168e-06, - "loss": 0.0874, + "loss": 0.146, "step": 8775 }, { "epoch": 2.44, "learning_rate": 3.7164857593468782e-06, - "loss": 0.1914, + "loss": 0.1288, "step": 8776 }, { "epoch": 2.44, "learning_rate": 3.71463029965674e-06, - "loss": 0.0883, + "loss": 0.0138, "step": 8777 }, { "epoch": 2.44, "learning_rate": 3.712774839966602e-06, - "loss": 0.1929, + "loss": 0.1129, "step": 8778 }, { "epoch": 2.44, "learning_rate": 3.710919380276464e-06, - "loss": 0.2408, + "loss": 0.1301, "step": 8779 }, { "epoch": 2.44, "learning_rate": 3.7090639205863253e-06, - "loss": 0.0877, + "loss": 0.0186, "step": 8780 }, { "epoch": 2.44, "learning_rate": 3.707208460896187e-06, - "loss": 0.188, + "loss": 0.0907, "step": 8781 }, { "epoch": 2.44, "learning_rate": 3.705353001206049e-06, - "loss": 0.0875, + "loss": 0.0228, "step": 8782 }, { "epoch": 2.44, "learning_rate": 3.703497541515911e-06, - "loss": 0.2443, + "loss": 0.0178, "step": 8783 }, { "epoch": 2.44, "learning_rate": 3.7016420818257728e-06, - "loss": 0.3446, + "loss": 0.0399, "step": 8784 }, { "epoch": 2.45, "learning_rate": 3.6997866221356342e-06, - "loss": 0.1405, + "loss": 0.0678, "step": 8785 }, { "epoch": 2.45, "learning_rate": 3.697931162445496e-06, - "loss": 0.0381, + "loss": 0.0421, "step": 8786 }, { "epoch": 2.45, "learning_rate": 3.696075702755358e-06, - "loss": 0.1906, + "loss": 0.0244, "step": 8787 }, { "epoch": 2.45, "learning_rate": 3.69422024306522e-06, - "loss": 0.0885, + "loss": 0.1183, "step": 8788 }, { "epoch": 2.45, "learning_rate": 3.6923647833750813e-06, - "loss": 0.1384, + "loss": 0.0611, "step": 8789 }, { "epoch": 2.45, "learning_rate": 3.690509323684943e-06, - "loss": 0.0887, + "loss": 0.1019, "step": 8790 }, { "epoch": 2.45, "learning_rate": 3.688653863994805e-06, - "loss": 0.0369, + "loss": 0.1376, "step": 8791 }, { "epoch": 2.45, "learning_rate": 3.686798404304667e-06, - "loss": 0.2447, + "loss": 0.1075, "step": 8792 }, { "epoch": 2.45, "learning_rate": 3.6849429446145283e-06, - "loss": 0.0906, + "loss": 0.0239, "step": 8793 }, { "epoch": 2.45, "learning_rate": 3.68308748492439e-06, - "loss": 0.1903, + "loss": 0.0442, "step": 8794 }, { "epoch": 2.45, "learning_rate": 3.681232025234252e-06, - "loss": 0.1466, + "loss": 0.1435, "step": 8795 }, { "epoch": 2.45, "learning_rate": 3.679376565544114e-06, - "loss": 0.1394, + "loss": 0.0129, "step": 8796 }, { "epoch": 2.45, "learning_rate": 3.6775211058539754e-06, - "loss": 0.1442, + "loss": 0.0193, "step": 8797 }, { "epoch": 2.45, "learning_rate": 3.6756656461638373e-06, - "loss": 0.0881, + "loss": 0.0234, "step": 8798 }, { "epoch": 2.45, "learning_rate": 3.673810186473699e-06, - "loss": 0.1896, + "loss": 0.0674, "step": 8799 }, { "epoch": 2.45, "learning_rate": 3.671954726783561e-06, - "loss": 0.1424, + "loss": 0.0274, "step": 8800 }, { "epoch": 2.45, "learning_rate": 3.670099267093423e-06, - "loss": 0.0899, + "loss": 0.0589, "step": 8801 }, { "epoch": 2.45, "learning_rate": 3.6682438074032843e-06, - "loss": 0.0882, + "loss": 0.0221, "step": 8802 }, { "epoch": 2.45, "learning_rate": 3.666388347713146e-06, - "loss": 0.1912, + "loss": 0.0492, "step": 8803 }, { "epoch": 2.45, "learning_rate": 3.664532888023008e-06, - "loss": 0.0875, + "loss": 0.0254, "step": 8804 }, { "epoch": 2.45, "learning_rate": 3.66267742833287e-06, - "loss": 0.1397, + "loss": 0.1057, "step": 8805 }, { "epoch": 2.45, "learning_rate": 3.6608219686427314e-06, - "loss": 0.1389, + "loss": 0.014, "step": 8806 }, { "epoch": 2.45, "learning_rate": 3.6589665089525932e-06, - "loss": 0.1357, + "loss": 0.0206, "step": 8807 }, { "epoch": 2.45, "learning_rate": 3.657111049262455e-06, - "loss": 0.2423, + "loss": 0.0481, "step": 8808 }, { "epoch": 2.45, "learning_rate": 3.655255589572317e-06, - "loss": 0.1928, + "loss": 0.0147, "step": 8809 }, { "epoch": 2.45, "learning_rate": 3.6534001298821784e-06, - "loss": 0.1909, + "loss": 0.0935, "step": 8810 }, { "epoch": 2.45, "learning_rate": 3.6515446701920403e-06, - "loss": 0.138, + "loss": 0.1457, "step": 8811 }, { "epoch": 2.45, "learning_rate": 3.649689210501902e-06, - "loss": 0.0888, + "loss": 0.0164, "step": 8812 }, { "epoch": 2.45, "learning_rate": 3.647833750811764e-06, - "loss": 0.0886, + "loss": 0.1316, "step": 8813 }, { "epoch": 2.45, "learning_rate": 3.645978291121626e-06, - "loss": 0.0865, + "loss": 0.06, "step": 8814 }, { "epoch": 2.45, "learning_rate": 3.6441228314314874e-06, - "loss": 0.1892, + "loss": 0.1239, "step": 8815 }, { "epoch": 2.45, "learning_rate": 3.6422673717413492e-06, - "loss": 0.0347, + "loss": 0.1323, "step": 8816 }, { "epoch": 2.45, "learning_rate": 3.640411912051211e-06, - "loss": 0.0863, + "loss": 0.1427, "step": 8817 }, { "epoch": 2.45, "learning_rate": 3.638556452361073e-06, - "loss": 0.0353, + "loss": 0.1423, "step": 8818 }, { "epoch": 2.45, "learning_rate": 3.6367009926709344e-06, - "loss": 0.1941, + "loss": 0.0169, "step": 8819 }, { "epoch": 2.45, "learning_rate": 3.6348455329807963e-06, - "loss": 0.0885, + "loss": 0.0763, "step": 8820 }, { "epoch": 2.46, "learning_rate": 3.632990073290658e-06, - "loss": 0.1402, + "loss": 0.1266, "step": 8821 }, { "epoch": 2.46, "learning_rate": 3.63113461360052e-06, - "loss": 0.0858, + "loss": 0.1041, "step": 8822 }, { "epoch": 2.46, "learning_rate": 3.6292791539103815e-06, - "loss": 0.1401, + "loss": 0.0715, "step": 8823 }, { "epoch": 2.46, "learning_rate": 3.6274236942202434e-06, - "loss": 0.192, + "loss": 0.0405, "step": 8824 }, { "epoch": 2.46, "learning_rate": 3.6255682345301052e-06, - "loss": 0.2984, + "loss": 0.0773, "step": 8825 }, { "epoch": 2.46, "learning_rate": 3.623712774839967e-06, - "loss": 0.242, + "loss": 0.0216, "step": 8826 }, { "epoch": 2.46, "learning_rate": 3.6218573151498285e-06, - "loss": 0.1375, + "loss": 0.0086, "step": 8827 }, { "epoch": 2.46, "learning_rate": 3.6200018554596904e-06, - "loss": 0.1379, + "loss": 0.1049, "step": 8828 }, { "epoch": 2.46, "learning_rate": 3.6181463957695523e-06, - "loss": 0.196, + "loss": 0.1002, "step": 8829 }, { "epoch": 2.46, "learning_rate": 3.616290936079414e-06, - "loss": 0.0884, + "loss": 0.0617, "step": 8830 }, { "epoch": 2.46, "learning_rate": 3.614435476389276e-06, - "loss": 0.0334, + "loss": 0.09, "step": 8831 }, { "epoch": 2.46, "learning_rate": 3.6125800166991375e-06, - "loss": 0.2444, + "loss": 0.0979, "step": 8832 }, { "epoch": 2.46, "learning_rate": 3.6107245570089993e-06, - "loss": 0.2461, + "loss": 0.0161, "step": 8833 }, { "epoch": 2.46, "learning_rate": 3.6088690973188612e-06, - "loss": 0.0859, + "loss": 0.0217, "step": 8834 }, { "epoch": 2.46, "learning_rate": 3.607013637628723e-06, - "loss": 0.0846, + "loss": 0.0756, "step": 8835 }, { "epoch": 2.46, "learning_rate": 3.6051581779385845e-06, - "loss": 0.2455, + "loss": 0.0883, "step": 8836 }, { "epoch": 2.46, "learning_rate": 3.6033027182484464e-06, - "loss": 0.2431, + "loss": 0.0979, "step": 8837 }, { "epoch": 2.46, "learning_rate": 3.6014472585583083e-06, - "loss": 0.1913, + "loss": 0.0964, "step": 8838 }, { "epoch": 2.46, "learning_rate": 3.59959179886817e-06, - "loss": 0.087, + "loss": 0.1084, "step": 8839 }, { "epoch": 2.46, "learning_rate": 3.5977363391780316e-06, - "loss": 0.1408, + "loss": 0.0261, "step": 8840 }, { "epoch": 2.46, "learning_rate": 3.5958808794878935e-06, - "loss": 0.2465, + "loss": 0.0901, "step": 8841 }, { "epoch": 2.46, "learning_rate": 3.5940254197977553e-06, - "loss": 0.0874, + "loss": 0.062, "step": 8842 }, { "epoch": 2.46, "learning_rate": 3.592169960107617e-06, - "loss": 0.1909, + "loss": 0.077, "step": 8843 }, { "epoch": 2.46, "learning_rate": 3.590314500417479e-06, - "loss": 0.0337, + "loss": 0.0179, "step": 8844 }, { "epoch": 2.46, "learning_rate": 3.5884590407273405e-06, - "loss": 0.083, + "loss": 0.0845, "step": 8845 }, { "epoch": 2.46, "learning_rate": 3.5866035810372024e-06, - "loss": 0.0852, + "loss": 0.0182, "step": 8846 }, { "epoch": 2.46, "learning_rate": 3.5847481213470643e-06, - "loss": 0.1362, + "loss": 0.0958, "step": 8847 }, { "epoch": 2.46, "learning_rate": 3.582892661656926e-06, - "loss": 0.1907, + "loss": 0.0452, "step": 8848 }, { "epoch": 2.46, "learning_rate": 3.5810372019667876e-06, - "loss": 0.1876, + "loss": 0.0457, "step": 8849 }, { "epoch": 2.46, "learning_rate": 3.5791817422766494e-06, - "loss": 0.297, + "loss": 0.0439, "step": 8850 }, { "epoch": 2.46, "learning_rate": 3.5773262825865113e-06, - "loss": 0.2431, + "loss": 0.0651, "step": 8851 }, { "epoch": 2.46, "learning_rate": 3.575470822896373e-06, - "loss": 0.2457, + "loss": 0.0166, "step": 8852 }, { "epoch": 2.46, "learning_rate": 3.5736153632062346e-06, - "loss": 0.1409, + "loss": 0.018, "step": 8853 }, { "epoch": 2.46, "learning_rate": 3.5717599035160965e-06, - "loss": 0.0348, + "loss": 0.0476, "step": 8854 }, { "epoch": 2.46, "learning_rate": 3.5699044438259584e-06, - "loss": 0.1894, + "loss": 0.0132, "step": 8855 }, { "epoch": 2.46, "learning_rate": 3.5680489841358202e-06, - "loss": 0.0876, + "loss": 0.0622, "step": 8856 }, { "epoch": 2.47, "learning_rate": 3.5661935244456817e-06, - "loss": 0.1931, + "loss": 0.0287, "step": 8857 }, { "epoch": 2.47, "learning_rate": 3.5643380647555436e-06, - "loss": 0.0878, + "loss": 0.0938, "step": 8858 }, { "epoch": 2.47, "learning_rate": 3.5624826050654054e-06, - "loss": 0.3928, + "loss": 0.0641, "step": 8859 }, { "epoch": 2.47, "learning_rate": 3.5606271453752673e-06, - "loss": 0.2958, + "loss": 0.032, "step": 8860 }, { "epoch": 2.47, "learning_rate": 3.558771685685129e-06, - "loss": 0.0888, + "loss": 0.0605, "step": 8861 }, { "epoch": 2.47, "learning_rate": 3.5569162259949906e-06, - "loss": 0.0357, + "loss": 0.0185, "step": 8862 }, { "epoch": 2.47, "learning_rate": 3.5550607663048525e-06, - "loss": 0.1884, + "loss": 0.0488, "step": 8863 }, { "epoch": 2.47, "learning_rate": 3.5532053066147144e-06, - "loss": 0.1406, + "loss": 0.0928, "step": 8864 }, { "epoch": 2.47, "learning_rate": 3.5513498469245762e-06, - "loss": 0.0346, + "loss": 0.0566, "step": 8865 }, { "epoch": 2.47, "learning_rate": 3.5494943872344377e-06, - "loss": 0.2935, + "loss": 0.0291, "step": 8866 }, { "epoch": 2.47, "learning_rate": 3.5476389275442996e-06, - "loss": 0.1417, + "loss": 0.1008, "step": 8867 }, { "epoch": 2.47, "learning_rate": 3.5457834678541614e-06, - "loss": 0.0881, + "loss": 0.0146, "step": 8868 }, { "epoch": 2.47, "learning_rate": 3.5439280081640233e-06, - "loss": 0.2442, + "loss": 0.052, "step": 8869 }, { "epoch": 2.47, "learning_rate": 3.5420725484738847e-06, - "loss": 0.036, + "loss": 0.0994, "step": 8870 }, { "epoch": 2.47, "learning_rate": 3.5402170887837466e-06, - "loss": 0.1357, + "loss": 0.0154, "step": 8871 }, { "epoch": 2.47, "learning_rate": 3.5383616290936085e-06, - "loss": 0.1407, + "loss": 0.0992, "step": 8872 }, { "epoch": 2.47, "learning_rate": 3.5365061694034704e-06, - "loss": 0.1918, + "loss": 0.0106, "step": 8873 }, { "epoch": 2.47, "learning_rate": 3.5346507097133322e-06, - "loss": 0.1943, + "loss": 0.0535, "step": 8874 }, { "epoch": 2.47, "learning_rate": 3.5327952500231937e-06, - "loss": 0.1421, + "loss": 0.1507, "step": 8875 }, { "epoch": 2.47, "learning_rate": 3.5309397903330555e-06, - "loss": 0.0875, + "loss": 0.0652, "step": 8876 }, { "epoch": 2.47, "learning_rate": 3.5290843306429174e-06, - "loss": 0.0868, + "loss": 0.0195, "step": 8877 }, { "epoch": 2.47, "learning_rate": 3.5272288709527793e-06, - "loss": 0.0374, + "loss": 0.2295, "step": 8878 }, { "epoch": 2.47, "learning_rate": 3.5253734112626407e-06, - "loss": 0.089, + "loss": 0.0836, "step": 8879 }, { "epoch": 2.47, "learning_rate": 3.5235179515725026e-06, - "loss": 0.0892, + "loss": 0.0512, "step": 8880 }, { "epoch": 2.47, "learning_rate": 3.5216624918823645e-06, - "loss": 0.1927, + "loss": 0.0498, "step": 8881 }, { "epoch": 2.47, "learning_rate": 3.5198070321922255e-06, - "loss": 0.2429, + "loss": 0.0488, "step": 8882 }, { "epoch": 2.47, "learning_rate": 3.5179515725020874e-06, - "loss": 0.0858, + "loss": 0.0713, "step": 8883 }, { "epoch": 2.47, "learning_rate": 3.5160961128119492e-06, - "loss": 0.2426, + "loss": 0.0659, "step": 8884 }, { "epoch": 2.47, "learning_rate": 3.514240653121811e-06, - "loss": 0.1933, + "loss": 0.0405, "step": 8885 }, { "epoch": 2.47, "learning_rate": 3.5123851934316726e-06, - "loss": 0.188, + "loss": 0.0594, "step": 8886 }, { "epoch": 2.47, "learning_rate": 3.5105297337415344e-06, - "loss": 0.1958, + "loss": 0.1038, "step": 8887 }, { "epoch": 2.47, "learning_rate": 3.5086742740513963e-06, - "loss": 0.2424, + "loss": 0.1327, "step": 8888 }, { "epoch": 2.47, "learning_rate": 3.506818814361258e-06, - "loss": 0.1352, + "loss": 0.049, "step": 8889 }, { "epoch": 2.47, "learning_rate": 3.5049633546711196e-06, - "loss": 0.1387, + "loss": 0.0212, "step": 8890 }, { "epoch": 2.47, "learning_rate": 3.5031078949809815e-06, - "loss": 0.1938, + "loss": 0.1415, "step": 8891 }, { "epoch": 2.47, "learning_rate": 3.5012524352908434e-06, - "loss": 0.0859, + "loss": 0.1567, "step": 8892 }, { "epoch": 2.48, "learning_rate": 3.4993969756007052e-06, - "loss": 0.191, + "loss": 0.016, "step": 8893 }, { "epoch": 2.48, "learning_rate": 3.497541515910567e-06, - "loss": 0.1364, + "loss": 0.0197, "step": 8894 }, { "epoch": 2.48, "learning_rate": 3.4956860562204285e-06, - "loss": 0.0875, + "loss": 0.1397, "step": 8895 }, { "epoch": 2.48, "learning_rate": 3.4938305965302904e-06, - "loss": 0.1913, + "loss": 0.056, "step": 8896 }, { "epoch": 2.48, "learning_rate": 3.4919751368401523e-06, - "loss": 0.0879, + "loss": 0.1123, "step": 8897 }, { "epoch": 2.48, "learning_rate": 3.490119677150014e-06, - "loss": 0.0888, + "loss": 0.0567, "step": 8898 }, { "epoch": 2.48, "learning_rate": 3.4882642174598756e-06, - "loss": 0.0866, + "loss": 0.0197, "step": 8899 }, { "epoch": 2.48, "learning_rate": 3.4864087577697375e-06, - "loss": 0.143, + "loss": 0.0237, "step": 8900 }, { "epoch": 2.48, "learning_rate": 3.4845532980795993e-06, - "loss": 0.086, + "loss": 0.0128, "step": 8901 }, { "epoch": 2.48, "learning_rate": 3.4826978383894612e-06, - "loss": 0.1918, + "loss": 0.0119, "step": 8902 }, { "epoch": 2.48, "learning_rate": 3.4808423786993227e-06, - "loss": 0.2934, + "loss": 0.0204, "step": 8903 }, { "epoch": 2.48, "learning_rate": 3.4789869190091845e-06, - "loss": 0.035, + "loss": 0.0477, "step": 8904 }, { "epoch": 2.48, "learning_rate": 3.4771314593190464e-06, - "loss": 0.24, + "loss": 0.1017, "step": 8905 }, { "epoch": 2.48, "learning_rate": 3.4752759996289083e-06, - "loss": 0.2392, + "loss": 0.0953, "step": 8906 }, { "epoch": 2.48, "learning_rate": 3.47342053993877e-06, - "loss": 0.1401, + "loss": 0.0254, "step": 8907 }, { "epoch": 2.48, "learning_rate": 3.4715650802486316e-06, - "loss": 0.0901, + "loss": 0.0613, "step": 8908 }, { "epoch": 2.48, "learning_rate": 3.4697096205584935e-06, - "loss": 0.0872, + "loss": 0.0533, "step": 8909 }, { "epoch": 2.48, "learning_rate": 3.4678541608683553e-06, - "loss": 0.1986, + "loss": 0.0602, "step": 8910 }, { "epoch": 2.48, "learning_rate": 3.465998701178217e-06, - "loss": 0.0889, + "loss": 0.0581, "step": 8911 }, { "epoch": 2.48, "learning_rate": 3.4641432414880786e-06, - "loss": 0.0877, + "loss": 0.0166, "step": 8912 }, { "epoch": 2.48, "learning_rate": 3.4622877817979405e-06, - "loss": 0.2982, + "loss": 0.0578, "step": 8913 }, { "epoch": 2.48, "learning_rate": 3.4604323221078024e-06, - "loss": 0.2497, + "loss": 0.0098, "step": 8914 }, { "epoch": 2.48, "learning_rate": 3.4585768624176643e-06, - "loss": 0.1412, + "loss": 0.0457, "step": 8915 }, { "epoch": 2.48, "learning_rate": 3.4567214027275257e-06, - "loss": 0.2415, + "loss": 0.1405, "step": 8916 }, { "epoch": 2.48, "learning_rate": 3.4548659430373876e-06, - "loss": 0.0369, + "loss": 0.1408, "step": 8917 }, { "epoch": 2.48, "learning_rate": 3.4530104833472494e-06, - "loss": 0.0347, + "loss": 0.0119, "step": 8918 }, { "epoch": 2.48, "learning_rate": 3.4511550236571113e-06, - "loss": 0.0893, + "loss": 0.0681, "step": 8919 }, { "epoch": 2.48, "learning_rate": 3.4492995639669728e-06, - "loss": 0.1913, + "loss": 0.0096, "step": 8920 }, { "epoch": 2.48, "learning_rate": 3.4474441042768346e-06, - "loss": 0.0885, + "loss": 0.0143, "step": 8921 }, { "epoch": 2.48, "learning_rate": 3.4455886445866965e-06, - "loss": 0.2404, + "loss": 0.1264, "step": 8922 }, { "epoch": 2.48, "learning_rate": 3.4437331848965584e-06, - "loss": 0.1373, + "loss": 0.0959, "step": 8923 }, { "epoch": 2.48, "learning_rate": 3.4418777252064202e-06, - "loss": 0.0885, + "loss": 0.0821, "step": 8924 }, { "epoch": 2.48, "learning_rate": 3.4400222655162817e-06, - "loss": 0.1925, + "loss": 0.169, "step": 8925 }, { "epoch": 2.48, "learning_rate": 3.4381668058261436e-06, - "loss": 0.1373, + "loss": 0.0158, "step": 8926 }, { "epoch": 2.48, "learning_rate": 3.4363113461360054e-06, - "loss": 0.0876, + "loss": 0.1176, "step": 8927 }, { "epoch": 2.48, "learning_rate": 3.4344558864458673e-06, - "loss": 0.1403, + "loss": 0.1221, "step": 8928 }, { "epoch": 2.49, "learning_rate": 3.4326004267557288e-06, - "loss": 0.1381, + "loss": 0.0466, "step": 8929 }, { "epoch": 2.49, "learning_rate": 3.4307449670655906e-06, - "loss": 0.1875, + "loss": 0.022, "step": 8930 }, { "epoch": 2.49, "learning_rate": 3.4288895073754525e-06, - "loss": 0.1934, + "loss": 0.0216, "step": 8931 }, { "epoch": 2.49, "learning_rate": 3.4270340476853144e-06, - "loss": 0.1392, + "loss": 0.0703, "step": 8932 }, { "epoch": 2.49, "learning_rate": 3.425178587995176e-06, - "loss": 0.0878, + "loss": 0.1877, "step": 8933 }, { "epoch": 2.49, "learning_rate": 3.4233231283050377e-06, - "loss": 0.1392, + "loss": 0.0368, "step": 8934 }, { "epoch": 2.49, "learning_rate": 3.4214676686148996e-06, - "loss": 0.1404, + "loss": 0.0851, "step": 8935 }, { "epoch": 2.49, "learning_rate": 3.4196122089247614e-06, - "loss": 0.1899, + "loss": 0.1009, "step": 8936 }, { "epoch": 2.49, "learning_rate": 3.4177567492346233e-06, - "loss": 0.0864, + "loss": 0.1433, "step": 8937 }, { "epoch": 2.49, "learning_rate": 3.4159012895444847e-06, - "loss": 0.0348, + "loss": 0.0191, "step": 8938 }, { "epoch": 2.49, "learning_rate": 3.4140458298543466e-06, - "loss": 0.1891, + "loss": 0.0939, "step": 8939 }, { "epoch": 2.49, "learning_rate": 3.4121903701642085e-06, - "loss": 0.0855, + "loss": 0.0505, "step": 8940 }, { "epoch": 2.49, "learning_rate": 3.4103349104740704e-06, - "loss": 0.1389, + "loss": 0.1138, "step": 8941 }, { "epoch": 2.49, "learning_rate": 3.408479450783932e-06, - "loss": 0.0874, + "loss": 0.044, "step": 8942 }, { "epoch": 2.49, "learning_rate": 3.4066239910937937e-06, - "loss": 0.0843, + "loss": 0.0192, "step": 8943 }, { "epoch": 2.49, "learning_rate": 3.4047685314036555e-06, - "loss": 0.0855, + "loss": 0.0132, "step": 8944 }, { "epoch": 2.49, "learning_rate": 3.4029130717135174e-06, - "loss": 0.0335, + "loss": 0.0631, "step": 8945 }, { "epoch": 2.49, "learning_rate": 3.401057612023379e-06, - "loss": 0.3509, + "loss": 0.1145, "step": 8946 }, { "epoch": 2.49, "learning_rate": 3.3992021523332407e-06, - "loss": 0.0869, + "loss": 0.014, "step": 8947 }, { "epoch": 2.49, "learning_rate": 3.3973466926431026e-06, - "loss": 0.1389, + "loss": 0.0984, "step": 8948 }, { "epoch": 2.49, "learning_rate": 3.3954912329529645e-06, - "loss": 0.0858, + "loss": 0.0125, "step": 8949 }, { "epoch": 2.49, "learning_rate": 3.393635773262826e-06, - "loss": 0.3025, + "loss": 0.0774, "step": 8950 }, { "epoch": 2.49, "learning_rate": 3.391780313572688e-06, - "loss": 0.141, + "loss": 0.1035, "step": 8951 }, { "epoch": 2.49, "learning_rate": 3.3899248538825497e-06, - "loss": 0.1391, + "loss": 0.1493, "step": 8952 }, { "epoch": 2.49, "learning_rate": 3.3880693941924115e-06, - "loss": 0.0864, + "loss": 0.1204, "step": 8953 }, { "epoch": 2.49, "learning_rate": 3.3862139345022734e-06, - "loss": 0.193, + "loss": 0.0444, "step": 8954 }, { "epoch": 2.49, "learning_rate": 3.384358474812135e-06, - "loss": 0.2394, + "loss": 0.0153, "step": 8955 }, { "epoch": 2.49, "learning_rate": 3.3825030151219967e-06, - "loss": 0.0336, + "loss": 0.0609, "step": 8956 }, { "epoch": 2.49, "learning_rate": 3.3806475554318586e-06, - "loss": 0.1386, + "loss": 0.1247, "step": 8957 }, { "epoch": 2.49, "learning_rate": 3.3787920957417205e-06, - "loss": 0.0345, + "loss": 0.0721, "step": 8958 }, { "epoch": 2.49, "learning_rate": 3.376936636051582e-06, - "loss": 0.1911, + "loss": 0.0414, "step": 8959 }, { "epoch": 2.49, "learning_rate": 3.3750811763614438e-06, - "loss": 0.0854, + "loss": 0.0201, "step": 8960 }, { "epoch": 2.49, "learning_rate": 3.3732257166713056e-06, - "loss": 0.1376, + "loss": 0.0237, "step": 8961 }, { "epoch": 2.49, "learning_rate": 3.3713702569811675e-06, - "loss": 0.1396, + "loss": 0.0972, "step": 8962 }, { "epoch": 2.49, "learning_rate": 3.369514797291029e-06, - "loss": 0.0835, + "loss": 0.1121, "step": 8963 }, { "epoch": 2.49, "learning_rate": 3.367659337600891e-06, - "loss": 0.139, + "loss": 0.0963, "step": 8964 }, { "epoch": 2.5, "learning_rate": 3.3658038779107527e-06, - "loss": 0.1418, + "loss": 0.0174, "step": 8965 }, { "epoch": 2.5, "learning_rate": 3.3639484182206146e-06, - "loss": 0.2966, + "loss": 0.0561, "step": 8966 }, { "epoch": 2.5, "learning_rate": 3.3620929585304764e-06, - "loss": 0.1902, + "loss": 0.024, "step": 8967 }, { "epoch": 2.5, "learning_rate": 3.360237498840338e-06, - "loss": 0.2483, + "loss": 0.1054, "step": 8968 }, { "epoch": 2.5, "learning_rate": 3.3583820391501998e-06, - "loss": 0.1389, + "loss": 0.017, "step": 8969 }, { "epoch": 2.5, "learning_rate": 3.3565265794600616e-06, - "loss": 0.1851, + "loss": 0.0774, "step": 8970 }, { "epoch": 2.5, "learning_rate": 3.3546711197699235e-06, - "loss": 0.0325, + "loss": 0.0643, "step": 8971 }, { "epoch": 2.5, "learning_rate": 3.352815660079785e-06, - "loss": 0.2977, + "loss": 0.1674, "step": 8972 }, { "epoch": 2.5, "learning_rate": 3.350960200389647e-06, - "loss": 0.0337, + "loss": 0.0791, "step": 8973 }, { "epoch": 2.5, "learning_rate": 3.3491047406995087e-06, - "loss": 0.136, + "loss": 0.0894, "step": 8974 }, { "epoch": 2.5, "learning_rate": 3.3472492810093706e-06, - "loss": 0.0861, + "loss": 0.3238, "step": 8975 }, { "epoch": 2.5, "learning_rate": 3.345393821319232e-06, - "loss": 0.1359, + "loss": 0.16, "step": 8976 }, { "epoch": 2.5, "learning_rate": 3.343538361629094e-06, - "loss": 0.1418, + "loss": 0.0219, "step": 8977 }, { "epoch": 2.5, "learning_rate": 3.3416829019389558e-06, - "loss": 0.1942, + "loss": 0.0476, "step": 8978 }, { "epoch": 2.5, "learning_rate": 3.3398274422488176e-06, - "loss": 0.1879, + "loss": 0.0892, "step": 8979 }, { "epoch": 2.5, "learning_rate": 3.337971982558679e-06, - "loss": 0.137, + "loss": 0.0903, "step": 8980 }, { "epoch": 2.5, "learning_rate": 3.336116522868541e-06, - "loss": 0.1928, + "loss": 0.1342, "step": 8981 }, { "epoch": 2.5, "learning_rate": 3.334261063178403e-06, - "loss": 0.1394, + "loss": 0.0222, "step": 8982 }, { "epoch": 2.5, "learning_rate": 3.3324056034882647e-06, - "loss": 0.1892, + "loss": 0.0699, "step": 8983 }, { "epoch": 2.5, "learning_rate": 3.3305501437981266e-06, - "loss": 0.1371, + "loss": 0.0517, "step": 8984 }, { "epoch": 2.5, "learning_rate": 3.328694684107988e-06, - "loss": 0.1916, + "loss": 0.194, "step": 8985 }, { "epoch": 2.5, "learning_rate": 3.32683922441785e-06, - "loss": 0.0341, + "loss": 0.048, "step": 8986 }, { "epoch": 2.5, "learning_rate": 3.3249837647277117e-06, - "loss": 0.0869, + "loss": 0.0207, "step": 8987 }, { "epoch": 2.5, "learning_rate": 3.3231283050375736e-06, - "loss": 0.0854, + "loss": 0.0235, "step": 8988 }, { "epoch": 2.5, "learning_rate": 3.321272845347435e-06, - "loss": 0.1375, + "loss": 0.0173, "step": 8989 }, { "epoch": 2.5, "learning_rate": 3.319417385657297e-06, - "loss": 0.1922, + "loss": 0.0967, "step": 8990 }, { "epoch": 2.5, "learning_rate": 3.317561925967159e-06, - "loss": 0.1952, + "loss": 0.0913, "step": 8991 }, { "epoch": 2.5, "learning_rate": 3.3157064662770207e-06, - "loss": 0.1352, + "loss": 0.0266, "step": 8992 }, { "epoch": 2.5, "learning_rate": 3.313851006586882e-06, - "loss": 0.1931, + "loss": 0.0274, "step": 8993 }, { "epoch": 2.5, "learning_rate": 3.311995546896744e-06, - "loss": 0.1397, + "loss": 0.0221, "step": 8994 }, { "epoch": 2.5, "learning_rate": 3.310140087206606e-06, - "loss": 0.1907, + "loss": 0.23, "step": 8995 }, { "epoch": 2.5, "learning_rate": 3.3082846275164677e-06, - "loss": 0.1373, + "loss": 0.0258, "step": 8996 }, { "epoch": 2.5, "learning_rate": 3.3064291678263296e-06, - "loss": 0.1906, + "loss": 0.1201, "step": 8997 }, { "epoch": 2.5, "learning_rate": 3.304573708136191e-06, - "loss": 0.1906, + "loss": 0.0455, "step": 8998 }, { "epoch": 2.5, "learning_rate": 3.302718248446053e-06, - "loss": 0.0863, + "loss": 0.0159, "step": 8999 }, { "epoch": 2.5, "learning_rate": 3.3008627887559148e-06, - "loss": 0.0868, + "loss": 0.0179, "step": 9000 }, { "epoch": 2.51, "learning_rate": 3.2990073290657767e-06, - "loss": 0.249, + "loss": 0.0771, "step": 9001 }, { "epoch": 2.51, "learning_rate": 3.297151869375638e-06, - "loss": 0.2413, + "loss": 0.0545, "step": 9002 }, { "epoch": 2.51, "learning_rate": 3.2952964096855e-06, - "loss": 0.1378, + "loss": 0.056, "step": 9003 }, { "epoch": 2.51, "learning_rate": 3.293440949995362e-06, - "loss": 0.1409, + "loss": 0.1444, "step": 9004 }, { "epoch": 2.51, "learning_rate": 3.2915854903052237e-06, - "loss": 0.0888, + "loss": 0.0191, "step": 9005 }, { "epoch": 2.51, "learning_rate": 3.289730030615085e-06, - "loss": 0.0826, + "loss": 0.0667, "step": 9006 }, { "epoch": 2.51, "learning_rate": 3.287874570924947e-06, - "loss": 0.0862, + "loss": 0.0377, "step": 9007 }, { "epoch": 2.51, "learning_rate": 3.286019111234809e-06, - "loss": 0.0339, + "loss": 0.0123, "step": 9008 }, { "epoch": 2.51, "learning_rate": 3.2841636515446708e-06, - "loss": 0.2958, + "loss": 0.1079, "step": 9009 }, { "epoch": 2.51, "learning_rate": 3.2823081918545322e-06, - "loss": 0.1937, + "loss": 0.0224, "step": 9010 }, { "epoch": 2.51, "learning_rate": 3.280452732164394e-06, - "loss": 0.0881, + "loss": 0.1427, "step": 9011 }, { "epoch": 2.51, "learning_rate": 3.278597272474256e-06, - "loss": 0.193, + "loss": 0.0599, "step": 9012 }, { "epoch": 2.51, "learning_rate": 3.276741812784118e-06, - "loss": 0.0885, + "loss": 0.044, "step": 9013 }, { "epoch": 2.51, "learning_rate": 3.2748863530939797e-06, - "loss": 0.0345, + "loss": 0.1236, "step": 9014 }, { "epoch": 2.51, "learning_rate": 3.273030893403841e-06, - "loss": 0.1928, + "loss": 0.1622, "step": 9015 }, { "epoch": 2.51, "learning_rate": 3.271175433713703e-06, - "loss": 0.1918, + "loss": 0.0705, "step": 9016 }, { "epoch": 2.51, "learning_rate": 3.269319974023565e-06, - "loss": 0.1926, + "loss": 0.0164, "step": 9017 }, { "epoch": 2.51, "learning_rate": 3.2674645143334268e-06, - "loss": 0.0346, + "loss": 0.0849, "step": 9018 }, { "epoch": 2.51, "learning_rate": 3.265609054643288e-06, - "loss": 0.1916, + "loss": 0.1252, "step": 9019 }, { "epoch": 2.51, "learning_rate": 3.26375359495315e-06, - "loss": 0.1913, + "loss": 0.0822, "step": 9020 }, { "epoch": 2.51, "learning_rate": 3.261898135263012e-06, - "loss": 0.0857, + "loss": 0.0503, "step": 9021 }, { "epoch": 2.51, "learning_rate": 3.260042675572874e-06, - "loss": 0.1406, + "loss": 0.0161, "step": 9022 }, { "epoch": 2.51, "learning_rate": 3.2581872158827353e-06, - "loss": 0.0338, + "loss": 0.038, "step": 9023 }, { "epoch": 2.51, "learning_rate": 3.256331756192597e-06, - "loss": 0.0873, + "loss": 0.1131, "step": 9024 }, { "epoch": 2.51, "learning_rate": 3.254476296502459e-06, - "loss": 0.1396, + "loss": 0.2051, "step": 9025 }, { "epoch": 2.51, "learning_rate": 3.252620836812321e-06, - "loss": 0.144, + "loss": 0.062, "step": 9026 }, { "epoch": 2.51, "learning_rate": 3.2507653771221828e-06, - "loss": 0.192, + "loss": 0.0166, "step": 9027 }, { "epoch": 2.51, "learning_rate": 3.248909917432044e-06, - "loss": 0.0339, + "loss": 0.0153, "step": 9028 }, { "epoch": 2.51, "learning_rate": 3.247054457741906e-06, - "loss": 0.0865, + "loss": 0.1714, "step": 9029 }, { "epoch": 2.51, "learning_rate": 3.245198998051768e-06, - "loss": 0.1416, + "loss": 0.0534, "step": 9030 }, { "epoch": 2.51, "learning_rate": 3.24334353836163e-06, - "loss": 0.0844, + "loss": 0.1145, "step": 9031 }, { "epoch": 2.51, "learning_rate": 3.2414880786714913e-06, - "loss": 0.0902, + "loss": 0.0452, "step": 9032 }, { "epoch": 2.51, "learning_rate": 3.239632618981353e-06, - "loss": 0.2507, + "loss": 0.0511, "step": 9033 }, { "epoch": 2.51, "learning_rate": 3.2377771592912146e-06, - "loss": 0.1405, + "loss": 0.0738, "step": 9034 }, { "epoch": 2.51, "learning_rate": 3.235921699601076e-06, - "loss": 0.087, + "loss": 0.0896, "step": 9035 }, { "epoch": 2.51, "learning_rate": 3.234066239910938e-06, - "loss": 0.0322, + "loss": 0.0534, "step": 9036 }, { "epoch": 2.52, "learning_rate": 3.2322107802207998e-06, - "loss": 0.193, + "loss": 0.0469, "step": 9037 }, { "epoch": 2.52, "learning_rate": 3.2303553205306616e-06, - "loss": 0.1391, + "loss": 0.0749, "step": 9038 }, { "epoch": 2.52, "learning_rate": 3.228499860840523e-06, - "loss": 0.138, + "loss": 0.0578, "step": 9039 }, { "epoch": 2.52, "learning_rate": 3.226644401150385e-06, - "loss": 0.1416, + "loss": 0.0472, "step": 9040 }, { "epoch": 2.52, "learning_rate": 3.224788941460247e-06, - "loss": 0.0847, + "loss": 0.0158, "step": 9041 }, { "epoch": 2.52, "learning_rate": 3.2229334817701087e-06, - "loss": 0.085, + "loss": 0.0586, "step": 9042 }, { "epoch": 2.52, "learning_rate": 3.22107802207997e-06, - "loss": 0.1385, + "loss": 0.0626, "step": 9043 }, { "epoch": 2.52, "learning_rate": 3.219222562389832e-06, - "loss": 0.2461, + "loss": 0.0424, "step": 9044 }, { "epoch": 2.52, "learning_rate": 3.217367102699694e-06, - "loss": 0.1913, + "loss": 0.0614, "step": 9045 }, { "epoch": 2.52, "learning_rate": 3.2155116430095558e-06, - "loss": 0.0845, + "loss": 0.1045, "step": 9046 }, { "epoch": 2.52, "learning_rate": 3.2136561833194176e-06, - "loss": 0.0319, + "loss": 0.1186, "step": 9047 }, { "epoch": 2.52, "learning_rate": 3.211800723629279e-06, - "loss": 0.0307, + "loss": 0.1435, "step": 9048 }, { "epoch": 2.52, "learning_rate": 3.209945263939141e-06, - "loss": 0.1394, + "loss": 0.033, "step": 9049 }, { "epoch": 2.52, "learning_rate": 3.208089804249003e-06, - "loss": 0.2981, + "loss": 0.3016, "step": 9050 }, { "epoch": 2.52, "learning_rate": 3.2062343445588647e-06, - "loss": 0.0311, + "loss": 0.1904, "step": 9051 }, { "epoch": 2.52, "learning_rate": 3.204378884868726e-06, - "loss": 0.1959, + "loss": 0.0142, "step": 9052 }, { "epoch": 2.52, "learning_rate": 3.202523425178588e-06, - "loss": 0.1351, + "loss": 0.1108, "step": 9053 }, { "epoch": 2.52, "learning_rate": 3.20066796548845e-06, - "loss": 0.0853, + "loss": 0.0823, "step": 9054 }, { "epoch": 2.52, "learning_rate": 3.1988125057983117e-06, - "loss": 0.2462, + "loss": 0.0763, "step": 9055 }, { "epoch": 2.52, "learning_rate": 3.196957046108173e-06, - "loss": 0.1897, + "loss": 0.085, "step": 9056 }, { "epoch": 2.52, "learning_rate": 3.195101586418035e-06, - "loss": 0.0857, + "loss": 0.0249, "step": 9057 }, { "epoch": 2.52, "learning_rate": 3.193246126727897e-06, - "loss": 0.0867, + "loss": 0.0988, "step": 9058 }, { "epoch": 2.52, "learning_rate": 3.191390667037759e-06, - "loss": 0.0844, + "loss": 0.0679, "step": 9059 }, { "epoch": 2.52, "learning_rate": 3.1895352073476202e-06, - "loss": 0.0866, + "loss": 0.1058, "step": 9060 }, { "epoch": 2.52, "learning_rate": 3.187679747657482e-06, - "loss": 0.1929, + "loss": 0.1264, "step": 9061 }, { "epoch": 2.52, "learning_rate": 3.185824287967344e-06, - "loss": 0.1428, + "loss": 0.0669, "step": 9062 }, { "epoch": 2.52, "learning_rate": 3.183968828277206e-06, - "loss": 0.0869, + "loss": 0.1435, "step": 9063 }, { "epoch": 2.52, "learning_rate": 3.1821133685870677e-06, - "loss": 0.0837, + "loss": 0.0583, "step": 9064 }, { "epoch": 2.52, "learning_rate": 3.180257908896929e-06, - "loss": 0.1951, + "loss": 0.0572, "step": 9065 }, { "epoch": 2.52, "learning_rate": 3.178402449206791e-06, - "loss": 0.1978, + "loss": 0.1045, "step": 9066 }, { "epoch": 2.52, "learning_rate": 3.176546989516653e-06, - "loss": 0.3027, + "loss": 0.1262, "step": 9067 }, { "epoch": 2.52, "learning_rate": 3.1746915298265148e-06, - "loss": 0.0854, + "loss": 0.0552, "step": 9068 }, { "epoch": 2.52, "learning_rate": 3.1728360701363762e-06, - "loss": 0.3013, + "loss": 0.0293, "step": 9069 }, { "epoch": 2.52, "learning_rate": 3.170980610446238e-06, - "loss": 0.1403, + "loss": 0.0959, "step": 9070 }, { "epoch": 2.52, "learning_rate": 3.1691251507561e-06, - "loss": 0.1917, + "loss": 0.06, "step": 9071 }, { "epoch": 2.52, "learning_rate": 3.167269691065962e-06, - "loss": 0.0881, + "loss": 0.0347, "step": 9072 }, { "epoch": 2.53, "learning_rate": 3.1654142313758233e-06, - "loss": 0.0844, + "loss": 0.1364, "step": 9073 }, { "epoch": 2.53, "learning_rate": 3.163558771685685e-06, - "loss": 0.1934, + "loss": 0.0237, "step": 9074 }, { "epoch": 2.53, "learning_rate": 3.161703311995547e-06, - "loss": 0.0304, + "loss": 0.0746, "step": 9075 }, { "epoch": 2.53, "learning_rate": 3.159847852305409e-06, - "loss": 0.0861, + "loss": 0.0227, "step": 9076 }, { "epoch": 2.53, "learning_rate": 3.1579923926152708e-06, - "loss": 0.1954, + "loss": 0.0548, "step": 9077 }, { "epoch": 2.53, "learning_rate": 3.1561369329251322e-06, - "loss": 0.0862, + "loss": 0.0222, "step": 9078 }, { "epoch": 2.53, "learning_rate": 3.154281473234994e-06, - "loss": 0.0833, + "loss": 0.0436, "step": 9079 }, { "epoch": 2.53, "learning_rate": 3.152426013544856e-06, - "loss": 0.0846, + "loss": 0.0495, "step": 9080 }, { "epoch": 2.53, "learning_rate": 3.150570553854718e-06, - "loss": 0.1929, + "loss": 0.045, "step": 9081 }, { "epoch": 2.53, "learning_rate": 3.1487150941645793e-06, - "loss": 0.0823, + "loss": 0.0164, "step": 9082 }, { "epoch": 2.53, "learning_rate": 3.146859634474441e-06, - "loss": 0.2456, + "loss": 0.026, "step": 9083 }, { "epoch": 2.53, "learning_rate": 3.145004174784303e-06, - "loss": 0.0854, + "loss": 0.0167, "step": 9084 }, { "epoch": 2.53, "learning_rate": 3.143148715094165e-06, - "loss": 0.1367, + "loss": 0.0245, "step": 9085 }, { "epoch": 2.53, "learning_rate": 3.1412932554040263e-06, - "loss": 0.1387, + "loss": 0.0223, "step": 9086 }, { "epoch": 2.53, "learning_rate": 3.139437795713888e-06, - "loss": 0.0876, + "loss": 0.1171, "step": 9087 }, { "epoch": 2.53, "learning_rate": 3.13758233602375e-06, - "loss": 0.2505, + "loss": 0.0241, "step": 9088 }, { "epoch": 2.53, "learning_rate": 3.135726876333612e-06, - "loss": 0.1386, + "loss": 0.0809, "step": 9089 }, { "epoch": 2.53, "learning_rate": 3.1338714166434734e-06, - "loss": 0.1898, + "loss": 0.0457, "step": 9090 }, { "epoch": 2.53, "learning_rate": 3.1320159569533353e-06, - "loss": 0.0302, + "loss": 0.0918, "step": 9091 }, { "epoch": 2.53, "learning_rate": 3.130160497263197e-06, - "loss": 0.1385, + "loss": 0.0614, "step": 9092 }, { "epoch": 2.53, "learning_rate": 3.128305037573059e-06, - "loss": 0.1399, + "loss": 0.0897, "step": 9093 }, { "epoch": 2.53, "learning_rate": 3.126449577882921e-06, - "loss": 0.0305, + "loss": 0.0718, "step": 9094 }, { "epoch": 2.53, "learning_rate": 3.1245941181927823e-06, - "loss": 0.0855, + "loss": 0.0927, "step": 9095 }, { "epoch": 2.53, "learning_rate": 3.122738658502644e-06, - "loss": 0.2487, + "loss": 0.1133, "step": 9096 }, { "epoch": 2.53, "learning_rate": 3.120883198812506e-06, - "loss": 0.0817, + "loss": 0.0179, "step": 9097 }, { "epoch": 2.53, "learning_rate": 3.119027739122368e-06, - "loss": 0.1401, + "loss": 0.0517, "step": 9098 }, { "epoch": 2.53, "learning_rate": 3.1171722794322294e-06, - "loss": 0.137, + "loss": 0.0826, "step": 9099 }, { "epoch": 2.53, "learning_rate": 3.1153168197420913e-06, - "loss": 0.0313, + "loss": 0.1236, "step": 9100 }, { "epoch": 2.53, "learning_rate": 3.113461360051953e-06, - "loss": 0.1327, + "loss": 0.0596, "step": 9101 }, { "epoch": 2.53, "learning_rate": 3.111605900361815e-06, - "loss": 0.1901, + "loss": 0.0182, "step": 9102 }, { "epoch": 2.53, "learning_rate": 3.1097504406716764e-06, - "loss": 0.138, + "loss": 0.0242, "step": 9103 }, { "epoch": 2.53, "learning_rate": 3.1078949809815383e-06, - "loss": 0.197, + "loss": 0.0646, "step": 9104 }, { "epoch": 2.53, "learning_rate": 3.1060395212914e-06, - "loss": 0.1367, + "loss": 0.0109, "step": 9105 }, { "epoch": 2.53, "learning_rate": 3.104184061601262e-06, - "loss": 0.1405, + "loss": 0.1649, "step": 9106 }, { "epoch": 2.53, "learning_rate": 3.102328601911124e-06, - "loss": 0.0843, + "loss": 0.0512, "step": 9107 }, { "epoch": 2.53, "learning_rate": 3.1004731422209854e-06, - "loss": 0.1924, + "loss": 0.0383, "step": 9108 }, { "epoch": 2.54, "learning_rate": 3.0986176825308472e-06, - "loss": 0.3548, + "loss": 0.0832, "step": 9109 }, { "epoch": 2.54, "learning_rate": 3.096762222840709e-06, - "loss": 0.0309, + "loss": 0.0553, "step": 9110 }, { "epoch": 2.54, "learning_rate": 3.094906763150571e-06, - "loss": 0.085, + "loss": 0.0199, "step": 9111 }, { "epoch": 2.54, "learning_rate": 3.0930513034604324e-06, - "loss": 0.0819, + "loss": 0.1449, "step": 9112 }, { "epoch": 2.54, "learning_rate": 3.0911958437702943e-06, - "loss": 0.0836, + "loss": 0.0109, "step": 9113 }, { "epoch": 2.54, "learning_rate": 3.089340384080156e-06, - "loss": 0.0834, + "loss": 0.0697, "step": 9114 }, { "epoch": 2.54, "learning_rate": 3.087484924390018e-06, - "loss": 0.1961, + "loss": 0.0692, "step": 9115 }, { "epoch": 2.54, "learning_rate": 3.0856294646998795e-06, - "loss": 0.136, + "loss": 0.1059, "step": 9116 }, { "epoch": 2.54, "learning_rate": 3.0837740050097414e-06, - "loss": 0.2473, + "loss": 0.047, "step": 9117 }, { "epoch": 2.54, "learning_rate": 3.0819185453196032e-06, - "loss": 0.0841, + "loss": 0.1268, "step": 9118 }, { "epoch": 2.54, "learning_rate": 3.080063085629465e-06, - "loss": 0.0309, + "loss": 0.0143, "step": 9119 }, { "epoch": 2.54, "learning_rate": 3.0782076259393266e-06, - "loss": 0.0875, + "loss": 0.1336, "step": 9120 }, { "epoch": 2.54, "learning_rate": 3.0763521662491884e-06, - "loss": 0.1407, + "loss": 0.0099, "step": 9121 }, { "epoch": 2.54, "learning_rate": 3.0744967065590503e-06, - "loss": 0.1437, + "loss": 0.122, "step": 9122 }, { "epoch": 2.54, "learning_rate": 3.072641246868912e-06, - "loss": 0.1961, + "loss": 0.0144, "step": 9123 }, { "epoch": 2.54, "learning_rate": 3.070785787178774e-06, - "loss": 0.1415, + "loss": 0.0596, "step": 9124 }, { "epoch": 2.54, "learning_rate": 3.0689303274886355e-06, - "loss": 0.1366, + "loss": 0.0113, "step": 9125 }, { "epoch": 2.54, "learning_rate": 3.0670748677984974e-06, - "loss": 0.135, + "loss": 0.1015, "step": 9126 }, { "epoch": 2.54, "learning_rate": 3.0652194081083592e-06, - "loss": 0.1436, + "loss": 0.0155, "step": 9127 }, { "epoch": 2.54, "learning_rate": 3.063363948418221e-06, - "loss": 0.0882, + "loss": 0.0873, "step": 9128 }, { "epoch": 2.54, "learning_rate": 3.0615084887280825e-06, - "loss": 0.1928, + "loss": 0.0646, "step": 9129 }, { "epoch": 2.54, "learning_rate": 3.0596530290379444e-06, - "loss": 0.0866, + "loss": 0.0159, "step": 9130 }, { "epoch": 2.54, "learning_rate": 3.0577975693478063e-06, - "loss": 0.1417, + "loss": 0.0163, "step": 9131 }, { "epoch": 2.54, "learning_rate": 3.055942109657668e-06, - "loss": 0.0315, + "loss": 0.0382, "step": 9132 }, { "epoch": 2.54, "learning_rate": 3.0540866499675296e-06, - "loss": 0.1985, + "loss": 0.0237, "step": 9133 }, { "epoch": 2.54, "learning_rate": 3.0522311902773915e-06, - "loss": 0.1394, + "loss": 0.1258, "step": 9134 }, { "epoch": 2.54, "learning_rate": 3.0503757305872533e-06, - "loss": 0.2429, + "loss": 0.0413, "step": 9135 }, { "epoch": 2.54, "learning_rate": 3.048520270897115e-06, - "loss": 0.1933, + "loss": 0.0387, "step": 9136 }, { "epoch": 2.54, "learning_rate": 3.046664811206977e-06, - "loss": 0.1948, + "loss": 0.1631, "step": 9137 }, { "epoch": 2.54, "learning_rate": 3.0448093515168385e-06, - "loss": 0.2495, + "loss": 0.1916, "step": 9138 }, { "epoch": 2.54, "learning_rate": 3.0429538918267004e-06, - "loss": 0.0823, + "loss": 0.0737, "step": 9139 }, { "epoch": 2.54, "learning_rate": 3.0410984321365623e-06, - "loss": 0.1411, + "loss": 0.0239, "step": 9140 }, { "epoch": 2.54, "learning_rate": 3.039242972446424e-06, - "loss": 0.1376, + "loss": 0.1031, "step": 9141 }, { "epoch": 2.54, "learning_rate": 3.0373875127562856e-06, - "loss": 0.1874, + "loss": 0.1975, "step": 9142 }, { "epoch": 2.54, "learning_rate": 3.0355320530661475e-06, - "loss": 0.0863, + "loss": 0.1117, "step": 9143 }, { "epoch": 2.54, "learning_rate": 3.0336765933760093e-06, - "loss": 0.0318, + "loss": 0.0582, "step": 9144 }, { "epoch": 2.55, "learning_rate": 3.031821133685871e-06, - "loss": 0.2425, + "loss": 0.0172, "step": 9145 }, { "epoch": 2.55, "learning_rate": 3.0299656739957326e-06, - "loss": 0.1993, + "loss": 0.0247, "step": 9146 }, { "epoch": 2.55, "learning_rate": 3.0281102143055945e-06, - "loss": 0.0849, + "loss": 0.0955, "step": 9147 }, { "epoch": 2.55, "learning_rate": 3.0262547546154564e-06, - "loss": 0.1405, + "loss": 0.0245, "step": 9148 }, { "epoch": 2.55, "learning_rate": 3.0243992949253183e-06, - "loss": 0.0868, + "loss": 0.0388, "step": 9149 }, { "epoch": 2.55, "learning_rate": 3.0225438352351797e-06, - "loss": 0.1985, + "loss": 0.1758, "step": 9150 }, { "epoch": 2.55, "learning_rate": 3.0206883755450416e-06, - "loss": 0.2442, + "loss": 0.1367, "step": 9151 }, { "epoch": 2.55, "learning_rate": 3.0188329158549034e-06, - "loss": 0.1363, + "loss": 0.1143, "step": 9152 }, { "epoch": 2.55, "learning_rate": 3.0169774561647653e-06, - "loss": 0.032, + "loss": 0.0189, "step": 9153 }, { "epoch": 2.55, "learning_rate": 3.015121996474627e-06, - "loss": 0.0819, + "loss": 0.1093, "step": 9154 }, { "epoch": 2.55, "learning_rate": 3.0132665367844886e-06, - "loss": 0.1392, + "loss": 0.0203, "step": 9155 }, { "epoch": 2.55, "learning_rate": 3.0114110770943505e-06, - "loss": 0.086, + "loss": 0.126, "step": 9156 }, { "epoch": 2.55, "learning_rate": 3.0095556174042124e-06, - "loss": 0.1393, + "loss": 0.0903, "step": 9157 }, { "epoch": 2.55, "learning_rate": 3.0077001577140742e-06, - "loss": 0.0324, + "loss": 0.1127, "step": 9158 }, { "epoch": 2.55, "learning_rate": 3.0058446980239357e-06, - "loss": 0.1357, + "loss": 0.0523, "step": 9159 }, { "epoch": 2.55, "learning_rate": 3.0039892383337976e-06, - "loss": 0.1422, + "loss": 0.0208, "step": 9160 }, { "epoch": 2.55, "learning_rate": 3.0021337786436594e-06, - "loss": 0.0313, + "loss": 0.0528, "step": 9161 }, { "epoch": 2.55, "learning_rate": 3.0002783189535213e-06, - "loss": 0.4174, + "loss": 0.0121, "step": 9162 }, { "epoch": 2.55, "learning_rate": 2.9984228592633828e-06, - "loss": 0.192, + "loss": 0.1026, "step": 9163 }, { "epoch": 2.55, "learning_rate": 2.9965673995732446e-06, - "loss": 0.0848, + "loss": 0.0687, "step": 9164 }, { "epoch": 2.55, "learning_rate": 2.9947119398831065e-06, - "loss": 0.1956, + "loss": 0.017, "step": 9165 }, { "epoch": 2.55, "learning_rate": 2.9928564801929684e-06, - "loss": 0.2986, + "loss": 0.1171, "step": 9166 }, { "epoch": 2.55, "learning_rate": 2.9910010205028302e-06, - "loss": 0.2461, + "loss": 0.0381, "step": 9167 }, { "epoch": 2.55, "learning_rate": 2.9891455608126917e-06, - "loss": 0.139, + "loss": 0.0207, "step": 9168 }, { "epoch": 2.55, "learning_rate": 2.9872901011225536e-06, - "loss": 0.1358, + "loss": 0.2333, "step": 9169 }, { "epoch": 2.55, "learning_rate": 2.9854346414324154e-06, - "loss": 0.2493, + "loss": 0.0231, "step": 9170 }, { "epoch": 2.55, "learning_rate": 2.9835791817422773e-06, - "loss": 0.245, + "loss": 0.0136, "step": 9171 }, { "epoch": 2.55, "learning_rate": 2.9817237220521387e-06, - "loss": 0.0322, + "loss": 0.1316, "step": 9172 }, { "epoch": 2.55, "learning_rate": 2.9798682623620006e-06, - "loss": 0.246, + "loss": 0.1129, "step": 9173 }, { "epoch": 2.55, "learning_rate": 2.9780128026718625e-06, - "loss": 0.0326, + "loss": 0.0522, "step": 9174 }, { "epoch": 2.55, "learning_rate": 2.9761573429817244e-06, - "loss": 0.1396, + "loss": 0.0412, "step": 9175 }, { "epoch": 2.55, "learning_rate": 2.974301883291586e-06, - "loss": 0.088, + "loss": 0.0602, "step": 9176 }, { "epoch": 2.55, "learning_rate": 2.9724464236014477e-06, - "loss": 0.1891, + "loss": 0.0687, "step": 9177 }, { "epoch": 2.55, "learning_rate": 2.9705909639113095e-06, - "loss": 0.1949, + "loss": 0.0382, "step": 9178 }, { "epoch": 2.55, "learning_rate": 2.9687355042211714e-06, - "loss": 0.0338, + "loss": 0.0327, "step": 9179 }, { "epoch": 2.55, "learning_rate": 2.966880044531033e-06, - "loss": 0.1935, + "loss": 0.127, "step": 9180 }, { "epoch": 2.56, "learning_rate": 2.9650245848408947e-06, - "loss": 0.1891, + "loss": 0.0461, "step": 9181 }, { "epoch": 2.56, "learning_rate": 2.9631691251507566e-06, - "loss": 0.3034, + "loss": 0.0861, "step": 9182 }, { "epoch": 2.56, "learning_rate": 2.9613136654606185e-06, - "loss": 0.0861, + "loss": 0.0164, "step": 9183 }, { "epoch": 2.56, "learning_rate": 2.9594582057704803e-06, - "loss": 0.1351, + "loss": 0.0147, "step": 9184 }, { "epoch": 2.56, "learning_rate": 2.9576027460803414e-06, - "loss": 0.1378, + "loss": 0.015, "step": 9185 }, { "epoch": 2.56, "learning_rate": 2.9557472863902032e-06, - "loss": 0.0346, + "loss": 0.0164, "step": 9186 }, { "epoch": 2.56, "learning_rate": 2.953891826700065e-06, - "loss": 0.1393, + "loss": 0.1492, "step": 9187 }, { "epoch": 2.56, "learning_rate": 2.9520363670099266e-06, - "loss": 0.1374, + "loss": 0.1612, "step": 9188 }, { "epoch": 2.56, "learning_rate": 2.9501809073197884e-06, - "loss": 0.0877, + "loss": 0.1191, "step": 9189 }, { "epoch": 2.56, "learning_rate": 2.9483254476296503e-06, - "loss": 0.0894, + "loss": 0.0468, "step": 9190 }, { "epoch": 2.56, "learning_rate": 2.946469987939512e-06, - "loss": 0.1429, + "loss": 0.0442, "step": 9191 }, { "epoch": 2.56, "learning_rate": 2.9446145282493736e-06, - "loss": 0.0844, + "loss": 0.0527, "step": 9192 }, { "epoch": 2.56, "learning_rate": 2.9427590685592355e-06, - "loss": 0.0883, + "loss": 0.1261, "step": 9193 }, { "epoch": 2.56, "learning_rate": 2.9409036088690974e-06, - "loss": 0.034, + "loss": 0.0198, "step": 9194 }, { "epoch": 2.56, "learning_rate": 2.9390481491789592e-06, - "loss": 0.0847, + "loss": 0.0191, "step": 9195 }, { "epoch": 2.56, "learning_rate": 2.9371926894888207e-06, - "loss": 0.084, + "loss": 0.1045, "step": 9196 }, { "epoch": 2.56, "learning_rate": 2.9353372297986825e-06, - "loss": 0.0333, + "loss": 0.0428, "step": 9197 }, { "epoch": 2.56, "learning_rate": 2.9334817701085444e-06, - "loss": 0.0882, + "loss": 0.0155, "step": 9198 }, { "epoch": 2.56, "learning_rate": 2.9316263104184063e-06, - "loss": 0.0881, + "loss": 0.0157, "step": 9199 }, { "epoch": 2.56, "learning_rate": 2.929770850728268e-06, - "loss": 0.0833, + "loss": 0.1016, "step": 9200 }, { "epoch": 2.56, "learning_rate": 2.9279153910381296e-06, - "loss": 0.1415, + "loss": 0.0492, "step": 9201 }, { "epoch": 2.56, "learning_rate": 2.9260599313479915e-06, - "loss": 0.1406, + "loss": 0.0198, "step": 9202 }, { "epoch": 2.56, "learning_rate": 2.9242044716578533e-06, - "loss": 0.0845, + "loss": 0.0894, "step": 9203 }, { "epoch": 2.56, "learning_rate": 2.922349011967715e-06, - "loss": 0.1968, + "loss": 0.1425, "step": 9204 }, { "epoch": 2.56, "learning_rate": 2.9204935522775767e-06, - "loss": 0.0314, + "loss": 0.0584, "step": 9205 }, { "epoch": 2.56, "learning_rate": 2.9186380925874385e-06, - "loss": 0.1926, + "loss": 0.0511, "step": 9206 }, { "epoch": 2.56, "learning_rate": 2.9167826328973004e-06, - "loss": 0.2497, + "loss": 0.022, "step": 9207 }, { "epoch": 2.56, "learning_rate": 2.9149271732071623e-06, - "loss": 0.0307, + "loss": 0.0448, "step": 9208 }, { "epoch": 2.56, "learning_rate": 2.9130717135170237e-06, - "loss": 0.1958, + "loss": 0.018, "step": 9209 }, { "epoch": 2.56, "learning_rate": 2.9112162538268856e-06, - "loss": 0.2002, + "loss": 0.0294, "step": 9210 }, { "epoch": 2.56, "learning_rate": 2.9093607941367475e-06, - "loss": 0.1362, + "loss": 0.0949, "step": 9211 }, { "epoch": 2.56, "learning_rate": 2.9075053344466093e-06, - "loss": 0.2433, + "loss": 0.0355, "step": 9212 }, { "epoch": 2.56, "learning_rate": 2.9056498747564708e-06, - "loss": 0.0856, + "loss": 0.0547, "step": 9213 }, { "epoch": 2.56, "learning_rate": 2.9037944150663326e-06, - "loss": 0.0851, + "loss": 0.062, "step": 9214 }, { "epoch": 2.56, "learning_rate": 2.9019389553761945e-06, - "loss": 0.1955, + "loss": 0.0943, "step": 9215 }, { "epoch": 2.56, "learning_rate": 2.9000834956860564e-06, - "loss": 0.0821, + "loss": 0.0605, "step": 9216 }, { "epoch": 2.57, "learning_rate": 2.8982280359959183e-06, - "loss": 0.0876, + "loss": 0.0184, "step": 9217 }, { "epoch": 2.57, "learning_rate": 2.8963725763057797e-06, - "loss": 0.138, + "loss": 0.0146, "step": 9218 }, { "epoch": 2.57, "learning_rate": 2.8945171166156416e-06, - "loss": 0.0295, + "loss": 0.094, "step": 9219 }, { "epoch": 2.57, "learning_rate": 2.8926616569255034e-06, - "loss": 0.0855, + "loss": 0.0157, "step": 9220 }, { "epoch": 2.57, "learning_rate": 2.8908061972353653e-06, - "loss": 0.0854, + "loss": 0.015, "step": 9221 }, { "epoch": 2.57, "learning_rate": 2.8889507375452268e-06, - "loss": 0.1414, + "loss": 0.0123, "step": 9222 }, { "epoch": 2.57, "learning_rate": 2.8870952778550886e-06, - "loss": 0.1373, + "loss": 0.0884, "step": 9223 }, { "epoch": 2.57, "learning_rate": 2.8852398181649505e-06, - "loss": 0.1388, + "loss": 0.0465, "step": 9224 }, { "epoch": 2.57, "learning_rate": 2.8833843584748124e-06, - "loss": 0.1387, + "loss": 0.0946, "step": 9225 }, { "epoch": 2.57, "learning_rate": 2.881528898784674e-06, - "loss": 0.1375, + "loss": 0.0075, "step": 9226 }, { "epoch": 2.57, "learning_rate": 2.8796734390945357e-06, - "loss": 0.14, + "loss": 0.2284, "step": 9227 }, { "epoch": 2.57, "learning_rate": 2.8778179794043976e-06, - "loss": 0.1393, + "loss": 0.0973, "step": 9228 }, { "epoch": 2.57, "learning_rate": 2.8759625197142594e-06, - "loss": 0.0308, + "loss": 0.01, "step": 9229 }, { "epoch": 2.57, "learning_rate": 2.8741070600241213e-06, - "loss": 0.1902, + "loss": 0.1158, "step": 9230 }, { "epoch": 2.57, "learning_rate": 2.8722516003339827e-06, - "loss": 0.1934, + "loss": 0.0703, "step": 9231 }, { "epoch": 2.57, "learning_rate": 2.8703961406438446e-06, - "loss": 0.1989, + "loss": 0.0187, "step": 9232 }, { "epoch": 2.57, "learning_rate": 2.8685406809537065e-06, - "loss": 0.086, + "loss": 0.0201, "step": 9233 }, { "epoch": 2.57, "learning_rate": 2.8666852212635684e-06, - "loss": 0.1363, + "loss": 0.0773, "step": 9234 }, { "epoch": 2.57, "learning_rate": 2.86482976157343e-06, - "loss": 0.0833, + "loss": 0.0395, "step": 9235 }, { "epoch": 2.57, "learning_rate": 2.8629743018832917e-06, - "loss": 0.1425, + "loss": 0.0647, "step": 9236 }, { "epoch": 2.57, "learning_rate": 2.8611188421931535e-06, - "loss": 0.0298, + "loss": 0.0121, "step": 9237 }, { "epoch": 2.57, "learning_rate": 2.8592633825030154e-06, - "loss": 0.1407, + "loss": 0.0085, "step": 9238 }, { "epoch": 2.57, "learning_rate": 2.857407922812877e-06, - "loss": 0.0294, + "loss": 0.0589, "step": 9239 }, { "epoch": 2.57, "learning_rate": 2.8555524631227387e-06, - "loss": 0.0833, + "loss": 0.0824, "step": 9240 }, { "epoch": 2.57, "learning_rate": 2.8536970034326006e-06, - "loss": 0.084, + "loss": 0.0711, "step": 9241 }, { "epoch": 2.57, "learning_rate": 2.8518415437424625e-06, - "loss": 0.0847, + "loss": 0.0603, "step": 9242 }, { "epoch": 2.57, "learning_rate": 2.849986084052324e-06, - "loss": 0.0288, + "loss": 0.0882, "step": 9243 }, { "epoch": 2.57, "learning_rate": 2.848130624362186e-06, - "loss": 0.2532, + "loss": 0.2251, "step": 9244 }, { "epoch": 2.57, "learning_rate": 2.8462751646720477e-06, - "loss": 0.085, + "loss": 0.0162, "step": 9245 }, { "epoch": 2.57, "learning_rate": 2.8444197049819095e-06, - "loss": 0.1329, + "loss": 0.0486, "step": 9246 }, { "epoch": 2.57, "learning_rate": 2.8425642452917714e-06, - "loss": 0.2515, + "loss": 0.0585, "step": 9247 }, { "epoch": 2.57, "learning_rate": 2.840708785601633e-06, - "loss": 0.2571, + "loss": 0.015, "step": 9248 }, { "epoch": 2.57, "learning_rate": 2.8388533259114947e-06, - "loss": 0.1396, + "loss": 0.0354, "step": 9249 }, { "epoch": 2.57, "learning_rate": 2.8369978662213566e-06, - "loss": 0.3622, + "loss": 0.0802, "step": 9250 }, { "epoch": 2.57, "learning_rate": 2.8351424065312185e-06, - "loss": 0.0852, + "loss": 0.0122, "step": 9251 }, { "epoch": 2.58, "learning_rate": 2.83328694684108e-06, - "loss": 0.0286, + "loss": 0.0583, "step": 9252 }, { "epoch": 2.58, "learning_rate": 2.8314314871509418e-06, - "loss": 0.1981, + "loss": 0.0643, "step": 9253 }, { "epoch": 2.58, "learning_rate": 2.8295760274608037e-06, - "loss": 0.2473, + "loss": 0.0535, "step": 9254 }, { "epoch": 2.58, "learning_rate": 2.8277205677706655e-06, - "loss": 0.0298, + "loss": 0.0629, "step": 9255 }, { "epoch": 2.58, "learning_rate": 2.825865108080527e-06, - "loss": 0.1397, + "loss": 0.0273, "step": 9256 }, { "epoch": 2.58, "learning_rate": 2.824009648390389e-06, - "loss": 0.0829, + "loss": 0.0839, "step": 9257 }, { "epoch": 2.58, "learning_rate": 2.8221541887002507e-06, - "loss": 0.1389, + "loss": 0.0491, "step": 9258 }, { "epoch": 2.58, "learning_rate": 2.8202987290101126e-06, - "loss": 0.0831, + "loss": 0.0521, "step": 9259 }, { "epoch": 2.58, "learning_rate": 2.8184432693199745e-06, - "loss": 0.1385, + "loss": 0.1103, "step": 9260 }, { "epoch": 2.58, "learning_rate": 2.816587809629836e-06, - "loss": 0.1432, + "loss": 0.1207, "step": 9261 }, { "epoch": 2.58, "learning_rate": 2.8147323499396978e-06, - "loss": 0.0294, + "loss": 0.0115, "step": 9262 }, { "epoch": 2.58, "learning_rate": 2.8128768902495596e-06, - "loss": 0.1389, + "loss": 0.1834, "step": 9263 }, { "epoch": 2.58, "learning_rate": 2.8110214305594215e-06, - "loss": 0.251, + "loss": 0.0569, "step": 9264 }, { "epoch": 2.58, "learning_rate": 2.809165970869283e-06, - "loss": 0.086, + "loss": 0.0136, "step": 9265 }, { "epoch": 2.58, "learning_rate": 2.807310511179145e-06, - "loss": 0.1905, + "loss": 0.0814, "step": 9266 }, { "epoch": 2.58, "learning_rate": 2.8054550514890067e-06, - "loss": 0.1964, + "loss": 0.0171, "step": 9267 }, { "epoch": 2.58, "learning_rate": 2.8035995917988686e-06, - "loss": 0.1385, + "loss": 0.0476, "step": 9268 }, { "epoch": 2.58, "learning_rate": 2.80174413210873e-06, - "loss": 0.0836, + "loss": 0.0192, "step": 9269 }, { "epoch": 2.58, "learning_rate": 2.799888672418592e-06, - "loss": 0.2523, + "loss": 0.0122, "step": 9270 }, { "epoch": 2.58, "learning_rate": 2.7980332127284538e-06, - "loss": 0.0815, + "loss": 0.1772, "step": 9271 }, { "epoch": 2.58, "learning_rate": 2.7961777530383156e-06, - "loss": 0.1933, + "loss": 0.0396, "step": 9272 }, { "epoch": 2.58, "learning_rate": 2.794322293348177e-06, - "loss": 0.1376, + "loss": 0.0877, "step": 9273 }, { "epoch": 2.58, "learning_rate": 2.792466833658039e-06, - "loss": 0.0862, + "loss": 0.0144, "step": 9274 }, { "epoch": 2.58, "learning_rate": 2.790611373967901e-06, - "loss": 0.3041, + "loss": 0.0369, "step": 9275 }, { "epoch": 2.58, "learning_rate": 2.7887559142777627e-06, - "loss": 0.0836, + "loss": 0.0972, "step": 9276 }, { "epoch": 2.58, "learning_rate": 2.7869004545876246e-06, - "loss": 0.1375, + "loss": 0.0612, "step": 9277 }, { "epoch": 2.58, "learning_rate": 2.785044994897486e-06, - "loss": 0.1974, + "loss": 0.0466, "step": 9278 }, { "epoch": 2.58, "learning_rate": 2.783189535207348e-06, - "loss": 0.0301, + "loss": 0.0512, "step": 9279 }, { "epoch": 2.58, "learning_rate": 2.7813340755172097e-06, - "loss": 0.197, + "loss": 0.0755, "step": 9280 }, { "epoch": 2.58, "learning_rate": 2.7794786158270716e-06, - "loss": 0.1944, + "loss": 0.0982, "step": 9281 }, { "epoch": 2.58, "learning_rate": 2.777623156136933e-06, - "loss": 0.1932, + "loss": 0.0702, "step": 9282 }, { "epoch": 2.58, "learning_rate": 2.775767696446795e-06, - "loss": 0.1388, + "loss": 0.0686, "step": 9283 }, { "epoch": 2.58, "learning_rate": 2.773912236756657e-06, - "loss": 0.0301, + "loss": 0.0455, "step": 9284 }, { "epoch": 2.58, "learning_rate": 2.7720567770665187e-06, - "loss": 0.1378, + "loss": 0.0419, "step": 9285 }, { "epoch": 2.58, "learning_rate": 2.77020131737638e-06, - "loss": 0.0301, + "loss": 0.0921, "step": 9286 }, { "epoch": 2.58, "learning_rate": 2.768345857686242e-06, - "loss": 0.2518, + "loss": 0.0289, "step": 9287 }, { "epoch": 2.59, "learning_rate": 2.766490397996104e-06, - "loss": 0.1985, + "loss": 0.0823, "step": 9288 }, { "epoch": 2.59, "learning_rate": 2.7646349383059657e-06, - "loss": 0.1381, + "loss": 0.102, "step": 9289 }, { "epoch": 2.59, "learning_rate": 2.7627794786158276e-06, - "loss": 0.3005, + "loss": 0.0628, "step": 9290 }, { "epoch": 2.59, "learning_rate": 2.760924018925689e-06, - "loss": 0.0314, + "loss": 0.0125, "step": 9291 }, { "epoch": 2.59, "learning_rate": 2.759068559235551e-06, - "loss": 0.1382, + "loss": 0.0201, "step": 9292 }, { "epoch": 2.59, "learning_rate": 2.757213099545413e-06, - "loss": 0.141, + "loss": 0.0982, "step": 9293 }, { "epoch": 2.59, "learning_rate": 2.7553576398552747e-06, - "loss": 0.251, + "loss": 0.114, "step": 9294 }, { "epoch": 2.59, "learning_rate": 2.753502180165136e-06, - "loss": 0.1354, + "loss": 0.122, "step": 9295 }, { "epoch": 2.59, "learning_rate": 2.751646720474998e-06, - "loss": 0.0317, + "loss": 0.0595, "step": 9296 }, { "epoch": 2.59, "learning_rate": 2.74979126078486e-06, - "loss": 0.0312, + "loss": 0.1707, "step": 9297 }, { "epoch": 2.59, "learning_rate": 2.7479358010947217e-06, - "loss": 0.2434, + "loss": 0.0651, "step": 9298 }, { "epoch": 2.59, "learning_rate": 2.746080341404583e-06, - "loss": 0.0847, + "loss": 0.0157, "step": 9299 }, { "epoch": 2.59, "learning_rate": 2.744224881714445e-06, - "loss": 0.3557, + "loss": 0.0832, "step": 9300 }, { "epoch": 2.59, "learning_rate": 2.742369422024307e-06, - "loss": 0.2468, + "loss": 0.0203, "step": 9301 }, { "epoch": 2.59, "learning_rate": 2.7405139623341688e-06, - "loss": 0.1948, + "loss": 0.1308, "step": 9302 }, { "epoch": 2.59, "learning_rate": 2.7386585026440302e-06, - "loss": 0.0866, + "loss": 0.0553, "step": 9303 }, { "epoch": 2.59, "learning_rate": 2.736803042953892e-06, - "loss": 0.0312, + "loss": 0.1168, "step": 9304 }, { "epoch": 2.59, "learning_rate": 2.734947583263754e-06, - "loss": 0.1382, + "loss": 0.0601, "step": 9305 }, { "epoch": 2.59, "learning_rate": 2.733092123573616e-06, - "loss": 0.0865, + "loss": 0.0174, "step": 9306 }, { "epoch": 2.59, "learning_rate": 2.7312366638834777e-06, - "loss": 0.1909, + "loss": 0.0904, "step": 9307 }, { "epoch": 2.59, "learning_rate": 2.729381204193339e-06, - "loss": 0.0859, + "loss": 0.0164, "step": 9308 }, { "epoch": 2.59, "learning_rate": 2.727525744503201e-06, - "loss": 0.2483, + "loss": 0.1479, "step": 9309 }, { "epoch": 2.59, "learning_rate": 2.725670284813063e-06, - "loss": 0.0326, + "loss": 0.0148, "step": 9310 }, { "epoch": 2.59, "learning_rate": 2.7238148251229248e-06, - "loss": 0.1407, + "loss": 0.0175, "step": 9311 }, { "epoch": 2.59, "learning_rate": 2.7219593654327862e-06, - "loss": 0.0853, + "loss": 0.0807, "step": 9312 }, { "epoch": 2.59, "learning_rate": 2.720103905742648e-06, - "loss": 0.0872, + "loss": 0.0471, "step": 9313 }, { "epoch": 2.59, "learning_rate": 2.71824844605251e-06, - "loss": 0.245, + "loss": 0.0252, "step": 9314 }, { "epoch": 2.59, "learning_rate": 2.716392986362372e-06, - "loss": 0.0837, + "loss": 0.0581, "step": 9315 }, { "epoch": 2.59, "learning_rate": 2.7145375266722333e-06, - "loss": 0.084, + "loss": 0.0103, "step": 9316 }, { "epoch": 2.59, "learning_rate": 2.712682066982095e-06, - "loss": 0.1406, + "loss": 0.0904, "step": 9317 }, { "epoch": 2.59, "learning_rate": 2.710826607291957e-06, - "loss": 0.2484, + "loss": 0.0707, "step": 9318 }, { "epoch": 2.59, "learning_rate": 2.708971147601819e-06, - "loss": 0.0878, + "loss": 0.0686, "step": 9319 }, { "epoch": 2.59, "learning_rate": 2.7071156879116808e-06, - "loss": 0.2478, + "loss": 0.0653, "step": 9320 }, { "epoch": 2.59, "learning_rate": 2.705260228221542e-06, - "loss": 0.1398, + "loss": 0.1608, "step": 9321 }, { "epoch": 2.59, "learning_rate": 2.703404768531404e-06, - "loss": 0.2971, + "loss": 0.0191, "step": 9322 }, { "epoch": 2.59, "learning_rate": 2.701549308841266e-06, - "loss": 0.0845, + "loss": 0.0543, "step": 9323 }, { "epoch": 2.6, "learning_rate": 2.699693849151128e-06, - "loss": 0.1409, + "loss": 0.0576, "step": 9324 }, { "epoch": 2.6, "learning_rate": 2.6978383894609893e-06, - "loss": 0.1406, + "loss": 0.0577, "step": 9325 }, { "epoch": 2.6, "learning_rate": 2.695982929770851e-06, - "loss": 0.0861, + "loss": 0.0943, "step": 9326 }, { "epoch": 2.6, "learning_rate": 2.694127470080713e-06, - "loss": 0.0844, + "loss": 0.02, "step": 9327 }, { "epoch": 2.6, "learning_rate": 2.692272010390575e-06, - "loss": 0.1383, + "loss": 0.0143, "step": 9328 }, { "epoch": 2.6, "learning_rate": 2.6904165507004363e-06, - "loss": 0.14, + "loss": 0.0198, "step": 9329 }, { "epoch": 2.6, "learning_rate": 2.688561091010298e-06, - "loss": 0.1396, + "loss": 0.0112, "step": 9330 }, { "epoch": 2.6, "learning_rate": 2.68670563132016e-06, - "loss": 0.1377, + "loss": 0.2465, "step": 9331 }, { "epoch": 2.6, "learning_rate": 2.684850171630022e-06, - "loss": 0.1412, + "loss": 0.271, "step": 9332 }, { "epoch": 2.6, "learning_rate": 2.6829947119398834e-06, - "loss": 0.1369, + "loss": 0.0424, "step": 9333 }, { "epoch": 2.6, "learning_rate": 2.6811392522497453e-06, - "loss": 0.197, + "loss": 0.0746, "step": 9334 }, { "epoch": 2.6, "learning_rate": 2.679283792559607e-06, - "loss": 0.032, + "loss": 0.085, "step": 9335 }, { "epoch": 2.6, "learning_rate": 2.677428332869469e-06, - "loss": 0.2461, + "loss": 0.0449, "step": 9336 }, { "epoch": 2.6, "learning_rate": 2.67557287317933e-06, - "loss": 0.1379, + "loss": 0.0483, "step": 9337 }, { "epoch": 2.6, "learning_rate": 2.673717413489192e-06, - "loss": 0.1352, + "loss": 0.1389, "step": 9338 }, { "epoch": 2.6, "learning_rate": 2.6718619537990538e-06, - "loss": 0.0851, + "loss": 0.0441, "step": 9339 }, { "epoch": 2.6, "learning_rate": 2.6700064941089156e-06, - "loss": 0.2964, + "loss": 0.0872, "step": 9340 }, { "epoch": 2.6, "learning_rate": 2.668151034418777e-06, - "loss": 0.1387, + "loss": 0.0433, "step": 9341 }, { "epoch": 2.6, "learning_rate": 2.666295574728639e-06, - "loss": 0.1378, + "loss": 0.0222, "step": 9342 }, { "epoch": 2.6, "learning_rate": 2.664440115038501e-06, - "loss": 0.1375, + "loss": 0.1809, "step": 9343 }, { "epoch": 2.6, "learning_rate": 2.6625846553483627e-06, - "loss": 0.1437, + "loss": 0.0983, "step": 9344 }, { "epoch": 2.6, "learning_rate": 2.660729195658224e-06, - "loss": 0.0882, + "loss": 0.0252, "step": 9345 }, { "epoch": 2.6, "learning_rate": 2.658873735968086e-06, - "loss": 0.2982, + "loss": 0.0602, "step": 9346 }, { "epoch": 2.6, "learning_rate": 2.657018276277948e-06, - "loss": 0.087, + "loss": 0.0718, "step": 9347 }, { "epoch": 2.6, "learning_rate": 2.6551628165878097e-06, - "loss": 0.1368, + "loss": 0.1584, "step": 9348 }, { "epoch": 2.6, "learning_rate": 2.653307356897671e-06, - "loss": 0.1433, + "loss": 0.1003, "step": 9349 }, { "epoch": 2.6, "learning_rate": 2.651451897207533e-06, - "loss": 0.0887, + "loss": 0.0786, "step": 9350 }, { "epoch": 2.6, "learning_rate": 2.649596437517395e-06, - "loss": 0.3014, + "loss": 0.1346, "step": 9351 }, { "epoch": 2.6, "learning_rate": 2.647740977827257e-06, - "loss": 0.0858, + "loss": 0.2276, "step": 9352 }, { "epoch": 2.6, "learning_rate": 2.6458855181371187e-06, - "loss": 0.1973, + "loss": 0.0627, "step": 9353 }, { "epoch": 2.6, "learning_rate": 2.64403005844698e-06, - "loss": 0.2408, + "loss": 0.0219, "step": 9354 }, { "epoch": 2.6, "learning_rate": 2.642174598756842e-06, - "loss": 0.1407, + "loss": 0.102, "step": 9355 }, { "epoch": 2.6, "learning_rate": 2.640319139066704e-06, - "loss": 0.1398, + "loss": 0.027, "step": 9356 }, { "epoch": 2.6, "learning_rate": 2.6384636793765657e-06, - "loss": 0.0335, + "loss": 0.1838, "step": 9357 }, { "epoch": 2.6, "learning_rate": 2.636608219686427e-06, - "loss": 0.238, + "loss": 0.029, "step": 9358 }, { "epoch": 2.6, "learning_rate": 2.634752759996289e-06, - "loss": 0.1919, + "loss": 0.02, "step": 9359 }, { "epoch": 2.61, "learning_rate": 2.632897300306151e-06, - "loss": 0.0326, + "loss": 0.0804, "step": 9360 }, { "epoch": 2.61, "learning_rate": 2.631041840616013e-06, - "loss": 0.2413, + "loss": 0.0176, "step": 9361 }, { "epoch": 2.61, "learning_rate": 2.6291863809258742e-06, - "loss": 0.1418, + "loss": 0.0671, "step": 9362 }, { "epoch": 2.61, "learning_rate": 2.627330921235736e-06, - "loss": 0.1911, + "loss": 0.0571, "step": 9363 }, { "epoch": 2.61, "learning_rate": 2.625475461545598e-06, - "loss": 0.1377, + "loss": 0.0092, "step": 9364 }, { "epoch": 2.61, "learning_rate": 2.62362000185546e-06, - "loss": 0.1436, + "loss": 0.1102, "step": 9365 }, { "epoch": 2.61, "learning_rate": 2.6217645421653213e-06, - "loss": 0.3509, + "loss": 0.0686, "step": 9366 }, { "epoch": 2.61, "learning_rate": 2.619909082475183e-06, - "loss": 0.1417, + "loss": 0.0336, "step": 9367 }, { "epoch": 2.61, "learning_rate": 2.618053622785045e-06, - "loss": 0.1395, + "loss": 0.0859, "step": 9368 }, { "epoch": 2.61, "learning_rate": 2.616198163094907e-06, - "loss": 0.1909, + "loss": 0.1805, "step": 9369 }, { "epoch": 2.61, "learning_rate": 2.6143427034047688e-06, - "loss": 0.198, + "loss": 0.0411, "step": 9370 }, { "epoch": 2.61, "learning_rate": 2.6124872437146302e-06, - "loss": 0.1396, + "loss": 0.0965, "step": 9371 }, { "epoch": 2.61, "learning_rate": 2.610631784024492e-06, - "loss": 0.1373, + "loss": 0.0866, "step": 9372 }, { "epoch": 2.61, "learning_rate": 2.608776324334354e-06, - "loss": 0.1918, + "loss": 0.0601, "step": 9373 }, { "epoch": 2.61, "learning_rate": 2.606920864644216e-06, - "loss": 0.0887, + "loss": 0.103, "step": 9374 }, { "epoch": 2.61, "learning_rate": 2.6050654049540773e-06, - "loss": 0.0875, + "loss": 0.1098, "step": 9375 }, { "epoch": 2.61, "learning_rate": 2.603209945263939e-06, - "loss": 0.1926, + "loss": 0.0603, "step": 9376 }, { "epoch": 2.61, "learning_rate": 2.601354485573801e-06, - "loss": 0.0347, + "loss": 0.1624, "step": 9377 }, { "epoch": 2.61, "learning_rate": 2.599499025883663e-06, - "loss": 0.1921, + "loss": 0.0632, "step": 9378 }, { "epoch": 2.61, "learning_rate": 2.5976435661935243e-06, - "loss": 0.2999, + "loss": 0.022, "step": 9379 }, { "epoch": 2.61, "learning_rate": 2.5957881065033862e-06, - "loss": 0.0849, + "loss": 0.0848, "step": 9380 }, { "epoch": 2.61, "learning_rate": 2.593932646813248e-06, - "loss": 0.1388, + "loss": 0.0186, "step": 9381 }, { "epoch": 2.61, "learning_rate": 2.59207718712311e-06, - "loss": 0.1921, + "loss": 0.059, "step": 9382 }, { "epoch": 2.61, "learning_rate": 2.590221727432972e-06, - "loss": 0.1421, + "loss": 0.113, "step": 9383 }, { "epoch": 2.61, "learning_rate": 2.5883662677428333e-06, - "loss": 0.0344, + "loss": 0.0883, "step": 9384 }, { "epoch": 2.61, "learning_rate": 2.586510808052695e-06, - "loss": 0.1403, + "loss": 0.204, "step": 9385 }, { "epoch": 2.61, "learning_rate": 2.584655348362557e-06, - "loss": 0.1952, + "loss": 0.096, "step": 9386 }, { "epoch": 2.61, "learning_rate": 2.582799888672419e-06, - "loss": 0.2403, + "loss": 0.0906, "step": 9387 }, { "epoch": 2.61, "learning_rate": 2.5809444289822803e-06, - "loss": 0.0884, + "loss": 0.0219, "step": 9388 }, { "epoch": 2.61, "learning_rate": 2.579088969292142e-06, - "loss": 0.1398, + "loss": 0.0987, "step": 9389 }, { "epoch": 2.61, "learning_rate": 2.577233509602004e-06, - "loss": 0.1373, + "loss": 0.1263, "step": 9390 }, { "epoch": 2.61, "learning_rate": 2.575378049911866e-06, - "loss": 0.141, + "loss": 0.1234, "step": 9391 }, { "epoch": 2.61, "learning_rate": 2.5735225902217274e-06, - "loss": 0.0846, + "loss": 0.0552, "step": 9392 }, { "epoch": 2.61, "learning_rate": 2.5716671305315893e-06, - "loss": 0.0873, + "loss": 0.0159, "step": 9393 }, { "epoch": 2.61, "learning_rate": 2.569811670841451e-06, - "loss": 0.0343, + "loss": 0.0219, "step": 9394 }, { "epoch": 2.61, "learning_rate": 2.567956211151313e-06, - "loss": 0.0847, + "loss": 0.1053, "step": 9395 }, { "epoch": 2.62, "learning_rate": 2.5661007514611745e-06, - "loss": 0.0341, + "loss": 0.0213, "step": 9396 }, { "epoch": 2.62, "learning_rate": 2.5642452917710363e-06, - "loss": 0.1372, + "loss": 0.1149, "step": 9397 }, { "epoch": 2.62, "learning_rate": 2.562389832080898e-06, - "loss": 0.2961, + "loss": 0.0357, "step": 9398 }, { "epoch": 2.62, "learning_rate": 2.56053437239076e-06, - "loss": 0.0347, + "loss": 0.0907, "step": 9399 }, { "epoch": 2.62, "learning_rate": 2.558678912700622e-06, - "loss": 0.0877, + "loss": 0.0096, "step": 9400 }, { "epoch": 2.62, "learning_rate": 2.5568234530104834e-06, - "loss": 0.1413, + "loss": 0.1576, "step": 9401 }, { "epoch": 2.62, "learning_rate": 2.5549679933203453e-06, - "loss": 0.1413, + "loss": 0.0225, "step": 9402 }, { "epoch": 2.62, "learning_rate": 2.553112533630207e-06, - "loss": 0.0338, + "loss": 0.0689, "step": 9403 }, { "epoch": 2.62, "learning_rate": 2.551257073940069e-06, - "loss": 0.0874, + "loss": 0.0787, "step": 9404 }, { "epoch": 2.62, "learning_rate": 2.5494016142499304e-06, - "loss": 0.199, + "loss": 0.0471, "step": 9405 }, { "epoch": 2.62, "learning_rate": 2.5475461545597923e-06, - "loss": 0.1403, + "loss": 0.0137, "step": 9406 }, { "epoch": 2.62, "learning_rate": 2.545690694869654e-06, - "loss": 0.137, + "loss": 0.084, "step": 9407 }, { "epoch": 2.62, "learning_rate": 2.543835235179516e-06, - "loss": 0.1881, + "loss": 0.2517, "step": 9408 }, { "epoch": 2.62, "learning_rate": 2.5419797754893775e-06, - "loss": 0.2465, + "loss": 0.076, "step": 9409 }, { "epoch": 2.62, "learning_rate": 2.5401243157992394e-06, - "loss": 0.0885, + "loss": 0.0118, "step": 9410 }, { "epoch": 2.62, "learning_rate": 2.5382688561091012e-06, - "loss": 0.0338, + "loss": 0.0336, "step": 9411 }, { "epoch": 2.62, "learning_rate": 2.536413396418963e-06, - "loss": 0.088, + "loss": 0.1218, "step": 9412 }, { "epoch": 2.62, "learning_rate": 2.534557936728825e-06, - "loss": 0.1887, + "loss": 0.0688, "step": 9413 }, { "epoch": 2.62, "learning_rate": 2.5327024770386864e-06, - "loss": 0.0873, + "loss": 0.0362, "step": 9414 }, { "epoch": 2.62, "learning_rate": 2.5308470173485483e-06, - "loss": 0.1391, + "loss": 0.053, "step": 9415 }, { "epoch": 2.62, "learning_rate": 2.52899155765841e-06, - "loss": 0.0852, + "loss": 0.0679, "step": 9416 }, { "epoch": 2.62, "learning_rate": 2.527136097968272e-06, - "loss": 0.1382, + "loss": 0.0126, "step": 9417 }, { "epoch": 2.62, "learning_rate": 2.5252806382781335e-06, - "loss": 0.1922, + "loss": 0.0242, "step": 9418 }, { "epoch": 2.62, "learning_rate": 2.5234251785879954e-06, - "loss": 0.0327, + "loss": 0.0111, "step": 9419 }, { "epoch": 2.62, "learning_rate": 2.5215697188978572e-06, - "loss": 0.1414, + "loss": 0.1102, "step": 9420 }, { "epoch": 2.62, "learning_rate": 2.519714259207719e-06, - "loss": 0.0318, + "loss": 0.0514, "step": 9421 }, { "epoch": 2.62, "learning_rate": 2.5178587995175805e-06, - "loss": 0.0891, + "loss": 0.0208, "step": 9422 }, { "epoch": 2.62, "learning_rate": 2.5160033398274424e-06, - "loss": 0.1345, + "loss": 0.0162, "step": 9423 }, { "epoch": 2.62, "learning_rate": 2.5141478801373043e-06, - "loss": 0.0314, + "loss": 0.0189, "step": 9424 }, { "epoch": 2.62, "learning_rate": 2.512292420447166e-06, - "loss": 0.1369, + "loss": 0.094, "step": 9425 }, { "epoch": 2.62, "learning_rate": 2.5104369607570276e-06, - "loss": 0.2391, + "loss": 0.2002, "step": 9426 }, { "epoch": 2.62, "learning_rate": 2.5085815010668895e-06, - "loss": 0.1427, + "loss": 0.0145, "step": 9427 }, { "epoch": 2.62, "learning_rate": 2.5067260413767513e-06, - "loss": 0.0849, + "loss": 0.0086, "step": 9428 }, { "epoch": 2.62, "learning_rate": 2.5048705816866132e-06, - "loss": 0.0311, + "loss": 0.0451, "step": 9429 }, { "epoch": 2.62, "learning_rate": 2.503015121996475e-06, - "loss": 0.3087, + "loss": 0.0586, "step": 9430 }, { "epoch": 2.62, "learning_rate": 2.5011596623063365e-06, - "loss": 0.0858, + "loss": 0.0593, "step": 9431 }, { "epoch": 2.63, "learning_rate": 2.4993042026161984e-06, - "loss": 0.0873, + "loss": 0.0259, "step": 9432 }, { "epoch": 2.63, "learning_rate": 2.4974487429260603e-06, - "loss": 0.0851, + "loss": 0.0586, "step": 9433 }, { "epoch": 2.63, "learning_rate": 2.495593283235922e-06, - "loss": 0.0863, + "loss": 0.0169, "step": 9434 }, { "epoch": 2.63, "learning_rate": 2.4937378235457836e-06, - "loss": 0.1969, + "loss": 0.0644, "step": 9435 }, { "epoch": 2.63, "learning_rate": 2.4918823638556455e-06, - "loss": 0.3034, + "loss": 0.0318, "step": 9436 }, { "epoch": 2.63, "learning_rate": 2.4900269041655073e-06, - "loss": 0.1933, + "loss": 0.0373, "step": 9437 }, { "epoch": 2.63, "learning_rate": 2.488171444475369e-06, - "loss": 0.0836, + "loss": 0.0292, "step": 9438 }, { "epoch": 2.63, "learning_rate": 2.4863159847852307e-06, - "loss": 0.1383, + "loss": 0.0688, "step": 9439 }, { "epoch": 2.63, "learning_rate": 2.4844605250950925e-06, - "loss": 0.0831, + "loss": 0.0207, "step": 9440 }, { "epoch": 2.63, "learning_rate": 2.4826050654049544e-06, - "loss": 0.1384, + "loss": 0.1256, "step": 9441 }, { "epoch": 2.63, "learning_rate": 2.4807496057148163e-06, - "loss": 0.0314, + "loss": 0.1065, "step": 9442 }, { "epoch": 2.63, "learning_rate": 2.478894146024678e-06, - "loss": 0.0304, + "loss": 0.0159, "step": 9443 }, { "epoch": 2.63, "learning_rate": 2.4770386863345396e-06, - "loss": 0.0851, + "loss": 0.058, "step": 9444 }, { "epoch": 2.63, "learning_rate": 2.4751832266444015e-06, - "loss": 0.139, + "loss": 0.0114, "step": 9445 }, { "epoch": 2.63, "learning_rate": 2.4733277669542633e-06, - "loss": 0.0822, + "loss": 0.0736, "step": 9446 }, { "epoch": 2.63, "learning_rate": 2.471472307264125e-06, - "loss": 0.137, + "loss": 0.0529, "step": 9447 }, { "epoch": 2.63, "learning_rate": 2.4696168475739866e-06, - "loss": 0.1953, + "loss": 0.0112, "step": 9448 }, { "epoch": 2.63, "learning_rate": 2.4677613878838485e-06, - "loss": 0.1444, + "loss": 0.0146, "step": 9449 }, { "epoch": 2.63, "learning_rate": 2.4659059281937104e-06, - "loss": 0.1366, + "loss": 0.1063, "step": 9450 }, { "epoch": 2.63, "learning_rate": 2.464050468503572e-06, - "loss": 0.0838, + "loss": 0.0878, "step": 9451 }, { "epoch": 2.63, "learning_rate": 2.4621950088134337e-06, - "loss": 0.0839, + "loss": 0.0629, "step": 9452 }, { "epoch": 2.63, "learning_rate": 2.4603395491232956e-06, - "loss": 0.0843, + "loss": 0.0284, "step": 9453 }, { "epoch": 2.63, "learning_rate": 2.458484089433157e-06, - "loss": 0.1412, + "loss": 0.1068, "step": 9454 }, { "epoch": 2.63, "learning_rate": 2.456628629743019e-06, - "loss": 0.0308, + "loss": 0.0684, "step": 9455 }, { "epoch": 2.63, "learning_rate": 2.4547731700528808e-06, - "loss": 0.0825, + "loss": 0.0117, "step": 9456 }, { "epoch": 2.63, "learning_rate": 2.4529177103627426e-06, - "loss": 0.1918, + "loss": 0.0715, "step": 9457 }, { "epoch": 2.63, "learning_rate": 2.451062250672604e-06, - "loss": 0.03, + "loss": 0.0603, "step": 9458 }, { "epoch": 2.63, "learning_rate": 2.449206790982466e-06, - "loss": 0.0844, + "loss": 0.0948, "step": 9459 }, { "epoch": 2.63, "learning_rate": 2.447351331292328e-06, - "loss": 0.3039, + "loss": 0.0953, "step": 9460 }, { "epoch": 2.63, "learning_rate": 2.4454958716021897e-06, - "loss": 0.1971, + "loss": 0.0156, "step": 9461 }, { "epoch": 2.63, "learning_rate": 2.443640411912051e-06, - "loss": 0.1392, + "loss": 0.0915, "step": 9462 }, { "epoch": 2.63, "learning_rate": 2.441784952221913e-06, - "loss": 0.0874, + "loss": 0.0518, "step": 9463 }, { "epoch": 2.63, "learning_rate": 2.439929492531775e-06, - "loss": 0.0301, + "loss": 0.0537, "step": 9464 }, { "epoch": 2.63, "learning_rate": 2.4380740328416367e-06, - "loss": 0.0862, + "loss": 0.0095, "step": 9465 }, { "epoch": 2.63, "learning_rate": 2.436218573151498e-06, - "loss": 0.0867, + "loss": 0.0195, "step": 9466 }, { "epoch": 2.63, "learning_rate": 2.43436311346136e-06, - "loss": 0.0867, + "loss": 0.1344, "step": 9467 }, { "epoch": 2.64, "learning_rate": 2.432507653771222e-06, - "loss": 0.1919, + "loss": 0.0335, "step": 9468 }, { "epoch": 2.64, "learning_rate": 2.430652194081084e-06, - "loss": 0.19, + "loss": 0.0541, "step": 9469 }, { "epoch": 2.64, "learning_rate": 2.4287967343909457e-06, - "loss": 0.1401, + "loss": 0.0928, "step": 9470 }, { "epoch": 2.64, "learning_rate": 2.426941274700807e-06, - "loss": 0.0844, + "loss": 0.0585, "step": 9471 }, { "epoch": 2.64, "learning_rate": 2.425085815010669e-06, - "loss": 0.1368, + "loss": 0.2303, "step": 9472 }, { "epoch": 2.64, "learning_rate": 2.423230355320531e-06, - "loss": 0.2527, + "loss": 0.0794, "step": 9473 }, { "epoch": 2.64, "learning_rate": 2.4213748956303927e-06, - "loss": 0.2487, + "loss": 0.0804, "step": 9474 }, { "epoch": 2.64, "learning_rate": 2.419519435940254e-06, - "loss": 0.1378, + "loss": 0.0647, "step": 9475 }, { "epoch": 2.64, "learning_rate": 2.417663976250116e-06, - "loss": 0.1963, + "loss": 0.0403, "step": 9476 }, { "epoch": 2.64, "learning_rate": 2.415808516559978e-06, - "loss": 0.0295, + "loss": 0.1083, "step": 9477 }, { "epoch": 2.64, "learning_rate": 2.41395305686984e-06, - "loss": 0.0302, + "loss": 0.0199, "step": 9478 }, { "epoch": 2.64, "learning_rate": 2.4120975971797012e-06, - "loss": 0.0297, + "loss": 0.0926, "step": 9479 }, { "epoch": 2.64, "learning_rate": 2.410242137489563e-06, - "loss": 0.2517, + "loss": 0.0479, "step": 9480 }, { "epoch": 2.64, "learning_rate": 2.408386677799425e-06, - "loss": 0.0831, + "loss": 0.041, "step": 9481 }, { "epoch": 2.64, "learning_rate": 2.406531218109287e-06, - "loss": 0.0833, + "loss": 0.0152, "step": 9482 }, { "epoch": 2.64, "learning_rate": 2.4046757584191487e-06, - "loss": 0.0875, + "loss": 0.1814, "step": 9483 }, { "epoch": 2.64, "learning_rate": 2.40282029872901e-06, - "loss": 0.1389, + "loss": 0.0262, "step": 9484 }, { "epoch": 2.64, "learning_rate": 2.400964839038872e-06, - "loss": 0.1907, + "loss": 0.1523, "step": 9485 }, { "epoch": 2.64, "learning_rate": 2.399109379348734e-06, - "loss": 0.0293, + "loss": 0.0225, "step": 9486 }, { "epoch": 2.64, "learning_rate": 2.3972539196585958e-06, - "loss": 0.1399, + "loss": 0.0191, "step": 9487 }, { "epoch": 2.64, "learning_rate": 2.3953984599684572e-06, - "loss": 0.2495, + "loss": 0.1026, "step": 9488 }, { "epoch": 2.64, "learning_rate": 2.393543000278319e-06, - "loss": 0.1939, + "loss": 0.0584, "step": 9489 }, { "epoch": 2.64, "learning_rate": 2.391687540588181e-06, - "loss": 0.1389, + "loss": 0.1038, "step": 9490 }, { "epoch": 2.64, "learning_rate": 2.389832080898043e-06, - "loss": 0.0292, + "loss": 0.1426, "step": 9491 }, { "epoch": 2.64, "learning_rate": 2.3879766212079043e-06, - "loss": 0.1407, + "loss": 0.0158, "step": 9492 }, { "epoch": 2.64, "learning_rate": 2.386121161517766e-06, - "loss": 0.0284, + "loss": 0.0695, "step": 9493 }, { "epoch": 2.64, "learning_rate": 2.384265701827628e-06, - "loss": 0.2509, + "loss": 0.1201, "step": 9494 }, { "epoch": 2.64, "learning_rate": 2.38241024213749e-06, - "loss": 0.1396, + "loss": 0.0634, "step": 9495 }, { "epoch": 2.64, "learning_rate": 2.3805547824473513e-06, - "loss": 0.141, + "loss": 0.0138, "step": 9496 }, { "epoch": 2.64, "learning_rate": 2.3786993227572132e-06, - "loss": 0.1433, + "loss": 0.0511, "step": 9497 }, { "epoch": 2.64, "learning_rate": 2.376843863067075e-06, - "loss": 0.193, + "loss": 0.1255, "step": 9498 }, { "epoch": 2.64, "learning_rate": 2.374988403376937e-06, - "loss": 0.0809, + "loss": 0.0987, "step": 9499 }, { "epoch": 2.64, "learning_rate": 2.373132943686799e-06, - "loss": 0.3543, + "loss": 0.1437, "step": 9500 }, { "epoch": 2.64, "learning_rate": 2.3712774839966603e-06, - "loss": 0.087, + "loss": 0.1188, "step": 9501 }, { "epoch": 2.64, "learning_rate": 2.369422024306522e-06, - "loss": 0.0284, + "loss": 0.1088, "step": 9502 }, { "epoch": 2.64, "learning_rate": 2.367566564616384e-06, - "loss": 0.2437, + "loss": 0.1047, "step": 9503 }, { "epoch": 2.65, "learning_rate": 2.365711104926246e-06, - "loss": 0.0291, + "loss": 0.0649, "step": 9504 }, { "epoch": 2.65, "learning_rate": 2.3638556452361073e-06, - "loss": 0.0292, + "loss": 0.0794, "step": 9505 }, { "epoch": 2.65, "learning_rate": 2.362000185545969e-06, - "loss": 0.0291, + "loss": 0.1141, "step": 9506 }, { "epoch": 2.65, "learning_rate": 2.360144725855831e-06, - "loss": 0.0289, + "loss": 0.0862, "step": 9507 }, { "epoch": 2.65, "learning_rate": 2.358289266165693e-06, - "loss": 0.252, + "loss": 0.0863, "step": 9508 }, { "epoch": 2.65, "learning_rate": 2.3564338064755544e-06, - "loss": 0.142, + "loss": 0.0231, "step": 9509 }, { "epoch": 2.65, "learning_rate": 2.3545783467854163e-06, - "loss": 0.0828, + "loss": 0.1892, "step": 9510 }, { "epoch": 2.65, "learning_rate": 2.352722887095278e-06, - "loss": 0.1389, + "loss": 0.0238, "step": 9511 }, { "epoch": 2.65, "learning_rate": 2.35086742740514e-06, - "loss": 0.1953, + "loss": 0.1213, "step": 9512 }, { "epoch": 2.65, "learning_rate": 2.349011967715002e-06, - "loss": 0.1944, + "loss": 0.0166, "step": 9513 }, { "epoch": 2.65, "learning_rate": 2.3471565080248633e-06, - "loss": 0.1957, + "loss": 0.0915, "step": 9514 }, { "epoch": 2.65, "learning_rate": 2.345301048334725e-06, - "loss": 0.1934, + "loss": 0.0383, "step": 9515 }, { "epoch": 2.65, "learning_rate": 2.343445588644587e-06, - "loss": 0.194, + "loss": 0.0607, "step": 9516 }, { "epoch": 2.65, "learning_rate": 2.341590128954449e-06, - "loss": 0.1421, + "loss": 0.1361, "step": 9517 }, { "epoch": 2.65, "learning_rate": 2.3397346692643104e-06, - "loss": 0.0854, + "loss": 0.0337, "step": 9518 }, { "epoch": 2.65, "learning_rate": 2.3378792095741723e-06, - "loss": 0.1364, + "loss": 0.0563, "step": 9519 }, { "epoch": 2.65, "learning_rate": 2.336023749884034e-06, - "loss": 0.0861, + "loss": 0.0653, "step": 9520 }, { "epoch": 2.65, "learning_rate": 2.334168290193896e-06, - "loss": 0.2535, + "loss": 0.026, "step": 9521 }, { "epoch": 2.65, "learning_rate": 2.3323128305037574e-06, - "loss": 0.0816, + "loss": 0.0488, "step": 9522 }, { "epoch": 2.65, "learning_rate": 2.3304573708136193e-06, - "loss": 0.0856, + "loss": 0.0676, "step": 9523 }, { "epoch": 2.65, "learning_rate": 2.328601911123481e-06, - "loss": 0.3068, + "loss": 0.0919, "step": 9524 }, { "epoch": 2.65, "learning_rate": 2.326746451433343e-06, - "loss": 0.1929, + "loss": 0.1011, "step": 9525 }, { "epoch": 2.65, "learning_rate": 2.3248909917432045e-06, - "loss": 0.1929, + "loss": 0.015, "step": 9526 }, { "epoch": 2.65, "learning_rate": 2.3230355320530664e-06, - "loss": 0.2478, + "loss": 0.0267, "step": 9527 }, { "epoch": 2.65, "learning_rate": 2.321180072362928e-06, - "loss": 0.3046, + "loss": 0.0538, "step": 9528 }, { "epoch": 2.65, "learning_rate": 2.3193246126727897e-06, - "loss": 0.0857, + "loss": 0.093, "step": 9529 }, { "epoch": 2.65, "learning_rate": 2.3174691529826516e-06, - "loss": 0.1417, + "loss": 0.0267, "step": 9530 }, { "epoch": 2.65, "learning_rate": 2.3156136932925134e-06, - "loss": 0.137, + "loss": 0.0191, "step": 9531 }, { "epoch": 2.65, "learning_rate": 2.313758233602375e-06, - "loss": 0.0314, + "loss": 0.1319, "step": 9532 }, { "epoch": 2.65, "learning_rate": 2.3119027739122367e-06, - "loss": 0.1935, + "loss": 0.0911, "step": 9533 }, { "epoch": 2.65, "learning_rate": 2.3100473142220986e-06, - "loss": 0.0307, + "loss": 0.1581, "step": 9534 }, { "epoch": 2.65, "learning_rate": 2.3081918545319605e-06, - "loss": 0.1412, + "loss": 0.0519, "step": 9535 }, { "epoch": 2.65, "learning_rate": 2.3063363948418224e-06, - "loss": 0.1421, + "loss": 0.0453, "step": 9536 }, { "epoch": 2.65, "learning_rate": 2.304480935151684e-06, - "loss": 0.0869, + "loss": 0.0713, "step": 9537 }, { "epoch": 2.65, "learning_rate": 2.3026254754615457e-06, - "loss": 0.1418, + "loss": 0.0551, "step": 9538 }, { "epoch": 2.65, "learning_rate": 2.3007700157714075e-06, - "loss": 0.0297, + "loss": 0.1203, "step": 9539 }, { "epoch": 2.66, "learning_rate": 2.2989145560812694e-06, - "loss": 0.1401, + "loss": 0.0701, "step": 9540 }, { "epoch": 2.66, "learning_rate": 2.297059096391131e-06, - "loss": 0.0315, + "loss": 0.0225, "step": 9541 }, { "epoch": 2.66, "learning_rate": 2.2952036367009927e-06, - "loss": 0.1892, + "loss": 0.0243, "step": 9542 }, { "epoch": 2.66, "learning_rate": 2.2933481770108546e-06, - "loss": 0.2492, + "loss": 0.0683, "step": 9543 }, { "epoch": 2.66, "learning_rate": 2.2914927173207165e-06, - "loss": 0.0838, + "loss": 0.1414, "step": 9544 }, { "epoch": 2.66, "learning_rate": 2.289637257630578e-06, - "loss": 0.1405, + "loss": 0.0756, "step": 9545 }, { "epoch": 2.66, "learning_rate": 2.28778179794044e-06, - "loss": 0.0833, + "loss": 0.266, "step": 9546 }, { "epoch": 2.66, "learning_rate": 2.2859263382503017e-06, - "loss": 0.086, + "loss": 0.0446, "step": 9547 }, { "epoch": 2.66, "learning_rate": 2.2840708785601635e-06, - "loss": 0.0304, + "loss": 0.0544, "step": 9548 }, { "epoch": 2.66, "learning_rate": 2.282215418870025e-06, - "loss": 0.1383, + "loss": 0.0402, "step": 9549 }, { "epoch": 2.66, "learning_rate": 2.280359959179887e-06, - "loss": 0.192, + "loss": 0.1839, "step": 9550 }, { "epoch": 2.66, "learning_rate": 2.2785044994897487e-06, - "loss": 0.2982, + "loss": 0.0439, "step": 9551 }, { "epoch": 2.66, "learning_rate": 2.2766490397996106e-06, - "loss": 0.0876, + "loss": 0.126, "step": 9552 }, { "epoch": 2.66, "learning_rate": 2.2747935801094725e-06, - "loss": 0.1859, + "loss": 0.0141, "step": 9553 }, { "epoch": 2.66, "learning_rate": 2.272938120419334e-06, - "loss": 0.252, + "loss": 0.0163, "step": 9554 }, { "epoch": 2.66, "learning_rate": 2.2710826607291958e-06, - "loss": 0.1977, + "loss": 0.0454, "step": 9555 }, { "epoch": 2.66, "learning_rate": 2.2692272010390577e-06, - "loss": 0.0839, + "loss": 0.0565, "step": 9556 }, { "epoch": 2.66, "learning_rate": 2.2673717413489195e-06, - "loss": 0.1906, + "loss": 0.102, "step": 9557 }, { "epoch": 2.66, "learning_rate": 2.265516281658781e-06, - "loss": 0.0309, + "loss": 0.1176, "step": 9558 }, { "epoch": 2.66, "learning_rate": 2.263660821968643e-06, - "loss": 0.0303, + "loss": 0.0276, "step": 9559 }, { "epoch": 2.66, "learning_rate": 2.2618053622785047e-06, - "loss": 0.0864, + "loss": 0.1508, "step": 9560 }, { "epoch": 2.66, "learning_rate": 2.2599499025883666e-06, - "loss": 0.2988, + "loss": 0.0521, "step": 9561 }, { "epoch": 2.66, "learning_rate": 2.258094442898228e-06, - "loss": 0.1408, + "loss": 0.1282, "step": 9562 }, { "epoch": 2.66, "learning_rate": 2.25623898320809e-06, - "loss": 0.1392, + "loss": 0.0597, "step": 9563 }, { "epoch": 2.66, "learning_rate": 2.2543835235179518e-06, - "loss": 0.1421, + "loss": 0.0227, "step": 9564 }, { "epoch": 2.66, "learning_rate": 2.2525280638278136e-06, - "loss": 0.1392, + "loss": 0.0399, "step": 9565 }, { "epoch": 2.66, "learning_rate": 2.2506726041376755e-06, - "loss": 0.1368, + "loss": 0.0667, "step": 9566 }, { "epoch": 2.66, "learning_rate": 2.248817144447537e-06, - "loss": 0.0847, + "loss": 0.0159, "step": 9567 }, { "epoch": 2.66, "learning_rate": 2.246961684757399e-06, - "loss": 0.1398, + "loss": 0.0938, "step": 9568 }, { "epoch": 2.66, "learning_rate": 2.2451062250672607e-06, - "loss": 0.0844, + "loss": 0.0543, "step": 9569 }, { "epoch": 2.66, "learning_rate": 2.2432507653771226e-06, - "loss": 0.0311, + "loss": 0.0122, "step": 9570 }, { "epoch": 2.66, "learning_rate": 2.241395305686984e-06, - "loss": 0.0314, + "loss": 0.02, "step": 9571 }, { "epoch": 2.66, "learning_rate": 2.239539845996846e-06, - "loss": 0.1402, + "loss": 0.0203, "step": 9572 }, { "epoch": 2.66, "learning_rate": 2.2376843863067078e-06, - "loss": 0.0868, + "loss": 0.1, "step": 9573 }, { "epoch": 2.66, "learning_rate": 2.2358289266165696e-06, - "loss": 0.194, + "loss": 0.0085, "step": 9574 }, { "epoch": 2.66, "learning_rate": 2.233973466926431e-06, - "loss": 0.1397, + "loss": 0.0427, "step": 9575 }, { "epoch": 2.67, "learning_rate": 2.232118007236293e-06, - "loss": 0.2458, + "loss": 0.0163, "step": 9576 }, { "epoch": 2.67, "learning_rate": 2.230262547546155e-06, - "loss": 0.1923, + "loss": 0.0526, "step": 9577 }, { "epoch": 2.67, "learning_rate": 2.2284070878560167e-06, - "loss": 0.1932, + "loss": 0.1316, "step": 9578 }, { "epoch": 2.67, "learning_rate": 2.226551628165878e-06, - "loss": 0.1396, + "loss": 0.0497, "step": 9579 }, { "epoch": 2.67, "learning_rate": 2.22469616847574e-06, - "loss": 0.0857, + "loss": 0.0492, "step": 9580 }, { "epoch": 2.67, "learning_rate": 2.222840708785602e-06, - "loss": 0.1362, + "loss": 0.0434, "step": 9581 }, { "epoch": 2.67, "learning_rate": 2.2209852490954637e-06, - "loss": 0.138, + "loss": 0.0448, "step": 9582 }, { "epoch": 2.67, "learning_rate": 2.2191297894053256e-06, - "loss": 0.1927, + "loss": 0.0829, "step": 9583 }, { "epoch": 2.67, "learning_rate": 2.217274329715187e-06, - "loss": 0.0822, + "loss": 0.1416, "step": 9584 }, { "epoch": 2.67, "learning_rate": 2.215418870025049e-06, - "loss": 0.1881, + "loss": 0.0198, "step": 9585 }, { "epoch": 2.67, "learning_rate": 2.213563410334911e-06, - "loss": 0.1437, + "loss": 0.0233, "step": 9586 }, { "epoch": 2.67, "learning_rate": 2.2117079506447727e-06, - "loss": 0.14, + "loss": 0.0572, "step": 9587 }, { "epoch": 2.67, "learning_rate": 2.209852490954634e-06, - "loss": 0.0303, + "loss": 0.0484, "step": 9588 }, { "epoch": 2.67, "learning_rate": 2.207997031264496e-06, - "loss": 0.194, + "loss": 0.0541, "step": 9589 }, { "epoch": 2.67, "learning_rate": 2.206141571574358e-06, - "loss": 0.1337, + "loss": 0.0556, "step": 9590 }, { "epoch": 2.67, "learning_rate": 2.2042861118842197e-06, - "loss": 0.0858, + "loss": 0.0189, "step": 9591 }, { "epoch": 2.67, "learning_rate": 2.202430652194081e-06, - "loss": 0.0848, + "loss": 0.1574, "step": 9592 }, { "epoch": 2.67, "learning_rate": 2.200575192503943e-06, - "loss": 0.0864, + "loss": 0.0628, "step": 9593 }, { "epoch": 2.67, "learning_rate": 2.198719732813805e-06, - "loss": 0.0851, + "loss": 0.0465, "step": 9594 }, { "epoch": 2.67, "learning_rate": 2.196864273123667e-06, - "loss": 0.1383, + "loss": 0.0219, "step": 9595 }, { "epoch": 2.67, "learning_rate": 2.1950088134335287e-06, - "loss": 0.031, + "loss": 0.0197, "step": 9596 }, { "epoch": 2.67, "learning_rate": 2.19315335374339e-06, - "loss": 0.1392, + "loss": 0.0521, "step": 9597 }, { "epoch": 2.67, "learning_rate": 2.191297894053252e-06, - "loss": 0.1362, + "loss": 0.0558, "step": 9598 }, { "epoch": 2.67, "learning_rate": 2.189442434363114e-06, - "loss": 0.1871, + "loss": 0.2971, "step": 9599 }, { "epoch": 2.67, "learning_rate": 2.1875869746729757e-06, - "loss": 0.3619, + "loss": 0.0631, "step": 9600 }, { "epoch": 2.67, "learning_rate": 2.185731514982837e-06, - "loss": 0.1375, + "loss": 0.0672, "step": 9601 }, { "epoch": 2.67, "learning_rate": 2.183876055292699e-06, - "loss": 0.0298, + "loss": 0.0399, "step": 9602 }, { "epoch": 2.67, "learning_rate": 2.1820205956025605e-06, - "loss": 0.2453, + "loss": 0.0444, "step": 9603 }, { "epoch": 2.67, "learning_rate": 2.1801651359124224e-06, - "loss": 0.2455, + "loss": 0.0172, "step": 9604 }, { "epoch": 2.67, "learning_rate": 2.1783096762222842e-06, - "loss": 0.0316, + "loss": 0.1561, "step": 9605 }, { "epoch": 2.67, "learning_rate": 2.176454216532146e-06, - "loss": 0.0305, + "loss": 0.016, "step": 9606 }, { "epoch": 2.67, "learning_rate": 2.1745987568420075e-06, - "loss": 0.14, + "loss": 0.0909, "step": 9607 }, { "epoch": 2.67, "learning_rate": 2.1727432971518694e-06, - "loss": 0.085, + "loss": 0.0152, "step": 9608 }, { "epoch": 2.67, "learning_rate": 2.1708878374617313e-06, - "loss": 0.0849, + "loss": 0.1557, "step": 9609 }, { "epoch": 2.67, "learning_rate": 2.169032377771593e-06, - "loss": 0.1939, + "loss": 0.0832, "step": 9610 }, { "epoch": 2.67, "learning_rate": 2.1671769180814546e-06, - "loss": 0.3073, + "loss": 0.0192, "step": 9611 }, { "epoch": 2.68, "learning_rate": 2.1653214583913165e-06, - "loss": 0.1388, + "loss": 0.0249, "step": 9612 }, { "epoch": 2.68, "learning_rate": 2.1634659987011783e-06, - "loss": 0.2495, + "loss": 0.0956, "step": 9613 }, { "epoch": 2.68, "learning_rate": 2.1616105390110402e-06, - "loss": 0.0826, + "loss": 0.0545, "step": 9614 }, { "epoch": 2.68, "learning_rate": 2.1597550793209017e-06, - "loss": 0.1901, + "loss": 0.013, "step": 9615 }, { "epoch": 2.68, "learning_rate": 2.1578996196307635e-06, - "loss": 0.084, + "loss": 0.1359, "step": 9616 }, { "epoch": 2.68, "learning_rate": 2.1560441599406254e-06, - "loss": 0.0848, + "loss": 0.0187, "step": 9617 }, { "epoch": 2.68, "learning_rate": 2.1541887002504873e-06, - "loss": 0.1949, + "loss": 0.0395, "step": 9618 }, { "epoch": 2.68, "learning_rate": 2.1523332405603487e-06, - "loss": 0.0315, + "loss": 0.097, "step": 9619 }, { "epoch": 2.68, "learning_rate": 2.1504777808702106e-06, - "loss": 0.1355, + "loss": 0.0562, "step": 9620 }, { "epoch": 2.68, "learning_rate": 2.1486223211800725e-06, - "loss": 0.1376, + "loss": 0.0154, "step": 9621 }, { "epoch": 2.68, "learning_rate": 2.1467668614899343e-06, - "loss": 0.0855, + "loss": 0.0746, "step": 9622 }, { "epoch": 2.68, "learning_rate": 2.144911401799796e-06, - "loss": 0.085, + "loss": 0.0493, "step": 9623 }, { "epoch": 2.68, "learning_rate": 2.1430559421096577e-06, - "loss": 0.1401, + "loss": 0.1516, "step": 9624 }, { "epoch": 2.68, "learning_rate": 2.1412004824195195e-06, - "loss": 0.0306, + "loss": 0.0654, "step": 9625 }, { "epoch": 2.68, "learning_rate": 2.1393450227293814e-06, - "loss": 0.1385, + "loss": 0.0134, "step": 9626 }, { "epoch": 2.68, "learning_rate": 2.1374895630392433e-06, - "loss": 0.0855, + "loss": 0.1004, "step": 9627 }, { "epoch": 2.68, "learning_rate": 2.1356341033491047e-06, - "loss": 0.2548, + "loss": 0.0104, "step": 9628 }, { "epoch": 2.68, "learning_rate": 2.1337786436589666e-06, - "loss": 0.1946, + "loss": 0.0954, "step": 9629 }, { "epoch": 2.68, "learning_rate": 2.1319231839688285e-06, - "loss": 0.0842, + "loss": 0.0726, "step": 9630 }, { "epoch": 2.68, "learning_rate": 2.1300677242786903e-06, - "loss": 0.0297, + "loss": 0.0385, "step": 9631 }, { "epoch": 2.68, "learning_rate": 2.1282122645885518e-06, - "loss": 0.3581, + "loss": 0.0135, "step": 9632 }, { "epoch": 2.68, "learning_rate": 2.1263568048984136e-06, - "loss": 0.1416, + "loss": 0.0578, "step": 9633 }, { "epoch": 2.68, "learning_rate": 2.1245013452082755e-06, - "loss": 0.1411, + "loss": 0.1547, "step": 9634 }, { "epoch": 2.68, "learning_rate": 2.1226458855181374e-06, - "loss": 0.0862, + "loss": 0.0133, "step": 9635 }, { "epoch": 2.68, "learning_rate": 2.1207904258279993e-06, - "loss": 0.1923, + "loss": 0.014, "step": 9636 }, { "epoch": 2.68, "learning_rate": 2.1189349661378607e-06, - "loss": 0.1414, + "loss": 0.0534, "step": 9637 }, { "epoch": 2.68, "learning_rate": 2.1170795064477226e-06, - "loss": 0.1411, + "loss": 0.1177, "step": 9638 }, { "epoch": 2.68, "learning_rate": 2.1152240467575844e-06, - "loss": 0.0305, + "loss": 0.0253, "step": 9639 }, { "epoch": 2.68, "learning_rate": 2.1133685870674463e-06, - "loss": 0.1402, + "loss": 0.0072, "step": 9640 }, { "epoch": 2.68, "learning_rate": 2.1115131273773078e-06, - "loss": 0.1956, + "loss": 0.018, "step": 9641 }, { "epoch": 2.68, "learning_rate": 2.1096576676871696e-06, - "loss": 0.0902, + "loss": 0.0501, "step": 9642 }, { "epoch": 2.68, "learning_rate": 2.1078022079970315e-06, - "loss": 0.1381, + "loss": 0.0775, "step": 9643 }, { "epoch": 2.68, "learning_rate": 2.1059467483068934e-06, - "loss": 0.1409, + "loss": 0.0641, "step": 9644 }, { "epoch": 2.68, "learning_rate": 2.104091288616755e-06, - "loss": 0.1416, + "loss": 0.0814, "step": 9645 }, { "epoch": 2.68, "learning_rate": 2.1022358289266167e-06, - "loss": 0.1382, + "loss": 0.0128, "step": 9646 }, { "epoch": 2.68, "learning_rate": 2.1003803692364786e-06, - "loss": 0.1411, + "loss": 0.055, "step": 9647 }, { "epoch": 2.69, "learning_rate": 2.0985249095463404e-06, - "loss": 0.3087, + "loss": 0.0496, "step": 9648 }, { "epoch": 2.69, "learning_rate": 2.096669449856202e-06, - "loss": 0.1963, + "loss": 0.0759, "step": 9649 }, { "epoch": 2.69, "learning_rate": 2.0948139901660637e-06, - "loss": 0.2458, + "loss": 0.0228, "step": 9650 }, { "epoch": 2.69, "learning_rate": 2.0929585304759256e-06, - "loss": 0.1395, + "loss": 0.1987, "step": 9651 }, { "epoch": 2.69, "learning_rate": 2.0911030707857875e-06, - "loss": 0.0853, + "loss": 0.0912, "step": 9652 }, { "epoch": 2.69, "learning_rate": 2.0892476110956494e-06, - "loss": 0.0308, + "loss": 0.0124, "step": 9653 }, { "epoch": 2.69, "learning_rate": 2.087392151405511e-06, - "loss": 0.0843, + "loss": 0.0644, "step": 9654 }, { "epoch": 2.69, "learning_rate": 2.0855366917153727e-06, - "loss": 0.0312, + "loss": 0.0807, "step": 9655 }, { "epoch": 2.69, "learning_rate": 2.0836812320252345e-06, - "loss": 0.3019, + "loss": 0.0247, "step": 9656 }, { "epoch": 2.69, "learning_rate": 2.0818257723350964e-06, - "loss": 0.1954, + "loss": 0.118, "step": 9657 }, { "epoch": 2.69, "learning_rate": 2.079970312644958e-06, - "loss": 0.3046, + "loss": 0.0657, "step": 9658 }, { "epoch": 2.69, "learning_rate": 2.0781148529548197e-06, - "loss": 0.0861, + "loss": 0.0115, "step": 9659 }, { "epoch": 2.69, "learning_rate": 2.0762593932646816e-06, - "loss": 0.1408, + "loss": 0.0595, "step": 9660 }, { "epoch": 2.69, "learning_rate": 2.0744039335745435e-06, - "loss": 0.1391, + "loss": 0.1379, "step": 9661 }, { "epoch": 2.69, "learning_rate": 2.072548473884405e-06, - "loss": 0.2544, + "loss": 0.0445, "step": 9662 }, { "epoch": 2.69, "learning_rate": 2.070693014194267e-06, - "loss": 0.2998, + "loss": 0.0513, "step": 9663 }, { "epoch": 2.69, "learning_rate": 2.0688375545041287e-06, - "loss": 0.0836, + "loss": 0.0547, "step": 9664 }, { "epoch": 2.69, "learning_rate": 2.0669820948139905e-06, - "loss": 0.1951, + "loss": 0.0114, "step": 9665 }, { "epoch": 2.69, "learning_rate": 2.0651266351238524e-06, - "loss": 0.0321, + "loss": 0.1387, "step": 9666 }, { "epoch": 2.69, "learning_rate": 2.063271175433714e-06, - "loss": 0.1896, + "loss": 0.0514, "step": 9667 }, { "epoch": 2.69, "learning_rate": 2.0614157157435757e-06, - "loss": 0.0866, + "loss": 0.0665, "step": 9668 }, { "epoch": 2.69, "learning_rate": 2.0595602560534376e-06, - "loss": 0.0318, + "loss": 0.0185, "step": 9669 }, { "epoch": 2.69, "learning_rate": 2.0577047963632995e-06, - "loss": 0.0852, + "loss": 0.044, "step": 9670 }, { "epoch": 2.69, "learning_rate": 2.055849336673161e-06, - "loss": 0.143, + "loss": 0.0345, "step": 9671 }, { "epoch": 2.69, "learning_rate": 2.0539938769830228e-06, - "loss": 0.1407, + "loss": 0.0239, "step": 9672 }, { "epoch": 2.69, "learning_rate": 2.0521384172928846e-06, - "loss": 0.0861, + "loss": 0.0231, "step": 9673 }, { "epoch": 2.69, "learning_rate": 2.0502829576027465e-06, - "loss": 0.1929, + "loss": 0.1225, "step": 9674 }, { "epoch": 2.69, "learning_rate": 2.048427497912608e-06, - "loss": 0.0329, + "loss": 0.0749, "step": 9675 }, { "epoch": 2.69, "learning_rate": 2.04657203822247e-06, - "loss": 0.1417, + "loss": 0.0131, "step": 9676 }, { "epoch": 2.69, "learning_rate": 2.0447165785323317e-06, - "loss": 0.1944, + "loss": 0.0983, "step": 9677 }, { "epoch": 2.69, "learning_rate": 2.0428611188421936e-06, - "loss": 0.1954, + "loss": 0.0442, "step": 9678 }, { "epoch": 2.69, "learning_rate": 2.041005659152055e-06, - "loss": 0.3064, + "loss": 0.0185, "step": 9679 }, { "epoch": 2.69, "learning_rate": 2.039150199461917e-06, - "loss": 0.1415, + "loss": 0.0972, "step": 9680 }, { "epoch": 2.69, "learning_rate": 2.0372947397717783e-06, - "loss": 0.1953, + "loss": 0.2024, "step": 9681 }, { "epoch": 2.69, "learning_rate": 2.0354392800816402e-06, - "loss": 0.1399, + "loss": 0.0166, "step": 9682 }, { "epoch": 2.69, "learning_rate": 2.033583820391502e-06, - "loss": 0.1378, + "loss": 0.0573, "step": 9683 }, { "epoch": 2.7, "learning_rate": 2.031728360701364e-06, - "loss": 0.1872, + "loss": 0.0866, "step": 9684 }, { "epoch": 2.7, "learning_rate": 2.0298729010112254e-06, - "loss": 0.1941, + "loss": 0.0172, "step": 9685 }, { "epoch": 2.7, "learning_rate": 2.0280174413210873e-06, - "loss": 0.0889, + "loss": 0.0163, "step": 9686 }, { "epoch": 2.7, "learning_rate": 2.026161981630949e-06, - "loss": 0.0323, + "loss": 0.1116, "step": 9687 }, { "epoch": 2.7, "learning_rate": 2.024306521940811e-06, - "loss": 0.0851, + "loss": 0.0093, "step": 9688 }, { "epoch": 2.7, "learning_rate": 2.0224510622506725e-06, - "loss": 0.2447, + "loss": 0.0718, "step": 9689 }, { "epoch": 2.7, "learning_rate": 2.0205956025605343e-06, - "loss": 0.1907, + "loss": 0.107, "step": 9690 }, { "epoch": 2.7, "learning_rate": 2.018740142870396e-06, - "loss": 0.0848, + "loss": 0.0201, "step": 9691 }, { "epoch": 2.7, "learning_rate": 2.016884683180258e-06, - "loss": 0.0882, + "loss": 0.0535, "step": 9692 }, { "epoch": 2.7, "learning_rate": 2.01502922349012e-06, - "loss": 0.087, + "loss": 0.0116, "step": 9693 }, { "epoch": 2.7, "learning_rate": 2.0131737637999814e-06, - "loss": 0.1943, + "loss": 0.0112, "step": 9694 }, { "epoch": 2.7, "learning_rate": 2.0113183041098433e-06, - "loss": 0.1384, + "loss": 0.0705, "step": 9695 }, { "epoch": 2.7, "learning_rate": 2.009462844419705e-06, - "loss": 0.1378, + "loss": 0.1876, "step": 9696 }, { "epoch": 2.7, "learning_rate": 2.007607384729567e-06, - "loss": 0.1922, + "loss": 0.0117, "step": 9697 }, { "epoch": 2.7, "learning_rate": 2.0057519250394284e-06, - "loss": 0.0827, + "loss": 0.0839, "step": 9698 }, { "epoch": 2.7, "learning_rate": 2.0038964653492903e-06, - "loss": 0.2434, + "loss": 0.0824, "step": 9699 }, { "epoch": 2.7, "learning_rate": 2.002041005659152e-06, - "loss": 0.1441, + "loss": 0.0674, "step": 9700 }, { "epoch": 2.7, "learning_rate": 2.000185545969014e-06, - "loss": 0.138, + "loss": 0.0145, "step": 9701 }, { "epoch": 2.7, "learning_rate": 1.9983300862788755e-06, - "loss": 0.0881, + "loss": 0.0061, "step": 9702 }, { "epoch": 2.7, "learning_rate": 1.9964746265887374e-06, - "loss": 0.1397, + "loss": 0.0856, "step": 9703 }, { "epoch": 2.7, "learning_rate": 1.9946191668985992e-06, - "loss": 0.1419, + "loss": 0.0096, "step": 9704 }, { "epoch": 2.7, "learning_rate": 1.992763707208461e-06, - "loss": 0.244, + "loss": 0.0131, "step": 9705 }, { "epoch": 2.7, "learning_rate": 1.990908247518323e-06, - "loss": 0.0327, + "loss": 0.0455, "step": 9706 }, { "epoch": 2.7, "learning_rate": 1.9890527878281844e-06, - "loss": 0.1339, + "loss": 0.1068, "step": 9707 }, { "epoch": 2.7, "learning_rate": 1.9871973281380463e-06, - "loss": 0.1394, + "loss": 0.1438, "step": 9708 }, { "epoch": 2.7, "learning_rate": 1.985341868447908e-06, - "loss": 0.1384, + "loss": 0.0057, "step": 9709 }, { "epoch": 2.7, "learning_rate": 1.98348640875777e-06, - "loss": 0.2499, + "loss": 0.0593, "step": 9710 }, { "epoch": 2.7, "learning_rate": 1.9816309490676315e-06, - "loss": 0.1952, + "loss": 0.0886, "step": 9711 }, { "epoch": 2.7, "learning_rate": 1.9797754893774934e-06, - "loss": 0.1407, + "loss": 0.1304, "step": 9712 }, { "epoch": 2.7, "learning_rate": 1.9779200296873552e-06, - "loss": 0.0848, + "loss": 0.0657, "step": 9713 }, { "epoch": 2.7, "learning_rate": 1.976064569997217e-06, - "loss": 0.247, + "loss": 0.0714, "step": 9714 }, { "epoch": 2.7, "learning_rate": 1.9742091103070786e-06, - "loss": 0.0855, + "loss": 0.0755, "step": 9715 }, { "epoch": 2.7, "learning_rate": 1.9723536506169404e-06, - "loss": 0.1934, + "loss": 0.015, "step": 9716 }, { "epoch": 2.7, "learning_rate": 1.9704981909268023e-06, - "loss": 0.2445, + "loss": 0.0554, "step": 9717 }, { "epoch": 2.7, "learning_rate": 1.968642731236664e-06, - "loss": 0.14, + "loss": 0.0426, "step": 9718 }, { "epoch": 2.7, "learning_rate": 1.9667872715465256e-06, - "loss": 0.1923, + "loss": 0.018, "step": 9719 }, { "epoch": 2.71, "learning_rate": 1.9649318118563875e-06, - "loss": 0.0874, + "loss": 0.0181, "step": 9720 }, { "epoch": 2.71, "learning_rate": 1.9630763521662494e-06, - "loss": 0.2983, + "loss": 0.092, "step": 9721 }, { "epoch": 2.71, "learning_rate": 1.9612208924761112e-06, - "loss": 0.0876, + "loss": 0.019, "step": 9722 }, { "epoch": 2.71, "learning_rate": 1.959365432785973e-06, - "loss": 0.1419, + "loss": 0.0494, "step": 9723 }, { "epoch": 2.71, "learning_rate": 1.9575099730958345e-06, - "loss": 0.2464, + "loss": 0.0148, "step": 9724 }, { "epoch": 2.71, "learning_rate": 1.9556545134056964e-06, - "loss": 0.2449, + "loss": 0.0829, "step": 9725 }, { "epoch": 2.71, "learning_rate": 1.9537990537155583e-06, - "loss": 0.1383, + "loss": 0.1581, "step": 9726 }, { "epoch": 2.71, "learning_rate": 1.95194359402542e-06, - "loss": 0.0859, + "loss": 0.0426, "step": 9727 }, { "epoch": 2.71, "learning_rate": 1.9500881343352816e-06, - "loss": 0.0874, + "loss": 0.0541, "step": 9728 }, { "epoch": 2.71, "learning_rate": 1.9482326746451435e-06, - "loss": 0.0854, + "loss": 0.2004, "step": 9729 }, { "epoch": 2.71, "learning_rate": 1.9463772149550053e-06, - "loss": 0.3559, + "loss": 0.046, "step": 9730 }, { "epoch": 2.71, "learning_rate": 1.9445217552648672e-06, - "loss": 0.1422, + "loss": 0.0189, "step": 9731 }, { "epoch": 2.71, "learning_rate": 1.9426662955747287e-06, - "loss": 0.0845, + "loss": 0.1353, "step": 9732 }, { "epoch": 2.71, "learning_rate": 1.9408108358845905e-06, - "loss": 0.246, + "loss": 0.1362, "step": 9733 }, { "epoch": 2.71, "learning_rate": 1.9389553761944524e-06, - "loss": 0.2995, + "loss": 0.0532, "step": 9734 }, { "epoch": 2.71, "learning_rate": 1.9370999165043143e-06, - "loss": 0.0872, + "loss": 0.0428, "step": 9735 }, { "epoch": 2.71, "learning_rate": 1.935244456814176e-06, - "loss": 0.191, + "loss": 0.0681, "step": 9736 }, { "epoch": 2.71, "learning_rate": 1.9333889971240376e-06, - "loss": 0.1432, + "loss": 0.0484, "step": 9737 }, { "epoch": 2.71, "learning_rate": 1.9315335374338995e-06, - "loss": 0.0867, + "loss": 0.013, "step": 9738 }, { "epoch": 2.71, "learning_rate": 1.9296780777437613e-06, - "loss": 0.1374, + "loss": 0.0099, "step": 9739 }, { "epoch": 2.71, "learning_rate": 1.927822618053623e-06, - "loss": 0.2438, + "loss": 0.0468, "step": 9740 }, { "epoch": 2.71, "learning_rate": 1.9259671583634846e-06, - "loss": 0.0867, + "loss": 0.0433, "step": 9741 }, { "epoch": 2.71, "learning_rate": 1.9241116986733465e-06, - "loss": 0.1398, + "loss": 0.0182, "step": 9742 }, { "epoch": 2.71, "learning_rate": 1.9222562389832084e-06, - "loss": 0.292, + "loss": 0.017, "step": 9743 }, { "epoch": 2.71, "learning_rate": 1.9204007792930703e-06, - "loss": 0.1397, + "loss": 0.0559, "step": 9744 }, { "epoch": 2.71, "learning_rate": 1.9185453196029317e-06, - "loss": 0.0346, + "loss": 0.0982, "step": 9745 }, { "epoch": 2.71, "learning_rate": 1.9166898599127936e-06, - "loss": 0.193, + "loss": 0.013, "step": 9746 }, { "epoch": 2.71, "learning_rate": 1.9148344002226554e-06, - "loss": 0.2464, + "loss": 0.0409, "step": 9747 }, { "epoch": 2.71, "learning_rate": 1.9129789405325173e-06, - "loss": 0.0848, + "loss": 0.0607, "step": 9748 }, { "epoch": 2.71, "learning_rate": 1.9111234808423788e-06, - "loss": 0.1393, + "loss": 0.1555, "step": 9749 }, { "epoch": 2.71, "learning_rate": 1.9092680211522406e-06, - "loss": 0.1422, + "loss": 0.2465, "step": 9750 }, { "epoch": 2.71, "learning_rate": 1.9074125614621025e-06, - "loss": 0.1941, + "loss": 0.0611, "step": 9751 }, { "epoch": 2.71, "learning_rate": 1.9055571017719644e-06, - "loss": 0.2408, + "loss": 0.0208, "step": 9752 }, { "epoch": 2.71, "learning_rate": 1.903701642081826e-06, - "loss": 0.0851, + "loss": 0.0122, "step": 9753 }, { "epoch": 2.71, "learning_rate": 1.901846182391688e-06, - "loss": 0.188, + "loss": 0.0522, "step": 9754 }, { "epoch": 2.72, "learning_rate": 1.8999907227015494e-06, - "loss": 0.0873, + "loss": 0.0479, "step": 9755 }, { "epoch": 2.72, "learning_rate": 1.898135263011411e-06, - "loss": 0.0886, + "loss": 0.0562, "step": 9756 }, { "epoch": 2.72, "learning_rate": 1.8962798033212729e-06, - "loss": 0.0906, + "loss": 0.0456, "step": 9757 }, { "epoch": 2.72, "learning_rate": 1.8944243436311345e-06, - "loss": 0.1385, + "loss": 0.1891, "step": 9758 }, { "epoch": 2.72, "learning_rate": 1.8925688839409964e-06, - "loss": 0.0343, + "loss": 0.0145, "step": 9759 }, { "epoch": 2.72, "learning_rate": 1.8907134242508583e-06, - "loss": 0.0341, + "loss": 0.0084, "step": 9760 }, { "epoch": 2.72, "learning_rate": 1.88885796456072e-06, - "loss": 0.0869, + "loss": 0.0101, "step": 9761 }, { "epoch": 2.72, "learning_rate": 1.8870025048705818e-06, - "loss": 0.2517, + "loss": 0.0239, "step": 9762 }, { "epoch": 2.72, "learning_rate": 1.8851470451804435e-06, - "loss": 0.0846, + "loss": 0.1026, "step": 9763 }, { "epoch": 2.72, "learning_rate": 1.8832915854903053e-06, - "loss": 0.0349, + "loss": 0.0151, "step": 9764 }, { "epoch": 2.72, "learning_rate": 1.881436125800167e-06, - "loss": 0.2458, + "loss": 0.01, "step": 9765 }, { "epoch": 2.72, "learning_rate": 1.8795806661100289e-06, - "loss": 0.0355, + "loss": 0.0872, "step": 9766 }, { "epoch": 2.72, "learning_rate": 1.8777252064198905e-06, - "loss": 0.1413, + "loss": 0.0602, "step": 9767 }, { "epoch": 2.72, "learning_rate": 1.8758697467297524e-06, - "loss": 0.1888, + "loss": 0.0735, "step": 9768 }, { "epoch": 2.72, "learning_rate": 1.874014287039614e-06, - "loss": 0.2928, + "loss": 0.1058, "step": 9769 }, { "epoch": 2.72, "learning_rate": 1.872158827349476e-06, - "loss": 0.0865, + "loss": 0.1295, "step": 9770 }, { "epoch": 2.72, "learning_rate": 1.8703033676593376e-06, - "loss": 0.0857, + "loss": 0.0471, "step": 9771 }, { "epoch": 2.72, "learning_rate": 1.8684479079691995e-06, - "loss": 0.1375, + "loss": 0.0802, "step": 9772 }, { "epoch": 2.72, "learning_rate": 1.8665924482790611e-06, - "loss": 0.035, + "loss": 0.1044, "step": 9773 }, { "epoch": 2.72, "learning_rate": 1.864736988588923e-06, - "loss": 0.3493, + "loss": 0.0603, "step": 9774 }, { "epoch": 2.72, "learning_rate": 1.8628815288987849e-06, - "loss": 0.1951, + "loss": 0.0342, "step": 9775 }, { "epoch": 2.72, "learning_rate": 1.8610260692086465e-06, - "loss": 0.0345, + "loss": 0.013, "step": 9776 }, { "epoch": 2.72, "learning_rate": 1.8591706095185084e-06, - "loss": 0.0883, + "loss": 0.1205, "step": 9777 }, { "epoch": 2.72, "learning_rate": 1.85731514982837e-06, - "loss": 0.0346, + "loss": 0.0211, "step": 9778 }, { "epoch": 2.72, "learning_rate": 1.855459690138232e-06, - "loss": 0.2437, + "loss": 0.0824, "step": 9779 }, { "epoch": 2.72, "learning_rate": 1.8536042304480936e-06, - "loss": 0.0874, + "loss": 0.0184, "step": 9780 }, { "epoch": 2.72, "learning_rate": 1.8517487707579554e-06, - "loss": 0.0343, + "loss": 0.0598, "step": 9781 }, { "epoch": 2.72, "learning_rate": 1.8498933110678171e-06, - "loss": 0.1365, + "loss": 0.0166, "step": 9782 }, { "epoch": 2.72, "learning_rate": 1.848037851377679e-06, - "loss": 0.0884, + "loss": 0.0108, "step": 9783 }, { "epoch": 2.72, "learning_rate": 1.8461823916875406e-06, - "loss": 0.1428, + "loss": 0.1402, "step": 9784 }, { "epoch": 2.72, "learning_rate": 1.8443269319974025e-06, - "loss": 0.1947, + "loss": 0.064, "step": 9785 }, { "epoch": 2.72, "learning_rate": 1.8424714723072642e-06, - "loss": 0.1389, + "loss": 0.0103, "step": 9786 }, { "epoch": 2.72, "learning_rate": 1.840616012617126e-06, - "loss": 0.1923, + "loss": 0.1969, "step": 9787 }, { "epoch": 2.72, "learning_rate": 1.8387605529269877e-06, - "loss": 0.137, + "loss": 0.0167, "step": 9788 }, { "epoch": 2.72, "learning_rate": 1.8369050932368496e-06, - "loss": 0.0885, + "loss": 0.0158, "step": 9789 }, { "epoch": 2.72, "learning_rate": 1.8350496335467114e-06, - "loss": 0.0864, + "loss": 0.1802, "step": 9790 }, { "epoch": 2.73, "learning_rate": 1.833194173856573e-06, - "loss": 0.2456, + "loss": 0.113, "step": 9791 }, { "epoch": 2.73, "learning_rate": 1.831338714166435e-06, - "loss": 0.1391, + "loss": 0.0652, "step": 9792 }, { "epoch": 2.73, "learning_rate": 1.8294832544762966e-06, - "loss": 0.089, + "loss": 0.1921, "step": 9793 }, { "epoch": 2.73, "learning_rate": 1.8276277947861585e-06, - "loss": 0.0875, + "loss": 0.0215, "step": 9794 }, { "epoch": 2.73, "learning_rate": 1.8257723350960202e-06, - "loss": 0.0847, + "loss": 0.0484, "step": 9795 }, { "epoch": 2.73, "learning_rate": 1.823916875405882e-06, - "loss": 0.0839, + "loss": 0.042, "step": 9796 }, { "epoch": 2.73, "learning_rate": 1.8220614157157437e-06, - "loss": 0.249, + "loss": 0.058, "step": 9797 }, { "epoch": 2.73, "learning_rate": 1.8202059560256056e-06, - "loss": 0.1428, + "loss": 0.0475, "step": 9798 }, { "epoch": 2.73, "learning_rate": 1.8183504963354672e-06, - "loss": 0.1891, + "loss": 0.0787, "step": 9799 }, { "epoch": 2.73, "learning_rate": 1.816495036645329e-06, - "loss": 0.0333, + "loss": 0.0822, "step": 9800 }, { "epoch": 2.73, "learning_rate": 1.8146395769551907e-06, - "loss": 0.1372, + "loss": 0.1184, "step": 9801 }, { "epoch": 2.73, "learning_rate": 1.8127841172650526e-06, - "loss": 0.0864, + "loss": 0.0506, "step": 9802 }, { "epoch": 2.73, "learning_rate": 1.8109286575749143e-06, - "loss": 0.1365, + "loss": 0.0176, "step": 9803 }, { "epoch": 2.73, "learning_rate": 1.8090731978847761e-06, - "loss": 0.1419, + "loss": 0.0906, "step": 9804 }, { "epoch": 2.73, "learning_rate": 1.807217738194638e-06, - "loss": 0.0871, + "loss": 0.0111, "step": 9805 }, { "epoch": 2.73, "learning_rate": 1.8053622785044997e-06, - "loss": 0.189, + "loss": 0.0469, "step": 9806 }, { "epoch": 2.73, "learning_rate": 1.8035068188143615e-06, - "loss": 0.1391, + "loss": 0.0198, "step": 9807 }, { "epoch": 2.73, "learning_rate": 1.8016513591242232e-06, - "loss": 0.1366, + "loss": 0.1264, "step": 9808 }, { "epoch": 2.73, "learning_rate": 1.799795899434085e-06, - "loss": 0.2445, + "loss": 0.0355, "step": 9809 }, { "epoch": 2.73, "learning_rate": 1.7979404397439467e-06, - "loss": 0.2438, + "loss": 0.0534, "step": 9810 }, { "epoch": 2.73, "learning_rate": 1.7960849800538086e-06, - "loss": 0.0336, + "loss": 0.0408, "step": 9811 }, { "epoch": 2.73, "learning_rate": 1.7942295203636703e-06, - "loss": 0.1944, + "loss": 0.0187, "step": 9812 }, { "epoch": 2.73, "learning_rate": 1.7923740606735321e-06, - "loss": 0.1399, + "loss": 0.0063, "step": 9813 }, { "epoch": 2.73, "learning_rate": 1.7905186009833938e-06, - "loss": 0.0336, + "loss": 0.0495, "step": 9814 }, { "epoch": 2.73, "learning_rate": 1.7886631412932557e-06, - "loss": 0.1402, + "loss": 0.1234, "step": 9815 }, { "epoch": 2.73, "learning_rate": 1.7868076816031173e-06, - "loss": 0.0883, + "loss": 0.0793, "step": 9816 }, { "epoch": 2.73, "learning_rate": 1.7849522219129792e-06, - "loss": 0.1381, + "loss": 0.0467, "step": 9817 }, { "epoch": 2.73, "learning_rate": 1.7830967622228408e-06, - "loss": 0.1954, + "loss": 0.142, "step": 9818 }, { "epoch": 2.73, "learning_rate": 1.7812413025327027e-06, - "loss": 0.1375, + "loss": 0.0722, "step": 9819 }, { "epoch": 2.73, "learning_rate": 1.7793858428425646e-06, - "loss": 0.1986, + "loss": 0.0511, "step": 9820 }, { "epoch": 2.73, "learning_rate": 1.7775303831524262e-06, - "loss": 0.0325, + "loss": 0.0154, "step": 9821 }, { "epoch": 2.73, "learning_rate": 1.7756749234622881e-06, - "loss": 0.2397, + "loss": 0.0169, "step": 9822 }, { "epoch": 2.73, "learning_rate": 1.7738194637721498e-06, - "loss": 0.0839, + "loss": 0.0925, "step": 9823 }, { "epoch": 2.73, "learning_rate": 1.7719640040820116e-06, - "loss": 0.0316, + "loss": 0.0201, "step": 9824 }, { "epoch": 2.73, "learning_rate": 1.7701085443918733e-06, - "loss": 0.2478, + "loss": 0.0807, "step": 9825 }, { "epoch": 2.73, "learning_rate": 1.7682530847017352e-06, - "loss": 0.1919, + "loss": 0.0097, "step": 9826 }, { "epoch": 2.74, "learning_rate": 1.7663976250115968e-06, - "loss": 0.0336, + "loss": 0.0632, "step": 9827 }, { "epoch": 2.74, "learning_rate": 1.7645421653214587e-06, - "loss": 0.0851, + "loss": 0.0596, "step": 9828 }, { "epoch": 2.74, "learning_rate": 1.7626867056313204e-06, - "loss": 0.2433, + "loss": 0.0426, "step": 9829 }, { "epoch": 2.74, "learning_rate": 1.7608312459411822e-06, - "loss": 0.0322, + "loss": 0.0213, "step": 9830 }, { "epoch": 2.74, "learning_rate": 1.7589757862510437e-06, - "loss": 0.0841, + "loss": 0.1043, "step": 9831 }, { "epoch": 2.74, "learning_rate": 1.7571203265609056e-06, - "loss": 0.1403, + "loss": 0.1198, "step": 9832 }, { "epoch": 2.74, "learning_rate": 1.7552648668707672e-06, - "loss": 0.2429, + "loss": 0.0489, "step": 9833 }, { "epoch": 2.74, "learning_rate": 1.753409407180629e-06, - "loss": 0.0846, + "loss": 0.1004, "step": 9834 }, { "epoch": 2.74, "learning_rate": 1.7515539474904907e-06, - "loss": 0.0856, + "loss": 0.1127, "step": 9835 }, { "epoch": 2.74, "learning_rate": 1.7496984878003526e-06, - "loss": 0.1936, + "loss": 0.2209, "step": 9836 }, { "epoch": 2.74, "learning_rate": 1.7478430281102143e-06, - "loss": 0.1417, + "loss": 0.1532, "step": 9837 }, { "epoch": 2.74, "learning_rate": 1.7459875684200761e-06, - "loss": 0.0317, + "loss": 0.0575, "step": 9838 }, { "epoch": 2.74, "learning_rate": 1.7441321087299378e-06, - "loss": 0.1376, + "loss": 0.1165, "step": 9839 }, { "epoch": 2.74, "learning_rate": 1.7422766490397997e-06, - "loss": 0.0844, + "loss": 0.012, "step": 9840 }, { "epoch": 2.74, "learning_rate": 1.7404211893496613e-06, - "loss": 0.0847, + "loss": 0.0656, "step": 9841 }, { "epoch": 2.74, "learning_rate": 1.7385657296595232e-06, - "loss": 0.0849, + "loss": 0.1481, "step": 9842 }, { "epoch": 2.74, "learning_rate": 1.736710269969385e-06, - "loss": 0.1959, + "loss": 0.0587, "step": 9843 }, { "epoch": 2.74, "learning_rate": 1.7348548102792467e-06, - "loss": 0.1925, + "loss": 0.0938, "step": 9844 }, { "epoch": 2.74, "learning_rate": 1.7329993505891086e-06, - "loss": 0.3454, + "loss": 0.0486, "step": 9845 }, { "epoch": 2.74, "learning_rate": 1.7311438908989703e-06, - "loss": 0.0878, + "loss": 0.0586, "step": 9846 }, { "epoch": 2.74, "learning_rate": 1.7292884312088321e-06, - "loss": 0.191, + "loss": 0.0564, "step": 9847 }, { "epoch": 2.74, "learning_rate": 1.7274329715186938e-06, - "loss": 0.1378, + "loss": 0.1236, "step": 9848 }, { "epoch": 2.74, "learning_rate": 1.7255775118285557e-06, - "loss": 0.1926, + "loss": 0.0602, "step": 9849 }, { "epoch": 2.74, "learning_rate": 1.7237220521384173e-06, - "loss": 0.1387, + "loss": 0.0739, "step": 9850 }, { "epoch": 2.74, "learning_rate": 1.7218665924482792e-06, - "loss": 0.1416, + "loss": 0.1007, "step": 9851 }, { "epoch": 2.74, "learning_rate": 1.7200111327581408e-06, - "loss": 0.1407, + "loss": 0.0521, "step": 9852 }, { "epoch": 2.74, "learning_rate": 1.7181556730680027e-06, - "loss": 0.1917, + "loss": 0.098, "step": 9853 }, { "epoch": 2.74, "learning_rate": 1.7163002133778644e-06, - "loss": 0.0842, + "loss": 0.0532, "step": 9854 }, { "epoch": 2.74, "learning_rate": 1.7144447536877262e-06, - "loss": 0.0874, + "loss": 0.0981, "step": 9855 }, { "epoch": 2.74, "learning_rate": 1.712589293997588e-06, - "loss": 0.2517, + "loss": 0.0586, "step": 9856 }, { "epoch": 2.74, "learning_rate": 1.7107338343074498e-06, - "loss": 0.1396, + "loss": 0.1212, "step": 9857 }, { "epoch": 2.74, "learning_rate": 1.7088783746173116e-06, - "loss": 0.1382, + "loss": 0.0476, "step": 9858 }, { "epoch": 2.74, "learning_rate": 1.7070229149271733e-06, - "loss": 0.1953, + "loss": 0.0264, "step": 9859 }, { "epoch": 2.74, "learning_rate": 1.7051674552370352e-06, - "loss": 0.0325, + "loss": 0.0239, "step": 9860 }, { "epoch": 2.74, "learning_rate": 1.7033119955468968e-06, - "loss": 0.1403, + "loss": 0.0131, "step": 9861 }, { "epoch": 2.74, "learning_rate": 1.7014565358567587e-06, - "loss": 0.2462, + "loss": 0.0438, "step": 9862 }, { "epoch": 2.75, "learning_rate": 1.6996010761666204e-06, - "loss": 0.0889, + "loss": 0.0622, "step": 9863 }, { "epoch": 2.75, "learning_rate": 1.6977456164764822e-06, - "loss": 0.1901, + "loss": 0.0158, "step": 9864 }, { "epoch": 2.75, "learning_rate": 1.695890156786344e-06, - "loss": 0.1943, + "loss": 0.0977, "step": 9865 }, { "epoch": 2.75, "learning_rate": 1.6940346970962058e-06, - "loss": 0.0832, + "loss": 0.0526, "step": 9866 }, { "epoch": 2.75, "learning_rate": 1.6921792374060674e-06, - "loss": 0.1953, + "loss": 0.0128, "step": 9867 }, { "epoch": 2.75, "learning_rate": 1.6903237777159293e-06, - "loss": 0.2437, + "loss": 0.1272, "step": 9868 }, { "epoch": 2.75, "learning_rate": 1.688468318025791e-06, - "loss": 0.0861, + "loss": 0.0481, "step": 9869 }, { "epoch": 2.75, "learning_rate": 1.6866128583356528e-06, - "loss": 0.3014, + "loss": 0.0137, "step": 9870 }, { "epoch": 2.75, "learning_rate": 1.6847573986455145e-06, - "loss": 0.243, + "loss": 0.092, "step": 9871 }, { "epoch": 2.75, "learning_rate": 1.6829019389553764e-06, - "loss": 0.0323, + "loss": 0.0551, "step": 9872 }, { "epoch": 2.75, "learning_rate": 1.6810464792652382e-06, - "loss": 0.0327, + "loss": 0.0134, "step": 9873 }, { "epoch": 2.75, "learning_rate": 1.6791910195750999e-06, - "loss": 0.1912, + "loss": 0.0538, "step": 9874 }, { "epoch": 2.75, "learning_rate": 1.6773355598849618e-06, - "loss": 0.0882, + "loss": 0.1241, "step": 9875 }, { "epoch": 2.75, "learning_rate": 1.6754801001948234e-06, - "loss": 0.0843, + "loss": 0.0472, "step": 9876 }, { "epoch": 2.75, "learning_rate": 1.6736246405046853e-06, - "loss": 0.1387, + "loss": 0.0179, "step": 9877 }, { "epoch": 2.75, "learning_rate": 1.671769180814547e-06, - "loss": 0.1907, + "loss": 0.0213, "step": 9878 }, { "epoch": 2.75, "learning_rate": 1.6699137211244088e-06, - "loss": 0.1414, + "loss": 0.0546, "step": 9879 }, { "epoch": 2.75, "learning_rate": 1.6680582614342705e-06, - "loss": 0.0877, + "loss": 0.0362, "step": 9880 }, { "epoch": 2.75, "learning_rate": 1.6662028017441323e-06, - "loss": 0.1405, + "loss": 0.0531, "step": 9881 }, { "epoch": 2.75, "learning_rate": 1.664347342053994e-06, - "loss": 0.0835, + "loss": 0.0205, "step": 9882 }, { "epoch": 2.75, "learning_rate": 1.6624918823638559e-06, - "loss": 0.1412, + "loss": 0.0451, "step": 9883 }, { "epoch": 2.75, "learning_rate": 1.6606364226737175e-06, - "loss": 0.0864, + "loss": 0.0159, "step": 9884 }, { "epoch": 2.75, "learning_rate": 1.6587809629835794e-06, - "loss": 0.1922, + "loss": 0.0941, "step": 9885 }, { "epoch": 2.75, "learning_rate": 1.656925503293441e-06, - "loss": 0.0326, + "loss": 0.0377, "step": 9886 }, { "epoch": 2.75, "learning_rate": 1.655070043603303e-06, - "loss": 0.1387, + "loss": 0.0112, "step": 9887 }, { "epoch": 2.75, "learning_rate": 1.6532145839131648e-06, - "loss": 0.1412, + "loss": 0.0173, "step": 9888 }, { "epoch": 2.75, "learning_rate": 1.6513591242230265e-06, - "loss": 0.2497, + "loss": 0.0173, "step": 9889 }, { "epoch": 2.75, "learning_rate": 1.6495036645328883e-06, - "loss": 0.086, + "loss": 0.0989, "step": 9890 }, { "epoch": 2.75, "learning_rate": 1.64764820484275e-06, - "loss": 0.0883, + "loss": 0.0128, "step": 9891 }, { "epoch": 2.75, "learning_rate": 1.6457927451526119e-06, - "loss": 0.0866, + "loss": 0.0127, "step": 9892 }, { "epoch": 2.75, "learning_rate": 1.6439372854624735e-06, - "loss": 0.1388, + "loss": 0.0455, "step": 9893 }, { "epoch": 2.75, "learning_rate": 1.6420818257723354e-06, - "loss": 0.0855, + "loss": 0.0506, "step": 9894 }, { "epoch": 2.75, "learning_rate": 1.640226366082197e-06, - "loss": 0.1927, + "loss": 0.0455, "step": 9895 }, { "epoch": 2.75, "learning_rate": 1.638370906392059e-06, - "loss": 0.0851, + "loss": 0.1903, "step": 9896 }, { "epoch": 2.75, "learning_rate": 1.6365154467019206e-06, - "loss": 0.1419, + "loss": 0.0199, "step": 9897 }, { "epoch": 2.75, "learning_rate": 1.6346599870117824e-06, - "loss": 0.1934, + "loss": 0.0814, "step": 9898 }, { "epoch": 2.76, "learning_rate": 1.632804527321644e-06, - "loss": 0.1928, + "loss": 0.0098, "step": 9899 }, { "epoch": 2.76, "learning_rate": 1.630949067631506e-06, - "loss": 0.1383, + "loss": 0.0136, "step": 9900 }, { "epoch": 2.76, "learning_rate": 1.6290936079413676e-06, - "loss": 0.1953, + "loss": 0.0441, "step": 9901 }, { "epoch": 2.76, "learning_rate": 1.6272381482512295e-06, - "loss": 0.0856, + "loss": 0.0561, "step": 9902 }, { "epoch": 2.76, "learning_rate": 1.6253826885610914e-06, - "loss": 0.1441, + "loss": 0.093, "step": 9903 }, { "epoch": 2.76, "learning_rate": 1.623527228870953e-06, - "loss": 0.1929, + "loss": 0.0674, "step": 9904 }, { "epoch": 2.76, "learning_rate": 1.621671769180815e-06, - "loss": 0.2429, + "loss": 0.0113, "step": 9905 }, { "epoch": 2.76, "learning_rate": 1.6198163094906766e-06, - "loss": 0.0854, + "loss": 0.0208, "step": 9906 }, { "epoch": 2.76, "learning_rate": 1.617960849800538e-06, - "loss": 0.087, + "loss": 0.0447, "step": 9907 }, { "epoch": 2.76, "learning_rate": 1.6161053901103999e-06, - "loss": 0.0837, + "loss": 0.0133, "step": 9908 }, { "epoch": 2.76, "learning_rate": 1.6142499304202615e-06, - "loss": 0.1359, + "loss": 0.0495, "step": 9909 }, { "epoch": 2.76, "learning_rate": 1.6123944707301234e-06, - "loss": 0.1923, + "loss": 0.1472, "step": 9910 }, { "epoch": 2.76, "learning_rate": 1.610539011039985e-06, - "loss": 0.0315, + "loss": 0.1216, "step": 9911 }, { "epoch": 2.76, "learning_rate": 1.608683551349847e-06, - "loss": 0.1407, + "loss": 0.0128, "step": 9912 }, { "epoch": 2.76, "learning_rate": 1.6068280916597088e-06, - "loss": 0.032, + "loss": 0.0362, "step": 9913 }, { "epoch": 2.76, "learning_rate": 1.6049726319695705e-06, - "loss": 0.033, + "loss": 0.0099, "step": 9914 }, { "epoch": 2.76, "learning_rate": 1.6031171722794323e-06, - "loss": 0.0853, + "loss": 0.105, "step": 9915 }, { "epoch": 2.76, "learning_rate": 1.601261712589294e-06, - "loss": 0.1402, + "loss": 0.0566, "step": 9916 }, { "epoch": 2.76, "learning_rate": 1.5994062528991559e-06, - "loss": 0.135, + "loss": 0.1104, "step": 9917 }, { "epoch": 2.76, "learning_rate": 1.5975507932090175e-06, - "loss": 0.139, + "loss": 0.0368, "step": 9918 }, { "epoch": 2.76, "learning_rate": 1.5956953335188794e-06, - "loss": 0.0873, + "loss": 0.103, "step": 9919 }, { "epoch": 2.76, "learning_rate": 1.593839873828741e-06, - "loss": 0.195, + "loss": 0.023, "step": 9920 }, { "epoch": 2.76, "learning_rate": 1.591984414138603e-06, - "loss": 0.0883, + "loss": 0.0123, "step": 9921 }, { "epoch": 2.76, "learning_rate": 1.5901289544484646e-06, - "loss": 0.2956, + "loss": 0.0575, "step": 9922 }, { "epoch": 2.76, "learning_rate": 1.5882734947583265e-06, - "loss": 0.0316, + "loss": 0.1447, "step": 9923 }, { "epoch": 2.76, "learning_rate": 1.5864180350681881e-06, - "loss": 0.086, + "loss": 0.0112, "step": 9924 }, { "epoch": 2.76, "learning_rate": 1.58456257537805e-06, - "loss": 0.0834, + "loss": 0.0932, "step": 9925 }, { "epoch": 2.76, "learning_rate": 1.5827071156879116e-06, - "loss": 0.1952, + "loss": 0.06, "step": 9926 }, { "epoch": 2.76, "learning_rate": 1.5808516559977735e-06, - "loss": 0.1388, + "loss": 0.014, "step": 9927 }, { "epoch": 2.76, "learning_rate": 1.5789961963076354e-06, - "loss": 0.1407, + "loss": 0.0144, "step": 9928 }, { "epoch": 2.76, "learning_rate": 1.577140736617497e-06, - "loss": 0.0808, + "loss": 0.2019, "step": 9929 }, { "epoch": 2.76, "learning_rate": 1.575285276927359e-06, - "loss": 0.1386, + "loss": 0.0698, "step": 9930 }, { "epoch": 2.76, "learning_rate": 1.5734298172372206e-06, - "loss": 0.0868, + "loss": 0.0373, "step": 9931 }, { "epoch": 2.76, "learning_rate": 1.5715743575470824e-06, - "loss": 0.1376, + "loss": 0.0489, "step": 9932 }, { "epoch": 2.76, "learning_rate": 1.569718897856944e-06, - "loss": 0.1408, + "loss": 0.0469, "step": 9933 }, { "epoch": 2.76, "learning_rate": 1.567863438166806e-06, - "loss": 0.0837, + "loss": 0.1112, "step": 9934 }, { "epoch": 2.77, "learning_rate": 1.5660079784766676e-06, - "loss": 0.1392, + "loss": 0.0192, "step": 9935 }, { "epoch": 2.77, "learning_rate": 1.5641525187865295e-06, - "loss": 0.1989, + "loss": 0.0399, "step": 9936 }, { "epoch": 2.77, "learning_rate": 1.5622970590963912e-06, - "loss": 0.1441, + "loss": 0.052, "step": 9937 }, { "epoch": 2.77, "learning_rate": 1.560441599406253e-06, - "loss": 0.082, + "loss": 0.0151, "step": 9938 }, { "epoch": 2.77, "learning_rate": 1.5585861397161147e-06, - "loss": 0.0869, + "loss": 0.0709, "step": 9939 }, { "epoch": 2.77, "learning_rate": 1.5567306800259766e-06, - "loss": 0.0313, + "loss": 0.0903, "step": 9940 }, { "epoch": 2.77, "learning_rate": 1.5548752203358382e-06, - "loss": 0.0837, + "loss": 0.0159, "step": 9941 }, { "epoch": 2.77, "learning_rate": 1.5530197606457e-06, - "loss": 0.0848, + "loss": 0.135, "step": 9942 }, { "epoch": 2.77, "learning_rate": 1.551164300955562e-06, - "loss": 0.0309, + "loss": 0.0144, "step": 9943 }, { "epoch": 2.77, "learning_rate": 1.5493088412654236e-06, - "loss": 0.2466, + "loss": 0.0997, "step": 9944 }, { "epoch": 2.77, "learning_rate": 1.5474533815752855e-06, - "loss": 0.0863, + "loss": 0.0502, "step": 9945 }, { "epoch": 2.77, "learning_rate": 1.5455979218851472e-06, - "loss": 0.1404, + "loss": 0.0248, "step": 9946 }, { "epoch": 2.77, "learning_rate": 1.543742462195009e-06, - "loss": 0.0847, + "loss": 0.0621, "step": 9947 }, { "epoch": 2.77, "learning_rate": 1.5418870025048707e-06, - "loss": 0.1393, + "loss": 0.0127, "step": 9948 }, { "epoch": 2.77, "learning_rate": 1.5400315428147326e-06, - "loss": 0.0304, + "loss": 0.1489, "step": 9949 }, { "epoch": 2.77, "learning_rate": 1.5381760831245942e-06, - "loss": 0.0849, + "loss": 0.0404, "step": 9950 }, { "epoch": 2.77, "learning_rate": 1.536320623434456e-06, - "loss": 0.0305, + "loss": 0.0825, "step": 9951 }, { "epoch": 2.77, "learning_rate": 1.5344651637443177e-06, - "loss": 0.1361, + "loss": 0.0787, "step": 9952 }, { "epoch": 2.77, "learning_rate": 1.5326097040541796e-06, - "loss": 0.1401, + "loss": 0.0126, "step": 9953 }, { "epoch": 2.77, "learning_rate": 1.5307542443640413e-06, - "loss": 0.0309, + "loss": 0.0567, "step": 9954 }, { "epoch": 2.77, "learning_rate": 1.5288987846739031e-06, - "loss": 0.141, + "loss": 0.1391, "step": 9955 }, { "epoch": 2.77, "learning_rate": 1.5270433249837648e-06, - "loss": 0.3079, + "loss": 0.1405, "step": 9956 }, { "epoch": 2.77, "learning_rate": 1.5251878652936267e-06, - "loss": 0.2954, + "loss": 0.0845, "step": 9957 }, { "epoch": 2.77, "learning_rate": 1.5233324056034885e-06, - "loss": 0.1424, + "loss": 0.0672, "step": 9958 }, { "epoch": 2.77, "learning_rate": 1.5214769459133502e-06, - "loss": 0.0863, + "loss": 0.0714, "step": 9959 }, { "epoch": 2.77, "learning_rate": 1.519621486223212e-06, - "loss": 0.0848, + "loss": 0.052, "step": 9960 }, { "epoch": 2.77, "learning_rate": 1.5177660265330737e-06, - "loss": 0.1924, + "loss": 0.0437, "step": 9961 }, { "epoch": 2.77, "learning_rate": 1.5159105668429356e-06, - "loss": 0.0843, + "loss": 0.0519, "step": 9962 }, { "epoch": 2.77, "learning_rate": 1.5140551071527973e-06, - "loss": 0.2513, + "loss": 0.0146, "step": 9963 }, { "epoch": 2.77, "learning_rate": 1.5121996474626591e-06, - "loss": 0.1942, + "loss": 0.1211, "step": 9964 }, { "epoch": 2.77, "learning_rate": 1.5103441877725208e-06, - "loss": 0.133, + "loss": 0.0928, "step": 9965 }, { "epoch": 2.77, "learning_rate": 1.5084887280823827e-06, - "loss": 0.1374, + "loss": 0.0676, "step": 9966 }, { "epoch": 2.77, "learning_rate": 1.5066332683922443e-06, - "loss": 0.252, + "loss": 0.0837, "step": 9967 }, { "epoch": 2.77, "learning_rate": 1.5047778087021062e-06, - "loss": 0.0299, + "loss": 0.1452, "step": 9968 }, { "epoch": 2.77, "learning_rate": 1.5029223490119678e-06, - "loss": 0.1401, + "loss": 0.0144, "step": 9969 }, { "epoch": 2.77, "learning_rate": 1.5010668893218297e-06, - "loss": 0.0297, + "loss": 0.0461, "step": 9970 }, { "epoch": 2.78, "learning_rate": 1.4992114296316914e-06, - "loss": 0.4186, + "loss": 0.0387, "step": 9971 }, { "epoch": 2.78, "learning_rate": 1.4973559699415532e-06, - "loss": 0.1934, + "loss": 0.1719, "step": 9972 }, { "epoch": 2.78, "learning_rate": 1.4955005102514151e-06, - "loss": 0.1419, + "loss": 0.0415, "step": 9973 }, { "epoch": 2.78, "learning_rate": 1.4936450505612768e-06, - "loss": 0.1965, + "loss": 0.0729, "step": 9974 }, { "epoch": 2.78, "learning_rate": 1.4917895908711386e-06, - "loss": 0.0833, + "loss": 0.0559, "step": 9975 }, { "epoch": 2.78, "learning_rate": 1.4899341311810003e-06, - "loss": 0.2491, + "loss": 0.1997, "step": 9976 }, { "epoch": 2.78, "learning_rate": 1.4880786714908622e-06, - "loss": 0.0852, + "loss": 0.01, "step": 9977 }, { "epoch": 2.78, "learning_rate": 1.4862232118007238e-06, - "loss": 0.1405, + "loss": 0.0597, "step": 9978 }, { "epoch": 2.78, "learning_rate": 1.4843677521105857e-06, - "loss": 0.1396, + "loss": 0.1113, "step": 9979 }, { "epoch": 2.78, "learning_rate": 1.4825122924204474e-06, - "loss": 0.0863, + "loss": 0.0563, "step": 9980 }, { "epoch": 2.78, "learning_rate": 1.4806568327303092e-06, - "loss": 0.0844, + "loss": 0.0314, "step": 9981 }, { "epoch": 2.78, "learning_rate": 1.4788013730401707e-06, - "loss": 0.0832, + "loss": 0.0646, "step": 9982 }, { "epoch": 2.78, "learning_rate": 1.4769459133500326e-06, - "loss": 0.0882, + "loss": 0.0526, "step": 9983 }, { "epoch": 2.78, "learning_rate": 1.4750904536598942e-06, - "loss": 0.138, + "loss": 0.1253, "step": 9984 }, { "epoch": 2.78, "learning_rate": 1.473234993969756e-06, - "loss": 0.0303, + "loss": 0.0197, "step": 9985 }, { "epoch": 2.78, "learning_rate": 1.4713795342796177e-06, - "loss": 0.0305, + "loss": 0.0637, "step": 9986 }, { "epoch": 2.78, "learning_rate": 1.4695240745894796e-06, - "loss": 0.0862, + "loss": 0.0434, "step": 9987 }, { "epoch": 2.78, "learning_rate": 1.4676686148993413e-06, - "loss": 0.1955, + "loss": 0.0474, "step": 9988 }, { "epoch": 2.78, "learning_rate": 1.4658131552092031e-06, - "loss": 0.2483, + "loss": 0.0452, "step": 9989 }, { "epoch": 2.78, "learning_rate": 1.4639576955190648e-06, - "loss": 0.2501, + "loss": 0.0417, "step": 9990 }, { "epoch": 2.78, "learning_rate": 1.4621022358289267e-06, - "loss": 0.0304, + "loss": 0.0598, "step": 9991 }, { "epoch": 2.78, "learning_rate": 1.4602467761387883e-06, - "loss": 0.3013, + "loss": 0.0261, "step": 9992 }, { "epoch": 2.78, "learning_rate": 1.4583913164486502e-06, - "loss": 0.2482, + "loss": 0.0977, "step": 9993 }, { "epoch": 2.78, "learning_rate": 1.4565358567585119e-06, - "loss": 0.1359, + "loss": 0.0681, "step": 9994 }, { "epoch": 2.78, "learning_rate": 1.4546803970683737e-06, - "loss": 0.2531, + "loss": 0.0422, "step": 9995 }, { "epoch": 2.78, "learning_rate": 1.4528249373782354e-06, - "loss": 0.2516, + "loss": 0.0189, "step": 9996 }, { "epoch": 2.78, "learning_rate": 1.4509694776880973e-06, - "loss": 0.0843, + "loss": 0.1173, "step": 9997 }, { "epoch": 2.78, "learning_rate": 1.4491140179979591e-06, - "loss": 0.0869, + "loss": 0.0594, "step": 9998 }, { "epoch": 2.78, "learning_rate": 1.4472585583078208e-06, - "loss": 0.0304, + "loss": 0.0178, "step": 9999 }, { "epoch": 2.78, "learning_rate": 1.4454030986176827e-06, - "loss": 0.03, + "loss": 0.0197, "step": 10000 }, { "epoch": 2.78, "learning_rate": 1.4435476389275443e-06, - "loss": 0.2392, + "loss": 0.176, "step": 10001 }, { "epoch": 2.78, "learning_rate": 1.4416921792374062e-06, - "loss": 0.0305, + "loss": 0.0103, "step": 10002 }, { "epoch": 2.78, "learning_rate": 1.4398367195472678e-06, - "loss": 0.1905, + "loss": 0.0094, "step": 10003 }, { "epoch": 2.78, "learning_rate": 1.4379812598571297e-06, - "loss": 0.0861, + "loss": 0.0126, "step": 10004 }, { "epoch": 2.78, "learning_rate": 1.4361258001669914e-06, - "loss": 0.0857, + "loss": 0.0281, "step": 10005 }, { "epoch": 2.78, "learning_rate": 1.4342703404768532e-06, - "loss": 0.0855, + "loss": 0.0154, "step": 10006 }, { "epoch": 2.79, "learning_rate": 1.432414880786715e-06, - "loss": 0.1935, + "loss": 0.0702, "step": 10007 }, { "epoch": 2.79, "learning_rate": 1.4305594210965768e-06, - "loss": 0.0833, + "loss": 0.02, "step": 10008 }, { "epoch": 2.79, "learning_rate": 1.4287039614064384e-06, - "loss": 0.2474, + "loss": 0.0634, "step": 10009 }, { "epoch": 2.79, "learning_rate": 1.4268485017163003e-06, - "loss": 0.0835, + "loss": 0.0131, "step": 10010 }, { "epoch": 2.79, "learning_rate": 1.424993042026162e-06, - "loss": 0.1415, + "loss": 0.0932, "step": 10011 }, { "epoch": 2.79, "learning_rate": 1.4231375823360238e-06, - "loss": 0.0825, + "loss": 0.0243, "step": 10012 }, { "epoch": 2.79, "learning_rate": 1.4212821226458857e-06, - "loss": 0.087, + "loss": 0.0127, "step": 10013 }, { "epoch": 2.79, "learning_rate": 1.4194266629557474e-06, - "loss": 0.137, + "loss": 0.0859, "step": 10014 }, { "epoch": 2.79, "learning_rate": 1.4175712032656092e-06, - "loss": 0.0849, + "loss": 0.0525, "step": 10015 }, { "epoch": 2.79, "learning_rate": 1.4157157435754709e-06, - "loss": 0.2438, + "loss": 0.0956, "step": 10016 }, { "epoch": 2.79, "learning_rate": 1.4138602838853328e-06, - "loss": 0.087, + "loss": 0.0697, "step": 10017 }, { "epoch": 2.79, "learning_rate": 1.4120048241951944e-06, - "loss": 0.139, + "loss": 0.083, "step": 10018 }, { "epoch": 2.79, "learning_rate": 1.4101493645050563e-06, - "loss": 0.0304, + "loss": 0.009, "step": 10019 }, { "epoch": 2.79, "learning_rate": 1.408293904814918e-06, - "loss": 0.0829, + "loss": 0.0465, "step": 10020 }, { "epoch": 2.79, "learning_rate": 1.4064384451247798e-06, - "loss": 0.3645, + "loss": 0.0536, "step": 10021 }, { "epoch": 2.79, "learning_rate": 1.4045829854346415e-06, - "loss": 0.0303, + "loss": 0.2392, "step": 10022 }, { "epoch": 2.79, "learning_rate": 1.4027275257445034e-06, - "loss": 0.0838, + "loss": 0.081, "step": 10023 }, { "epoch": 2.79, "learning_rate": 1.400872066054365e-06, - "loss": 0.1408, + "loss": 0.0661, "step": 10024 }, { "epoch": 2.79, "learning_rate": 1.3990166063642269e-06, - "loss": 0.0835, + "loss": 0.0972, "step": 10025 }, { "epoch": 2.79, "learning_rate": 1.3971611466740885e-06, - "loss": 0.0852, + "loss": 0.1017, "step": 10026 }, { "epoch": 2.79, "learning_rate": 1.3953056869839504e-06, - "loss": 0.1391, + "loss": 0.0424, "step": 10027 }, { "epoch": 2.79, "learning_rate": 1.3934502272938123e-06, - "loss": 0.246, + "loss": 0.0156, "step": 10028 }, { "epoch": 2.79, "learning_rate": 1.391594767603674e-06, - "loss": 0.2475, + "loss": 0.1177, "step": 10029 }, { "epoch": 2.79, "learning_rate": 1.3897393079135358e-06, - "loss": 0.0306, + "loss": 0.0755, "step": 10030 }, { "epoch": 2.79, "learning_rate": 1.3878838482233975e-06, - "loss": 0.0846, + "loss": 0.1349, "step": 10031 }, { "epoch": 2.79, "learning_rate": 1.3860283885332593e-06, - "loss": 0.1939, + "loss": 0.0613, "step": 10032 }, { "epoch": 2.79, "learning_rate": 1.384172928843121e-06, - "loss": 0.0868, + "loss": 0.0186, "step": 10033 }, { "epoch": 2.79, "learning_rate": 1.3823174691529829e-06, - "loss": 0.3654, + "loss": 0.016, "step": 10034 }, { "epoch": 2.79, "learning_rate": 1.3804620094628445e-06, - "loss": 0.0302, + "loss": 0.0612, "step": 10035 }, { "epoch": 2.79, "learning_rate": 1.3786065497727064e-06, - "loss": 0.1359, + "loss": 0.0481, "step": 10036 }, { "epoch": 2.79, "learning_rate": 1.376751090082568e-06, - "loss": 0.1931, + "loss": 0.0117, "step": 10037 }, { "epoch": 2.79, "learning_rate": 1.37489563039243e-06, - "loss": 0.0306, + "loss": 0.0765, "step": 10038 }, { "epoch": 2.79, "learning_rate": 1.3730401707022916e-06, - "loss": 0.1391, + "loss": 0.0294, "step": 10039 }, { "epoch": 2.79, "learning_rate": 1.3711847110121535e-06, - "loss": 0.1947, + "loss": 0.0213, "step": 10040 }, { "epoch": 2.79, "learning_rate": 1.3693292513220151e-06, - "loss": 0.0847, + "loss": 0.0081, "step": 10041 }, { "epoch": 2.79, "learning_rate": 1.367473791631877e-06, - "loss": 0.0855, + "loss": 0.1394, "step": 10042 }, { "epoch": 2.8, "learning_rate": 1.3656183319417389e-06, - "loss": 0.0303, + "loss": 0.0129, "step": 10043 }, { "epoch": 2.8, "learning_rate": 1.3637628722516005e-06, - "loss": 0.0308, + "loss": 0.0237, "step": 10044 }, { "epoch": 2.8, "learning_rate": 1.3619074125614624e-06, - "loss": 0.0856, + "loss": 0.0183, "step": 10045 }, { "epoch": 2.8, "learning_rate": 1.360051952871324e-06, - "loss": 0.0839, + "loss": 0.0199, "step": 10046 }, { "epoch": 2.8, "learning_rate": 1.358196493181186e-06, - "loss": 0.1909, + "loss": 0.0182, "step": 10047 }, { "epoch": 2.8, "learning_rate": 1.3563410334910476e-06, - "loss": 0.3009, + "loss": 0.0733, "step": 10048 }, { "epoch": 2.8, "learning_rate": 1.3544855738009094e-06, - "loss": 0.0824, + "loss": 0.0875, "step": 10049 }, { "epoch": 2.8, "learning_rate": 1.352630114110771e-06, - "loss": 0.1388, + "loss": 0.1599, "step": 10050 }, { "epoch": 2.8, "learning_rate": 1.350774654420633e-06, - "loss": 0.0302, + "loss": 0.1017, "step": 10051 }, { "epoch": 2.8, "learning_rate": 1.3489191947304946e-06, - "loss": 0.193, + "loss": 0.1256, "step": 10052 }, { "epoch": 2.8, "learning_rate": 1.3470637350403565e-06, - "loss": 0.0301, + "loss": 0.109, "step": 10053 }, { "epoch": 2.8, "learning_rate": 1.3452082753502182e-06, - "loss": 0.0869, + "loss": 0.0536, "step": 10054 }, { "epoch": 2.8, "learning_rate": 1.34335281566008e-06, - "loss": 0.1949, + "loss": 0.0412, "step": 10055 }, { "epoch": 2.8, "learning_rate": 1.3414973559699417e-06, - "loss": 0.0828, + "loss": 0.0136, "step": 10056 }, { "epoch": 2.8, "learning_rate": 1.3396418962798036e-06, - "loss": 0.1397, + "loss": 0.1214, "step": 10057 }, { "epoch": 2.8, "learning_rate": 1.337786436589665e-06, - "loss": 0.3047, + "loss": 0.1372, "step": 10058 }, { "epoch": 2.8, "learning_rate": 1.3359309768995269e-06, - "loss": 0.2463, + "loss": 0.0471, "step": 10059 }, { "epoch": 2.8, "learning_rate": 1.3340755172093885e-06, - "loss": 0.0882, + "loss": 0.0118, "step": 10060 }, { "epoch": 2.8, "learning_rate": 1.3322200575192504e-06, - "loss": 0.0307, + "loss": 0.0531, "step": 10061 }, { "epoch": 2.8, "learning_rate": 1.330364597829112e-06, - "loss": 0.0847, + "loss": 0.1311, "step": 10062 }, { "epoch": 2.8, "learning_rate": 1.328509138138974e-06, - "loss": 0.193, + "loss": 0.0639, "step": 10063 }, { "epoch": 2.8, "learning_rate": 1.3266536784488356e-06, - "loss": 0.2493, + "loss": 0.0217, "step": 10064 }, { "epoch": 2.8, "learning_rate": 1.3247982187586975e-06, - "loss": 0.0847, + "loss": 0.007, "step": 10065 }, { "epoch": 2.8, "learning_rate": 1.3229427590685593e-06, - "loss": 0.142, + "loss": 0.0148, "step": 10066 }, { "epoch": 2.8, "learning_rate": 1.321087299378421e-06, - "loss": 0.1929, + "loss": 0.0591, "step": 10067 }, { "epoch": 2.8, "learning_rate": 1.3192318396882829e-06, - "loss": 0.3033, + "loss": 0.1349, "step": 10068 }, { "epoch": 2.8, "learning_rate": 1.3173763799981445e-06, - "loss": 0.0847, + "loss": 0.0234, "step": 10069 }, { "epoch": 2.8, "learning_rate": 1.3155209203080064e-06, - "loss": 0.137, + "loss": 0.0117, "step": 10070 }, { "epoch": 2.8, "learning_rate": 1.313665460617868e-06, - "loss": 0.1345, + "loss": 0.05, "step": 10071 }, { "epoch": 2.8, "learning_rate": 1.31181000092773e-06, - "loss": 0.139, + "loss": 0.0725, "step": 10072 }, { "epoch": 2.8, "learning_rate": 1.3099545412375916e-06, - "loss": 0.0817, + "loss": 0.0454, "step": 10073 }, { "epoch": 2.8, "learning_rate": 1.3080990815474535e-06, - "loss": 0.0835, + "loss": 0.1145, "step": 10074 }, { "epoch": 2.8, "learning_rate": 1.3062436218573151e-06, - "loss": 0.1417, + "loss": 0.0133, "step": 10075 }, { "epoch": 2.8, "learning_rate": 1.304388162167177e-06, - "loss": 0.0872, + "loss": 0.0128, "step": 10076 }, { "epoch": 2.8, "learning_rate": 1.3025327024770386e-06, - "loss": 0.0838, + "loss": 0.0154, "step": 10077 }, { "epoch": 2.8, "learning_rate": 1.3006772427869005e-06, - "loss": 0.2482, + "loss": 0.0119, "step": 10078 }, { "epoch": 2.81, "learning_rate": 1.2988217830967622e-06, - "loss": 0.1909, + "loss": 0.0165, "step": 10079 }, { "epoch": 2.81, "learning_rate": 1.296966323406624e-06, - "loss": 0.2471, + "loss": 0.0161, "step": 10080 }, { "epoch": 2.81, "learning_rate": 1.295110863716486e-06, - "loss": 0.3035, + "loss": 0.0723, "step": 10081 }, { "epoch": 2.81, "learning_rate": 1.2932554040263476e-06, - "loss": 0.087, + "loss": 0.0401, "step": 10082 }, { "epoch": 2.81, "learning_rate": 1.2913999443362094e-06, - "loss": 0.1967, + "loss": 0.0566, "step": 10083 }, { "epoch": 2.81, "learning_rate": 1.289544484646071e-06, - "loss": 0.1379, + "loss": 0.0443, "step": 10084 }, { "epoch": 2.81, "learning_rate": 1.287689024955933e-06, - "loss": 0.1376, + "loss": 0.1233, "step": 10085 }, { "epoch": 2.81, "learning_rate": 1.2858335652657946e-06, - "loss": 0.0313, + "loss": 0.1466, "step": 10086 }, { "epoch": 2.81, "learning_rate": 1.2839781055756565e-06, - "loss": 0.1903, + "loss": 0.0661, "step": 10087 }, { "epoch": 2.81, "learning_rate": 1.2821226458855182e-06, - "loss": 0.1371, + "loss": 0.0596, "step": 10088 }, { "epoch": 2.81, "learning_rate": 1.28026718619538e-06, - "loss": 0.2979, + "loss": 0.134, "step": 10089 }, { "epoch": 2.81, "learning_rate": 1.2784117265052417e-06, - "loss": 0.194, + "loss": 0.0126, "step": 10090 }, { "epoch": 2.81, "learning_rate": 1.2765562668151036e-06, - "loss": 0.0833, + "loss": 0.2342, "step": 10091 }, { "epoch": 2.81, "learning_rate": 1.2747008071249652e-06, - "loss": 0.1367, + "loss": 0.0817, "step": 10092 }, { "epoch": 2.81, "learning_rate": 1.272845347434827e-06, - "loss": 0.1898, + "loss": 0.1469, "step": 10093 }, { "epoch": 2.81, "learning_rate": 1.2709898877446888e-06, - "loss": 0.2437, + "loss": 0.0124, "step": 10094 }, { "epoch": 2.81, "learning_rate": 1.2691344280545506e-06, - "loss": 0.1384, + "loss": 0.038, "step": 10095 }, { "epoch": 2.81, "learning_rate": 1.2672789683644125e-06, - "loss": 0.1396, + "loss": 0.0462, "step": 10096 }, { "epoch": 2.81, "learning_rate": 1.2654235086742742e-06, - "loss": 0.0843, + "loss": 0.0486, "step": 10097 }, { "epoch": 2.81, "learning_rate": 1.263568048984136e-06, - "loss": 0.1958, + "loss": 0.0346, "step": 10098 }, { "epoch": 2.81, "learning_rate": 1.2617125892939977e-06, - "loss": 0.2449, + "loss": 0.089, "step": 10099 }, { "epoch": 2.81, "learning_rate": 1.2598571296038596e-06, - "loss": 0.2469, + "loss": 0.0118, "step": 10100 }, { "epoch": 2.81, "learning_rate": 1.2580016699137212e-06, - "loss": 0.248, + "loss": 0.072, "step": 10101 }, { "epoch": 2.81, "learning_rate": 1.256146210223583e-06, - "loss": 0.0311, + "loss": 0.0527, "step": 10102 }, { "epoch": 2.81, "learning_rate": 1.2542907505334447e-06, - "loss": 0.086, + "loss": 0.0633, "step": 10103 }, { "epoch": 2.81, "learning_rate": 1.2524352908433066e-06, - "loss": 0.0824, + "loss": 0.0733, "step": 10104 }, { "epoch": 2.81, "learning_rate": 1.2505798311531683e-06, - "loss": 0.0309, + "loss": 0.0656, "step": 10105 }, { "epoch": 2.81, "learning_rate": 1.2487243714630301e-06, - "loss": 0.0317, + "loss": 0.064, "step": 10106 }, { "epoch": 2.81, "learning_rate": 1.2468689117728918e-06, - "loss": 0.139, + "loss": 0.1262, "step": 10107 }, { "epoch": 2.81, "learning_rate": 1.2450134520827537e-06, - "loss": 0.0863, + "loss": 0.0548, "step": 10108 }, { "epoch": 2.81, "learning_rate": 1.2431579923926153e-06, - "loss": 0.0842, + "loss": 0.0635, "step": 10109 }, { "epoch": 2.81, "learning_rate": 1.2413025327024772e-06, - "loss": 0.2417, + "loss": 0.0645, "step": 10110 }, { "epoch": 2.81, "learning_rate": 1.239447073012339e-06, - "loss": 0.0842, + "loss": 0.079, "step": 10111 }, { "epoch": 2.81, "learning_rate": 1.2375916133222007e-06, - "loss": 0.1416, + "loss": 0.0296, "step": 10112 }, { "epoch": 2.81, "learning_rate": 1.2357361536320626e-06, - "loss": 0.0869, + "loss": 0.1153, "step": 10113 }, { "epoch": 2.81, "learning_rate": 1.2338806939419243e-06, - "loss": 0.0854, + "loss": 0.0188, "step": 10114 }, { "epoch": 2.82, "learning_rate": 1.232025234251786e-06, - "loss": 0.081, + "loss": 0.1452, "step": 10115 }, { "epoch": 2.82, "learning_rate": 1.2301697745616478e-06, - "loss": 0.0323, + "loss": 0.007, "step": 10116 }, { "epoch": 2.82, "learning_rate": 1.2283143148715094e-06, - "loss": 0.0833, + "loss": 0.0435, "step": 10117 }, { "epoch": 2.82, "learning_rate": 1.2264588551813713e-06, - "loss": 0.1366, + "loss": 0.1119, "step": 10118 }, { "epoch": 2.82, "learning_rate": 1.224603395491233e-06, - "loss": 0.1415, + "loss": 0.0096, "step": 10119 }, { "epoch": 2.82, "learning_rate": 1.2227479358010948e-06, - "loss": 0.0852, + "loss": 0.2002, "step": 10120 }, { "epoch": 2.82, "learning_rate": 1.2208924761109565e-06, - "loss": 0.031, + "loss": 0.2265, "step": 10121 }, { "epoch": 2.82, "learning_rate": 1.2190370164208184e-06, - "loss": 0.1363, + "loss": 0.0152, "step": 10122 }, { "epoch": 2.82, "learning_rate": 1.21718155673068e-06, - "loss": 0.1935, + "loss": 0.0141, "step": 10123 }, { "epoch": 2.82, "learning_rate": 1.215326097040542e-06, - "loss": 0.2482, + "loss": 0.0653, "step": 10124 }, { "epoch": 2.82, "learning_rate": 1.2134706373504036e-06, - "loss": 0.0829, + "loss": 0.0067, "step": 10125 }, { "epoch": 2.82, "learning_rate": 1.2116151776602654e-06, - "loss": 0.1373, + "loss": 0.0383, "step": 10126 }, { "epoch": 2.82, "learning_rate": 1.209759717970127e-06, - "loss": 0.0842, + "loss": 0.1311, "step": 10127 }, { "epoch": 2.82, "learning_rate": 1.207904258279989e-06, - "loss": 0.1388, + "loss": 0.1112, "step": 10128 }, { "epoch": 2.82, "learning_rate": 1.2060487985898506e-06, - "loss": 0.1393, + "loss": 0.0893, "step": 10129 }, { "epoch": 2.82, "learning_rate": 1.2041933388997125e-06, - "loss": 0.0878, + "loss": 0.0514, "step": 10130 }, { "epoch": 2.82, "learning_rate": 1.2023378792095744e-06, - "loss": 0.1381, + "loss": 0.0466, "step": 10131 }, { "epoch": 2.82, "learning_rate": 1.200482419519436e-06, - "loss": 0.0855, + "loss": 0.0235, "step": 10132 }, { "epoch": 2.82, "learning_rate": 1.1986269598292979e-06, - "loss": 0.1913, + "loss": 0.0307, "step": 10133 }, { "epoch": 2.82, "learning_rate": 1.1967715001391595e-06, - "loss": 0.1375, + "loss": 0.096, "step": 10134 }, { "epoch": 2.82, "learning_rate": 1.1949160404490214e-06, - "loss": 0.1409, + "loss": 0.0309, "step": 10135 }, { "epoch": 2.82, "learning_rate": 1.193060580758883e-06, - "loss": 0.2469, + "loss": 0.0385, "step": 10136 }, { "epoch": 2.82, "learning_rate": 1.191205121068745e-06, - "loss": 0.0306, + "loss": 0.0703, "step": 10137 }, { "epoch": 2.82, "learning_rate": 1.1893496613786066e-06, - "loss": 0.0837, + "loss": 0.0179, "step": 10138 }, { "epoch": 2.82, "learning_rate": 1.1874942016884685e-06, - "loss": 0.4146, + "loss": 0.1105, "step": 10139 }, { "epoch": 2.82, "learning_rate": 1.1856387419983301e-06, - "loss": 0.1386, + "loss": 0.1145, "step": 10140 }, { "epoch": 2.82, "learning_rate": 1.183783282308192e-06, - "loss": 0.0862, + "loss": 0.0119, "step": 10141 }, { "epoch": 2.82, "learning_rate": 1.1819278226180537e-06, - "loss": 0.142, + "loss": 0.0228, "step": 10142 }, { "epoch": 2.82, "learning_rate": 1.1800723629279155e-06, - "loss": 0.136, + "loss": 0.1764, "step": 10143 }, { "epoch": 2.82, "learning_rate": 1.1782169032377772e-06, - "loss": 0.139, + "loss": 0.0172, "step": 10144 }, { "epoch": 2.82, "learning_rate": 1.176361443547639e-06, - "loss": 0.0864, + "loss": 0.019, "step": 10145 }, { "epoch": 2.82, "learning_rate": 1.174505983857501e-06, - "loss": 0.1954, + "loss": 0.0276, "step": 10146 }, { "epoch": 2.82, "learning_rate": 1.1726505241673626e-06, - "loss": 0.1411, + "loss": 0.2058, "step": 10147 }, { "epoch": 2.82, "learning_rate": 1.1707950644772245e-06, - "loss": 0.2965, + "loss": 0.1773, "step": 10148 }, { "epoch": 2.82, "learning_rate": 1.1689396047870861e-06, - "loss": 0.1407, + "loss": 0.0109, "step": 10149 }, { "epoch": 2.82, "learning_rate": 1.167084145096948e-06, - "loss": 0.0877, + "loss": 0.1071, "step": 10150 }, { "epoch": 2.83, "learning_rate": 1.1652286854068097e-06, - "loss": 0.1418, + "loss": 0.0965, "step": 10151 }, { "epoch": 2.83, "learning_rate": 1.1633732257166715e-06, - "loss": 0.1394, + "loss": 0.0236, "step": 10152 }, { "epoch": 2.83, "learning_rate": 1.1615177660265332e-06, - "loss": 0.0312, + "loss": 0.0915, "step": 10153 }, { "epoch": 2.83, "learning_rate": 1.1596623063363948e-06, - "loss": 0.247, + "loss": 0.0177, "step": 10154 }, { "epoch": 2.83, "learning_rate": 1.1578068466462567e-06, - "loss": 0.0857, + "loss": 0.0688, "step": 10155 }, { "epoch": 2.83, "learning_rate": 1.1559513869561184e-06, - "loss": 0.1902, + "loss": 0.041, "step": 10156 }, { "epoch": 2.83, "learning_rate": 1.1540959272659802e-06, - "loss": 0.1991, + "loss": 0.0123, "step": 10157 }, { "epoch": 2.83, "learning_rate": 1.152240467575842e-06, - "loss": 0.1414, + "loss": 0.0138, "step": 10158 }, { "epoch": 2.83, "learning_rate": 1.1503850078857038e-06, - "loss": 0.198, + "loss": 0.0216, "step": 10159 }, { "epoch": 2.83, "learning_rate": 1.1485295481955654e-06, - "loss": 0.0315, + "loss": 0.0764, "step": 10160 }, { "epoch": 2.83, "learning_rate": 1.1466740885054273e-06, - "loss": 0.1371, + "loss": 0.0942, "step": 10161 }, { "epoch": 2.83, "learning_rate": 1.144818628815289e-06, - "loss": 0.1358, + "loss": 0.0119, "step": 10162 }, { "epoch": 2.83, "learning_rate": 1.1429631691251508e-06, - "loss": 0.0837, + "loss": 0.0151, "step": 10163 }, { "epoch": 2.83, "learning_rate": 1.1411077094350125e-06, - "loss": 0.3, + "loss": 0.0285, "step": 10164 }, { "epoch": 2.83, "learning_rate": 1.1392522497448744e-06, - "loss": 0.1392, + "loss": 0.0602, "step": 10165 }, { "epoch": 2.83, "learning_rate": 1.1373967900547362e-06, - "loss": 0.0313, + "loss": 0.0079, "step": 10166 }, { "epoch": 2.83, "learning_rate": 1.1355413303645979e-06, - "loss": 0.0307, + "loss": 0.0771, "step": 10167 }, { "epoch": 2.83, "learning_rate": 1.1336858706744598e-06, - "loss": 0.0863, + "loss": 0.0744, "step": 10168 }, { "epoch": 2.83, "learning_rate": 1.1318304109843214e-06, - "loss": 0.0842, + "loss": 0.0777, "step": 10169 }, { "epoch": 2.83, "learning_rate": 1.1299749512941833e-06, - "loss": 0.0867, + "loss": 0.0925, "step": 10170 }, { "epoch": 2.83, "learning_rate": 1.128119491604045e-06, - "loss": 0.1429, + "loss": 0.0502, "step": 10171 }, { "epoch": 2.83, "learning_rate": 1.1262640319139068e-06, - "loss": 0.1915, + "loss": 0.0586, "step": 10172 }, { "epoch": 2.83, "learning_rate": 1.1244085722237685e-06, - "loss": 0.1373, + "loss": 0.0705, "step": 10173 }, { "epoch": 2.83, "learning_rate": 1.1225531125336303e-06, - "loss": 0.0308, + "loss": 0.0205, "step": 10174 }, { "epoch": 2.83, "learning_rate": 1.120697652843492e-06, - "loss": 0.1369, + "loss": 0.0234, "step": 10175 }, { "epoch": 2.83, "learning_rate": 1.1188421931533539e-06, - "loss": 0.0817, + "loss": 0.0406, "step": 10176 }, { "epoch": 2.83, "learning_rate": 1.1169867334632155e-06, - "loss": 0.3491, + "loss": 0.1139, "step": 10177 }, { "epoch": 2.83, "learning_rate": 1.1151312737730774e-06, - "loss": 0.0859, + "loss": 0.0812, "step": 10178 }, { "epoch": 2.83, "learning_rate": 1.113275814082939e-06, - "loss": 0.0838, + "loss": 0.0845, "step": 10179 }, { "epoch": 2.83, "learning_rate": 1.111420354392801e-06, - "loss": 0.1372, + "loss": 0.0442, "step": 10180 }, { "epoch": 2.83, "learning_rate": 1.1095648947026628e-06, - "loss": 0.1971, + "loss": 0.1057, "step": 10181 }, { "epoch": 2.83, "learning_rate": 1.1077094350125245e-06, - "loss": 0.0858, + "loss": 0.1349, "step": 10182 }, { "epoch": 2.83, "learning_rate": 1.1058539753223863e-06, - "loss": 0.0841, + "loss": 0.0832, "step": 10183 }, { "epoch": 2.83, "learning_rate": 1.103998515632248e-06, - "loss": 0.0855, + "loss": 0.0736, "step": 10184 }, { "epoch": 2.83, "learning_rate": 1.1021430559421099e-06, - "loss": 0.0318, + "loss": 0.0438, "step": 10185 }, { "epoch": 2.83, "learning_rate": 1.1002875962519715e-06, - "loss": 0.0837, + "loss": 0.1931, "step": 10186 }, { "epoch": 2.84, "learning_rate": 1.0984321365618334e-06, - "loss": 0.1895, + "loss": 0.1462, "step": 10187 }, { "epoch": 2.84, "learning_rate": 1.096576676871695e-06, - "loss": 0.1942, + "loss": 0.0125, "step": 10188 }, { "epoch": 2.84, "learning_rate": 1.094721217181557e-06, - "loss": 0.139, + "loss": 0.0745, "step": 10189 }, { "epoch": 2.84, "learning_rate": 1.0928657574914186e-06, - "loss": 0.1369, + "loss": 0.0167, "step": 10190 }, { "epoch": 2.84, "learning_rate": 1.0910102978012802e-06, - "loss": 0.0308, + "loss": 0.0126, "step": 10191 }, { "epoch": 2.84, "learning_rate": 1.0891548381111421e-06, - "loss": 0.2512, + "loss": 0.0076, "step": 10192 }, { "epoch": 2.84, "learning_rate": 1.0872993784210038e-06, - "loss": 0.1381, + "loss": 0.0634, "step": 10193 }, { "epoch": 2.84, "learning_rate": 1.0854439187308656e-06, - "loss": 0.2486, + "loss": 0.1168, "step": 10194 }, { "epoch": 2.84, "learning_rate": 1.0835884590407273e-06, - "loss": 0.0852, + "loss": 0.0275, "step": 10195 }, { "epoch": 2.84, "learning_rate": 1.0817329993505892e-06, - "loss": 0.0848, + "loss": 0.0595, "step": 10196 }, { "epoch": 2.84, "learning_rate": 1.0798775396604508e-06, - "loss": 0.1938, + "loss": 0.1057, "step": 10197 }, { "epoch": 2.84, "learning_rate": 1.0780220799703127e-06, - "loss": 0.1382, + "loss": 0.089, "step": 10198 }, { "epoch": 2.84, "learning_rate": 1.0761666202801744e-06, - "loss": 0.1956, + "loss": 0.0614, "step": 10199 }, { "epoch": 2.84, "learning_rate": 1.0743111605900362e-06, - "loss": 0.0854, + "loss": 0.0718, "step": 10200 }, { "epoch": 2.84, "learning_rate": 1.072455700899898e-06, - "loss": 0.1957, + "loss": 0.0147, "step": 10201 }, { "epoch": 2.84, "learning_rate": 1.0706002412097598e-06, - "loss": 0.193, + "loss": 0.0104, "step": 10202 }, { "epoch": 2.84, "learning_rate": 1.0687447815196216e-06, - "loss": 0.1409, + "loss": 0.0496, "step": 10203 }, { "epoch": 2.84, "learning_rate": 1.0668893218294833e-06, - "loss": 0.0869, + "loss": 0.1189, "step": 10204 }, { "epoch": 2.84, "learning_rate": 1.0650338621393452e-06, - "loss": 0.0865, + "loss": 0.0313, "step": 10205 }, { "epoch": 2.84, "learning_rate": 1.0631784024492068e-06, - "loss": 0.1396, + "loss": 0.0137, "step": 10206 }, { "epoch": 2.84, "learning_rate": 1.0613229427590687e-06, - "loss": 0.2507, + "loss": 0.0901, "step": 10207 }, { "epoch": 2.84, "learning_rate": 1.0594674830689303e-06, - "loss": 0.1418, + "loss": 0.0933, "step": 10208 }, { "epoch": 2.84, "learning_rate": 1.0576120233787922e-06, - "loss": 0.2513, + "loss": 0.0368, "step": 10209 }, { "epoch": 2.84, "learning_rate": 1.0557565636886539e-06, - "loss": 0.1928, + "loss": 0.0154, "step": 10210 }, { "epoch": 2.84, "learning_rate": 1.0539011039985157e-06, - "loss": 0.0864, + "loss": 0.0175, "step": 10211 }, { "epoch": 2.84, "learning_rate": 1.0520456443083774e-06, - "loss": 0.136, + "loss": 0.1067, "step": 10212 }, { "epoch": 2.84, "learning_rate": 1.0501901846182393e-06, - "loss": 0.1949, + "loss": 0.1388, "step": 10213 }, { "epoch": 2.84, "learning_rate": 1.048334724928101e-06, - "loss": 0.1392, + "loss": 0.0912, "step": 10214 }, { "epoch": 2.84, "learning_rate": 1.0464792652379628e-06, - "loss": 0.0304, + "loss": 0.0163, "step": 10215 }, { "epoch": 2.84, "learning_rate": 1.0446238055478247e-06, - "loss": 0.0834, + "loss": 0.0764, "step": 10216 }, { "epoch": 2.84, "learning_rate": 1.0427683458576863e-06, - "loss": 0.0859, + "loss": 0.0504, "step": 10217 }, { "epoch": 2.84, "learning_rate": 1.0409128861675482e-06, - "loss": 0.0863, + "loss": 0.0559, "step": 10218 }, { "epoch": 2.84, "learning_rate": 1.0390574264774099e-06, - "loss": 0.2495, + "loss": 0.0201, "step": 10219 }, { "epoch": 2.84, "learning_rate": 1.0372019667872717e-06, - "loss": 0.1915, + "loss": 0.1127, "step": 10220 }, { "epoch": 2.84, "learning_rate": 1.0353465070971334e-06, - "loss": 0.1383, + "loss": 0.1033, "step": 10221 }, { "epoch": 2.84, "learning_rate": 1.0334910474069953e-06, - "loss": 0.0848, + "loss": 0.1505, "step": 10222 }, { "epoch": 2.85, "learning_rate": 1.031635587716857e-06, - "loss": 0.0854, + "loss": 0.0248, "step": 10223 }, { "epoch": 2.85, "learning_rate": 1.0297801280267188e-06, - "loss": 0.3532, + "loss": 0.0586, "step": 10224 }, { "epoch": 2.85, "learning_rate": 1.0279246683365805e-06, - "loss": 0.2465, + "loss": 0.1641, "step": 10225 }, { "epoch": 2.85, "learning_rate": 1.0260692086464423e-06, - "loss": 0.2451, + "loss": 0.0619, "step": 10226 }, { "epoch": 2.85, "learning_rate": 1.024213748956304e-06, - "loss": 0.2442, + "loss": 0.0163, "step": 10227 }, { "epoch": 2.85, "learning_rate": 1.0223582892661659e-06, - "loss": 0.031, + "loss": 0.0134, "step": 10228 }, { "epoch": 2.85, "learning_rate": 1.0205028295760275e-06, - "loss": 0.1379, + "loss": 0.0777, "step": 10229 }, { "epoch": 2.85, "learning_rate": 1.0186473698858892e-06, - "loss": 0.192, + "loss": 0.0685, "step": 10230 }, { "epoch": 2.85, "learning_rate": 1.016791910195751e-06, - "loss": 0.2479, + "loss": 0.0802, "step": 10231 }, { "epoch": 2.85, "learning_rate": 1.0149364505056127e-06, - "loss": 0.0838, + "loss": 0.0182, "step": 10232 }, { "epoch": 2.85, "learning_rate": 1.0130809908154746e-06, - "loss": 0.3001, + "loss": 0.0917, "step": 10233 }, { "epoch": 2.85, "learning_rate": 1.0112255311253362e-06, - "loss": 0.1976, + "loss": 0.0262, "step": 10234 }, { "epoch": 2.85, "learning_rate": 1.009370071435198e-06, - "loss": 0.1887, + "loss": 0.0503, "step": 10235 }, { "epoch": 2.85, "learning_rate": 1.00751461174506e-06, - "loss": 0.1942, + "loss": 0.0996, "step": 10236 }, { "epoch": 2.85, "learning_rate": 1.0056591520549216e-06, - "loss": 0.14, + "loss": 0.0267, "step": 10237 }, { "epoch": 2.85, "learning_rate": 1.0038036923647835e-06, - "loss": 0.0847, + "loss": 0.0316, "step": 10238 }, { "epoch": 2.85, "learning_rate": 1.0019482326746452e-06, - "loss": 0.0317, + "loss": 0.0174, "step": 10239 }, { "epoch": 2.85, "learning_rate": 1.000092772984507e-06, - "loss": 0.0855, + "loss": 0.0305, "step": 10240 }, { "epoch": 2.85, "learning_rate": 9.982373132943687e-07, - "loss": 0.1928, + "loss": 0.0195, "step": 10241 }, { "epoch": 2.85, "learning_rate": 9.963818536042306e-07, - "loss": 0.1401, + "loss": 0.0873, "step": 10242 }, { "epoch": 2.85, "learning_rate": 9.945263939140922e-07, - "loss": 0.2448, + "loss": 0.0549, "step": 10243 }, { "epoch": 2.85, "learning_rate": 9.92670934223954e-07, - "loss": 0.0804, + "loss": 0.0614, "step": 10244 }, { "epoch": 2.85, "learning_rate": 9.908154745338157e-07, - "loss": 0.1393, + "loss": 0.012, "step": 10245 }, { "epoch": 2.85, "learning_rate": 9.889600148436776e-07, - "loss": 0.4088, + "loss": 0.0098, "step": 10246 }, { "epoch": 2.85, "learning_rate": 9.871045551535393e-07, - "loss": 0.0862, + "loss": 0.0132, "step": 10247 }, { "epoch": 2.85, "learning_rate": 9.852490954634011e-07, - "loss": 0.1915, + "loss": 0.0207, "step": 10248 }, { "epoch": 2.85, "learning_rate": 9.833936357732628e-07, - "loss": 0.032, + "loss": 0.2286, "step": 10249 }, { "epoch": 2.85, "learning_rate": 9.815381760831247e-07, - "loss": 0.0837, + "loss": 0.0221, "step": 10250 }, { "epoch": 2.85, "learning_rate": 9.796827163929865e-07, - "loss": 0.1338, + "loss": 0.0543, "step": 10251 }, { "epoch": 2.85, "learning_rate": 9.778272567028482e-07, - "loss": 0.0854, + "loss": 0.071, "step": 10252 }, { "epoch": 2.85, "learning_rate": 9.7597179701271e-07, - "loss": 0.1922, + "loss": 0.0552, "step": 10253 }, { "epoch": 2.85, "learning_rate": 9.741163373225717e-07, - "loss": 0.1378, + "loss": 0.1421, "step": 10254 }, { "epoch": 2.85, "learning_rate": 9.722608776324336e-07, - "loss": 0.0862, + "loss": 0.0765, "step": 10255 }, { "epoch": 2.85, "learning_rate": 9.704054179422953e-07, - "loss": 0.1932, + "loss": 0.0708, "step": 10256 }, { "epoch": 2.85, "learning_rate": 9.685499582521571e-07, - "loss": 0.1427, + "loss": 0.0878, "step": 10257 }, { "epoch": 2.85, "learning_rate": 9.666944985620188e-07, - "loss": 0.1903, + "loss": 0.1051, "step": 10258 }, { "epoch": 2.86, "learning_rate": 9.648390388718807e-07, - "loss": 0.1923, + "loss": 0.0247, "step": 10259 }, { "epoch": 2.86, "learning_rate": 9.629835791817423e-07, - "loss": 0.0842, + "loss": 0.0219, "step": 10260 }, { "epoch": 2.86, "learning_rate": 9.611281194916042e-07, - "loss": 0.1383, + "loss": 0.104, "step": 10261 }, { "epoch": 2.86, "learning_rate": 9.592726598014659e-07, - "loss": 0.1916, + "loss": 0.144, "step": 10262 }, { "epoch": 2.86, "learning_rate": 9.574172001113277e-07, - "loss": 0.2976, + "loss": 0.0461, "step": 10263 }, { "epoch": 2.86, "learning_rate": 9.555617404211894e-07, - "loss": 0.2509, + "loss": 0.0773, "step": 10264 }, { "epoch": 2.86, "learning_rate": 9.537062807310513e-07, - "loss": 0.1898, + "loss": 0.0398, "step": 10265 }, { "epoch": 2.86, "learning_rate": 9.51850821040913e-07, - "loss": 0.1377, + "loss": 0.0439, "step": 10266 }, { "epoch": 2.86, "learning_rate": 9.499953613507747e-07, - "loss": 0.1947, + "loss": 0.1123, "step": 10267 }, { "epoch": 2.86, "learning_rate": 9.481399016606364e-07, - "loss": 0.0859, + "loss": 0.0161, "step": 10268 }, { "epoch": 2.86, "learning_rate": 9.462844419704982e-07, - "loss": 0.032, + "loss": 0.019, "step": 10269 }, { "epoch": 2.86, "learning_rate": 9.4442898228036e-07, - "loss": 0.084, + "loss": 0.0116, "step": 10270 }, { "epoch": 2.86, "learning_rate": 9.425735225902217e-07, - "loss": 0.0873, + "loss": 0.02, "step": 10271 }, { "epoch": 2.86, "learning_rate": 9.407180629000835e-07, - "loss": 0.1862, + "loss": 0.0577, "step": 10272 }, { "epoch": 2.86, "learning_rate": 9.388626032099453e-07, - "loss": 0.1421, + "loss": 0.0641, "step": 10273 }, { "epoch": 2.86, "learning_rate": 9.37007143519807e-07, - "loss": 0.1949, + "loss": 0.0153, "step": 10274 }, { "epoch": 2.86, "learning_rate": 9.351516838296688e-07, - "loss": 0.1399, + "loss": 0.0203, "step": 10275 }, { "epoch": 2.86, "learning_rate": 9.332962241395306e-07, - "loss": 0.1927, + "loss": 0.0218, "step": 10276 }, { "epoch": 2.86, "learning_rate": 9.314407644493924e-07, - "loss": 0.2455, + "loss": 0.0448, "step": 10277 }, { "epoch": 2.86, "learning_rate": 9.295853047592542e-07, - "loss": 0.0321, + "loss": 0.1394, "step": 10278 }, { "epoch": 2.86, "learning_rate": 9.27729845069116e-07, - "loss": 0.0865, + "loss": 0.0256, "step": 10279 }, { "epoch": 2.86, "learning_rate": 9.258743853789777e-07, - "loss": 0.0325, + "loss": 0.113, "step": 10280 }, { "epoch": 2.86, "learning_rate": 9.240189256888395e-07, - "loss": 0.199, + "loss": 0.0993, "step": 10281 }, { "epoch": 2.86, "learning_rate": 9.221634659987013e-07, - "loss": 0.0832, + "loss": 0.0539, "step": 10282 }, { "epoch": 2.86, "learning_rate": 9.20308006308563e-07, - "loss": 0.351, + "loss": 0.0938, "step": 10283 }, { "epoch": 2.86, "learning_rate": 9.184525466184248e-07, - "loss": 0.1434, + "loss": 0.1164, "step": 10284 }, { "epoch": 2.86, "learning_rate": 9.165970869282865e-07, - "loss": 0.086, + "loss": 0.0607, "step": 10285 }, { "epoch": 2.86, "learning_rate": 9.147416272381483e-07, - "loss": 0.1388, + "loss": 0.0171, "step": 10286 }, { "epoch": 2.86, "learning_rate": 9.128861675480101e-07, - "loss": 0.1938, + "loss": 0.0438, "step": 10287 }, { "epoch": 2.86, "learning_rate": 9.110307078578718e-07, - "loss": 0.0326, + "loss": 0.0953, "step": 10288 }, { "epoch": 2.86, "learning_rate": 9.091752481677336e-07, - "loss": 0.139, + "loss": 0.0118, "step": 10289 }, { "epoch": 2.86, "learning_rate": 9.073197884775954e-07, - "loss": 0.1387, + "loss": 0.0171, "step": 10290 }, { "epoch": 2.86, "learning_rate": 9.054643287874571e-07, - "loss": 0.1378, + "loss": 0.1301, "step": 10291 }, { "epoch": 2.86, "learning_rate": 9.03608869097319e-07, - "loss": 0.0871, + "loss": 0.0922, "step": 10292 }, { "epoch": 2.86, "learning_rate": 9.017534094071808e-07, - "loss": 0.1934, + "loss": 0.084, "step": 10293 }, { "epoch": 2.87, "learning_rate": 8.998979497170425e-07, - "loss": 0.14, + "loss": 0.093, "step": 10294 }, { "epoch": 2.87, "learning_rate": 8.980424900269043e-07, - "loss": 0.1892, + "loss": 0.0536, "step": 10295 }, { "epoch": 2.87, "learning_rate": 8.961870303367661e-07, - "loss": 0.0872, + "loss": 0.0086, "step": 10296 }, { "epoch": 2.87, "learning_rate": 8.943315706466278e-07, - "loss": 0.1398, + "loss": 0.0361, "step": 10297 }, { "epoch": 2.87, "learning_rate": 8.924761109564896e-07, - "loss": 0.086, + "loss": 0.0603, "step": 10298 }, { "epoch": 2.87, "learning_rate": 8.906206512663514e-07, - "loss": 0.0334, + "loss": 0.047, "step": 10299 }, { "epoch": 2.87, "learning_rate": 8.887651915762131e-07, - "loss": 0.1388, + "loss": 0.1052, "step": 10300 }, { "epoch": 2.87, "learning_rate": 8.869097318860749e-07, - "loss": 0.0839, + "loss": 0.0106, "step": 10301 }, { "epoch": 2.87, "learning_rate": 8.850542721959367e-07, - "loss": 0.0878, + "loss": 0.0479, "step": 10302 }, { "epoch": 2.87, "learning_rate": 8.831988125057984e-07, - "loss": 0.2445, + "loss": 0.1083, "step": 10303 }, { "epoch": 2.87, "learning_rate": 8.813433528156602e-07, - "loss": 0.0867, + "loss": 0.099, "step": 10304 }, { "epoch": 2.87, "learning_rate": 8.794878931255218e-07, - "loss": 0.301, + "loss": 0.0439, "step": 10305 }, { "epoch": 2.87, "learning_rate": 8.776324334353836e-07, - "loss": 0.0855, + "loss": 0.0126, "step": 10306 }, { "epoch": 2.87, "learning_rate": 8.757769737452454e-07, - "loss": 0.0864, + "loss": 0.0618, "step": 10307 }, { "epoch": 2.87, "learning_rate": 8.739215140551071e-07, - "loss": 0.1396, + "loss": 0.0617, "step": 10308 }, { "epoch": 2.87, "learning_rate": 8.720660543649689e-07, - "loss": 0.1379, + "loss": 0.0591, "step": 10309 }, { "epoch": 2.87, "learning_rate": 8.702105946748307e-07, - "loss": 0.3532, + "loss": 0.0871, "step": 10310 }, { "epoch": 2.87, "learning_rate": 8.683551349846925e-07, - "loss": 0.2445, + "loss": 0.1141, "step": 10311 }, { "epoch": 2.87, "learning_rate": 8.664996752945543e-07, - "loss": 0.0863, + "loss": 0.1059, "step": 10312 }, { "epoch": 2.87, "learning_rate": 8.646442156044161e-07, - "loss": 0.1395, + "loss": 0.2237, "step": 10313 }, { "epoch": 2.87, "learning_rate": 8.627887559142778e-07, - "loss": 0.0846, + "loss": 0.0405, "step": 10314 }, { "epoch": 2.87, "learning_rate": 8.609332962241396e-07, - "loss": 0.0327, + "loss": 0.047, "step": 10315 }, { "epoch": 2.87, "learning_rate": 8.590778365340014e-07, - "loss": 0.0875, + "loss": 0.016, "step": 10316 }, { "epoch": 2.87, "learning_rate": 8.572223768438631e-07, - "loss": 0.1366, + "loss": 0.0807, "step": 10317 }, { "epoch": 2.87, "learning_rate": 8.553669171537249e-07, - "loss": 0.1902, + "loss": 0.0164, "step": 10318 }, { "epoch": 2.87, "learning_rate": 8.535114574635867e-07, - "loss": 0.1934, + "loss": 0.1319, "step": 10319 }, { "epoch": 2.87, "learning_rate": 8.516559977734484e-07, - "loss": 0.1358, + "loss": 0.0774, "step": 10320 }, { "epoch": 2.87, "learning_rate": 8.498005380833102e-07, - "loss": 0.0838, + "loss": 0.0818, "step": 10321 }, { "epoch": 2.87, "learning_rate": 8.47945078393172e-07, - "loss": 0.0865, + "loss": 0.0234, "step": 10322 }, { "epoch": 2.87, "learning_rate": 8.460896187030337e-07, - "loss": 0.1396, + "loss": 0.0472, "step": 10323 }, { "epoch": 2.87, "learning_rate": 8.442341590128955e-07, - "loss": 0.2492, + "loss": 0.0633, "step": 10324 }, { "epoch": 2.87, "learning_rate": 8.423786993227572e-07, - "loss": 0.087, + "loss": 0.0097, "step": 10325 }, { "epoch": 2.87, "learning_rate": 8.405232396326191e-07, - "loss": 0.1389, + "loss": 0.0221, "step": 10326 }, { "epoch": 2.87, "learning_rate": 8.386677799424809e-07, - "loss": 0.0883, + "loss": 0.069, "step": 10327 }, { "epoch": 2.87, "learning_rate": 8.368123202523426e-07, - "loss": 0.0317, + "loss": 0.0135, "step": 10328 }, { "epoch": 2.87, "learning_rate": 8.349568605622044e-07, - "loss": 0.0878, + "loss": 0.0164, "step": 10329 }, { "epoch": 2.88, "learning_rate": 8.331014008720662e-07, - "loss": 0.1924, + "loss": 0.0206, "step": 10330 }, { "epoch": 2.88, "learning_rate": 8.312459411819279e-07, - "loss": 0.1393, + "loss": 0.0302, "step": 10331 }, { "epoch": 2.88, "learning_rate": 8.293904814917897e-07, - "loss": 0.1399, + "loss": 0.1034, "step": 10332 }, { "epoch": 2.88, "learning_rate": 8.275350218016515e-07, - "loss": 0.0317, + "loss": 0.1437, "step": 10333 }, { "epoch": 2.88, "learning_rate": 8.256795621115132e-07, - "loss": 0.1351, + "loss": 0.0141, "step": 10334 }, { "epoch": 2.88, "learning_rate": 8.23824102421375e-07, - "loss": 0.0871, + "loss": 0.0817, "step": 10335 }, { "epoch": 2.88, "learning_rate": 8.219686427312368e-07, - "loss": 0.1922, + "loss": 0.0824, "step": 10336 }, { "epoch": 2.88, "learning_rate": 8.201131830410985e-07, - "loss": 0.1395, + "loss": 0.0491, "step": 10337 }, { "epoch": 2.88, "learning_rate": 8.182577233509603e-07, - "loss": 0.1857, + "loss": 0.1059, "step": 10338 }, { "epoch": 2.88, "learning_rate": 8.16402263660822e-07, - "loss": 0.1434, + "loss": 0.0945, "step": 10339 }, { "epoch": 2.88, "learning_rate": 8.145468039706838e-07, - "loss": 0.1898, + "loss": 0.0509, "step": 10340 }, { "epoch": 2.88, "learning_rate": 8.126913442805457e-07, - "loss": 0.0863, + "loss": 0.1001, "step": 10341 }, { "epoch": 2.88, "learning_rate": 8.108358845904075e-07, - "loss": 0.0327, + "loss": 0.0441, "step": 10342 }, { "epoch": 2.88, "learning_rate": 8.08980424900269e-07, - "loss": 0.0321, + "loss": 0.019, "step": 10343 }, { "epoch": 2.88, "learning_rate": 8.071249652101308e-07, - "loss": 0.2463, + "loss": 0.0513, "step": 10344 }, { "epoch": 2.88, "learning_rate": 8.052695055199925e-07, - "loss": 0.0858, + "loss": 0.057, "step": 10345 }, { "epoch": 2.88, "learning_rate": 8.034140458298544e-07, - "loss": 0.0881, + "loss": 0.0429, "step": 10346 }, { "epoch": 2.88, "learning_rate": 8.015585861397162e-07, - "loss": 0.2401, + "loss": 0.0409, "step": 10347 }, { "epoch": 2.88, "learning_rate": 7.997031264495779e-07, - "loss": 0.0861, + "loss": 0.1348, "step": 10348 }, { "epoch": 2.88, "learning_rate": 7.978476667594397e-07, - "loss": 0.3017, + "loss": 0.0724, "step": 10349 }, { "epoch": 2.88, "learning_rate": 7.959922070693015e-07, - "loss": 0.0901, + "loss": 0.0159, "step": 10350 }, { "epoch": 2.88, "learning_rate": 7.941367473791632e-07, - "loss": 0.1364, + "loss": 0.0676, "step": 10351 }, { "epoch": 2.88, "learning_rate": 7.92281287689025e-07, - "loss": 0.1936, + "loss": 0.0164, "step": 10352 }, { "epoch": 2.88, "learning_rate": 7.904258279988868e-07, - "loss": 0.0326, + "loss": 0.0421, "step": 10353 }, { "epoch": 2.88, "learning_rate": 7.885703683087485e-07, - "loss": 0.1949, + "loss": 0.0309, "step": 10354 }, { "epoch": 2.88, "learning_rate": 7.867149086186103e-07, - "loss": 0.0317, + "loss": 0.0103, "step": 10355 }, { "epoch": 2.88, "learning_rate": 7.84859448928472e-07, - "loss": 0.2424, + "loss": 0.0185, "step": 10356 }, { "epoch": 2.88, "learning_rate": 7.830039892383338e-07, - "loss": 0.0853, + "loss": 0.0065, "step": 10357 }, { "epoch": 2.88, "learning_rate": 7.811485295481956e-07, - "loss": 0.139, + "loss": 0.1519, "step": 10358 }, { "epoch": 2.88, "learning_rate": 7.792930698580573e-07, - "loss": 0.0832, + "loss": 0.201, "step": 10359 }, { "epoch": 2.88, "learning_rate": 7.774376101679191e-07, - "loss": 0.0321, + "loss": 0.1277, "step": 10360 }, { "epoch": 2.88, "learning_rate": 7.75582150477781e-07, - "loss": 0.1949, + "loss": 0.0448, "step": 10361 }, { "epoch": 2.88, "learning_rate": 7.737266907876427e-07, - "loss": 0.1419, + "loss": 0.0871, "step": 10362 }, { "epoch": 2.88, "learning_rate": 7.718712310975045e-07, - "loss": 0.141, + "loss": 0.0597, "step": 10363 }, { "epoch": 2.88, "learning_rate": 7.700157714073663e-07, - "loss": 0.0853, + "loss": 0.059, "step": 10364 }, { "epoch": 2.88, "learning_rate": 7.68160311717228e-07, - "loss": 0.0854, + "loss": 0.0849, "step": 10365 }, { "epoch": 2.89, "learning_rate": 7.663048520270898e-07, - "loss": 0.0851, + "loss": 0.0425, "step": 10366 }, { "epoch": 2.89, "learning_rate": 7.644493923369516e-07, - "loss": 0.1401, + "loss": 0.0409, "step": 10367 }, { "epoch": 2.89, "learning_rate": 7.625939326468133e-07, - "loss": 0.0856, + "loss": 0.005, "step": 10368 }, { "epoch": 2.89, "learning_rate": 7.607384729566751e-07, - "loss": 0.1929, + "loss": 0.0715, "step": 10369 }, { "epoch": 2.89, "learning_rate": 7.588830132665369e-07, - "loss": 0.0327, + "loss": 0.0158, "step": 10370 }, { "epoch": 2.89, "learning_rate": 7.570275535763986e-07, - "loss": 0.0875, + "loss": 0.0197, "step": 10371 }, { "epoch": 2.89, "learning_rate": 7.551720938862604e-07, - "loss": 0.032, + "loss": 0.0692, "step": 10372 }, { "epoch": 2.89, "learning_rate": 7.533166341961222e-07, - "loss": 0.0319, + "loss": 0.0162, "step": 10373 }, { "epoch": 2.89, "learning_rate": 7.514611745059839e-07, - "loss": 0.1924, + "loss": 0.022, "step": 10374 }, { "epoch": 2.89, "learning_rate": 7.496057148158457e-07, - "loss": 0.0317, + "loss": 0.018, "step": 10375 }, { "epoch": 2.89, "learning_rate": 7.477502551257076e-07, - "loss": 0.1912, + "loss": 0.085, "step": 10376 }, { "epoch": 2.89, "learning_rate": 7.458947954355693e-07, - "loss": 0.1416, + "loss": 0.0473, "step": 10377 }, { "epoch": 2.89, "learning_rate": 7.440393357454311e-07, - "loss": 0.1881, + "loss": 0.1538, "step": 10378 }, { "epoch": 2.89, "learning_rate": 7.421838760552929e-07, - "loss": 0.194, + "loss": 0.0678, "step": 10379 }, { "epoch": 2.89, "learning_rate": 7.403284163651546e-07, - "loss": 0.0868, + "loss": 0.1183, "step": 10380 }, { "epoch": 2.89, "learning_rate": 7.384729566750163e-07, - "loss": 0.1407, + "loss": 0.1167, "step": 10381 }, { "epoch": 2.89, "learning_rate": 7.36617496984878e-07, - "loss": 0.1368, + "loss": 0.0422, "step": 10382 }, { "epoch": 2.89, "learning_rate": 7.347620372947398e-07, - "loss": 0.1935, + "loss": 0.1047, "step": 10383 }, { "epoch": 2.89, "learning_rate": 7.329065776046016e-07, - "loss": 0.14, + "loss": 0.0186, "step": 10384 }, { "epoch": 2.89, "learning_rate": 7.310511179144633e-07, - "loss": 0.0843, + "loss": 0.1083, "step": 10385 }, { "epoch": 2.89, "learning_rate": 7.291956582243251e-07, - "loss": 0.1387, + "loss": 0.0342, "step": 10386 }, { "epoch": 2.89, "learning_rate": 7.273401985341869e-07, - "loss": 0.0845, + "loss": 0.0237, "step": 10387 }, { "epoch": 2.89, "learning_rate": 7.254847388440486e-07, - "loss": 0.0837, + "loss": 0.1076, "step": 10388 }, { "epoch": 2.89, "learning_rate": 7.236292791539104e-07, - "loss": 0.0319, + "loss": 0.0817, "step": 10389 }, { "epoch": 2.89, "learning_rate": 7.217738194637722e-07, - "loss": 0.1403, + "loss": 0.0468, "step": 10390 }, { "epoch": 2.89, "learning_rate": 7.199183597736339e-07, - "loss": 0.1396, + "loss": 0.0113, "step": 10391 }, { "epoch": 2.89, "learning_rate": 7.180629000834957e-07, - "loss": 0.1374, + "loss": 0.037, "step": 10392 }, { "epoch": 2.89, "learning_rate": 7.162074403933575e-07, - "loss": 0.1943, + "loss": 0.0467, "step": 10393 }, { "epoch": 2.89, "learning_rate": 7.143519807032192e-07, - "loss": 0.0832, + "loss": 0.0784, "step": 10394 }, { "epoch": 2.89, "learning_rate": 7.12496521013081e-07, - "loss": 0.1916, + "loss": 0.0456, "step": 10395 }, { "epoch": 2.89, "learning_rate": 7.106410613229429e-07, - "loss": 0.1945, + "loss": 0.0215, "step": 10396 }, { "epoch": 2.89, "learning_rate": 7.087856016328046e-07, - "loss": 0.1405, + "loss": 0.0759, "step": 10397 }, { "epoch": 2.89, "learning_rate": 7.069301419426664e-07, - "loss": 0.1334, + "loss": 0.0553, "step": 10398 }, { "epoch": 2.89, "learning_rate": 7.050746822525281e-07, - "loss": 0.1412, + "loss": 0.2002, "step": 10399 }, { "epoch": 2.89, "learning_rate": 7.032192225623899e-07, - "loss": 0.1366, + "loss": 0.0607, "step": 10400 }, { "epoch": 2.89, "learning_rate": 7.013637628722517e-07, - "loss": 0.1358, + "loss": 0.0455, "step": 10401 }, { "epoch": 2.9, "learning_rate": 6.995083031821134e-07, - "loss": 0.2482, + "loss": 0.1021, "step": 10402 }, { "epoch": 2.9, "learning_rate": 6.976528434919752e-07, - "loss": 0.0872, + "loss": 0.0134, "step": 10403 }, { "epoch": 2.9, "learning_rate": 6.95797383801837e-07, - "loss": 0.0309, + "loss": 0.1584, "step": 10404 }, { "epoch": 2.9, "learning_rate": 6.939419241116987e-07, - "loss": 0.0865, + "loss": 0.1306, "step": 10405 }, { "epoch": 2.9, "learning_rate": 6.920864644215605e-07, - "loss": 0.2515, + "loss": 0.1368, "step": 10406 }, { "epoch": 2.9, "learning_rate": 6.902310047314223e-07, - "loss": 0.1937, + "loss": 0.0122, "step": 10407 }, { "epoch": 2.9, "learning_rate": 6.88375545041284e-07, - "loss": 0.2966, + "loss": 0.1537, "step": 10408 }, { "epoch": 2.9, "learning_rate": 6.865200853511458e-07, - "loss": 0.0311, + "loss": 0.0652, "step": 10409 }, { "epoch": 2.9, "learning_rate": 6.846646256610076e-07, - "loss": 0.1384, + "loss": 0.0303, "step": 10410 }, { "epoch": 2.9, "learning_rate": 6.828091659708694e-07, - "loss": 0.0848, + "loss": 0.0156, "step": 10411 }, { "epoch": 2.9, "learning_rate": 6.809537062807312e-07, - "loss": 0.1953, + "loss": 0.1284, "step": 10412 }, { "epoch": 2.9, "learning_rate": 6.79098246590593e-07, - "loss": 0.0865, + "loss": 0.1043, "step": 10413 }, { "epoch": 2.9, "learning_rate": 6.772427869004547e-07, - "loss": 0.0839, + "loss": 0.0102, "step": 10414 }, { "epoch": 2.9, "learning_rate": 6.753873272103165e-07, - "loss": 0.1379, + "loss": 0.0308, "step": 10415 }, { "epoch": 2.9, "learning_rate": 6.735318675201783e-07, - "loss": 0.0863, + "loss": 0.1367, "step": 10416 }, { "epoch": 2.9, "learning_rate": 6.7167640783004e-07, - "loss": 0.0319, + "loss": 0.0583, "step": 10417 }, { "epoch": 2.9, "learning_rate": 6.698209481399018e-07, - "loss": 0.0863, + "loss": 0.0626, "step": 10418 }, { "epoch": 2.9, "learning_rate": 6.679654884497634e-07, - "loss": 0.1393, + "loss": 0.1605, "step": 10419 }, { "epoch": 2.9, "learning_rate": 6.661100287596252e-07, - "loss": 0.2425, + "loss": 0.0247, "step": 10420 }, { "epoch": 2.9, "learning_rate": 6.64254569069487e-07, - "loss": 0.139, + "loss": 0.0889, "step": 10421 }, { "epoch": 2.9, "learning_rate": 6.623991093793487e-07, - "loss": 0.1387, + "loss": 0.0193, "step": 10422 }, { "epoch": 2.9, "learning_rate": 6.605436496892105e-07, - "loss": 0.1427, + "loss": 0.055, "step": 10423 }, { "epoch": 2.9, "learning_rate": 6.586881899990723e-07, - "loss": 0.1934, + "loss": 0.0786, "step": 10424 }, { "epoch": 2.9, "learning_rate": 6.56832730308934e-07, - "loss": 0.0874, + "loss": 0.0707, "step": 10425 }, { "epoch": 2.9, "learning_rate": 6.549772706187958e-07, - "loss": 0.0868, + "loss": 0.0737, "step": 10426 }, { "epoch": 2.9, "learning_rate": 6.531218109286576e-07, - "loss": 0.1418, + "loss": 0.0144, "step": 10427 }, { "epoch": 2.9, "learning_rate": 6.512663512385193e-07, - "loss": 0.0849, + "loss": 0.0544, "step": 10428 }, { "epoch": 2.9, "learning_rate": 6.494108915483811e-07, - "loss": 0.1378, + "loss": 0.1018, "step": 10429 }, { "epoch": 2.9, "learning_rate": 6.47555431858243e-07, - "loss": 0.1912, + "loss": 0.1747, "step": 10430 }, { "epoch": 2.9, "learning_rate": 6.456999721681047e-07, - "loss": 0.2976, + "loss": 0.0827, "step": 10431 }, { "epoch": 2.9, "learning_rate": 6.438445124779665e-07, - "loss": 0.0897, + "loss": 0.0499, "step": 10432 }, { "epoch": 2.9, "learning_rate": 6.419890527878283e-07, - "loss": 0.0321, + "loss": 0.1171, "step": 10433 }, { "epoch": 2.9, "learning_rate": 6.4013359309769e-07, - "loss": 0.3007, + "loss": 0.0162, "step": 10434 }, { "epoch": 2.9, "learning_rate": 6.382781334075518e-07, - "loss": 0.0865, + "loss": 0.0156, "step": 10435 }, { "epoch": 2.9, "learning_rate": 6.364226737174135e-07, - "loss": 0.2389, + "loss": 0.1229, "step": 10436 }, { "epoch": 2.9, "learning_rate": 6.345672140272753e-07, - "loss": 0.2457, + "loss": 0.0487, "step": 10437 }, { "epoch": 2.91, "learning_rate": 6.327117543371371e-07, - "loss": 0.3002, + "loss": 0.0159, "step": 10438 }, { "epoch": 2.91, "learning_rate": 6.308562946469988e-07, - "loss": 0.1933, + "loss": 0.092, "step": 10439 }, { "epoch": 2.91, "learning_rate": 6.290008349568606e-07, - "loss": 0.2466, + "loss": 0.0739, "step": 10440 }, { "epoch": 2.91, "learning_rate": 6.271453752667224e-07, - "loss": 0.1376, + "loss": 0.0236, "step": 10441 }, { "epoch": 2.91, "learning_rate": 6.252899155765841e-07, - "loss": 0.1921, + "loss": 0.0489, "step": 10442 }, { "epoch": 2.91, "learning_rate": 6.234344558864459e-07, - "loss": 0.1366, + "loss": 0.0952, "step": 10443 }, { "epoch": 2.91, "learning_rate": 6.215789961963077e-07, - "loss": 0.0872, + "loss": 0.0502, "step": 10444 }, { "epoch": 2.91, "learning_rate": 6.197235365061695e-07, - "loss": 0.0316, + "loss": 0.0176, "step": 10445 }, { "epoch": 2.91, "learning_rate": 6.178680768160313e-07, - "loss": 0.0859, + "loss": 0.0117, "step": 10446 }, { "epoch": 2.91, "learning_rate": 6.16012617125893e-07, - "loss": 0.1348, + "loss": 0.1065, "step": 10447 }, { "epoch": 2.91, "learning_rate": 6.141571574357547e-07, - "loss": 0.1391, + "loss": 0.0103, "step": 10448 }, { "epoch": 2.91, "learning_rate": 6.123016977456165e-07, - "loss": 0.0845, + "loss": 0.018, "step": 10449 }, { "epoch": 2.91, "learning_rate": 6.104462380554783e-07, - "loss": 0.1897, + "loss": 0.0536, "step": 10450 }, { "epoch": 2.91, "learning_rate": 6.0859077836534e-07, - "loss": 0.138, + "loss": 0.0133, "step": 10451 }, { "epoch": 2.91, "learning_rate": 6.067353186752018e-07, - "loss": 0.244, + "loss": 0.0523, "step": 10452 }, { "epoch": 2.91, "learning_rate": 6.048798589850635e-07, - "loss": 0.1915, + "loss": 0.0836, "step": 10453 }, { "epoch": 2.91, "learning_rate": 6.030243992949253e-07, - "loss": 0.1377, + "loss": 0.0497, "step": 10454 }, { "epoch": 2.91, "learning_rate": 6.011689396047872e-07, - "loss": 0.0855, + "loss": 0.2478, "step": 10455 }, { "epoch": 2.91, "learning_rate": 5.993134799146489e-07, - "loss": 0.0866, + "loss": 0.109, "step": 10456 }, { "epoch": 2.91, "learning_rate": 5.974580202245107e-07, - "loss": 0.1392, + "loss": 0.0104, "step": 10457 }, { "epoch": 2.91, "learning_rate": 5.956025605343725e-07, - "loss": 0.2936, + "loss": 0.1061, "step": 10458 }, { "epoch": 2.91, "learning_rate": 5.937471008442342e-07, - "loss": 0.0885, + "loss": 0.1224, "step": 10459 }, { "epoch": 2.91, "learning_rate": 5.91891641154096e-07, - "loss": 0.0851, + "loss": 0.0515, "step": 10460 }, { "epoch": 2.91, "learning_rate": 5.900361814639578e-07, - "loss": 0.0862, + "loss": 0.0661, "step": 10461 }, { "epoch": 2.91, "learning_rate": 5.881807217738195e-07, - "loss": 0.0869, + "loss": 0.0892, "step": 10462 }, { "epoch": 2.91, "learning_rate": 5.863252620836813e-07, - "loss": 0.1372, + "loss": 0.0333, "step": 10463 }, { "epoch": 2.91, "learning_rate": 5.844698023935431e-07, - "loss": 0.0313, + "loss": 0.048, "step": 10464 }, { "epoch": 2.91, "learning_rate": 5.826143427034048e-07, - "loss": 0.141, + "loss": 0.084, "step": 10465 }, { "epoch": 2.91, "learning_rate": 5.807588830132666e-07, - "loss": 0.0314, + "loss": 0.0157, "step": 10466 }, { "epoch": 2.91, "learning_rate": 5.789034233231284e-07, - "loss": 0.0846, + "loss": 0.0506, "step": 10467 }, { "epoch": 2.91, "learning_rate": 5.770479636329901e-07, - "loss": 0.1897, + "loss": 0.1483, "step": 10468 }, { "epoch": 2.91, "learning_rate": 5.751925039428519e-07, - "loss": 0.1403, + "loss": 0.0149, "step": 10469 }, { "epoch": 2.91, "learning_rate": 5.733370442527137e-07, - "loss": 0.1357, + "loss": 0.0677, "step": 10470 }, { "epoch": 2.91, "learning_rate": 5.714815845625754e-07, - "loss": 0.1378, + "loss": 0.0184, "step": 10471 }, { "epoch": 2.91, "learning_rate": 5.696261248724372e-07, - "loss": 0.2415, + "loss": 0.0943, "step": 10472 }, { "epoch": 2.91, "learning_rate": 5.677706651822989e-07, - "loss": 0.1874, + "loss": 0.0865, "step": 10473 }, { "epoch": 2.92, "learning_rate": 5.659152054921607e-07, - "loss": 0.1373, + "loss": 0.0485, "step": 10474 }, { "epoch": 2.92, "learning_rate": 5.640597458020225e-07, - "loss": 0.0877, + "loss": 0.1826, "step": 10475 }, { "epoch": 2.92, "learning_rate": 5.622042861118842e-07, - "loss": 0.2479, + "loss": 0.0381, "step": 10476 }, { "epoch": 2.92, "learning_rate": 5.60348826421746e-07, - "loss": 0.1386, + "loss": 0.0174, "step": 10477 }, { "epoch": 2.92, "learning_rate": 5.584933667316078e-07, - "loss": 0.0836, + "loss": 0.1203, "step": 10478 }, { "epoch": 2.92, "learning_rate": 5.566379070414695e-07, - "loss": 0.03, + "loss": 0.1108, "step": 10479 }, { "epoch": 2.92, "learning_rate": 5.547824473513314e-07, - "loss": 0.0865, + "loss": 0.0521, "step": 10480 }, { "epoch": 2.92, "learning_rate": 5.529269876611932e-07, - "loss": 0.0846, + "loss": 0.0438, "step": 10481 }, { "epoch": 2.92, "learning_rate": 5.510715279710549e-07, - "loss": 0.1395, + "loss": 0.0628, "step": 10482 }, { "epoch": 2.92, "learning_rate": 5.492160682809167e-07, - "loss": 0.1372, + "loss": 0.1428, "step": 10483 }, { "epoch": 2.92, "learning_rate": 5.473606085907785e-07, - "loss": 0.1389, + "loss": 0.1039, "step": 10484 }, { "epoch": 2.92, "learning_rate": 5.455051489006401e-07, - "loss": 0.1936, + "loss": 0.0143, "step": 10485 }, { "epoch": 2.92, "learning_rate": 5.436496892105019e-07, - "loss": 0.0848, + "loss": 0.0589, "step": 10486 }, { "epoch": 2.92, "learning_rate": 5.417942295203637e-07, - "loss": 0.192, + "loss": 0.1215, "step": 10487 }, { "epoch": 2.92, "learning_rate": 5.399387698302254e-07, - "loss": 0.0309, + "loss": 0.0216, "step": 10488 }, { "epoch": 2.92, "learning_rate": 5.380833101400872e-07, - "loss": 0.1384, + "loss": 0.0098, "step": 10489 }, { "epoch": 2.92, "learning_rate": 5.36227850449949e-07, - "loss": 0.2471, + "loss": 0.0978, "step": 10490 }, { "epoch": 2.92, "learning_rate": 5.343723907598108e-07, - "loss": 0.0314, + "loss": 0.0835, "step": 10491 }, { "epoch": 2.92, "learning_rate": 5.325169310696726e-07, - "loss": 0.1387, + "loss": 0.0082, "step": 10492 }, { "epoch": 2.92, "learning_rate": 5.306614713795343e-07, - "loss": 0.0851, + "loss": 0.1399, "step": 10493 }, { "epoch": 2.92, "learning_rate": 5.288060116893961e-07, - "loss": 0.1377, + "loss": 0.0529, "step": 10494 }, { "epoch": 2.92, "learning_rate": 5.269505519992579e-07, - "loss": 0.0317, + "loss": 0.0498, "step": 10495 }, { "epoch": 2.92, "learning_rate": 5.250950923091196e-07, - "loss": 0.0315, + "loss": 0.0113, "step": 10496 }, { "epoch": 2.92, "learning_rate": 5.232396326189814e-07, - "loss": 0.0804, + "loss": 0.0115, "step": 10497 }, { "epoch": 2.92, "learning_rate": 5.213841729288432e-07, - "loss": 0.0857, + "loss": 0.1537, "step": 10498 }, { "epoch": 2.92, "learning_rate": 5.195287132387049e-07, - "loss": 0.1923, + "loss": 0.0545, "step": 10499 }, { "epoch": 2.92, "learning_rate": 5.176732535485667e-07, - "loss": 0.0316, + "loss": 0.0672, "step": 10500 }, { "epoch": 2.92, "learning_rate": 5.158177938584285e-07, - "loss": 0.1952, + "loss": 0.0232, "step": 10501 }, { "epoch": 2.92, "learning_rate": 5.139623341682902e-07, - "loss": 0.2473, + "loss": 0.1498, "step": 10502 }, { "epoch": 2.92, "learning_rate": 5.12106874478152e-07, - "loss": 0.0876, + "loss": 0.093, "step": 10503 }, { "epoch": 2.92, "learning_rate": 5.102514147880138e-07, - "loss": 0.1391, + "loss": 0.0114, "step": 10504 }, { "epoch": 2.92, "learning_rate": 5.083959550978755e-07, - "loss": 0.0831, + "loss": 0.015, "step": 10505 }, { "epoch": 2.92, "learning_rate": 5.065404954077373e-07, - "loss": 0.2441, + "loss": 0.0746, "step": 10506 }, { "epoch": 2.92, "learning_rate": 5.04685035717599e-07, - "loss": 0.0852, + "loss": 0.0514, "step": 10507 }, { "epoch": 2.92, "learning_rate": 5.028295760274608e-07, - "loss": 0.1413, + "loss": 0.0115, "step": 10508 }, { "epoch": 2.92, "learning_rate": 5.009741163373226e-07, - "loss": 0.1917, + "loss": 0.0776, "step": 10509 }, { "epoch": 2.93, "learning_rate": 4.991186566471843e-07, - "loss": 0.1354, + "loss": 0.0177, "step": 10510 }, { "epoch": 2.93, "learning_rate": 4.972631969570461e-07, - "loss": 0.0838, + "loss": 0.1928, "step": 10511 }, { "epoch": 2.93, "learning_rate": 4.954077372669079e-07, - "loss": 0.0871, + "loss": 0.1063, "step": 10512 }, { "epoch": 2.93, "learning_rate": 4.935522775767696e-07, - "loss": 0.1425, + "loss": 0.1605, "step": 10513 }, { "epoch": 2.93, "learning_rate": 4.916968178866314e-07, - "loss": 0.1426, + "loss": 0.0138, "step": 10514 }, { "epoch": 2.93, "learning_rate": 4.898413581964933e-07, - "loss": 0.1407, + "loss": 0.0393, "step": 10515 }, { "epoch": 2.93, "learning_rate": 4.87985898506355e-07, - "loss": 0.2461, + "loss": 0.0148, "step": 10516 }, { "epoch": 2.93, "learning_rate": 4.861304388162168e-07, - "loss": 0.1399, + "loss": 0.1484, "step": 10517 }, { "epoch": 2.93, "learning_rate": 4.842749791260786e-07, - "loss": 0.0873, + "loss": 0.0219, "step": 10518 }, { "epoch": 2.93, "learning_rate": 4.824195194359403e-07, - "loss": 0.1404, + "loss": 0.012, "step": 10519 }, { "epoch": 2.93, "learning_rate": 4.805640597458021e-07, - "loss": 0.3016, + "loss": 0.1033, "step": 10520 }, { "epoch": 2.93, "learning_rate": 4.787086000556639e-07, - "loss": 0.2453, + "loss": 0.0408, "step": 10521 }, { "epoch": 2.93, "learning_rate": 4.768531403655256e-07, - "loss": 0.1903, + "loss": 0.0209, "step": 10522 }, { "epoch": 2.93, "learning_rate": 4.7499768067538734e-07, - "loss": 0.1399, + "loss": 0.0482, "step": 10523 }, { "epoch": 2.93, "learning_rate": 4.731422209852491e-07, - "loss": 0.0824, + "loss": 0.016, "step": 10524 }, { "epoch": 2.93, "learning_rate": 4.7128676129511087e-07, - "loss": 0.1399, + "loss": 0.0499, "step": 10525 }, { "epoch": 2.93, "learning_rate": 4.6943130160497263e-07, - "loss": 0.1922, + "loss": 0.1465, "step": 10526 }, { "epoch": 2.93, "learning_rate": 4.675758419148344e-07, - "loss": 0.3046, + "loss": 0.0729, "step": 10527 }, { "epoch": 2.93, "learning_rate": 4.657203822246962e-07, - "loss": 0.0842, + "loss": 0.1424, "step": 10528 }, { "epoch": 2.93, "learning_rate": 4.63864922534558e-07, - "loss": 0.1384, + "loss": 0.1382, "step": 10529 }, { "epoch": 2.93, "learning_rate": 4.6200946284441974e-07, - "loss": 0.085, + "loss": 0.0191, "step": 10530 }, { "epoch": 2.93, "learning_rate": 4.601540031542815e-07, - "loss": 0.1404, + "loss": 0.0288, "step": 10531 }, { "epoch": 2.93, "learning_rate": 4.582985434641433e-07, - "loss": 0.3567, + "loss": 0.0214, "step": 10532 }, { "epoch": 2.93, "learning_rate": 4.5644308377400504e-07, - "loss": 0.0829, + "loss": 0.0176, "step": 10533 }, { "epoch": 2.93, "learning_rate": 4.545876240838668e-07, - "loss": 0.2428, + "loss": 0.0705, "step": 10534 }, { "epoch": 2.93, "learning_rate": 4.5273216439372857e-07, - "loss": 0.0882, + "loss": 0.0817, "step": 10535 }, { "epoch": 2.93, "learning_rate": 4.508767047035904e-07, - "loss": 0.1357, + "loss": 0.081, "step": 10536 }, { "epoch": 2.93, "learning_rate": 4.4902124501345215e-07, - "loss": 0.1397, + "loss": 0.0251, "step": 10537 }, { "epoch": 2.93, "learning_rate": 4.471657853233139e-07, - "loss": 0.1382, + "loss": 0.0764, "step": 10538 }, { "epoch": 2.93, "learning_rate": 4.453103256331757e-07, - "loss": 0.031, + "loss": 0.0186, "step": 10539 }, { "epoch": 2.93, "learning_rate": 4.4345486594303744e-07, - "loss": 0.0322, + "loss": 0.0138, "step": 10540 }, { "epoch": 2.93, "learning_rate": 4.415994062528992e-07, - "loss": 0.1391, + "loss": 0.0318, "step": 10541 }, { "epoch": 2.93, "learning_rate": 4.397439465627609e-07, - "loss": 0.1949, + "loss": 0.0229, "step": 10542 }, { "epoch": 2.93, "learning_rate": 4.378884868726227e-07, - "loss": 0.1377, + "loss": 0.0811, "step": 10543 }, { "epoch": 2.93, "learning_rate": 4.3603302718248445e-07, - "loss": 0.1394, + "loss": 0.0874, "step": 10544 }, { "epoch": 2.93, "learning_rate": 4.3417756749234627e-07, - "loss": 0.1959, + "loss": 0.1433, "step": 10545 }, { "epoch": 2.94, "learning_rate": 4.3232210780220803e-07, - "loss": 0.0857, + "loss": 0.0647, "step": 10546 }, { "epoch": 2.94, "learning_rate": 4.304666481120698e-07, - "loss": 0.1398, + "loss": 0.0327, "step": 10547 }, { "epoch": 2.94, "learning_rate": 4.2861118842193156e-07, - "loss": 0.1365, + "loss": 0.0249, "step": 10548 }, { "epoch": 2.94, "learning_rate": 4.2675572873179333e-07, - "loss": 0.0842, + "loss": 0.0229, "step": 10549 }, { "epoch": 2.94, "learning_rate": 4.249002690416551e-07, - "loss": 0.1905, + "loss": 0.0329, "step": 10550 }, { "epoch": 2.94, "learning_rate": 4.2304480935151686e-07, - "loss": 0.0827, + "loss": 0.0515, "step": 10551 }, { "epoch": 2.94, "learning_rate": 4.211893496613786e-07, - "loss": 0.1941, + "loss": 0.0508, "step": 10552 }, { "epoch": 2.94, "learning_rate": 4.1933388997124044e-07, - "loss": 0.0306, + "loss": 0.0243, "step": 10553 }, { "epoch": 2.94, "learning_rate": 4.174784302811022e-07, - "loss": 0.1426, + "loss": 0.0223, "step": 10554 }, { "epoch": 2.94, "learning_rate": 4.1562297059096397e-07, - "loss": 0.085, + "loss": 0.1112, "step": 10555 }, { "epoch": 2.94, "learning_rate": 4.1376751090082573e-07, - "loss": 0.0318, + "loss": 0.0096, "step": 10556 }, { "epoch": 2.94, "learning_rate": 4.119120512106875e-07, - "loss": 0.192, + "loss": 0.0174, "step": 10557 }, { "epoch": 2.94, "learning_rate": 4.1005659152054926e-07, - "loss": 0.1957, + "loss": 0.0203, "step": 10558 }, { "epoch": 2.94, "learning_rate": 4.08201131830411e-07, - "loss": 0.0822, + "loss": 0.0608, "step": 10559 }, { "epoch": 2.94, "learning_rate": 4.0634567214027284e-07, - "loss": 0.1363, + "loss": 0.0247, "step": 10560 }, { "epoch": 2.94, "learning_rate": 4.044902124501345e-07, - "loss": 0.1978, + "loss": 0.1765, "step": 10561 }, { "epoch": 2.94, "learning_rate": 4.0263475275999627e-07, - "loss": 0.1417, + "loss": 0.0181, "step": 10562 }, { "epoch": 2.94, "learning_rate": 4.007792930698581e-07, - "loss": 0.2477, + "loss": 0.0493, "step": 10563 }, { "epoch": 2.94, "learning_rate": 3.9892383337971985e-07, - "loss": 0.0854, + "loss": 0.0194, "step": 10564 }, { "epoch": 2.94, "learning_rate": 3.970683736895816e-07, - "loss": 0.0317, + "loss": 0.0465, "step": 10565 }, { "epoch": 2.94, "learning_rate": 3.952129139994434e-07, - "loss": 0.0309, + "loss": 0.0609, "step": 10566 }, { "epoch": 2.94, "learning_rate": 3.9335745430930514e-07, - "loss": 0.0857, + "loss": 0.0098, "step": 10567 }, { "epoch": 2.94, "learning_rate": 3.915019946191669e-07, - "loss": 0.1372, + "loss": 0.0166, "step": 10568 }, { "epoch": 2.94, "learning_rate": 3.8964653492902867e-07, - "loss": 0.0319, + "loss": 0.1125, "step": 10569 }, { "epoch": 2.94, "learning_rate": 3.877910752388905e-07, - "loss": 0.0861, + "loss": 0.0781, "step": 10570 }, { "epoch": 2.94, "learning_rate": 3.8593561554875226e-07, - "loss": 0.0844, + "loss": 0.0489, "step": 10571 }, { "epoch": 2.94, "learning_rate": 3.84080155858614e-07, - "loss": 0.1349, + "loss": 0.0212, "step": 10572 }, { "epoch": 2.94, "learning_rate": 3.822246961684758e-07, - "loss": 0.1919, + "loss": 0.035, "step": 10573 }, { "epoch": 2.94, "learning_rate": 3.8036923647833755e-07, - "loss": 0.0845, + "loss": 0.051, "step": 10574 }, { "epoch": 2.94, "learning_rate": 3.785137767881993e-07, - "loss": 0.0828, + "loss": 0.045, "step": 10575 }, { "epoch": 2.94, "learning_rate": 3.766583170980611e-07, - "loss": 0.1374, + "loss": 0.1049, "step": 10576 }, { "epoch": 2.94, "learning_rate": 3.7480285740792284e-07, - "loss": 0.0318, + "loss": 0.0226, "step": 10577 }, { "epoch": 2.94, "learning_rate": 3.7294739771778466e-07, - "loss": 0.0312, + "loss": 0.1186, "step": 10578 }, { "epoch": 2.94, "learning_rate": 3.710919380276464e-07, - "loss": 0.1381, + "loss": 0.0648, "step": 10579 }, { "epoch": 2.94, "learning_rate": 3.6923647833750814e-07, - "loss": 0.2447, + "loss": 0.1213, "step": 10580 }, { "epoch": 2.94, "learning_rate": 3.673810186473699e-07, - "loss": 0.1404, + "loss": 0.0656, "step": 10581 }, { "epoch": 2.95, "learning_rate": 3.6552555895723167e-07, - "loss": 0.2975, + "loss": 0.0427, "step": 10582 }, { "epoch": 2.95, "learning_rate": 3.6367009926709343e-07, - "loss": 0.1425, + "loss": 0.0291, "step": 10583 }, { "epoch": 2.95, "learning_rate": 3.618146395769552e-07, - "loss": 0.0877, + "loss": 0.0701, "step": 10584 }, { "epoch": 2.95, "learning_rate": 3.5995917988681696e-07, - "loss": 0.0884, + "loss": 0.022, "step": 10585 }, { "epoch": 2.95, "learning_rate": 3.581037201966787e-07, - "loss": 0.0853, + "loss": 0.2428, "step": 10586 }, { "epoch": 2.95, "learning_rate": 3.562482605065405e-07, - "loss": 0.1925, + "loss": 0.1191, "step": 10587 }, { "epoch": 2.95, "learning_rate": 3.543928008164023e-07, - "loss": 0.1878, + "loss": 0.0103, "step": 10588 }, { "epoch": 2.95, "learning_rate": 3.5253734112626407e-07, - "loss": 0.1968, + "loss": 0.0939, "step": 10589 }, { "epoch": 2.95, "learning_rate": 3.5068188143612584e-07, - "loss": 0.1905, + "loss": 0.0311, "step": 10590 }, { "epoch": 2.95, "learning_rate": 3.488264217459876e-07, - "loss": 0.085, + "loss": 0.0474, "step": 10591 }, { "epoch": 2.95, "learning_rate": 3.4697096205584937e-07, - "loss": 0.0309, + "loss": 0.1411, "step": 10592 }, { "epoch": 2.95, "learning_rate": 3.4511550236571113e-07, - "loss": 0.1949, + "loss": 0.0058, "step": 10593 }, { "epoch": 2.95, "learning_rate": 3.432600426755729e-07, - "loss": 0.2462, + "loss": 0.0081, "step": 10594 }, { "epoch": 2.95, "learning_rate": 3.414045829854347e-07, - "loss": 0.1935, + "loss": 0.0755, "step": 10595 }, { "epoch": 2.95, "learning_rate": 3.395491232952965e-07, - "loss": 0.1931, + "loss": 0.0138, "step": 10596 }, { "epoch": 2.95, "learning_rate": 3.3769366360515824e-07, - "loss": 0.0312, + "loss": 0.1377, "step": 10597 }, { "epoch": 2.95, "learning_rate": 3.3583820391502e-07, - "loss": 0.2447, + "loss": 0.206, "step": 10598 }, { "epoch": 2.95, "learning_rate": 3.339827442248817e-07, - "loss": 0.1891, + "loss": 0.0597, "step": 10599 }, { "epoch": 2.95, "learning_rate": 3.321272845347435e-07, - "loss": 0.1392, + "loss": 0.0772, "step": 10600 }, { "epoch": 2.95, "learning_rate": 3.3027182484460525e-07, - "loss": 0.1397, + "loss": 0.0191, "step": 10601 }, { "epoch": 2.95, "learning_rate": 3.28416365154467e-07, - "loss": 0.143, + "loss": 0.0642, "step": 10602 }, { "epoch": 2.95, "learning_rate": 3.265609054643288e-07, - "loss": 0.0843, + "loss": 0.0184, "step": 10603 }, { "epoch": 2.95, "learning_rate": 3.2470544577419054e-07, - "loss": 0.2484, + "loss": 0.0697, "step": 10604 }, { "epoch": 2.95, "learning_rate": 3.2284998608405236e-07, - "loss": 0.0849, + "loss": 0.0437, "step": 10605 }, { "epoch": 2.95, "learning_rate": 3.209945263939141e-07, - "loss": 0.084, + "loss": 0.0745, "step": 10606 }, { "epoch": 2.95, "learning_rate": 3.191390667037759e-07, - "loss": 0.0308, + "loss": 0.0792, "step": 10607 }, { "epoch": 2.95, "learning_rate": 3.1728360701363766e-07, - "loss": 0.0854, + "loss": 0.1062, "step": 10608 }, { "epoch": 2.95, "learning_rate": 3.154281473234994e-07, - "loss": 0.0319, + "loss": 0.019, "step": 10609 }, { "epoch": 2.95, "learning_rate": 3.135726876333612e-07, - "loss": 0.1388, + "loss": 0.0487, "step": 10610 }, { "epoch": 2.95, "learning_rate": 3.1171722794322295e-07, - "loss": 0.1904, + "loss": 0.0488, "step": 10611 }, { "epoch": 2.95, "learning_rate": 3.0986176825308477e-07, - "loss": 0.1424, + "loss": 0.0626, "step": 10612 }, { "epoch": 2.95, "learning_rate": 3.080063085629465e-07, - "loss": 0.138, + "loss": 0.0424, "step": 10613 }, { "epoch": 2.95, "learning_rate": 3.0615084887280824e-07, - "loss": 0.0805, + "loss": 0.071, "step": 10614 }, { "epoch": 2.95, "learning_rate": 3.0429538918267e-07, - "loss": 0.0863, + "loss": 0.0664, "step": 10615 }, { "epoch": 2.95, "learning_rate": 3.0243992949253177e-07, - "loss": 0.0832, + "loss": 0.0817, "step": 10616 }, { "epoch": 2.95, "learning_rate": 3.005844698023936e-07, - "loss": 0.1381, + "loss": 0.0489, "step": 10617 }, { "epoch": 2.96, "learning_rate": 2.9872901011225536e-07, - "loss": 0.1913, + "loss": 0.0146, "step": 10618 }, { "epoch": 2.96, "learning_rate": 2.968735504221171e-07, - "loss": 0.0849, + "loss": 0.0471, "step": 10619 }, { "epoch": 2.96, "learning_rate": 2.950180907319789e-07, - "loss": 0.1409, + "loss": 0.0713, "step": 10620 }, { "epoch": 2.96, "learning_rate": 2.9316263104184065e-07, - "loss": 0.1879, + "loss": 0.0117, "step": 10621 }, { "epoch": 2.96, "learning_rate": 2.913071713517024e-07, - "loss": 0.0858, + "loss": 0.1375, "step": 10622 }, { "epoch": 2.96, "learning_rate": 2.894517116615642e-07, - "loss": 0.1392, + "loss": 0.0177, "step": 10623 }, { "epoch": 2.96, "learning_rate": 2.8759625197142594e-07, - "loss": 0.1348, + "loss": 0.0859, "step": 10624 }, { "epoch": 2.96, "learning_rate": 2.857407922812877e-07, - "loss": 0.0836, + "loss": 0.0833, "step": 10625 }, { "epoch": 2.96, "learning_rate": 2.8388533259114947e-07, - "loss": 0.0306, + "loss": 0.0538, "step": 10626 }, { "epoch": 2.96, "learning_rate": 2.8202987290101124e-07, - "loss": 0.0867, + "loss": 0.0103, "step": 10627 }, { "epoch": 2.96, "learning_rate": 2.80174413210873e-07, - "loss": 0.0309, + "loss": 0.0237, "step": 10628 }, { "epoch": 2.96, "learning_rate": 2.7831895352073477e-07, - "loss": 0.1408, + "loss": 0.1, "step": 10629 }, { "epoch": 2.96, "learning_rate": 2.764634938305966e-07, - "loss": 0.1399, + "loss": 0.1174, "step": 10630 }, { "epoch": 2.96, "learning_rate": 2.7460803414045835e-07, - "loss": 0.0873, + "loss": 0.1109, "step": 10631 }, { "epoch": 2.96, "learning_rate": 2.7275257445032006e-07, - "loss": 0.1996, + "loss": 0.0426, "step": 10632 }, { "epoch": 2.96, "learning_rate": 2.708971147601818e-07, - "loss": 0.248, + "loss": 0.0626, "step": 10633 }, { "epoch": 2.96, "learning_rate": 2.690416550700436e-07, - "loss": 0.194, + "loss": 0.1537, "step": 10634 }, { "epoch": 2.96, "learning_rate": 2.671861953799054e-07, - "loss": 0.0845, + "loss": 0.0131, "step": 10635 }, { "epoch": 2.96, "learning_rate": 2.6533073568976717e-07, - "loss": 0.1931, + "loss": 0.0104, "step": 10636 }, { "epoch": 2.96, "learning_rate": 2.6347527599962894e-07, - "loss": 0.0311, + "loss": 0.0148, "step": 10637 }, { "epoch": 2.96, "learning_rate": 2.616198163094907e-07, - "loss": 0.1894, + "loss": 0.043, "step": 10638 }, { "epoch": 2.96, "learning_rate": 2.5976435661935247e-07, - "loss": 0.1423, + "loss": 0.0503, "step": 10639 }, { "epoch": 2.96, "learning_rate": 2.5790889692921423e-07, - "loss": 0.084, + "loss": 0.1547, "step": 10640 }, { "epoch": 2.96, "learning_rate": 2.56053437239076e-07, - "loss": 0.1353, + "loss": 0.0574, "step": 10641 }, { "epoch": 2.96, "learning_rate": 2.5419797754893776e-07, - "loss": 0.0859, + "loss": 0.0262, "step": 10642 }, { "epoch": 2.96, "learning_rate": 2.523425178587995e-07, - "loss": 0.0847, + "loss": 0.0869, "step": 10643 }, { "epoch": 2.96, "learning_rate": 2.504870581686613e-07, - "loss": 0.0827, + "loss": 0.1493, "step": 10644 }, { "epoch": 2.96, "learning_rate": 2.4863159847852305e-07, - "loss": 0.0875, + "loss": 0.1046, "step": 10645 }, { "epoch": 2.96, "learning_rate": 2.467761387883848e-07, - "loss": 0.1384, + "loss": 0.0854, "step": 10646 }, { "epoch": 2.96, "learning_rate": 2.4492067909824664e-07, - "loss": 0.0309, + "loss": 0.1379, "step": 10647 }, { "epoch": 2.96, "learning_rate": 2.430652194081084e-07, - "loss": 0.0843, + "loss": 0.0762, "step": 10648 }, { "epoch": 2.96, "learning_rate": 2.4120975971797017e-07, - "loss": 0.191, + "loss": 0.0107, "step": 10649 }, { "epoch": 2.96, "learning_rate": 2.3935430002783193e-07, - "loss": 0.0864, + "loss": 0.1028, "step": 10650 }, { "epoch": 2.96, "learning_rate": 2.3749884033769367e-07, - "loss": 0.1397, + "loss": 0.0118, "step": 10651 }, { "epoch": 2.96, "learning_rate": 2.3564338064755543e-07, - "loss": 0.0307, + "loss": 0.0441, "step": 10652 }, { "epoch": 2.96, "learning_rate": 2.337879209574172e-07, - "loss": 0.0855, + "loss": 0.1073, "step": 10653 }, { "epoch": 2.97, "learning_rate": 2.31932461267279e-07, - "loss": 0.2473, + "loss": 0.2426, "step": 10654 }, { "epoch": 2.97, "learning_rate": 2.3007700157714075e-07, - "loss": 0.1385, + "loss": 0.008, "step": 10655 }, { "epoch": 2.97, "learning_rate": 2.2822154188700252e-07, - "loss": 0.186, + "loss": 0.0795, "step": 10656 }, { "epoch": 2.97, "learning_rate": 2.2636608219686428e-07, - "loss": 0.0859, + "loss": 0.0351, "step": 10657 }, { "epoch": 2.97, "learning_rate": 2.2451062250672608e-07, - "loss": 0.1945, + "loss": 0.1286, "step": 10658 }, { "epoch": 2.97, "learning_rate": 2.2265516281658784e-07, - "loss": 0.1362, + "loss": 0.0853, "step": 10659 }, { "epoch": 2.97, "learning_rate": 2.207997031264496e-07, - "loss": 0.299, + "loss": 0.0499, "step": 10660 }, { "epoch": 2.97, "learning_rate": 2.1894424343631134e-07, - "loss": 0.0834, + "loss": 0.2199, "step": 10661 }, { "epoch": 2.97, "learning_rate": 2.1708878374617313e-07, - "loss": 0.1409, + "loss": 0.1117, "step": 10662 }, { "epoch": 2.97, "learning_rate": 2.152333240560349e-07, - "loss": 0.1967, + "loss": 0.0146, "step": 10663 }, { "epoch": 2.97, "learning_rate": 2.1337786436589666e-07, - "loss": 0.0304, + "loss": 0.0919, "step": 10664 }, { "epoch": 2.97, "learning_rate": 2.1152240467575843e-07, - "loss": 0.0309, + "loss": 0.0846, "step": 10665 }, { "epoch": 2.97, "learning_rate": 2.0966694498562022e-07, - "loss": 0.1388, + "loss": 0.0764, "step": 10666 }, { "epoch": 2.97, "learning_rate": 2.0781148529548198e-07, - "loss": 0.1405, + "loss": 0.0385, "step": 10667 }, { "epoch": 2.97, "learning_rate": 2.0595602560534375e-07, - "loss": 0.0854, + "loss": 0.0624, "step": 10668 }, { "epoch": 2.97, "learning_rate": 2.041005659152055e-07, - "loss": 0.0854, + "loss": 0.2446, "step": 10669 }, { "epoch": 2.97, "learning_rate": 2.0224510622506725e-07, - "loss": 0.1919, + "loss": 0.1337, "step": 10670 }, { "epoch": 2.97, "learning_rate": 2.0038964653492904e-07, - "loss": 0.0312, + "loss": 0.0711, "step": 10671 }, { "epoch": 2.97, "learning_rate": 1.985341868447908e-07, - "loss": 0.1395, + "loss": 0.0214, "step": 10672 }, { "epoch": 2.97, "learning_rate": 1.9667872715465257e-07, - "loss": 0.1935, + "loss": 0.0179, "step": 10673 }, { "epoch": 2.97, "learning_rate": 1.9482326746451434e-07, - "loss": 0.0856, + "loss": 0.1166, "step": 10674 }, { "epoch": 2.97, "learning_rate": 1.9296780777437613e-07, - "loss": 0.1433, + "loss": 0.0635, "step": 10675 }, { "epoch": 2.97, "learning_rate": 1.911123480842379e-07, - "loss": 0.0826, + "loss": 0.1135, "step": 10676 }, { "epoch": 2.97, "learning_rate": 1.8925688839409966e-07, - "loss": 0.0309, + "loss": 0.0811, "step": 10677 }, { "epoch": 2.97, "learning_rate": 1.8740142870396142e-07, - "loss": 0.1903, + "loss": 0.1191, "step": 10678 }, { "epoch": 2.97, "learning_rate": 1.855459690138232e-07, - "loss": 0.2437, + "loss": 0.0594, "step": 10679 }, { "epoch": 2.97, "learning_rate": 1.8369050932368495e-07, - "loss": 0.086, + "loss": 0.0877, "step": 10680 }, { "epoch": 2.97, "learning_rate": 1.8183504963354672e-07, - "loss": 0.25, + "loss": 0.0383, "step": 10681 }, { "epoch": 2.97, "learning_rate": 1.7997958994340848e-07, - "loss": 0.0844, + "loss": 0.0592, "step": 10682 }, { "epoch": 2.97, "learning_rate": 1.7812413025327025e-07, - "loss": 0.2467, + "loss": 0.0683, "step": 10683 }, { "epoch": 2.97, "learning_rate": 1.7626867056313204e-07, - "loss": 0.1364, + "loss": 0.0079, "step": 10684 }, { "epoch": 2.97, "learning_rate": 1.744132108729938e-07, - "loss": 0.197, + "loss": 0.1219, "step": 10685 }, { "epoch": 2.97, "learning_rate": 1.7255775118285557e-07, - "loss": 0.0311, + "loss": 0.0742, "step": 10686 }, { "epoch": 2.97, "learning_rate": 1.7070229149271736e-07, - "loss": 0.1374, + "loss": 0.0169, "step": 10687 }, { "epoch": 2.97, "learning_rate": 1.6884683180257912e-07, - "loss": 0.0311, + "loss": 0.0463, "step": 10688 }, { "epoch": 2.97, "learning_rate": 1.6699137211244086e-07, - "loss": 0.194, + "loss": 0.0593, "step": 10689 }, { "epoch": 2.98, "learning_rate": 1.6513591242230262e-07, - "loss": 0.0861, + "loss": 0.0692, "step": 10690 }, { "epoch": 2.98, "learning_rate": 1.632804527321644e-07, - "loss": 0.0319, + "loss": 0.1678, "step": 10691 }, { "epoch": 2.98, "learning_rate": 1.6142499304202618e-07, - "loss": 0.1397, + "loss": 0.0081, "step": 10692 }, { "epoch": 2.98, "learning_rate": 1.5956953335188795e-07, - "loss": 0.087, + "loss": 0.1084, "step": 10693 }, { "epoch": 2.98, "learning_rate": 1.577140736617497e-07, - "loss": 0.0853, + "loss": 0.0407, "step": 10694 }, { "epoch": 2.98, "learning_rate": 1.5585861397161147e-07, - "loss": 0.1928, + "loss": 0.1241, "step": 10695 }, { "epoch": 2.98, "learning_rate": 1.5400315428147324e-07, - "loss": 0.1961, + "loss": 0.0116, "step": 10696 }, { "epoch": 2.98, "learning_rate": 1.52147694591335e-07, - "loss": 0.1923, + "loss": 0.0498, "step": 10697 }, { "epoch": 2.98, "learning_rate": 1.502922349011968e-07, - "loss": 0.139, + "loss": 0.1836, "step": 10698 }, { "epoch": 2.98, "learning_rate": 1.4843677521105856e-07, - "loss": 0.1452, + "loss": 0.0178, "step": 10699 }, { "epoch": 2.98, "learning_rate": 1.4658131552092032e-07, - "loss": 0.1401, + "loss": 0.0262, "step": 10700 }, { "epoch": 2.98, "learning_rate": 1.447258558307821e-07, - "loss": 0.1344, + "loss": 0.0401, "step": 10701 }, { "epoch": 2.98, "learning_rate": 1.4287039614064385e-07, - "loss": 0.0855, + "loss": 0.0308, "step": 10702 }, { "epoch": 2.98, "learning_rate": 1.4101493645050562e-07, - "loss": 0.1403, + "loss": 0.0278, "step": 10703 }, { "epoch": 2.98, "learning_rate": 1.3915947676036738e-07, - "loss": 0.4137, + "loss": 0.1341, "step": 10704 }, { "epoch": 2.98, "learning_rate": 1.3730401707022917e-07, - "loss": 0.248, + "loss": 0.0748, "step": 10705 }, { "epoch": 2.98, "learning_rate": 1.354485573800909e-07, - "loss": 0.1398, + "loss": 0.0427, "step": 10706 }, { "epoch": 2.98, "learning_rate": 1.335930976899527e-07, - "loss": 0.0829, + "loss": 0.0565, "step": 10707 }, { "epoch": 2.98, "learning_rate": 1.3173763799981447e-07, - "loss": 0.1945, + "loss": 0.0175, "step": 10708 }, { "epoch": 2.98, "learning_rate": 1.2988217830967623e-07, - "loss": 0.1378, + "loss": 0.0452, "step": 10709 }, { "epoch": 2.98, "learning_rate": 1.28026718619538e-07, - "loss": 0.0817, + "loss": 0.0189, "step": 10710 }, { "epoch": 2.98, "learning_rate": 1.2617125892939976e-07, - "loss": 0.2465, + "loss": 0.0701, "step": 10711 }, { "epoch": 2.98, "learning_rate": 1.2431579923926153e-07, - "loss": 0.1958, + "loss": 0.1807, "step": 10712 }, { "epoch": 2.98, "learning_rate": 1.2246033954912332e-07, - "loss": 0.0849, + "loss": 0.037, "step": 10713 }, { "epoch": 2.98, "learning_rate": 1.2060487985898508e-07, - "loss": 0.2468, + "loss": 0.0184, "step": 10714 }, { "epoch": 2.98, "learning_rate": 1.1874942016884683e-07, - "loss": 0.3004, + "loss": 0.2282, "step": 10715 }, { "epoch": 2.98, "learning_rate": 1.168939604787086e-07, - "loss": 0.0858, + "loss": 0.0573, "step": 10716 }, { "epoch": 2.98, "learning_rate": 1.1503850078857038e-07, - "loss": 0.0822, + "loss": 0.0774, "step": 10717 }, { "epoch": 2.98, "learning_rate": 1.1318304109843214e-07, - "loss": 0.0312, + "loss": 0.083, "step": 10718 }, { "epoch": 2.98, "learning_rate": 1.1132758140829392e-07, - "loss": 0.0314, + "loss": 0.0422, "step": 10719 }, { "epoch": 2.98, "learning_rate": 1.0947212171815567e-07, - "loss": 0.1925, + "loss": 0.0664, "step": 10720 }, { "epoch": 2.98, "learning_rate": 1.0761666202801745e-07, - "loss": 0.0311, + "loss": 0.1073, "step": 10721 }, { "epoch": 2.98, "learning_rate": 1.0576120233787921e-07, - "loss": 0.0318, + "loss": 0.0655, "step": 10722 }, { "epoch": 2.98, "learning_rate": 1.0390574264774099e-07, - "loss": 0.2518, + "loss": 0.0492, "step": 10723 }, { "epoch": 2.98, "learning_rate": 1.0205028295760276e-07, - "loss": 0.0863, + "loss": 0.0507, "step": 10724 }, { "epoch": 2.98, "learning_rate": 1.0019482326746452e-07, - "loss": 0.0863, + "loss": 0.1453, "step": 10725 }, { "epoch": 2.99, "learning_rate": 9.833936357732629e-08, - "loss": 0.14, + "loss": 0.1869, "step": 10726 }, { "epoch": 2.99, "learning_rate": 9.648390388718806e-08, - "loss": 0.3062, + "loss": 0.109, "step": 10727 }, { "epoch": 2.99, "learning_rate": 9.462844419704983e-08, - "loss": 0.084, + "loss": 0.0667, "step": 10728 }, { "epoch": 2.99, "learning_rate": 9.27729845069116e-08, - "loss": 0.0866, + "loss": 0.1347, "step": 10729 }, { "epoch": 2.99, "learning_rate": 9.091752481677336e-08, - "loss": 0.1432, + "loss": 0.1424, "step": 10730 }, { "epoch": 2.99, "learning_rate": 8.906206512663512e-08, - "loss": 0.2995, + "loss": 0.0464, "step": 10731 }, { "epoch": 2.99, "learning_rate": 8.72066054364969e-08, - "loss": 0.0869, + "loss": 0.0569, "step": 10732 }, { "epoch": 2.99, "learning_rate": 8.535114574635868e-08, - "loss": 0.0309, + "loss": 0.0772, "step": 10733 }, { "epoch": 2.99, "learning_rate": 8.349568605622043e-08, - "loss": 0.1985, + "loss": 0.0413, "step": 10734 }, { "epoch": 2.99, "learning_rate": 8.16402263660822e-08, - "loss": 0.0865, + "loss": 0.0736, "step": 10735 }, { "epoch": 2.99, "learning_rate": 7.978476667594397e-08, - "loss": 0.1383, + "loss": 0.0731, "step": 10736 }, { "epoch": 2.99, "learning_rate": 7.792930698580574e-08, - "loss": 0.0849, + "loss": 0.0232, "step": 10737 }, { "epoch": 2.99, "learning_rate": 7.60738472956675e-08, - "loss": 0.2452, + "loss": 0.0229, "step": 10738 }, { "epoch": 2.99, "learning_rate": 7.421838760552928e-08, - "loss": 0.1963, + "loss": 0.178, "step": 10739 }, { "epoch": 2.99, "learning_rate": 7.236292791539104e-08, - "loss": 0.1933, + "loss": 0.0136, "step": 10740 }, { "epoch": 2.99, "learning_rate": 7.050746822525281e-08, - "loss": 0.4133, + "loss": 0.0528, "step": 10741 }, { "epoch": 2.99, "learning_rate": 6.865200853511459e-08, - "loss": 0.1396, + "loss": 0.0177, "step": 10742 }, { "epoch": 2.99, "learning_rate": 6.679654884497635e-08, - "loss": 0.1948, + "loss": 0.0433, "step": 10743 }, { "epoch": 2.99, "learning_rate": 6.494108915483812e-08, - "loss": 0.3507, + "loss": 0.0569, "step": 10744 }, { "epoch": 2.99, "learning_rate": 6.308562946469988e-08, - "loss": 0.0303, + "loss": 0.0756, "step": 10745 }, { "epoch": 2.99, "learning_rate": 6.123016977456166e-08, - "loss": 0.2427, + "loss": 0.019, "step": 10746 }, { "epoch": 2.99, "learning_rate": 5.937471008442342e-08, - "loss": 0.3592, + "loss": 0.0164, "step": 10747 }, { "epoch": 2.99, "learning_rate": 5.751925039428519e-08, - "loss": 0.0843, + "loss": 0.0975, "step": 10748 }, { "epoch": 2.99, "learning_rate": 5.566379070414696e-08, - "loss": 0.0307, + "loss": 0.1497, "step": 10749 }, { "epoch": 2.99, "learning_rate": 5.3808331014008725e-08, - "loss": 0.2501, + "loss": 0.0612, "step": 10750 }, { "epoch": 2.99, "learning_rate": 5.1952871323870496e-08, - "loss": 0.1925, + "loss": 0.076, "step": 10751 }, { "epoch": 2.99, "learning_rate": 5.009741163373226e-08, - "loss": 0.2435, + "loss": 0.0708, "step": 10752 }, { "epoch": 2.99, "learning_rate": 4.824195194359403e-08, - "loss": 0.194, + "loss": 0.1878, "step": 10753 }, { "epoch": 2.99, "learning_rate": 4.63864922534558e-08, - "loss": 0.0871, + "loss": 0.0579, "step": 10754 }, { "epoch": 2.99, "learning_rate": 4.453103256331756e-08, - "loss": 0.1937, + "loss": 0.028, "step": 10755 }, { "epoch": 2.99, "learning_rate": 4.267557287317934e-08, - "loss": 0.1955, + "loss": 0.084, "step": 10756 }, { "epoch": 2.99, "learning_rate": 4.08201131830411e-08, - "loss": 0.0854, + "loss": 0.0494, "step": 10757 }, { "epoch": 2.99, "learning_rate": 3.896465349290287e-08, - "loss": 0.0883, + "loss": 0.0373, "step": 10758 }, { "epoch": 2.99, "learning_rate": 3.710919380276464e-08, - "loss": 0.0305, + "loss": 0.0497, "step": 10759 }, { "epoch": 2.99, "learning_rate": 3.5253734112626405e-08, - "loss": 0.1374, + "loss": 0.0704, "step": 10760 }, { "epoch": 2.99, "learning_rate": 3.3398274422488176e-08, - "loss": 0.142, + "loss": 0.0517, "step": 10761 }, { "epoch": 3.0, "learning_rate": 3.154281473234994e-08, - "loss": 0.031, + "loss": 0.063, "step": 10762 }, { "epoch": 3.0, "learning_rate": 2.968735504221171e-08, - "loss": 0.0854, + "loss": 0.1295, "step": 10763 }, { "epoch": 3.0, "learning_rate": 2.783189535207348e-08, - "loss": 0.2395, + "loss": 0.0513, "step": 10764 }, { "epoch": 3.0, "learning_rate": 2.5976435661935248e-08, - "loss": 0.2981, + "loss": 0.0592, "step": 10765 }, { "epoch": 3.0, "learning_rate": 2.4120975971797016e-08, - "loss": 0.1938, + "loss": 0.0338, "step": 10766 }, { "epoch": 3.0, "learning_rate": 2.226551628165878e-08, - "loss": 0.1966, + "loss": 0.0112, "step": 10767 }, { "epoch": 3.0, "learning_rate": 2.041005659152055e-08, - "loss": 0.0858, + "loss": 0.092, "step": 10768 }, { "epoch": 3.0, "learning_rate": 1.855459690138232e-08, - "loss": 0.1933, + "loss": 0.1126, "step": 10769 }, { "epoch": 3.0, "learning_rate": 1.6699137211244088e-08, - "loss": 0.0876, + "loss": 0.0537, "step": 10770 }, { "epoch": 3.0, "learning_rate": 1.4843677521105854e-08, - "loss": 0.1418, + "loss": 0.026, "step": 10771 }, { "epoch": 3.0, "learning_rate": 1.2988217830967624e-08, - "loss": 0.0852, + "loss": 0.123, "step": 10772 }, { "epoch": 3.0, "learning_rate": 1.113275814082939e-08, - "loss": 0.1367, + "loss": 0.0397, "step": 10773 }, { "epoch": 3.0, "learning_rate": 9.27729845069116e-09, - "loss": 0.1403, + "loss": 0.0209, "step": 10774 }, { "epoch": 3.0, "learning_rate": 7.421838760552927e-09, - "loss": 0.1379, + "loss": 0.015, "step": 10775 }, { "epoch": 3.0, "learning_rate": 5.566379070414695e-09, - "loss": 0.0813, + "loss": 0.0184, "step": 10776 }, { "epoch": 3.0, "learning_rate": 3.7109193802764636e-09, - "loss": 0.1317, + "loss": 0.1582, "step": 10777 }, { "epoch": 3.0, "learning_rate": 1.8554596901382318e-09, - "loss": 0.2475, + "loss": 0.0178, "step": 10778 }, { "epoch": 3.0, "learning_rate": 0.0, - "loss": 0.1077, + "loss": 0.0138, "step": 10779 }, { "epoch": 3.0, "step": 10779, - "total_flos": 5.663507435559936e+16, - "train_loss": 0.14667468480848764, - "train_runtime": 2515.9928, - "train_samples_per_second": 274.165, - "train_steps_per_second": 4.284 + "total_flos": 9.061611896895898e+17, + "train_loss": 0.08240926686040774, + "train_runtime": 7566.0702, + "train_samples_per_second": 91.17, + "train_steps_per_second": 1.425 } ], "max_steps": 10779, "num_train_epochs": 3, - "total_flos": 5.663507435559936e+16, + "total_flos": 9.061611896895898e+17, "trial_name": null, "trial_params": null }