{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1000.0, "eval_steps": 500, "global_step": 13000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 0.003992, "loss": 10.6341, "step": 13 }, { "epoch": 2.0, "learning_rate": 0.003984, "loss": 8.0261, "step": 26 }, { "epoch": 3.0, "learning_rate": 0.003976, "loss": 7.6356, "step": 39 }, { "epoch": 4.0, "learning_rate": 0.003968, "loss": 7.489, "step": 52 }, { "epoch": 5.0, "learning_rate": 0.00396, "loss": 7.3955, "step": 65 }, { "epoch": 6.0, "learning_rate": 0.003952, "loss": 7.3814, "step": 78 }, { "epoch": 7.0, "learning_rate": 0.0039440000000000005, "loss": 7.3919, "step": 91 }, { "epoch": 8.0, "learning_rate": 0.003936, "loss": 7.2877, "step": 104 }, { "epoch": 9.0, "learning_rate": 0.003928, "loss": 7.0588, "step": 117 }, { "epoch": 10.0, "learning_rate": 0.00392, "loss": 6.9853, "step": 130 }, { "epoch": 11.0, "learning_rate": 0.003912, "loss": 6.9981, "step": 143 }, { "epoch": 12.0, "learning_rate": 0.003904, "loss": 6.8759, "step": 156 }, { "epoch": 13.0, "learning_rate": 0.003896, "loss": 6.8897, "step": 169 }, { "epoch": 14.0, "learning_rate": 0.003888, "loss": 7.1851, "step": 182 }, { "epoch": 15.0, "learning_rate": 0.0038799999999999998, "loss": 7.3121, "step": 195 }, { "epoch": 16.0, "learning_rate": 0.003872, "loss": 7.2602, "step": 208 }, { "epoch": 17.0, "learning_rate": 0.003864, "loss": 7.2026, "step": 221 }, { "epoch": 18.0, "learning_rate": 0.003856, "loss": 7.2713, "step": 234 }, { "epoch": 19.0, "learning_rate": 0.003848, "loss": 7.1885, "step": 247 }, { "epoch": 20.0, "learning_rate": 0.00384, "loss": 7.2042, "step": 260 }, { "epoch": 21.0, "learning_rate": 0.003832, "loss": 7.1744, "step": 273 }, { "epoch": 22.0, "learning_rate": 0.0038239999999999997, "loss": 7.0481, "step": 286 }, { "epoch": 23.0, "learning_rate": 0.003816, "loss": 6.8698, "step": 299 }, { "epoch": 24.0, "learning_rate": 0.0038079999999999998, "loss": 6.7722, "step": 312 }, { "epoch": 25.0, "learning_rate": 0.0038, "loss": 6.7018, "step": 325 }, { "epoch": 26.0, "learning_rate": 0.003792, "loss": 6.6881, "step": 338 }, { "epoch": 27.0, "learning_rate": 0.003784, "loss": 6.7485, "step": 351 }, { "epoch": 28.0, "learning_rate": 0.003776, "loss": 6.5876, "step": 364 }, { "epoch": 29.0, "learning_rate": 0.003768, "loss": 6.5597, "step": 377 }, { "epoch": 30.0, "learning_rate": 0.00376, "loss": 6.5379, "step": 390 }, { "epoch": 31.0, "learning_rate": 0.0037519999999999997, "loss": 6.3772, "step": 403 }, { "epoch": 32.0, "learning_rate": 0.0037440000000000004, "loss": 6.3651, "step": 416 }, { "epoch": 33.0, "learning_rate": 0.003736, "loss": 6.305, "step": 429 }, { "epoch": 34.0, "learning_rate": 0.0037280000000000004, "loss": 6.2724, "step": 442 }, { "epoch": 35.0, "learning_rate": 0.00372, "loss": 6.183, "step": 455 }, { "epoch": 36.0, "learning_rate": 0.0037120000000000005, "loss": 6.2141, "step": 468 }, { "epoch": 37.0, "learning_rate": 0.0037040000000000003, "loss": 6.1447, "step": 481 }, { "epoch": 38.0, "learning_rate": 0.003696, "loss": 6.3683, "step": 494 }, { "epoch": 39.0, "learning_rate": 0.0036880000000000003, "loss": 6.2738, "step": 507 }, { "epoch": 40.0, "learning_rate": 0.00368, "loss": 6.0499, "step": 520 }, { "epoch": 41.0, "learning_rate": 0.0036720000000000004, "loss": 5.9005, "step": 533 }, { "epoch": 42.0, "learning_rate": 0.003664, "loss": 5.8533, "step": 546 }, { "epoch": 43.0, "learning_rate": 0.0036560000000000004, "loss": 5.8199, "step": 559 }, { "epoch": 44.0, "learning_rate": 0.003648, "loss": 6.051, "step": 572 }, { "epoch": 45.0, "learning_rate": 0.00364, "loss": 5.8496, "step": 585 }, { "epoch": 46.0, "learning_rate": 0.0036320000000000002, "loss": 5.7252, "step": 598 }, { "epoch": 47.0, "learning_rate": 0.003624, "loss": 5.6958, "step": 611 }, { "epoch": 48.0, "learning_rate": 0.0036160000000000003, "loss": 5.7218, "step": 624 }, { "epoch": 49.0, "learning_rate": 0.003608, "loss": 5.6656, "step": 637 }, { "epoch": 50.0, "learning_rate": 0.0036000000000000003, "loss": 5.612, "step": 650 }, { "epoch": 51.0, "learning_rate": 0.003592, "loss": 5.5532, "step": 663 }, { "epoch": 52.0, "learning_rate": 0.003584, "loss": 5.4327, "step": 676 }, { "epoch": 53.0, "learning_rate": 0.003576, "loss": 5.3979, "step": 689 }, { "epoch": 54.0, "learning_rate": 0.003568, "loss": 5.2903, "step": 702 }, { "epoch": 55.0, "learning_rate": 0.0035600000000000002, "loss": 5.4521, "step": 715 }, { "epoch": 56.0, "learning_rate": 0.003552, "loss": 5.6021, "step": 728 }, { "epoch": 57.0, "learning_rate": 0.0035440000000000003, "loss": 5.5058, "step": 741 }, { "epoch": 58.0, "learning_rate": 0.003536, "loss": 5.2167, "step": 754 }, { "epoch": 59.0, "learning_rate": 0.003528, "loss": 5.2102, "step": 767 }, { "epoch": 60.0, "learning_rate": 0.00352, "loss": 5.2617, "step": 780 }, { "epoch": 61.0, "learning_rate": 0.003512, "loss": 5.3012, "step": 793 }, { "epoch": 62.0, "learning_rate": 0.003504, "loss": 5.2158, "step": 806 }, { "epoch": 63.0, "learning_rate": 0.003496, "loss": 5.1959, "step": 819 }, { "epoch": 64.0, "learning_rate": 0.003488, "loss": 5.1716, "step": 832 }, { "epoch": 65.0, "learning_rate": 0.00348, "loss": 5.0796, "step": 845 }, { "epoch": 66.0, "learning_rate": 0.0034720000000000003, "loss": 4.9764, "step": 858 }, { "epoch": 67.0, "learning_rate": 0.003464, "loss": 4.974, "step": 871 }, { "epoch": 68.0, "learning_rate": 0.003456, "loss": 4.876, "step": 884 }, { "epoch": 69.0, "learning_rate": 0.003448, "loss": 4.8596, "step": 897 }, { "epoch": 70.0, "learning_rate": 0.00344, "loss": 4.7792, "step": 910 }, { "epoch": 71.0, "learning_rate": 0.003432, "loss": 4.765, "step": 923 }, { "epoch": 72.0, "learning_rate": 0.003424, "loss": 4.7933, "step": 936 }, { "epoch": 73.0, "learning_rate": 0.003416, "loss": 4.7636, "step": 949 }, { "epoch": 74.0, "learning_rate": 0.003408, "loss": 4.7114, "step": 962 }, { "epoch": 75.0, "learning_rate": 0.0034, "loss": 4.7079, "step": 975 }, { "epoch": 76.0, "learning_rate": 0.003392, "loss": 4.6745, "step": 988 }, { "epoch": 77.0, "learning_rate": 0.003384, "loss": 4.6765, "step": 1001 }, { "epoch": 78.0, "learning_rate": 0.003376, "loss": 4.5913, "step": 1014 }, { "epoch": 79.0, "learning_rate": 0.003368, "loss": 4.7949, "step": 1027 }, { "epoch": 80.0, "learning_rate": 0.00336, "loss": 4.6311, "step": 1040 }, { "epoch": 81.0, "learning_rate": 0.003352, "loss": 4.4818, "step": 1053 }, { "epoch": 82.0, "learning_rate": 0.0033439999999999998, "loss": 4.4462, "step": 1066 }, { "epoch": 83.0, "learning_rate": 0.003336, "loss": 4.5129, "step": 1079 }, { "epoch": 84.0, "learning_rate": 0.003328, "loss": 4.4626, "step": 1092 }, { "epoch": 85.0, "learning_rate": 0.00332, "loss": 4.3505, "step": 1105 }, { "epoch": 86.0, "learning_rate": 0.003312, "loss": 4.3377, "step": 1118 }, { "epoch": 87.0, "learning_rate": 0.003304, "loss": 4.4076, "step": 1131 }, { "epoch": 88.0, "learning_rate": 0.003296, "loss": 4.3765, "step": 1144 }, { "epoch": 89.0, "learning_rate": 0.0032879999999999997, "loss": 4.2473, "step": 1157 }, { "epoch": 90.0, "learning_rate": 0.00328, "loss": 4.2142, "step": 1170 }, { "epoch": 91.0, "learning_rate": 0.0032719999999999997, "loss": 4.1567, "step": 1183 }, { "epoch": 92.0, "learning_rate": 0.003264, "loss": 4.1569, "step": 1196 }, { "epoch": 93.0, "learning_rate": 0.0032559999999999998, "loss": 4.1347, "step": 1209 }, { "epoch": 94.0, "learning_rate": 0.0032480000000000005, "loss": 4.0786, "step": 1222 }, { "epoch": 95.0, "learning_rate": 0.0032400000000000003, "loss": 4.0796, "step": 1235 }, { "epoch": 96.0, "learning_rate": 0.003232, "loss": 4.0432, "step": 1248 }, { "epoch": 97.0, "learning_rate": 0.0032240000000000003, "loss": 4.033, "step": 1261 }, { "epoch": 98.0, "learning_rate": 0.003216, "loss": 3.952, "step": 1274 }, { "epoch": 99.0, "learning_rate": 0.0032080000000000003, "loss": 4.0043, "step": 1287 }, { "epoch": 100.0, "learning_rate": 0.0032, "loss": 4.2161, "step": 1300 }, { "epoch": 101.0, "learning_rate": 0.0031920000000000004, "loss": 4.1006, "step": 1313 }, { "epoch": 102.0, "learning_rate": 0.003184, "loss": 4.1527, "step": 1326 }, { "epoch": 103.0, "learning_rate": 0.0031760000000000004, "loss": 3.9791, "step": 1339 }, { "epoch": 104.0, "learning_rate": 0.0031680000000000002, "loss": 3.9599, "step": 1352 }, { "epoch": 105.0, "learning_rate": 0.00316, "loss": 3.9998, "step": 1365 }, { "epoch": 106.0, "learning_rate": 0.0031520000000000003, "loss": 3.973, "step": 1378 }, { "epoch": 107.0, "learning_rate": 0.003144, "loss": 3.9976, "step": 1391 }, { "epoch": 108.0, "learning_rate": 0.0031360000000000003, "loss": 3.9862, "step": 1404 }, { "epoch": 109.0, "learning_rate": 0.003128, "loss": 3.8562, "step": 1417 }, { "epoch": 110.0, "learning_rate": 0.0031200000000000004, "loss": 3.8322, "step": 1430 }, { "epoch": 111.0, "learning_rate": 0.003112, "loss": 3.8451, "step": 1443 }, { "epoch": 112.0, "learning_rate": 0.003104, "loss": 3.8274, "step": 1456 }, { "epoch": 113.0, "learning_rate": 0.0030960000000000002, "loss": 3.8483, "step": 1469 }, { "epoch": 114.0, "learning_rate": 0.003088, "loss": 3.7911, "step": 1482 }, { "epoch": 115.0, "learning_rate": 0.0030800000000000003, "loss": 3.8203, "step": 1495 }, { "epoch": 116.0, "learning_rate": 0.003072, "loss": 3.7111, "step": 1508 }, { "epoch": 117.0, "learning_rate": 0.0030640000000000003, "loss": 3.7186, "step": 1521 }, { "epoch": 118.0, "learning_rate": 0.003056, "loss": 3.6357, "step": 1534 }, { "epoch": 119.0, "learning_rate": 0.003048, "loss": 3.6484, "step": 1547 }, { "epoch": 120.0, "learning_rate": 0.00304, "loss": 3.7188, "step": 1560 }, { "epoch": 121.0, "learning_rate": 0.003032, "loss": 3.6217, "step": 1573 }, { "epoch": 122.0, "learning_rate": 0.003024, "loss": 3.5853, "step": 1586 }, { "epoch": 123.0, "learning_rate": 0.003016, "loss": 3.6381, "step": 1599 }, { "epoch": 124.0, "learning_rate": 0.0030080000000000003, "loss": 3.6051, "step": 1612 }, { "epoch": 125.0, "learning_rate": 0.003, "loss": 3.6293, "step": 1625 }, { "epoch": 126.0, "learning_rate": 0.002992, "loss": 3.626, "step": 1638 }, { "epoch": 127.0, "learning_rate": 0.002984, "loss": 3.6121, "step": 1651 }, { "epoch": 128.0, "learning_rate": 0.002976, "loss": 3.5777, "step": 1664 }, { "epoch": 129.0, "learning_rate": 0.002968, "loss": 3.551, "step": 1677 }, { "epoch": 130.0, "learning_rate": 0.00296, "loss": 3.534, "step": 1690 }, { "epoch": 131.0, "learning_rate": 0.002952, "loss": 3.5946, "step": 1703 }, { "epoch": 132.0, "learning_rate": 0.002944, "loss": 3.6511, "step": 1716 }, { "epoch": 133.0, "learning_rate": 0.002936, "loss": 3.5556, "step": 1729 }, { "epoch": 134.0, "learning_rate": 0.002928, "loss": 3.5453, "step": 1742 }, { "epoch": 135.0, "learning_rate": 0.00292, "loss": 3.5641, "step": 1755 }, { "epoch": 136.0, "learning_rate": 0.002912, "loss": 3.5357, "step": 1768 }, { "epoch": 137.0, "learning_rate": 0.002904, "loss": 3.5738, "step": 1781 }, { "epoch": 138.0, "learning_rate": 0.002896, "loss": 3.4697, "step": 1794 }, { "epoch": 139.0, "learning_rate": 0.002888, "loss": 3.4405, "step": 1807 }, { "epoch": 140.0, "learning_rate": 0.0028799999999999997, "loss": 3.3998, "step": 1820 }, { "epoch": 141.0, "learning_rate": 0.002872, "loss": 3.4035, "step": 1833 }, { "epoch": 142.0, "learning_rate": 0.002864, "loss": 3.4335, "step": 1846 }, { "epoch": 143.0, "learning_rate": 0.002856, "loss": 3.4105, "step": 1859 }, { "epoch": 144.0, "learning_rate": 0.002848, "loss": 3.3161, "step": 1872 }, { "epoch": 145.0, "learning_rate": 0.00284, "loss": 3.2802, "step": 1885 }, { "epoch": 146.0, "learning_rate": 0.002832, "loss": 3.2573, "step": 1898 }, { "epoch": 147.0, "learning_rate": 0.0028239999999999997, "loss": 3.265, "step": 1911 }, { "epoch": 148.0, "learning_rate": 0.002816, "loss": 3.3362, "step": 1924 }, { "epoch": 149.0, "learning_rate": 0.0028079999999999997, "loss": 3.2085, "step": 1937 }, { "epoch": 150.0, "learning_rate": 0.0028, "loss": 3.2445, "step": 1950 }, { "epoch": 151.0, "learning_rate": 0.0027919999999999998, "loss": 3.2212, "step": 1963 }, { "epoch": 152.0, "learning_rate": 0.002784, "loss": 3.2135, "step": 1976 }, { "epoch": 153.0, "learning_rate": 0.002776, "loss": 3.173, "step": 1989 }, { "epoch": 154.0, "learning_rate": 0.002768, "loss": 3.1946, "step": 2002 }, { "epoch": 155.0, "learning_rate": 0.00276, "loss": 3.1739, "step": 2015 }, { "epoch": 156.0, "learning_rate": 0.0027519999999999997, "loss": 3.1975, "step": 2028 }, { "epoch": 157.0, "learning_rate": 0.0027440000000000003, "loss": 3.148, "step": 2041 }, { "epoch": 158.0, "learning_rate": 0.002736, "loss": 3.1124, "step": 2054 }, { "epoch": 159.0, "learning_rate": 0.0027280000000000004, "loss": 3.1101, "step": 2067 }, { "epoch": 160.0, "learning_rate": 0.00272, "loss": 3.155, "step": 2080 }, { "epoch": 161.0, "learning_rate": 0.0027120000000000004, "loss": 3.091, "step": 2093 }, { "epoch": 162.0, "learning_rate": 0.0027040000000000002, "loss": 3.0156, "step": 2106 }, { "epoch": 163.0, "learning_rate": 0.002696, "loss": 3.031, "step": 2119 }, { "epoch": 164.0, "learning_rate": 0.0026880000000000003, "loss": 3.0426, "step": 2132 }, { "epoch": 165.0, "learning_rate": 0.00268, "loss": 2.9667, "step": 2145 }, { "epoch": 166.0, "learning_rate": 0.0026720000000000003, "loss": 2.9496, "step": 2158 }, { "epoch": 167.0, "learning_rate": 0.002664, "loss": 3.0151, "step": 2171 }, { "epoch": 168.0, "learning_rate": 0.0026560000000000004, "loss": 3.0202, "step": 2184 }, { "epoch": 169.0, "learning_rate": 0.002648, "loss": 3.1202, "step": 2197 }, { "epoch": 170.0, "learning_rate": 0.00264, "loss": 3.0814, "step": 2210 }, { "epoch": 171.0, "learning_rate": 0.0026320000000000002, "loss": 2.9501, "step": 2223 }, { "epoch": 172.0, "learning_rate": 0.002624, "loss": 2.8994, "step": 2236 }, { "epoch": 173.0, "learning_rate": 0.0026160000000000003, "loss": 2.8437, "step": 2249 }, { "epoch": 174.0, "learning_rate": 0.002608, "loss": 2.8867, "step": 2262 }, { "epoch": 175.0, "learning_rate": 0.0026000000000000003, "loss": 2.8977, "step": 2275 }, { "epoch": 176.0, "learning_rate": 0.002592, "loss": 2.8601, "step": 2288 }, { "epoch": 177.0, "learning_rate": 0.002584, "loss": 2.9511, "step": 2301 }, { "epoch": 178.0, "learning_rate": 0.002576, "loss": 2.8396, "step": 2314 }, { "epoch": 179.0, "learning_rate": 0.002568, "loss": 2.8238, "step": 2327 }, { "epoch": 180.0, "learning_rate": 0.00256, "loss": 2.8048, "step": 2340 }, { "epoch": 181.0, "learning_rate": 0.002552, "loss": 2.7583, "step": 2353 }, { "epoch": 182.0, "learning_rate": 0.0025440000000000003, "loss": 2.7443, "step": 2366 }, { "epoch": 183.0, "learning_rate": 0.002536, "loss": 2.7362, "step": 2379 }, { "epoch": 184.0, "learning_rate": 0.002528, "loss": 2.7878, "step": 2392 }, { "epoch": 185.0, "learning_rate": 0.00252, "loss": 2.7811, "step": 2405 }, { "epoch": 186.0, "learning_rate": 0.002512, "loss": 2.7213, "step": 2418 }, { "epoch": 187.0, "learning_rate": 0.002504, "loss": 2.7716, "step": 2431 }, { "epoch": 188.0, "learning_rate": 0.002496, "loss": 2.7761, "step": 2444 }, { "epoch": 189.0, "learning_rate": 0.002488, "loss": 2.7456, "step": 2457 }, { "epoch": 190.0, "learning_rate": 0.00248, "loss": 2.9211, "step": 2470 }, { "epoch": 191.0, "learning_rate": 0.0024720000000000002, "loss": 2.9644, "step": 2483 }, { "epoch": 192.0, "learning_rate": 0.002464, "loss": 2.7444, "step": 2496 }, { "epoch": 193.0, "learning_rate": 0.002456, "loss": 2.7094, "step": 2509 }, { "epoch": 194.0, "learning_rate": 0.002448, "loss": 2.6593, "step": 2522 }, { "epoch": 195.0, "learning_rate": 0.00244, "loss": 2.6424, "step": 2535 }, { "epoch": 196.0, "learning_rate": 0.002432, "loss": 2.5913, "step": 2548 }, { "epoch": 197.0, "learning_rate": 0.002424, "loss": 2.6003, "step": 2561 }, { "epoch": 198.0, "learning_rate": 0.002416, "loss": 2.6317, "step": 2574 }, { "epoch": 199.0, "learning_rate": 0.002408, "loss": 2.6468, "step": 2587 }, { "epoch": 200.0, "learning_rate": 0.0024, "loss": 2.5951, "step": 2600 }, { "epoch": 201.0, "learning_rate": 0.002392, "loss": 2.5915, "step": 2613 }, { "epoch": 202.0, "learning_rate": 0.002384, "loss": 2.568, "step": 2626 }, { "epoch": 203.0, "learning_rate": 0.002376, "loss": 2.5466, "step": 2639 }, { "epoch": 204.0, "learning_rate": 0.002368, "loss": 2.6858, "step": 2652 }, { "epoch": 205.0, "learning_rate": 0.00236, "loss": 2.5551, "step": 2665 }, { "epoch": 206.0, "learning_rate": 0.002352, "loss": 2.5618, "step": 2678 }, { "epoch": 207.0, "learning_rate": 0.0023439999999999997, "loss": 2.5309, "step": 2691 }, { "epoch": 208.0, "learning_rate": 0.002336, "loss": 2.5307, "step": 2704 }, { "epoch": 209.0, "learning_rate": 0.0023279999999999998, "loss": 2.5008, "step": 2717 }, { "epoch": 210.0, "learning_rate": 0.00232, "loss": 2.5485, "step": 2730 }, { "epoch": 211.0, "learning_rate": 0.002312, "loss": 2.547, "step": 2743 }, { "epoch": 212.0, "learning_rate": 0.002304, "loss": 2.461, "step": 2756 }, { "epoch": 213.0, "learning_rate": 0.002296, "loss": 2.4375, "step": 2769 }, { "epoch": 214.0, "learning_rate": 0.0022879999999999997, "loss": 2.4417, "step": 2782 }, { "epoch": 215.0, "learning_rate": 0.00228, "loss": 2.4427, "step": 2795 }, { "epoch": 216.0, "learning_rate": 0.0022719999999999997, "loss": 2.4756, "step": 2808 }, { "epoch": 217.0, "learning_rate": 0.002264, "loss": 2.4662, "step": 2821 }, { "epoch": 218.0, "learning_rate": 0.0022559999999999998, "loss": 2.4931, "step": 2834 }, { "epoch": 219.0, "learning_rate": 0.0022480000000000004, "loss": 2.4438, "step": 2847 }, { "epoch": 220.0, "learning_rate": 0.0022400000000000002, "loss": 2.3834, "step": 2860 }, { "epoch": 221.0, "learning_rate": 0.002232, "loss": 2.4078, "step": 2873 }, { "epoch": 222.0, "learning_rate": 0.0022240000000000003, "loss": 2.3813, "step": 2886 }, { "epoch": 223.0, "learning_rate": 0.002216, "loss": 2.382, "step": 2899 }, { "epoch": 224.0, "learning_rate": 0.0022080000000000003, "loss": 2.361, "step": 2912 }, { "epoch": 225.0, "learning_rate": 0.0022, "loss": 2.3106, "step": 2925 }, { "epoch": 226.0, "learning_rate": 0.0021920000000000004, "loss": 2.2991, "step": 2938 }, { "epoch": 227.0, "learning_rate": 0.002184, "loss": 2.231, "step": 2951 }, { "epoch": 228.0, "learning_rate": 0.0021760000000000004, "loss": 2.2748, "step": 2964 }, { "epoch": 229.0, "learning_rate": 0.0021680000000000002, "loss": 2.2974, "step": 2977 }, { "epoch": 230.0, "learning_rate": 0.00216, "loss": 2.2974, "step": 2990 }, { "epoch": 231.0, "learning_rate": 0.0021520000000000003, "loss": 2.2755, "step": 3003 }, { "epoch": 232.0, "learning_rate": 0.002144, "loss": 2.287, "step": 3016 }, { "epoch": 233.0, "learning_rate": 0.0021360000000000003, "loss": 2.2462, "step": 3029 }, { "epoch": 234.0, "learning_rate": 0.002128, "loss": 2.2528, "step": 3042 }, { "epoch": 235.0, "learning_rate": 0.0021200000000000004, "loss": 2.2052, "step": 3055 }, { "epoch": 236.0, "learning_rate": 0.002112, "loss": 2.2461, "step": 3068 }, { "epoch": 237.0, "learning_rate": 0.002104, "loss": 2.2099, "step": 3081 }, { "epoch": 238.0, "learning_rate": 0.002096, "loss": 2.1273, "step": 3094 }, { "epoch": 239.0, "learning_rate": 0.002088, "loss": 2.1668, "step": 3107 }, { "epoch": 240.0, "learning_rate": 0.0020800000000000003, "loss": 2.1719, "step": 3120 }, { "epoch": 241.0, "learning_rate": 0.002072, "loss": 2.171, "step": 3133 }, { "epoch": 242.0, "learning_rate": 0.0020640000000000003, "loss": 2.1436, "step": 3146 }, { "epoch": 243.0, "learning_rate": 0.002056, "loss": 2.1698, "step": 3159 }, { "epoch": 244.0, "learning_rate": 0.002048, "loss": 2.1576, "step": 3172 }, { "epoch": 245.0, "learning_rate": 0.00204, "loss": 2.1641, "step": 3185 }, { "epoch": 246.0, "learning_rate": 0.002032, "loss": 2.1721, "step": 3198 }, { "epoch": 247.0, "learning_rate": 0.002024, "loss": 2.1615, "step": 3211 }, { "epoch": 248.0, "learning_rate": 0.002016, "loss": 2.0983, "step": 3224 }, { "epoch": 249.0, "learning_rate": 0.0020080000000000002, "loss": 2.108, "step": 3237 }, { "epoch": 250.0, "learning_rate": 0.002, "loss": 2.1167, "step": 3250 }, { "epoch": 251.0, "learning_rate": 0.001992, "loss": 2.0951, "step": 3263 }, { "epoch": 252.0, "learning_rate": 0.001984, "loss": 2.0415, "step": 3276 }, { "epoch": 253.0, "learning_rate": 0.001976, "loss": 2.101, "step": 3289 }, { "epoch": 254.0, "learning_rate": 0.001968, "loss": 2.1233, "step": 3302 }, { "epoch": 255.0, "learning_rate": 0.00196, "loss": 2.0782, "step": 3315 }, { "epoch": 256.0, "learning_rate": 0.001952, "loss": 2.0033, "step": 3328 }, { "epoch": 257.0, "learning_rate": 0.001944, "loss": 2.051, "step": 3341 }, { "epoch": 258.0, "learning_rate": 0.001936, "loss": 2.0587, "step": 3354 }, { "epoch": 259.0, "learning_rate": 0.001928, "loss": 1.9981, "step": 3367 }, { "epoch": 260.0, "learning_rate": 0.00192, "loss": 2.0506, "step": 3380 }, { "epoch": 261.0, "learning_rate": 0.0019119999999999999, "loss": 2.0815, "step": 3393 }, { "epoch": 262.0, "learning_rate": 0.0019039999999999999, "loss": 2.0054, "step": 3406 }, { "epoch": 263.0, "learning_rate": 0.001896, "loss": 1.9923, "step": 3419 }, { "epoch": 264.0, "learning_rate": 0.001888, "loss": 1.9892, "step": 3432 }, { "epoch": 265.0, "learning_rate": 0.00188, "loss": 1.9406, "step": 3445 }, { "epoch": 266.0, "learning_rate": 0.0018720000000000002, "loss": 1.9295, "step": 3458 }, { "epoch": 267.0, "learning_rate": 0.0018640000000000002, "loss": 1.9791, "step": 3471 }, { "epoch": 268.0, "learning_rate": 0.0018560000000000002, "loss": 1.9413, "step": 3484 }, { "epoch": 269.0, "learning_rate": 0.001848, "loss": 1.9363, "step": 3497 }, { "epoch": 270.0, "learning_rate": 0.00184, "loss": 2.0056, "step": 3510 }, { "epoch": 271.0, "learning_rate": 0.001832, "loss": 1.9298, "step": 3523 }, { "epoch": 272.0, "learning_rate": 0.001824, "loss": 1.9045, "step": 3536 }, { "epoch": 273.0, "learning_rate": 0.0018160000000000001, "loss": 1.9165, "step": 3549 }, { "epoch": 274.0, "learning_rate": 0.0018080000000000001, "loss": 1.9214, "step": 3562 }, { "epoch": 275.0, "learning_rate": 0.0018000000000000002, "loss": 1.9063, "step": 3575 }, { "epoch": 276.0, "learning_rate": 0.001792, "loss": 1.9016, "step": 3588 }, { "epoch": 277.0, "learning_rate": 0.001784, "loss": 1.8091, "step": 3601 }, { "epoch": 278.0, "learning_rate": 0.001776, "loss": 1.8626, "step": 3614 }, { "epoch": 279.0, "learning_rate": 0.001768, "loss": 1.8663, "step": 3627 }, { "epoch": 280.0, "learning_rate": 0.00176, "loss": 1.9432, "step": 3640 }, { "epoch": 281.0, "learning_rate": 0.001752, "loss": 1.8664, "step": 3653 }, { "epoch": 282.0, "learning_rate": 0.001744, "loss": 1.8603, "step": 3666 }, { "epoch": 283.0, "learning_rate": 0.0017360000000000001, "loss": 1.8335, "step": 3679 }, { "epoch": 284.0, "learning_rate": 0.001728, "loss": 1.8625, "step": 3692 }, { "epoch": 285.0, "learning_rate": 0.00172, "loss": 1.8043, "step": 3705 }, { "epoch": 286.0, "learning_rate": 0.001712, "loss": 1.8061, "step": 3718 }, { "epoch": 287.0, "learning_rate": 0.001704, "loss": 1.835, "step": 3731 }, { "epoch": 288.0, "learning_rate": 0.001696, "loss": 1.7944, "step": 3744 }, { "epoch": 289.0, "learning_rate": 0.001688, "loss": 1.8492, "step": 3757 }, { "epoch": 290.0, "learning_rate": 0.00168, "loss": 1.812, "step": 3770 }, { "epoch": 291.0, "learning_rate": 0.0016719999999999999, "loss": 1.8175, "step": 3783 }, { "epoch": 292.0, "learning_rate": 0.001664, "loss": 1.7943, "step": 3796 }, { "epoch": 293.0, "learning_rate": 0.001656, "loss": 1.8063, "step": 3809 }, { "epoch": 294.0, "learning_rate": 0.001648, "loss": 1.7992, "step": 3822 }, { "epoch": 295.0, "learning_rate": 0.00164, "loss": 1.7959, "step": 3835 }, { "epoch": 296.0, "learning_rate": 0.001632, "loss": 1.7256, "step": 3848 }, { "epoch": 297.0, "learning_rate": 0.0016240000000000002, "loss": 1.7673, "step": 3861 }, { "epoch": 298.0, "learning_rate": 0.001616, "loss": 1.8299, "step": 3874 }, { "epoch": 299.0, "learning_rate": 0.001608, "loss": 1.8147, "step": 3887 }, { "epoch": 300.0, "learning_rate": 0.0016, "loss": 1.7495, "step": 3900 }, { "epoch": 301.0, "learning_rate": 0.001592, "loss": 1.8001, "step": 3913 }, { "epoch": 302.0, "learning_rate": 0.0015840000000000001, "loss": 1.7707, "step": 3926 }, { "epoch": 303.0, "learning_rate": 0.0015760000000000001, "loss": 1.7283, "step": 3939 }, { "epoch": 304.0, "learning_rate": 0.0015680000000000002, "loss": 1.7133, "step": 3952 }, { "epoch": 305.0, "learning_rate": 0.0015600000000000002, "loss": 1.71, "step": 3965 }, { "epoch": 306.0, "learning_rate": 0.001552, "loss": 1.6685, "step": 3978 }, { "epoch": 307.0, "learning_rate": 0.001544, "loss": 1.6526, "step": 3991 }, { "epoch": 308.0, "learning_rate": 0.001536, "loss": 1.6433, "step": 4004 }, { "epoch": 309.0, "learning_rate": 0.001528, "loss": 1.6823, "step": 4017 }, { "epoch": 310.0, "learning_rate": 0.00152, "loss": 1.6843, "step": 4030 }, { "epoch": 311.0, "learning_rate": 0.001512, "loss": 1.7029, "step": 4043 }, { "epoch": 312.0, "learning_rate": 0.0015040000000000001, "loss": 1.6362, "step": 4056 }, { "epoch": 313.0, "learning_rate": 0.001496, "loss": 1.6648, "step": 4069 }, { "epoch": 314.0, "learning_rate": 0.001488, "loss": 1.7202, "step": 4082 }, { "epoch": 315.0, "learning_rate": 0.00148, "loss": 1.677, "step": 4095 }, { "epoch": 316.0, "learning_rate": 0.001472, "loss": 1.6187, "step": 4108 }, { "epoch": 317.0, "learning_rate": 0.001464, "loss": 1.6398, "step": 4121 }, { "epoch": 318.0, "learning_rate": 0.001456, "loss": 1.6371, "step": 4134 }, { "epoch": 319.0, "learning_rate": 0.001448, "loss": 1.6081, "step": 4147 }, { "epoch": 320.0, "learning_rate": 0.0014399999999999999, "loss": 1.5936, "step": 4160 }, { "epoch": 321.0, "learning_rate": 0.001432, "loss": 1.6336, "step": 4173 }, { "epoch": 322.0, "learning_rate": 0.001424, "loss": 1.6022, "step": 4186 }, { "epoch": 323.0, "learning_rate": 0.001416, "loss": 1.6336, "step": 4199 }, { "epoch": 324.0, "learning_rate": 0.001408, "loss": 1.5898, "step": 4212 }, { "epoch": 325.0, "learning_rate": 0.0014, "loss": 1.5528, "step": 4225 }, { "epoch": 326.0, "learning_rate": 0.001392, "loss": 1.5734, "step": 4238 }, { "epoch": 327.0, "learning_rate": 0.001384, "loss": 1.618, "step": 4251 }, { "epoch": 328.0, "learning_rate": 0.0013759999999999998, "loss": 1.6529, "step": 4264 }, { "epoch": 329.0, "learning_rate": 0.001368, "loss": 1.5824, "step": 4277 }, { "epoch": 330.0, "learning_rate": 0.00136, "loss": 1.609, "step": 4290 }, { "epoch": 331.0, "learning_rate": 0.0013520000000000001, "loss": 1.5796, "step": 4303 }, { "epoch": 332.0, "learning_rate": 0.0013440000000000001, "loss": 1.5924, "step": 4316 }, { "epoch": 333.0, "learning_rate": 0.0013360000000000002, "loss": 1.5841, "step": 4329 }, { "epoch": 334.0, "learning_rate": 0.0013280000000000002, "loss": 1.5487, "step": 4342 }, { "epoch": 335.0, "learning_rate": 0.00132, "loss": 1.4625, "step": 4355 }, { "epoch": 336.0, "learning_rate": 0.001312, "loss": 1.5241, "step": 4368 }, { "epoch": 337.0, "learning_rate": 0.001304, "loss": 1.4823, "step": 4381 }, { "epoch": 338.0, "learning_rate": 0.001296, "loss": 1.5027, "step": 4394 }, { "epoch": 339.0, "learning_rate": 0.001288, "loss": 1.5211, "step": 4407 }, { "epoch": 340.0, "learning_rate": 0.00128, "loss": 1.4912, "step": 4420 }, { "epoch": 341.0, "learning_rate": 0.0012720000000000001, "loss": 1.4792, "step": 4433 }, { "epoch": 342.0, "learning_rate": 0.001264, "loss": 1.4932, "step": 4446 }, { "epoch": 343.0, "learning_rate": 0.001256, "loss": 1.4861, "step": 4459 }, { "epoch": 344.0, "learning_rate": 0.001248, "loss": 1.5171, "step": 4472 }, { "epoch": 345.0, "learning_rate": 0.00124, "loss": 1.494, "step": 4485 }, { "epoch": 346.0, "learning_rate": 0.001232, "loss": 1.4992, "step": 4498 }, { "epoch": 347.0, "learning_rate": 0.001224, "loss": 1.5033, "step": 4511 }, { "epoch": 348.0, "learning_rate": 0.001216, "loss": 1.5039, "step": 4524 }, { "epoch": 349.0, "learning_rate": 0.001208, "loss": 1.5341, "step": 4537 }, { "epoch": 350.0, "learning_rate": 0.0012, "loss": 1.5049, "step": 4550 }, { "epoch": 351.0, "learning_rate": 0.001192, "loss": 1.5104, "step": 4563 }, { "epoch": 352.0, "learning_rate": 0.001184, "loss": 1.4569, "step": 4576 }, { "epoch": 353.0, "learning_rate": 0.001176, "loss": 1.3996, "step": 4589 }, { "epoch": 354.0, "learning_rate": 0.001168, "loss": 1.4337, "step": 4602 }, { "epoch": 355.0, "learning_rate": 0.00116, "loss": 1.4572, "step": 4615 }, { "epoch": 356.0, "learning_rate": 0.001152, "loss": 1.4668, "step": 4628 }, { "epoch": 357.0, "learning_rate": 0.0011439999999999998, "loss": 1.4298, "step": 4641 }, { "epoch": 358.0, "learning_rate": 0.0011359999999999999, "loss": 1.4187, "step": 4654 }, { "epoch": 359.0, "learning_rate": 0.0011279999999999999, "loss": 1.4026, "step": 4667 }, { "epoch": 360.0, "learning_rate": 0.0011200000000000001, "loss": 1.4461, "step": 4680 }, { "epoch": 361.0, "learning_rate": 0.0011120000000000001, "loss": 1.4497, "step": 4693 }, { "epoch": 362.0, "learning_rate": 0.0011040000000000002, "loss": 1.3667, "step": 4706 }, { "epoch": 363.0, "learning_rate": 0.0010960000000000002, "loss": 1.4237, "step": 4719 }, { "epoch": 364.0, "learning_rate": 0.0010880000000000002, "loss": 1.485, "step": 4732 }, { "epoch": 365.0, "learning_rate": 0.00108, "loss": 1.4271, "step": 4745 }, { "epoch": 366.0, "learning_rate": 0.001072, "loss": 1.4046, "step": 4758 }, { "epoch": 367.0, "learning_rate": 0.001064, "loss": 1.3771, "step": 4771 }, { "epoch": 368.0, "learning_rate": 0.001056, "loss": 1.4054, "step": 4784 }, { "epoch": 369.0, "learning_rate": 0.001048, "loss": 1.3886, "step": 4797 }, { "epoch": 370.0, "learning_rate": 0.0010400000000000001, "loss": 1.3583, "step": 4810 }, { "epoch": 371.0, "learning_rate": 0.0010320000000000001, "loss": 1.3606, "step": 4823 }, { "epoch": 372.0, "learning_rate": 0.001024, "loss": 1.3619, "step": 4836 }, { "epoch": 373.0, "learning_rate": 0.001016, "loss": 1.3723, "step": 4849 }, { "epoch": 374.0, "learning_rate": 0.001008, "loss": 1.3604, "step": 4862 }, { "epoch": 375.0, "learning_rate": 0.001, "loss": 1.3745, "step": 4875 }, { "epoch": 376.0, "learning_rate": 0.000992, "loss": 1.393, "step": 4888 }, { "epoch": 377.0, "learning_rate": 0.000984, "loss": 1.3846, "step": 4901 }, { "epoch": 378.0, "learning_rate": 0.000976, "loss": 1.4033, "step": 4914 }, { "epoch": 379.0, "learning_rate": 0.000968, "loss": 1.3204, "step": 4927 }, { "epoch": 380.0, "learning_rate": 0.00096, "loss": 1.3257, "step": 4940 }, { "epoch": 381.0, "learning_rate": 0.0009519999999999999, "loss": 1.3274, "step": 4953 }, { "epoch": 382.0, "learning_rate": 0.000944, "loss": 1.3177, "step": 4966 }, { "epoch": 383.0, "learning_rate": 0.0009360000000000001, "loss": 1.3204, "step": 4979 }, { "epoch": 384.0, "learning_rate": 0.0009280000000000001, "loss": 1.3349, "step": 4992 }, { "epoch": 385.0, "learning_rate": 0.00092, "loss": 1.3149, "step": 5005 }, { "epoch": 386.0, "learning_rate": 0.000912, "loss": 1.2994, "step": 5018 }, { "epoch": 387.0, "learning_rate": 0.0009040000000000001, "loss": 1.3295, "step": 5031 }, { "epoch": 388.0, "learning_rate": 0.000896, "loss": 1.2975, "step": 5044 }, { "epoch": 389.0, "learning_rate": 0.000888, "loss": 1.3118, "step": 5057 }, { "epoch": 390.0, "learning_rate": 0.00088, "loss": 1.2712, "step": 5070 }, { "epoch": 391.0, "learning_rate": 0.000872, "loss": 1.3184, "step": 5083 }, { "epoch": 392.0, "learning_rate": 0.000864, "loss": 1.2687, "step": 5096 }, { "epoch": 393.0, "learning_rate": 0.000856, "loss": 1.2826, "step": 5109 }, { "epoch": 394.0, "learning_rate": 0.000848, "loss": 1.2766, "step": 5122 }, { "epoch": 395.0, "learning_rate": 0.00084, "loss": 1.2935, "step": 5135 }, { "epoch": 396.0, "learning_rate": 0.000832, "loss": 1.288, "step": 5148 }, { "epoch": 397.0, "learning_rate": 0.000824, "loss": 1.2617, "step": 5161 }, { "epoch": 398.0, "learning_rate": 0.000816, "loss": 1.2675, "step": 5174 }, { "epoch": 399.0, "learning_rate": 0.000808, "loss": 1.2895, "step": 5187 }, { "epoch": 400.0, "learning_rate": 0.0008, "loss": 1.2721, "step": 5200 }, { "epoch": 401.0, "learning_rate": 0.0007920000000000001, "loss": 1.2897, "step": 5213 }, { "epoch": 402.0, "learning_rate": 0.0007840000000000001, "loss": 1.2608, "step": 5226 }, { "epoch": 403.0, "learning_rate": 0.000776, "loss": 1.271, "step": 5239 }, { "epoch": 404.0, "learning_rate": 0.000768, "loss": 1.2581, "step": 5252 }, { "epoch": 405.0, "learning_rate": 0.00076, "loss": 1.2497, "step": 5265 }, { "epoch": 406.0, "learning_rate": 0.0007520000000000001, "loss": 1.2846, "step": 5278 }, { "epoch": 407.0, "learning_rate": 0.000744, "loss": 1.2718, "step": 5291 }, { "epoch": 408.0, "learning_rate": 0.000736, "loss": 1.2733, "step": 5304 }, { "epoch": 409.0, "learning_rate": 0.000728, "loss": 1.2918, "step": 5317 }, { "epoch": 410.0, "learning_rate": 0.0007199999999999999, "loss": 1.2659, "step": 5330 }, { "epoch": 411.0, "learning_rate": 0.000712, "loss": 1.2946, "step": 5343 }, { "epoch": 412.0, "learning_rate": 0.000704, "loss": 1.2425, "step": 5356 }, { "epoch": 413.0, "learning_rate": 0.000696, "loss": 1.2293, "step": 5369 }, { "epoch": 414.0, "learning_rate": 0.0006879999999999999, "loss": 1.2847, "step": 5382 }, { "epoch": 415.0, "learning_rate": 0.00068, "loss": 1.2318, "step": 5395 }, { "epoch": 416.0, "learning_rate": 0.0006720000000000001, "loss": 1.237, "step": 5408 }, { "epoch": 417.0, "learning_rate": 0.0006640000000000001, "loss": 1.1875, "step": 5421 }, { "epoch": 418.0, "learning_rate": 0.000656, "loss": 1.2204, "step": 5434 }, { "epoch": 419.0, "learning_rate": 0.000648, "loss": 1.1848, "step": 5447 }, { "epoch": 420.0, "learning_rate": 0.00064, "loss": 1.2146, "step": 5460 }, { "epoch": 421.0, "learning_rate": 0.000632, "loss": 1.1621, "step": 5473 }, { "epoch": 422.0, "learning_rate": 0.000624, "loss": 1.1883, "step": 5486 }, { "epoch": 423.0, "learning_rate": 0.000616, "loss": 1.183, "step": 5499 }, { "epoch": 424.0, "learning_rate": 0.000608, "loss": 1.1649, "step": 5512 }, { "epoch": 425.0, "learning_rate": 0.0006, "loss": 1.1824, "step": 5525 }, { "epoch": 426.0, "learning_rate": 0.000592, "loss": 1.2073, "step": 5538 }, { "epoch": 427.0, "learning_rate": 0.000584, "loss": 1.147, "step": 5551 }, { "epoch": 428.0, "learning_rate": 0.000576, "loss": 1.1798, "step": 5564 }, { "epoch": 429.0, "learning_rate": 0.0005679999999999999, "loss": 1.14, "step": 5577 }, { "epoch": 430.0, "learning_rate": 0.0005600000000000001, "loss": 1.1585, "step": 5590 }, { "epoch": 431.0, "learning_rate": 0.0005520000000000001, "loss": 1.1687, "step": 5603 }, { "epoch": 432.0, "learning_rate": 0.0005440000000000001, "loss": 1.1285, "step": 5616 }, { "epoch": 433.0, "learning_rate": 0.000536, "loss": 1.1472, "step": 5629 }, { "epoch": 434.0, "learning_rate": 0.000528, "loss": 1.1894, "step": 5642 }, { "epoch": 435.0, "learning_rate": 0.0005200000000000001, "loss": 1.1606, "step": 5655 }, { "epoch": 436.0, "learning_rate": 0.000512, "loss": 1.1294, "step": 5668 }, { "epoch": 437.0, "learning_rate": 0.000504, "loss": 1.1597, "step": 5681 }, { "epoch": 438.0, "learning_rate": 0.000496, "loss": 1.1772, "step": 5694 }, { "epoch": 439.0, "learning_rate": 0.000488, "loss": 1.2044, "step": 5707 }, { "epoch": 440.0, "learning_rate": 0.00048, "loss": 1.1543, "step": 5720 }, { "epoch": 441.0, "learning_rate": 0.000472, "loss": 1.1868, "step": 5733 }, { "epoch": 442.0, "learning_rate": 0.00046400000000000006, "loss": 1.1821, "step": 5746 }, { "epoch": 443.0, "learning_rate": 0.000456, "loss": 1.0897, "step": 5759 }, { "epoch": 444.0, "learning_rate": 0.000448, "loss": 1.0977, "step": 5772 }, { "epoch": 445.0, "learning_rate": 0.00044, "loss": 1.1695, "step": 5785 }, { "epoch": 446.0, "learning_rate": 0.000432, "loss": 1.1332, "step": 5798 }, { "epoch": 447.0, "learning_rate": 0.000424, "loss": 1.1321, "step": 5811 }, { "epoch": 448.0, "learning_rate": 0.000416, "loss": 1.1315, "step": 5824 }, { "epoch": 449.0, "learning_rate": 0.000408, "loss": 1.1178, "step": 5837 }, { "epoch": 450.0, "learning_rate": 0.0004, "loss": 1.1163, "step": 5850 }, { "epoch": 451.0, "learning_rate": 0.00039200000000000004, "loss": 1.1414, "step": 5863 }, { "epoch": 452.0, "learning_rate": 0.000384, "loss": 1.1274, "step": 5876 }, { "epoch": 453.0, "learning_rate": 0.00037600000000000003, "loss": 1.1067, "step": 5889 }, { "epoch": 454.0, "learning_rate": 0.000368, "loss": 1.0889, "step": 5902 }, { "epoch": 455.0, "learning_rate": 0.00035999999999999997, "loss": 1.0844, "step": 5915 }, { "epoch": 456.0, "learning_rate": 0.000352, "loss": 1.1341, "step": 5928 }, { "epoch": 457.0, "learning_rate": 0.00034399999999999996, "loss": 1.0644, "step": 5941 }, { "epoch": 458.0, "learning_rate": 0.00033600000000000004, "loss": 1.0991, "step": 5954 }, { "epoch": 459.0, "learning_rate": 0.000328, "loss": 1.1176, "step": 5967 }, { "epoch": 460.0, "learning_rate": 0.00032, "loss": 1.0997, "step": 5980 }, { "epoch": 461.0, "learning_rate": 0.000312, "loss": 1.0997, "step": 5993 }, { "epoch": 462.0, "learning_rate": 0.000304, "loss": 1.0763, "step": 6006 }, { "epoch": 463.0, "learning_rate": 0.000296, "loss": 1.1102, "step": 6019 }, { "epoch": 464.0, "learning_rate": 0.000288, "loss": 1.1236, "step": 6032 }, { "epoch": 465.0, "learning_rate": 0.00028000000000000003, "loss": 1.0941, "step": 6045 }, { "epoch": 466.0, "learning_rate": 0.00027200000000000005, "loss": 1.0976, "step": 6058 }, { "epoch": 467.0, "learning_rate": 0.000264, "loss": 1.0688, "step": 6071 }, { "epoch": 468.0, "learning_rate": 0.000256, "loss": 1.0591, "step": 6084 }, { "epoch": 469.0, "learning_rate": 0.000248, "loss": 1.0695, "step": 6097 }, { "epoch": 470.0, "learning_rate": 0.00024, "loss": 1.071, "step": 6110 }, { "epoch": 471.0, "learning_rate": 0.00023200000000000003, "loss": 1.0709, "step": 6123 }, { "epoch": 472.0, "learning_rate": 0.000224, "loss": 1.0767, "step": 6136 }, { "epoch": 473.0, "learning_rate": 0.000216, "loss": 1.0741, "step": 6149 }, { "epoch": 474.0, "learning_rate": 0.000208, "loss": 1.0644, "step": 6162 }, { "epoch": 475.0, "learning_rate": 0.0002, "loss": 1.0699, "step": 6175 }, { "epoch": 476.0, "learning_rate": 0.000192, "loss": 1.0727, "step": 6188 }, { "epoch": 477.0, "learning_rate": 0.000184, "loss": 1.06, "step": 6201 }, { "epoch": 478.0, "learning_rate": 0.000176, "loss": 1.0568, "step": 6214 }, { "epoch": 479.0, "learning_rate": 0.00016800000000000002, "loss": 1.0616, "step": 6227 }, { "epoch": 480.0, "learning_rate": 0.00016, "loss": 1.0491, "step": 6240 }, { "epoch": 481.0, "learning_rate": 0.000152, "loss": 1.0502, "step": 6253 }, { "epoch": 482.0, "learning_rate": 0.000144, "loss": 1.0742, "step": 6266 }, { "epoch": 483.0, "learning_rate": 0.00013600000000000003, "loss": 1.0582, "step": 6279 }, { "epoch": 484.0, "learning_rate": 0.000128, "loss": 1.0803, "step": 6292 }, { "epoch": 485.0, "learning_rate": 0.00012, "loss": 1.067, "step": 6305 }, { "epoch": 486.0, "learning_rate": 0.000112, "loss": 1.0397, "step": 6318 }, { "epoch": 487.0, "learning_rate": 0.000104, "loss": 1.0489, "step": 6331 }, { "epoch": 488.0, "learning_rate": 9.6e-05, "loss": 1.0378, "step": 6344 }, { "epoch": 489.0, "learning_rate": 8.8e-05, "loss": 1.0418, "step": 6357 }, { "epoch": 490.0, "learning_rate": 8e-05, "loss": 1.0344, "step": 6370 }, { "epoch": 491.0, "learning_rate": 7.2e-05, "loss": 1.0768, "step": 6383 }, { "epoch": 492.0, "learning_rate": 6.4e-05, "loss": 1.0296, "step": 6396 }, { "epoch": 493.0, "learning_rate": 5.6e-05, "loss": 1.0336, "step": 6409 }, { "epoch": 494.0, "learning_rate": 4.8e-05, "loss": 1.0568, "step": 6422 }, { "epoch": 495.0, "learning_rate": 4e-05, "loss": 1.0647, "step": 6435 }, { "epoch": 496.0, "learning_rate": 3.2e-05, "loss": 1.0448, "step": 6448 }, { "epoch": 497.0, "learning_rate": 2.4e-05, "loss": 1.0602, "step": 6461 }, { "epoch": 498.0, "learning_rate": 1.6e-05, "loss": 1.0615, "step": 6474 }, { "epoch": 499.0, "learning_rate": 8e-06, "loss": 1.0389, "step": 6487 }, { "epoch": 500.0, "learning_rate": 0.0, "loss": 1.0629, "step": 6500 }, { "epoch": 500.0, "step": 6500, "total_flos": 284798065115136.0, "train_loss": 2.748476623535156, "train_runtime": 71445.8185, "train_samples_per_second": 0.7, "train_steps_per_second": 0.091 }, { "epoch": 500.0, "step": 6500, "total_flos": 284798065115136.0, "train_loss": 0.0, "train_runtime": 1.3574, "train_samples_per_second": 36834.222, "train_steps_per_second": 4788.449 }, { "epoch": 501.0, "learning_rate": 0.003996, "loss": 1.4863, "step": 6513 }, { "epoch": 502.0, "learning_rate": 0.003992, "loss": 1.6645, "step": 6526 }, { "epoch": 503.0, "learning_rate": 0.003988, "loss": 1.7012, "step": 6539 }, { "epoch": 504.0, "learning_rate": 0.003984, "loss": 1.6185, "step": 6552 }, { "epoch": 505.0, "learning_rate": 0.00398, "loss": 1.5629, "step": 6565 }, { "epoch": 506.0, "learning_rate": 0.003976, "loss": 1.5867, "step": 6578 }, { "epoch": 507.0, "learning_rate": 0.003972, "loss": 1.6144, "step": 6591 }, { "epoch": 508.0, "learning_rate": 0.003968, "loss": 1.7844, "step": 6604 }, { "epoch": 509.0, "learning_rate": 0.003964, "loss": 1.7508, "step": 6617 }, { "epoch": 510.0, "learning_rate": 0.00396, "loss": 1.7693, "step": 6630 }, { "epoch": 511.0, "learning_rate": 0.003956, "loss": 1.8884, "step": 6643 }, { "epoch": 512.0, "learning_rate": 0.003952, "loss": 1.8287, "step": 6656 }, { "epoch": 513.0, "learning_rate": 0.003948, "loss": 1.8228, "step": 6669 }, { "epoch": 514.0, "learning_rate": 0.0039440000000000005, "loss": 1.7632, "step": 6682 }, { "epoch": 515.0, "learning_rate": 0.00394, "loss": 1.7943, "step": 6695 }, { "epoch": 516.0, "learning_rate": 0.003936, "loss": 1.7451, "step": 6708 }, { "epoch": 517.0, "learning_rate": 0.003932, "loss": 1.8542, "step": 6721 }, { "epoch": 518.0, "learning_rate": 0.003928, "loss": 2.0283, "step": 6734 }, { "epoch": 519.0, "learning_rate": 0.003924, "loss": 2.0074, "step": 6747 }, { "epoch": 520.0, "learning_rate": 0.00392, "loss": 2.1644, "step": 6760 }, { "epoch": 521.0, "learning_rate": 0.003916, "loss": 1.9558, "step": 6773 }, { "epoch": 522.0, "learning_rate": 0.003912, "loss": 1.9104, "step": 6786 }, { "epoch": 523.0, "learning_rate": 0.003908, "loss": 1.9961, "step": 6799 }, { "epoch": 524.0, "learning_rate": 0.003904, "loss": 2.0827, "step": 6812 }, { "epoch": 525.0, "learning_rate": 0.0039, "loss": 2.0293, "step": 6825 }, { "epoch": 526.0, "learning_rate": 0.003896, "loss": 1.9904, "step": 6838 }, { "epoch": 527.0, "learning_rate": 0.003892, "loss": 1.9175, "step": 6851 }, { "epoch": 528.0, "learning_rate": 0.003888, "loss": 1.8658, "step": 6864 }, { "epoch": 529.0, "learning_rate": 0.003884, "loss": 1.8219, "step": 6877 }, { "epoch": 530.0, "learning_rate": 0.0038799999999999998, "loss": 1.884, "step": 6890 }, { "epoch": 531.0, "learning_rate": 0.003876, "loss": 1.9361, "step": 6903 }, { "epoch": 532.0, "learning_rate": 0.003872, "loss": 1.8961, "step": 6916 }, { "epoch": 533.0, "learning_rate": 0.003868, "loss": 1.9082, "step": 6929 }, { "epoch": 534.0, "learning_rate": 0.003864, "loss": 2.0034, "step": 6942 }, { "epoch": 535.0, "learning_rate": 0.00386, "loss": 2.0058, "step": 6955 }, { "epoch": 536.0, "learning_rate": 0.003856, "loss": 1.9934, "step": 6968 }, { "epoch": 537.0, "learning_rate": 0.003852, "loss": 1.9674, "step": 6981 }, { "epoch": 538.0, "learning_rate": 0.003848, "loss": 1.9737, "step": 6994 }, { "epoch": 539.0, "learning_rate": 0.0038439999999999998, "loss": 1.9184, "step": 7007 }, { "epoch": 540.0, "learning_rate": 0.00384, "loss": 1.9147, "step": 7020 }, { "epoch": 541.0, "learning_rate": 0.003836, "loss": 1.9792, "step": 7033 }, { "epoch": 542.0, "learning_rate": 0.003832, "loss": 1.9448, "step": 7046 }, { "epoch": 543.0, "learning_rate": 0.003828, "loss": 1.8897, "step": 7059 }, { "epoch": 544.0, "learning_rate": 0.0038239999999999997, "loss": 1.9048, "step": 7072 }, { "epoch": 545.0, "learning_rate": 0.00382, "loss": 1.9577, "step": 7085 }, { "epoch": 546.0, "learning_rate": 0.003816, "loss": 1.9996, "step": 7098 }, { "epoch": 547.0, "learning_rate": 0.003812, "loss": 1.9895, "step": 7111 }, { "epoch": 548.0, "learning_rate": 0.0038079999999999998, "loss": 2.0381, "step": 7124 }, { "epoch": 549.0, "learning_rate": 0.003804, "loss": 1.9362, "step": 7137 }, { "epoch": 550.0, "learning_rate": 0.0038, "loss": 1.9544, "step": 7150 }, { "epoch": 551.0, "learning_rate": 0.003796, "loss": 1.9497, "step": 7163 }, { "epoch": 552.0, "learning_rate": 0.003792, "loss": 1.9105, "step": 7176 }, { "epoch": 553.0, "learning_rate": 0.0037879999999999997, "loss": 1.896, "step": 7189 }, { "epoch": 554.0, "learning_rate": 0.003784, "loss": 2.0015, "step": 7202 }, { "epoch": 555.0, "learning_rate": 0.00378, "loss": 1.8753, "step": 7215 }, { "epoch": 556.0, "learning_rate": 0.003776, "loss": 1.9087, "step": 7228 }, { "epoch": 557.0, "learning_rate": 0.0037719999999999997, "loss": 1.959, "step": 7241 }, { "epoch": 558.0, "learning_rate": 0.003768, "loss": 2.0541, "step": 7254 }, { "epoch": 559.0, "learning_rate": 0.003764, "loss": 2.0614, "step": 7267 }, { "epoch": 560.0, "learning_rate": 0.00376, "loss": 2.0349, "step": 7280 }, { "epoch": 561.0, "learning_rate": 0.003756, "loss": 1.9517, "step": 7293 }, { "epoch": 562.0, "learning_rate": 0.0037519999999999997, "loss": 2.0094, "step": 7306 }, { "epoch": 563.0, "learning_rate": 0.0037480000000000005, "loss": 2.0459, "step": 7319 }, { "epoch": 564.0, "learning_rate": 0.0037440000000000004, "loss": 2.172, "step": 7332 }, { "epoch": 565.0, "learning_rate": 0.0037400000000000003, "loss": 2.0798, "step": 7345 }, { "epoch": 566.0, "learning_rate": 0.003736, "loss": 2.0222, "step": 7358 }, { "epoch": 567.0, "learning_rate": 0.003732, "loss": 2.0051, "step": 7371 }, { "epoch": 568.0, "learning_rate": 0.0037280000000000004, "loss": 2.2096, "step": 7384 }, { "epoch": 569.0, "learning_rate": 0.0037240000000000003, "loss": 2.2197, "step": 7397 }, { "epoch": 570.0, "learning_rate": 0.00372, "loss": 2.1259, "step": 7410 }, { "epoch": 571.0, "learning_rate": 0.003716, "loss": 2.1098, "step": 7423 }, { "epoch": 572.0, "learning_rate": 0.0037120000000000005, "loss": 2.0734, "step": 7436 }, { "epoch": 573.0, "learning_rate": 0.0037080000000000004, "loss": 2.0822, "step": 7449 }, { "epoch": 574.0, "learning_rate": 0.0037040000000000003, "loss": 2.0771, "step": 7462 }, { "epoch": 575.0, "learning_rate": 0.0037, "loss": 2.0217, "step": 7475 }, { "epoch": 576.0, "learning_rate": 0.003696, "loss": 1.9762, "step": 7488 }, { "epoch": 577.0, "learning_rate": 0.0036920000000000004, "loss": 1.9341, "step": 7501 }, { "epoch": 578.0, "learning_rate": 0.0036880000000000003, "loss": 1.9837, "step": 7514 }, { "epoch": 579.0, "learning_rate": 0.003684, "loss": 1.9337, "step": 7527 }, { "epoch": 580.0, "learning_rate": 0.00368, "loss": 1.8968, "step": 7540 }, { "epoch": 581.0, "learning_rate": 0.0036760000000000004, "loss": 1.8705, "step": 7553 }, { "epoch": 582.0, "learning_rate": 0.0036720000000000004, "loss": 1.8261, "step": 7566 }, { "epoch": 583.0, "learning_rate": 0.0036680000000000003, "loss": 1.9411, "step": 7579 }, { "epoch": 584.0, "learning_rate": 0.003664, "loss": 1.9961, "step": 7592 }, { "epoch": 585.0, "learning_rate": 0.00366, "loss": 1.8865, "step": 7605 }, { "epoch": 586.0, "learning_rate": 0.0036560000000000004, "loss": 1.829, "step": 7618 }, { "epoch": 587.0, "learning_rate": 0.0036520000000000003, "loss": 1.8424, "step": 7631 }, { "epoch": 588.0, "learning_rate": 0.003648, "loss": 1.8463, "step": 7644 }, { "epoch": 589.0, "learning_rate": 0.003644, "loss": 1.8452, "step": 7657 }, { "epoch": 590.0, "learning_rate": 0.00364, "loss": 1.7974, "step": 7670 }, { "epoch": 591.0, "learning_rate": 0.0036360000000000003, "loss": 1.7995, "step": 7683 }, { "epoch": 592.0, "learning_rate": 0.0036320000000000002, "loss": 1.7664, "step": 7696 }, { "epoch": 593.0, "learning_rate": 0.003628, "loss": 1.7451, "step": 7709 }, { "epoch": 594.0, "learning_rate": 0.003624, "loss": 1.7978, "step": 7722 }, { "epoch": 595.0, "learning_rate": 0.0036200000000000004, "loss": 1.9067, "step": 7735 }, { "epoch": 596.0, "learning_rate": 0.0036160000000000003, "loss": 1.8932, "step": 7748 }, { "epoch": 597.0, "learning_rate": 0.003612, "loss": 1.9407, "step": 7761 }, { "epoch": 598.0, "learning_rate": 0.003608, "loss": 1.8776, "step": 7774 }, { "epoch": 599.0, "learning_rate": 0.003604, "loss": 1.8223, "step": 7787 }, { "epoch": 600.0, "learning_rate": 0.0036000000000000003, "loss": 1.7761, "step": 7800 }, { "epoch": 601.0, "learning_rate": 0.0035960000000000002, "loss": 1.7768, "step": 7813 }, { "epoch": 602.0, "learning_rate": 0.003592, "loss": 1.8109, "step": 7826 }, { "epoch": 603.0, "learning_rate": 0.003588, "loss": 1.7787, "step": 7839 }, { "epoch": 604.0, "learning_rate": 0.003584, "loss": 1.9842, "step": 7852 }, { "epoch": 605.0, "learning_rate": 0.0035800000000000003, "loss": 1.9262, "step": 7865 }, { "epoch": 606.0, "learning_rate": 0.003576, "loss": 1.9124, "step": 7878 }, { "epoch": 607.0, "learning_rate": 0.003572, "loss": 1.8407, "step": 7891 }, { "epoch": 608.0, "learning_rate": 0.003568, "loss": 1.8722, "step": 7904 }, { "epoch": 609.0, "learning_rate": 0.0035640000000000003, "loss": 1.7409, "step": 7917 }, { "epoch": 610.0, "learning_rate": 0.0035600000000000002, "loss": 1.712, "step": 7930 }, { "epoch": 611.0, "learning_rate": 0.003556, "loss": 1.6115, "step": 7943 }, { "epoch": 612.0, "learning_rate": 0.003552, "loss": 1.6805, "step": 7956 }, { "epoch": 613.0, "learning_rate": 0.003548, "loss": 1.7829, "step": 7969 }, { "epoch": 614.0, "learning_rate": 0.0035440000000000003, "loss": 1.7498, "step": 7982 }, { "epoch": 615.0, "learning_rate": 0.00354, "loss": 1.7536, "step": 7995 }, { "epoch": 616.0, "learning_rate": 0.003536, "loss": 1.7015, "step": 8008 }, { "epoch": 617.0, "learning_rate": 0.003532, "loss": 1.6556, "step": 8021 }, { "epoch": 618.0, "learning_rate": 0.003528, "loss": 1.7314, "step": 8034 }, { "epoch": 619.0, "learning_rate": 0.0035240000000000002, "loss": 1.6996, "step": 8047 }, { "epoch": 620.0, "learning_rate": 0.00352, "loss": 1.6819, "step": 8060 }, { "epoch": 621.0, "learning_rate": 0.003516, "loss": 1.6994, "step": 8073 }, { "epoch": 622.0, "learning_rate": 0.003512, "loss": 1.6657, "step": 8086 }, { "epoch": 623.0, "learning_rate": 0.0035080000000000003, "loss": 1.6558, "step": 8099 }, { "epoch": 624.0, "learning_rate": 0.003504, "loss": 1.6822, "step": 8112 }, { "epoch": 625.0, "learning_rate": 0.0035, "loss": 1.7245, "step": 8125 }, { "epoch": 626.0, "learning_rate": 0.003496, "loss": 1.8045, "step": 8138 }, { "epoch": 627.0, "learning_rate": 0.003492, "loss": 1.7307, "step": 8151 }, { "epoch": 628.0, "learning_rate": 0.003488, "loss": 1.7469, "step": 8164 }, { "epoch": 629.0, "learning_rate": 0.003484, "loss": 1.7047, "step": 8177 }, { "epoch": 630.0, "learning_rate": 0.00348, "loss": 1.6359, "step": 8190 }, { "epoch": 631.0, "learning_rate": 0.003476, "loss": 1.7324, "step": 8203 }, { "epoch": 632.0, "learning_rate": 0.0034720000000000003, "loss": 1.6107, "step": 8216 }, { "epoch": 633.0, "learning_rate": 0.003468, "loss": 1.5336, "step": 8229 }, { "epoch": 634.0, "learning_rate": 0.003464, "loss": 1.5587, "step": 8242 }, { "epoch": 635.0, "learning_rate": 0.00346, "loss": 1.581, "step": 8255 }, { "epoch": 636.0, "learning_rate": 0.003456, "loss": 1.5281, "step": 8268 }, { "epoch": 637.0, "learning_rate": 0.003452, "loss": 1.5198, "step": 8281 }, { "epoch": 638.0, "learning_rate": 0.003448, "loss": 1.5671, "step": 8294 }, { "epoch": 639.0, "learning_rate": 0.003444, "loss": 1.5257, "step": 8307 }, { "epoch": 640.0, "learning_rate": 0.00344, "loss": 1.5525, "step": 8320 }, { "epoch": 641.0, "learning_rate": 0.003436, "loss": 1.5005, "step": 8333 }, { "epoch": 642.0, "learning_rate": 0.003432, "loss": 1.4971, "step": 8346 }, { "epoch": 643.0, "learning_rate": 0.003428, "loss": 1.4738, "step": 8359 }, { "epoch": 644.0, "learning_rate": 0.003424, "loss": 1.5397, "step": 8372 }, { "epoch": 645.0, "learning_rate": 0.00342, "loss": 1.5092, "step": 8385 }, { "epoch": 646.0, "learning_rate": 0.003416, "loss": 1.5638, "step": 8398 }, { "epoch": 647.0, "learning_rate": 0.003412, "loss": 1.4813, "step": 8411 }, { "epoch": 648.0, "learning_rate": 0.003408, "loss": 1.4827, "step": 8424 }, { "epoch": 649.0, "learning_rate": 0.003404, "loss": 1.5285, "step": 8437 }, { "epoch": 650.0, "learning_rate": 0.0034, "loss": 1.5059, "step": 8450 }, { "epoch": 651.0, "learning_rate": 0.003396, "loss": 1.5452, "step": 8463 }, { "epoch": 652.0, "learning_rate": 0.003392, "loss": 1.6823, "step": 8476 }, { "epoch": 653.0, "learning_rate": 0.003388, "loss": 1.6004, "step": 8489 }, { "epoch": 654.0, "learning_rate": 0.003384, "loss": 1.648, "step": 8502 }, { "epoch": 655.0, "learning_rate": 0.0033799999999999998, "loss": 1.6614, "step": 8515 }, { "epoch": 656.0, "learning_rate": 0.003376, "loss": 1.6969, "step": 8528 }, { "epoch": 657.0, "learning_rate": 0.003372, "loss": 1.5948, "step": 8541 }, { "epoch": 658.0, "learning_rate": 0.003368, "loss": 1.5743, "step": 8554 }, { "epoch": 659.0, "learning_rate": 0.003364, "loss": 1.5521, "step": 8567 }, { "epoch": 660.0, "learning_rate": 0.00336, "loss": 1.5639, "step": 8580 }, { "epoch": 661.0, "learning_rate": 0.003356, "loss": 1.5155, "step": 8593 }, { "epoch": 662.0, "learning_rate": 0.003352, "loss": 1.4494, "step": 8606 }, { "epoch": 663.0, "learning_rate": 0.003348, "loss": 1.5803, "step": 8619 }, { "epoch": 664.0, "learning_rate": 0.0033439999999999998, "loss": 1.5896, "step": 8632 }, { "epoch": 665.0, "learning_rate": 0.00334, "loss": 1.6208, "step": 8645 }, { "epoch": 666.0, "learning_rate": 0.003336, "loss": 1.6345, "step": 8658 }, { "epoch": 667.0, "learning_rate": 0.003332, "loss": 1.6087, "step": 8671 }, { "epoch": 668.0, "learning_rate": 0.003328, "loss": 1.5938, "step": 8684 }, { "epoch": 669.0, "learning_rate": 0.0033239999999999997, "loss": 1.5391, "step": 8697 }, { "epoch": 670.0, "learning_rate": 0.00332, "loss": 1.5938, "step": 8710 }, { "epoch": 671.0, "learning_rate": 0.003316, "loss": 1.6235, "step": 8723 }, { "epoch": 672.0, "learning_rate": 0.003312, "loss": 1.6882, "step": 8736 }, { "epoch": 673.0, "learning_rate": 0.0033079999999999997, "loss": 1.5822, "step": 8749 }, { "epoch": 674.0, "learning_rate": 0.003304, "loss": 1.622, "step": 8762 }, { "epoch": 675.0, "learning_rate": 0.0033, "loss": 1.587, "step": 8775 }, { "epoch": 676.0, "learning_rate": 0.003296, "loss": 1.5119, "step": 8788 }, { "epoch": 677.0, "learning_rate": 0.003292, "loss": 1.467, "step": 8801 }, { "epoch": 678.0, "learning_rate": 0.0032879999999999997, "loss": 1.438, "step": 8814 }, { "epoch": 679.0, "learning_rate": 0.003284, "loss": 1.4497, "step": 8827 }, { "epoch": 680.0, "learning_rate": 0.00328, "loss": 1.4349, "step": 8840 }, { "epoch": 681.0, "learning_rate": 0.003276, "loss": 1.378, "step": 8853 }, { "epoch": 682.0, "learning_rate": 0.0032719999999999997, "loss": 1.3847, "step": 8866 }, { "epoch": 683.0, "learning_rate": 0.003268, "loss": 1.4556, "step": 8879 }, { "epoch": 684.0, "learning_rate": 0.003264, "loss": 1.4435, "step": 8892 }, { "epoch": 685.0, "learning_rate": 0.00326, "loss": 1.4494, "step": 8905 }, { "epoch": 686.0, "learning_rate": 0.0032559999999999998, "loss": 1.4065, "step": 8918 }, { "epoch": 687.0, "learning_rate": 0.0032519999999999997, "loss": 1.4323, "step": 8931 }, { "epoch": 688.0, "learning_rate": 0.0032480000000000005, "loss": 1.4698, "step": 8944 }, { "epoch": 689.0, "learning_rate": 0.0032440000000000004, "loss": 1.4293, "step": 8957 }, { "epoch": 690.0, "learning_rate": 0.0032400000000000003, "loss": 1.5205, "step": 8970 }, { "epoch": 691.0, "learning_rate": 0.003236, "loss": 1.5039, "step": 8983 }, { "epoch": 692.0, "learning_rate": 0.003232, "loss": 1.4715, "step": 8996 }, { "epoch": 693.0, "learning_rate": 0.0032280000000000004, "loss": 1.4448, "step": 9009 }, { "epoch": 694.0, "learning_rate": 0.0032240000000000003, "loss": 1.5031, "step": 9022 }, { "epoch": 695.0, "learning_rate": 0.00322, "loss": 1.5261, "step": 9035 }, { "epoch": 696.0, "learning_rate": 0.003216, "loss": 1.4389, "step": 9048 }, { "epoch": 697.0, "learning_rate": 0.0032120000000000004, "loss": 1.4792, "step": 9061 }, { "epoch": 698.0, "learning_rate": 0.0032080000000000003, "loss": 1.4317, "step": 9074 }, { "epoch": 699.0, "learning_rate": 0.0032040000000000003, "loss": 1.5776, "step": 9087 }, { "epoch": 700.0, "learning_rate": 0.0032, "loss": 1.5326, "step": 9100 }, { "epoch": 701.0, "learning_rate": 0.003196, "loss": 1.5345, "step": 9113 }, { "epoch": 702.0, "learning_rate": 0.0031920000000000004, "loss": 1.5269, "step": 9126 }, { "epoch": 703.0, "learning_rate": 0.0031880000000000003, "loss": 1.4819, "step": 9139 }, { "epoch": 704.0, "learning_rate": 0.003184, "loss": 1.5326, "step": 9152 }, { "epoch": 705.0, "learning_rate": 0.00318, "loss": 1.4257, "step": 9165 }, { "epoch": 706.0, "learning_rate": 0.0031760000000000004, "loss": 1.4306, "step": 9178 }, { "epoch": 707.0, "learning_rate": 0.0031720000000000003, "loss": 1.3884, "step": 9191 }, { "epoch": 708.0, "learning_rate": 0.0031680000000000002, "loss": 1.3421, "step": 9204 }, { "epoch": 709.0, "learning_rate": 0.003164, "loss": 1.394, "step": 9217 }, { "epoch": 710.0, "learning_rate": 0.00316, "loss": 1.3892, "step": 9230 }, { "epoch": 711.0, "learning_rate": 0.0031560000000000004, "loss": 1.4832, "step": 9243 }, { "epoch": 712.0, "learning_rate": 0.0031520000000000003, "loss": 1.4088, "step": 9256 }, { "epoch": 713.0, "learning_rate": 0.003148, "loss": 1.386, "step": 9269 }, { "epoch": 714.0, "learning_rate": 0.003144, "loss": 1.3992, "step": 9282 }, { "epoch": 715.0, "learning_rate": 0.00314, "loss": 1.381, "step": 9295 }, { "epoch": 716.0, "learning_rate": 0.0031360000000000003, "loss": 1.394, "step": 9308 }, { "epoch": 717.0, "learning_rate": 0.0031320000000000002, "loss": 1.4024, "step": 9321 }, { "epoch": 718.0, "learning_rate": 0.003128, "loss": 1.3334, "step": 9334 }, { "epoch": 719.0, "learning_rate": 0.003124, "loss": 1.3467, "step": 9347 }, { "epoch": 720.0, "learning_rate": 0.0031200000000000004, "loss": 1.284, "step": 9360 }, { "epoch": 721.0, "learning_rate": 0.0031160000000000003, "loss": 1.2705, "step": 9373 }, { "epoch": 722.0, "learning_rate": 0.003112, "loss": 1.2919, "step": 9386 }, { "epoch": 723.0, "learning_rate": 0.003108, "loss": 1.3071, "step": 9399 }, { "epoch": 724.0, "learning_rate": 0.003104, "loss": 1.3584, "step": 9412 }, { "epoch": 725.0, "learning_rate": 0.0031000000000000003, "loss": 1.4149, "step": 9425 }, { "epoch": 726.0, "learning_rate": 0.0030960000000000002, "loss": 1.3721, "step": 9438 }, { "epoch": 727.0, "learning_rate": 0.003092, "loss": 1.3719, "step": 9451 }, { "epoch": 728.0, "learning_rate": 0.003088, "loss": 1.3559, "step": 9464 }, { "epoch": 729.0, "learning_rate": 0.003084, "loss": 1.3108, "step": 9477 }, { "epoch": 730.0, "learning_rate": 0.0030800000000000003, "loss": 1.316, "step": 9490 }, { "epoch": 731.0, "learning_rate": 0.003076, "loss": 1.3154, "step": 9503 }, { "epoch": 732.0, "learning_rate": 0.003072, "loss": 1.327, "step": 9516 }, { "epoch": 733.0, "learning_rate": 0.003068, "loss": 1.2914, "step": 9529 }, { "epoch": 734.0, "learning_rate": 0.0030640000000000003, "loss": 1.2891, "step": 9542 }, { "epoch": 735.0, "learning_rate": 0.0030600000000000002, "loss": 1.2923, "step": 9555 }, { "epoch": 736.0, "learning_rate": 0.003056, "loss": 1.3608, "step": 9568 }, { "epoch": 737.0, "learning_rate": 0.003052, "loss": 1.3126, "step": 9581 }, { "epoch": 738.0, "learning_rate": 0.003048, "loss": 1.3673, "step": 9594 }, { "epoch": 739.0, "learning_rate": 0.0030440000000000003, "loss": 1.3951, "step": 9607 }, { "epoch": 740.0, "learning_rate": 0.00304, "loss": 1.3128, "step": 9620 }, { "epoch": 741.0, "learning_rate": 0.003036, "loss": 1.3117, "step": 9633 }, { "epoch": 742.0, "learning_rate": 0.003032, "loss": 1.2828, "step": 9646 }, { "epoch": 743.0, "learning_rate": 0.003028, "loss": 1.3054, "step": 9659 }, { "epoch": 744.0, "learning_rate": 0.003024, "loss": 1.289, "step": 9672 }, { "epoch": 745.0, "learning_rate": 0.00302, "loss": 1.3023, "step": 9685 }, { "epoch": 746.0, "learning_rate": 0.003016, "loss": 1.2972, "step": 9698 }, { "epoch": 747.0, "learning_rate": 0.003012, "loss": 1.281, "step": 9711 }, { "epoch": 748.0, "learning_rate": 0.0030080000000000003, "loss": 1.2475, "step": 9724 }, { "epoch": 749.0, "learning_rate": 0.003004, "loss": 1.2721, "step": 9737 }, { "epoch": 750.0, "learning_rate": 0.003, "loss": 1.3066, "step": 9750 }, { "epoch": 751.0, "learning_rate": 0.002996, "loss": 1.3229, "step": 9763 }, { "epoch": 752.0, "learning_rate": 0.002992, "loss": 1.2095, "step": 9776 }, { "epoch": 753.0, "learning_rate": 0.002988, "loss": 1.2389, "step": 9789 }, { "epoch": 754.0, "learning_rate": 0.002984, "loss": 1.2046, "step": 9802 }, { "epoch": 755.0, "learning_rate": 0.00298, "loss": 1.1953, "step": 9815 }, { "epoch": 756.0, "learning_rate": 0.002976, "loss": 1.1359, "step": 9828 }, { "epoch": 757.0, "learning_rate": 0.0029720000000000002, "loss": 1.13, "step": 9841 }, { "epoch": 758.0, "learning_rate": 0.002968, "loss": 1.1946, "step": 9854 }, { "epoch": 759.0, "learning_rate": 0.002964, "loss": 1.2325, "step": 9867 }, { "epoch": 760.0, "learning_rate": 0.00296, "loss": 1.2435, "step": 9880 }, { "epoch": 761.0, "learning_rate": 0.002956, "loss": 1.2878, "step": 9893 }, { "epoch": 762.0, "learning_rate": 0.002952, "loss": 1.2123, "step": 9906 }, { "epoch": 763.0, "learning_rate": 0.002948, "loss": 1.1953, "step": 9919 }, { "epoch": 764.0, "learning_rate": 0.002944, "loss": 1.2623, "step": 9932 }, { "epoch": 765.0, "learning_rate": 0.00294, "loss": 1.2676, "step": 9945 }, { "epoch": 766.0, "learning_rate": 0.002936, "loss": 1.1999, "step": 9958 }, { "epoch": 767.0, "learning_rate": 0.002932, "loss": 1.2521, "step": 9971 }, { "epoch": 768.0, "learning_rate": 0.002928, "loss": 1.4043, "step": 9984 }, { "epoch": 769.0, "learning_rate": 0.002924, "loss": 1.3043, "step": 9997 }, { "epoch": 770.0, "learning_rate": 0.00292, "loss": 1.1831, "step": 10010 }, { "epoch": 771.0, "learning_rate": 0.002916, "loss": 1.1813, "step": 10023 }, { "epoch": 772.0, "learning_rate": 0.002912, "loss": 1.1946, "step": 10036 }, { "epoch": 773.0, "learning_rate": 0.002908, "loss": 1.2182, "step": 10049 }, { "epoch": 774.0, "learning_rate": 0.002904, "loss": 1.2491, "step": 10062 }, { "epoch": 775.0, "learning_rate": 0.0029, "loss": 1.2422, "step": 10075 }, { "epoch": 776.0, "learning_rate": 0.002896, "loss": 1.2784, "step": 10088 }, { "epoch": 777.0, "learning_rate": 0.002892, "loss": 1.1924, "step": 10101 }, { "epoch": 778.0, "learning_rate": 0.002888, "loss": 1.1739, "step": 10114 }, { "epoch": 779.0, "learning_rate": 0.002884, "loss": 1.2357, "step": 10127 }, { "epoch": 780.0, "learning_rate": 0.0028799999999999997, "loss": 1.1845, "step": 10140 }, { "epoch": 781.0, "learning_rate": 0.002876, "loss": 1.1947, "step": 10153 }, { "epoch": 782.0, "learning_rate": 0.002872, "loss": 1.2135, "step": 10166 }, { "epoch": 783.0, "learning_rate": 0.002868, "loss": 1.1923, "step": 10179 }, { "epoch": 784.0, "learning_rate": 0.002864, "loss": 1.1954, "step": 10192 }, { "epoch": 785.0, "learning_rate": 0.00286, "loss": 1.2599, "step": 10205 }, { "epoch": 786.0, "learning_rate": 0.002856, "loss": 1.1905, "step": 10218 }, { "epoch": 787.0, "learning_rate": 0.002852, "loss": 1.1463, "step": 10231 }, { "epoch": 788.0, "learning_rate": 0.002848, "loss": 1.1417, "step": 10244 }, { "epoch": 789.0, "learning_rate": 0.0028439999999999997, "loss": 1.1696, "step": 10257 }, { "epoch": 790.0, "learning_rate": 0.00284, "loss": 1.1227, "step": 10270 }, { "epoch": 791.0, "learning_rate": 0.002836, "loss": 1.1858, "step": 10283 }, { "epoch": 792.0, "learning_rate": 0.002832, "loss": 1.1615, "step": 10296 }, { "epoch": 793.0, "learning_rate": 0.002828, "loss": 1.2113, "step": 10309 }, { "epoch": 794.0, "learning_rate": 0.0028239999999999997, "loss": 1.1995, "step": 10322 }, { "epoch": 795.0, "learning_rate": 0.00282, "loss": 1.2497, "step": 10335 }, { "epoch": 796.0, "learning_rate": 0.002816, "loss": 1.2255, "step": 10348 }, { "epoch": 797.0, "learning_rate": 0.002812, "loss": 1.2728, "step": 10361 }, { "epoch": 798.0, "learning_rate": 0.0028079999999999997, "loss": 1.2053, "step": 10374 }, { "epoch": 799.0, "learning_rate": 0.002804, "loss": 1.1941, "step": 10387 }, { "epoch": 800.0, "learning_rate": 0.0028, "loss": 1.184, "step": 10400 }, { "epoch": 801.0, "learning_rate": 0.002796, "loss": 1.1575, "step": 10413 }, { "epoch": 802.0, "learning_rate": 0.0027919999999999998, "loss": 1.1571, "step": 10426 }, { "epoch": 803.0, "learning_rate": 0.0027879999999999997, "loss": 1.1619, "step": 10439 }, { "epoch": 804.0, "learning_rate": 0.002784, "loss": 1.175, "step": 10452 }, { "epoch": 805.0, "learning_rate": 0.00278, "loss": 1.2, "step": 10465 }, { "epoch": 806.0, "learning_rate": 0.002776, "loss": 1.1761, "step": 10478 }, { "epoch": 807.0, "learning_rate": 0.0027719999999999997, "loss": 1.0651, "step": 10491 }, { "epoch": 808.0, "learning_rate": 0.002768, "loss": 1.0923, "step": 10504 }, { "epoch": 809.0, "learning_rate": 0.002764, "loss": 1.1537, "step": 10517 }, { "epoch": 810.0, "learning_rate": 0.00276, "loss": 1.1618, "step": 10530 }, { "epoch": 811.0, "learning_rate": 0.0027559999999999998, "loss": 1.2048, "step": 10543 }, { "epoch": 812.0, "learning_rate": 0.0027519999999999997, "loss": 1.1584, "step": 10556 }, { "epoch": 813.0, "learning_rate": 0.0027480000000000004, "loss": 1.1815, "step": 10569 }, { "epoch": 814.0, "learning_rate": 0.0027440000000000003, "loss": 1.1204, "step": 10582 }, { "epoch": 815.0, "learning_rate": 0.0027400000000000002, "loss": 1.1662, "step": 10595 }, { "epoch": 816.0, "learning_rate": 0.002736, "loss": 1.1275, "step": 10608 }, { "epoch": 817.0, "learning_rate": 0.002732, "loss": 1.1124, "step": 10621 }, { "epoch": 818.0, "learning_rate": 0.0027280000000000004, "loss": 1.0765, "step": 10634 }, { "epoch": 819.0, "learning_rate": 0.0027240000000000003, "loss": 1.1159, "step": 10647 }, { "epoch": 820.0, "learning_rate": 0.00272, "loss": 1.1124, "step": 10660 }, { "epoch": 821.0, "learning_rate": 0.002716, "loss": 1.1045, "step": 10673 }, { "epoch": 822.0, "learning_rate": 0.0027120000000000004, "loss": 1.1152, "step": 10686 }, { "epoch": 823.0, "learning_rate": 0.0027080000000000003, "loss": 1.0664, "step": 10699 }, { "epoch": 824.0, "learning_rate": 0.0027040000000000002, "loss": 1.0165, "step": 10712 }, { "epoch": 825.0, "learning_rate": 0.0027, "loss": 1.004, "step": 10725 }, { "epoch": 826.0, "learning_rate": 0.002696, "loss": 1.0194, "step": 10738 }, { "epoch": 827.0, "learning_rate": 0.0026920000000000004, "loss": 1.04, "step": 10751 }, { "epoch": 828.0, "learning_rate": 0.0026880000000000003, "loss": 1.0393, "step": 10764 }, { "epoch": 829.0, "learning_rate": 0.002684, "loss": 0.9552, "step": 10777 }, { "epoch": 830.0, "learning_rate": 0.00268, "loss": 0.9634, "step": 10790 }, { "epoch": 831.0, "learning_rate": 0.0026760000000000004, "loss": 0.9967, "step": 10803 }, { "epoch": 832.0, "learning_rate": 0.0026720000000000003, "loss": 0.994, "step": 10816 }, { "epoch": 833.0, "learning_rate": 0.0026680000000000002, "loss": 0.9906, "step": 10829 }, { "epoch": 834.0, "learning_rate": 0.002664, "loss": 1.0132, "step": 10842 }, { "epoch": 835.0, "learning_rate": 0.00266, "loss": 1.0088, "step": 10855 }, { "epoch": 836.0, "learning_rate": 0.0026560000000000004, "loss": 0.9914, "step": 10868 }, { "epoch": 837.0, "learning_rate": 0.0026520000000000003, "loss": 0.9947, "step": 10881 }, { "epoch": 838.0, "learning_rate": 0.002648, "loss": 1.0053, "step": 10894 }, { "epoch": 839.0, "learning_rate": 0.002644, "loss": 1.0138, "step": 10907 }, { "epoch": 840.0, "learning_rate": 0.00264, "loss": 0.9785, "step": 10920 }, { "epoch": 841.0, "learning_rate": 0.0026360000000000003, "loss": 1.0174, "step": 10933 }, { "epoch": 842.0, "learning_rate": 0.0026320000000000002, "loss": 0.9855, "step": 10946 }, { "epoch": 843.0, "learning_rate": 0.002628, "loss": 1.0005, "step": 10959 }, { "epoch": 844.0, "learning_rate": 0.002624, "loss": 0.9966, "step": 10972 }, { "epoch": 845.0, "learning_rate": 0.0026200000000000004, "loss": 0.9952, "step": 10985 }, { "epoch": 846.0, "learning_rate": 0.0026160000000000003, "loss": 1.001, "step": 10998 }, { "epoch": 847.0, "learning_rate": 0.002612, "loss": 1.0235, "step": 11011 }, { "epoch": 848.0, "learning_rate": 0.002608, "loss": 0.9835, "step": 11024 }, { "epoch": 849.0, "learning_rate": 0.002604, "loss": 0.9951, "step": 11037 }, { "epoch": 850.0, "learning_rate": 0.0026000000000000003, "loss": 1.0329, "step": 11050 }, { "epoch": 851.0, "learning_rate": 0.0025960000000000002, "loss": 1.0021, "step": 11063 }, { "epoch": 852.0, "learning_rate": 0.002592, "loss": 1.0391, "step": 11076 }, { "epoch": 853.0, "learning_rate": 0.002588, "loss": 1.0249, "step": 11089 }, { "epoch": 854.0, "learning_rate": 0.002584, "loss": 0.9974, "step": 11102 }, { "epoch": 855.0, "learning_rate": 0.0025800000000000003, "loss": 1.0149, "step": 11115 }, { "epoch": 856.0, "learning_rate": 0.002576, "loss": 1.0002, "step": 11128 }, { "epoch": 857.0, "learning_rate": 0.002572, "loss": 1.0379, "step": 11141 }, { "epoch": 858.0, "learning_rate": 0.002568, "loss": 1.0381, "step": 11154 }, { "epoch": 859.0, "learning_rate": 0.0025640000000000003, "loss": 0.9772, "step": 11167 }, { "epoch": 860.0, "learning_rate": 0.00256, "loss": 1.0263, "step": 11180 }, { "epoch": 861.0, "learning_rate": 0.002556, "loss": 0.982, "step": 11193 }, { "epoch": 862.0, "learning_rate": 0.002552, "loss": 0.9892, "step": 11206 }, { "epoch": 863.0, "learning_rate": 0.002548, "loss": 0.9708, "step": 11219 }, { "epoch": 864.0, "learning_rate": 0.0025440000000000003, "loss": 0.9883, "step": 11232 }, { "epoch": 865.0, "learning_rate": 0.00254, "loss": 0.9446, "step": 11245 }, { "epoch": 866.0, "learning_rate": 0.002536, "loss": 0.9686, "step": 11258 }, { "epoch": 867.0, "learning_rate": 0.002532, "loss": 1.0044, "step": 11271 }, { "epoch": 868.0, "learning_rate": 0.002528, "loss": 1.0128, "step": 11284 }, { "epoch": 869.0, "learning_rate": 0.002524, "loss": 0.9876, "step": 11297 }, { "epoch": 870.0, "learning_rate": 0.00252, "loss": 0.9992, "step": 11310 }, { "epoch": 871.0, "learning_rate": 0.002516, "loss": 1.1017, "step": 11323 }, { "epoch": 872.0, "learning_rate": 0.002512, "loss": 0.9853, "step": 11336 }, { "epoch": 873.0, "learning_rate": 0.0025080000000000002, "loss": 0.9495, "step": 11349 }, { "epoch": 874.0, "learning_rate": 0.002504, "loss": 0.9292, "step": 11362 }, { "epoch": 875.0, "learning_rate": 0.0025, "loss": 0.9339, "step": 11375 }, { "epoch": 876.0, "learning_rate": 0.002496, "loss": 0.9309, "step": 11388 }, { "epoch": 877.0, "learning_rate": 0.002492, "loss": 0.9303, "step": 11401 }, { "epoch": 878.0, "learning_rate": 0.002488, "loss": 0.8827, "step": 11414 }, { "epoch": 879.0, "learning_rate": 0.002484, "loss": 0.8898, "step": 11427 }, { "epoch": 880.0, "learning_rate": 0.00248, "loss": 0.8748, "step": 11440 }, { "epoch": 881.0, "learning_rate": 0.002476, "loss": 0.921, "step": 11453 }, { "epoch": 882.0, "learning_rate": 0.0024720000000000002, "loss": 0.912, "step": 11466 }, { "epoch": 883.0, "learning_rate": 0.002468, "loss": 0.9684, "step": 11479 }, { "epoch": 884.0, "learning_rate": 0.002464, "loss": 1.0113, "step": 11492 }, { "epoch": 885.0, "learning_rate": 0.00246, "loss": 1.0043, "step": 11505 }, { "epoch": 886.0, "learning_rate": 0.002456, "loss": 0.94, "step": 11518 }, { "epoch": 887.0, "learning_rate": 0.002452, "loss": 0.9166, "step": 11531 }, { "epoch": 888.0, "learning_rate": 0.002448, "loss": 0.9202, "step": 11544 }, { "epoch": 889.0, "learning_rate": 0.002444, "loss": 0.9179, "step": 11557 }, { "epoch": 890.0, "learning_rate": 0.00244, "loss": 0.8928, "step": 11570 }, { "epoch": 891.0, "learning_rate": 0.002436, "loss": 0.9021, "step": 11583 }, { "epoch": 892.0, "learning_rate": 0.002432, "loss": 0.9038, "step": 11596 }, { "epoch": 893.0, "learning_rate": 0.002428, "loss": 0.8446, "step": 11609 }, { "epoch": 894.0, "learning_rate": 0.002424, "loss": 0.9167, "step": 11622 }, { "epoch": 895.0, "learning_rate": 0.00242, "loss": 0.8897, "step": 11635 }, { "epoch": 896.0, "learning_rate": 0.002416, "loss": 0.9227, "step": 11648 }, { "epoch": 897.0, "learning_rate": 0.002412, "loss": 0.8956, "step": 11661 }, { "epoch": 898.0, "learning_rate": 0.002408, "loss": 0.8768, "step": 11674 }, { "epoch": 899.0, "learning_rate": 0.002404, "loss": 0.9134, "step": 11687 }, { "epoch": 900.0, "learning_rate": 0.0024, "loss": 0.8484, "step": 11700 }, { "epoch": 901.0, "learning_rate": 0.002396, "loss": 0.8616, "step": 11713 }, { "epoch": 902.0, "learning_rate": 0.002392, "loss": 0.8669, "step": 11726 }, { "epoch": 903.0, "learning_rate": 0.002388, "loss": 0.8529, "step": 11739 }, { "epoch": 904.0, "learning_rate": 0.002384, "loss": 0.8488, "step": 11752 }, { "epoch": 905.0, "learning_rate": 0.0023799999999999997, "loss": 0.8505, "step": 11765 }, { "epoch": 906.0, "learning_rate": 0.002376, "loss": 0.8264, "step": 11778 }, { "epoch": 907.0, "learning_rate": 0.002372, "loss": 0.8382, "step": 11791 }, { "epoch": 908.0, "learning_rate": 0.002368, "loss": 0.8176, "step": 11804 }, { "epoch": 909.0, "learning_rate": 0.0023639999999999998, "loss": 0.8122, "step": 11817 }, { "epoch": 910.0, "learning_rate": 0.00236, "loss": 0.8175, "step": 11830 }, { "epoch": 911.0, "learning_rate": 0.002356, "loss": 0.8345, "step": 11843 }, { "epoch": 912.0, "learning_rate": 0.002352, "loss": 0.8102, "step": 11856 }, { "epoch": 913.0, "learning_rate": 0.002348, "loss": 0.7818, "step": 11869 }, { "epoch": 914.0, "learning_rate": 0.0023439999999999997, "loss": 0.8027, "step": 11882 }, { "epoch": 915.0, "learning_rate": 0.00234, "loss": 0.7765, "step": 11895 }, { "epoch": 916.0, "learning_rate": 0.002336, "loss": 0.8225, "step": 11908 }, { "epoch": 917.0, "learning_rate": 0.002332, "loss": 0.7882, "step": 11921 }, { "epoch": 918.0, "learning_rate": 0.0023279999999999998, "loss": 0.7784, "step": 11934 }, { "epoch": 919.0, "learning_rate": 0.0023239999999999997, "loss": 0.7751, "step": 11947 }, { "epoch": 920.0, "learning_rate": 0.00232, "loss": 0.7837, "step": 11960 }, { "epoch": 921.0, "learning_rate": 0.002316, "loss": 0.7588, "step": 11973 }, { "epoch": 922.0, "learning_rate": 0.002312, "loss": 0.8106, "step": 11986 }, { "epoch": 923.0, "learning_rate": 0.0023079999999999997, "loss": 0.8359, "step": 11999 }, { "epoch": 924.0, "learning_rate": 0.002304, "loss": 0.7899, "step": 12012 }, { "epoch": 925.0, "learning_rate": 0.0023, "loss": 0.7766, "step": 12025 }, { "epoch": 926.0, "learning_rate": 0.002296, "loss": 0.7978, "step": 12038 }, { "epoch": 927.0, "learning_rate": 0.0022919999999999998, "loss": 0.8012, "step": 12051 }, { "epoch": 928.0, "learning_rate": 0.0022879999999999997, "loss": 0.8112, "step": 12064 }, { "epoch": 929.0, "learning_rate": 0.002284, "loss": 0.8725, "step": 12077 }, { "epoch": 930.0, "learning_rate": 0.00228, "loss": 0.8415, "step": 12090 }, { "epoch": 931.0, "learning_rate": 0.002276, "loss": 0.8444, "step": 12103 }, { "epoch": 932.0, "learning_rate": 0.0022719999999999997, "loss": 0.8459, "step": 12116 }, { "epoch": 933.0, "learning_rate": 0.002268, "loss": 0.7739, "step": 12129 }, { "epoch": 934.0, "learning_rate": 0.002264, "loss": 0.8236, "step": 12142 }, { "epoch": 935.0, "learning_rate": 0.00226, "loss": 0.7746, "step": 12155 }, { "epoch": 936.0, "learning_rate": 0.0022559999999999998, "loss": 0.807, "step": 12168 }, { "epoch": 937.0, "learning_rate": 0.0022519999999999997, "loss": 0.8016, "step": 12181 }, { "epoch": 938.0, "learning_rate": 0.0022480000000000004, "loss": 0.7812, "step": 12194 }, { "epoch": 939.0, "learning_rate": 0.0022440000000000003, "loss": 0.7796, "step": 12207 }, { "epoch": 940.0, "learning_rate": 0.0022400000000000002, "loss": 0.7743, "step": 12220 }, { "epoch": 941.0, "learning_rate": 0.002236, "loss": 0.8141, "step": 12233 }, { "epoch": 942.0, "learning_rate": 0.002232, "loss": 0.7666, "step": 12246 }, { "epoch": 943.0, "learning_rate": 0.0022280000000000004, "loss": 0.7668, "step": 12259 }, { "epoch": 944.0, "learning_rate": 0.0022240000000000003, "loss": 0.7469, "step": 12272 }, { "epoch": 945.0, "learning_rate": 0.00222, "loss": 0.8032, "step": 12285 }, { "epoch": 946.0, "learning_rate": 0.002216, "loss": 0.767, "step": 12298 }, { "epoch": 947.0, "learning_rate": 0.0022120000000000004, "loss": 0.7862, "step": 12311 }, { "epoch": 948.0, "learning_rate": 0.0022080000000000003, "loss": 0.762, "step": 12324 }, { "epoch": 949.0, "learning_rate": 0.0022040000000000002, "loss": 0.762, "step": 12337 }, { "epoch": 950.0, "learning_rate": 0.0022, "loss": 0.7546, "step": 12350 }, { "epoch": 951.0, "learning_rate": 0.002196, "loss": 0.721, "step": 12363 }, { "epoch": 952.0, "learning_rate": 0.0021920000000000004, "loss": 0.7442, "step": 12376 }, { "epoch": 953.0, "learning_rate": 0.0021880000000000003, "loss": 0.7331, "step": 12389 }, { "epoch": 954.0, "learning_rate": 0.002184, "loss": 0.7299, "step": 12402 }, { "epoch": 955.0, "learning_rate": 0.00218, "loss": 0.7114, "step": 12415 }, { "epoch": 956.0, "learning_rate": 0.0021760000000000004, "loss": 0.7443, "step": 12428 }, { "epoch": 957.0, "learning_rate": 0.0021720000000000003, "loss": 0.7247, "step": 12441 }, { "epoch": 958.0, "learning_rate": 0.0021680000000000002, "loss": 0.6941, "step": 12454 }, { "epoch": 959.0, "learning_rate": 0.002164, "loss": 0.6838, "step": 12467 }, { "epoch": 960.0, "learning_rate": 0.00216, "loss": 0.6838, "step": 12480 }, { "epoch": 961.0, "learning_rate": 0.0021560000000000004, "loss": 0.7048, "step": 12493 }, { "epoch": 962.0, "learning_rate": 0.0021520000000000003, "loss": 0.7083, "step": 12506 }, { "epoch": 963.0, "learning_rate": 0.002148, "loss": 0.7166, "step": 12519 }, { "epoch": 964.0, "learning_rate": 0.002144, "loss": 0.7128, "step": 12532 }, { "epoch": 965.0, "learning_rate": 0.00214, "loss": 0.7257, "step": 12545 }, { "epoch": 966.0, "learning_rate": 0.0021360000000000003, "loss": 0.7145, "step": 12558 }, { "epoch": 967.0, "learning_rate": 0.002132, "loss": 0.7173, "step": 12571 }, { "epoch": 968.0, "learning_rate": 0.002128, "loss": 0.7162, "step": 12584 }, { "epoch": 969.0, "learning_rate": 0.002124, "loss": 0.6849, "step": 12597 }, { "epoch": 970.0, "learning_rate": 0.0021200000000000004, "loss": 0.6859, "step": 12610 }, { "epoch": 971.0, "learning_rate": 0.0021160000000000003, "loss": 0.6802, "step": 12623 }, { "epoch": 972.0, "learning_rate": 0.002112, "loss": 0.6965, "step": 12636 }, { "epoch": 973.0, "learning_rate": 0.002108, "loss": 0.6941, "step": 12649 }, { "epoch": 974.0, "learning_rate": 0.002104, "loss": 0.6928, "step": 12662 }, { "epoch": 975.0, "learning_rate": 0.0021000000000000003, "loss": 0.6764, "step": 12675 }, { "epoch": 976.0, "learning_rate": 0.002096, "loss": 0.6569, "step": 12688 }, { "epoch": 977.0, "learning_rate": 0.002092, "loss": 0.6618, "step": 12701 }, { "epoch": 978.0, "learning_rate": 0.002088, "loss": 0.6719, "step": 12714 }, { "epoch": 979.0, "learning_rate": 0.002084, "loss": 0.6584, "step": 12727 }, { "epoch": 980.0, "learning_rate": 0.0020800000000000003, "loss": 0.6911, "step": 12740 }, { "epoch": 981.0, "learning_rate": 0.002076, "loss": 0.688, "step": 12753 }, { "epoch": 982.0, "learning_rate": 0.002072, "loss": 0.6741, "step": 12766 }, { "epoch": 983.0, "learning_rate": 0.002068, "loss": 0.6962, "step": 12779 }, { "epoch": 984.0, "learning_rate": 0.0020640000000000003, "loss": 0.6811, "step": 12792 }, { "epoch": 985.0, "learning_rate": 0.00206, "loss": 0.6717, "step": 12805 }, { "epoch": 986.0, "learning_rate": 0.002056, "loss": 0.6733, "step": 12818 }, { "epoch": 987.0, "learning_rate": 0.002052, "loss": 0.6813, "step": 12831 }, { "epoch": 988.0, "learning_rate": 0.002048, "loss": 0.6472, "step": 12844 }, { "epoch": 989.0, "learning_rate": 0.0020440000000000002, "loss": 0.6508, "step": 12857 }, { "epoch": 990.0, "learning_rate": 0.00204, "loss": 0.6576, "step": 12870 }, { "epoch": 991.0, "learning_rate": 0.002036, "loss": 0.6428, "step": 12883 }, { "epoch": 992.0, "learning_rate": 0.002032, "loss": 0.6505, "step": 12896 }, { "epoch": 993.0, "learning_rate": 0.002028, "loss": 0.6578, "step": 12909 }, { "epoch": 994.0, "learning_rate": 0.002024, "loss": 0.6689, "step": 12922 }, { "epoch": 995.0, "learning_rate": 0.00202, "loss": 0.6625, "step": 12935 }, { "epoch": 996.0, "learning_rate": 0.002016, "loss": 0.6894, "step": 12948 }, { "epoch": 997.0, "learning_rate": 0.002012, "loss": 0.6669, "step": 12961 }, { "epoch": 998.0, "learning_rate": 0.0020080000000000002, "loss": 0.6698, "step": 12974 }, { "epoch": 999.0, "learning_rate": 0.002004, "loss": 0.6861, "step": 12987 }, { "epoch": 1000.0, "learning_rate": 0.002, "loss": 0.7089, "step": 13000 }, { "epoch": 1000.0, "step": 13000, "total_flos": 569573692465152.0, "train_loss": 0.655489089525663, "train_runtime": 67792.7328, "train_samples_per_second": 1.475, "train_steps_per_second": 0.192 } ], "logging_steps": 500, "max_steps": 13000, "num_train_epochs": 1000, "save_steps": 500, "total_flos": 569573692465152.0, "trial_name": null, "trial_params": null }