diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,55969 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.999396900066341, + "global_step": 9324, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 7.142857142857144e-08, + "loss": 0.8121, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.4285714285714287e-07, + "loss": 0.7395, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 2.142857142857143e-07, + "loss": 0.7592, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 2.8571428571428575e-07, + "loss": 0.7375, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 3.5714285714285716e-07, + "loss": 0.7514, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 4.285714285714286e-07, + "loss": 0.7513, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 0.7281, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 5.714285714285715e-07, + "loss": 0.7293, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 6.428571428571428e-07, + "loss": 0.8903, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 7.142857142857143e-07, + "loss": 0.7312, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 7.857142857142857e-07, + "loss": 0.7163, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 8.571428571428572e-07, + "loss": 0.7149, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 9.285714285714287e-07, + "loss": 0.7133, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6977, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 1.0714285714285714e-06, + "loss": 0.6685, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 1.142857142857143e-06, + "loss": 0.6675, + "step": 16 + }, + { + "epoch": 0.01, + "learning_rate": 1.2142857142857144e-06, + "loss": 0.6095, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 1.2857142857142856e-06, + "loss": 0.5569, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 1.3571428571428572e-06, + "loss": 0.6008, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 1.4285714285714286e-06, + "loss": 0.5878, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-06, + "loss": 0.503, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 1.5714285714285714e-06, + "loss": 0.475, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 1.642857142857143e-06, + "loss": 0.4418, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 1.7142857142857145e-06, + "loss": 0.3964, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 1.7857142857142859e-06, + "loss": 0.4181, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 1.8571428571428573e-06, + "loss": 0.4335, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 1.928571428571429e-06, + "loss": 0.3693, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.3931, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 2.0714285714285717e-06, + "loss": 0.3669, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 2.1428571428571427e-06, + "loss": 0.3427, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 2.2142857142857146e-06, + "loss": 0.3502, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 2.285714285714286e-06, + "loss": 0.3406, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 2.3571428571428574e-06, + "loss": 0.3514, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 2.428571428571429e-06, + "loss": 0.3166, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-06, + "loss": 0.3152, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 2.571428571428571e-06, + "loss": 0.3146, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 2.642857142857143e-06, + "loss": 0.3663, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 2.7142857142857144e-06, + "loss": 0.3124, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 2.785714285714286e-06, + "loss": 0.3286, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.3195, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 2.928571428571429e-06, + "loss": 0.3204, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 3e-06, + "loss": 0.2832, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 3.071428571428572e-06, + "loss": 0.3401, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 3.142857142857143e-06, + "loss": 0.2743, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 3.2142857142857147e-06, + "loss": 0.3032, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 3.285714285714286e-06, + "loss": 0.2723, + "step": 46 + }, + { + "epoch": 0.02, + "learning_rate": 3.357142857142857e-06, + "loss": 0.288, + "step": 47 + }, + { + "epoch": 0.02, + "learning_rate": 3.428571428571429e-06, + "loss": 0.2849, + "step": 48 + }, + { + "epoch": 0.02, + "learning_rate": 3.5e-06, + "loss": 0.2688, + "step": 49 + }, + { + "epoch": 0.02, + "learning_rate": 3.5714285714285718e-06, + "loss": 0.2745, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.642857142857143e-06, + "loss": 0.2929, + "step": 51 + }, + { + "epoch": 0.02, + "learning_rate": 3.7142857142857146e-06, + "loss": 0.2916, + "step": 52 + }, + { + "epoch": 0.02, + "learning_rate": 3.785714285714286e-06, + "loss": 0.3044, + "step": 53 + }, + { + "epoch": 0.02, + "learning_rate": 3.857142857142858e-06, + "loss": 0.2995, + "step": 54 + }, + { + "epoch": 0.02, + "learning_rate": 3.928571428571429e-06, + "loss": 0.2793, + "step": 55 + }, + { + "epoch": 0.02, + "learning_rate": 4.000000000000001e-06, + "loss": 0.2757, + "step": 56 + }, + { + "epoch": 0.02, + "learning_rate": 4.071428571428572e-06, + "loss": 0.2613, + "step": 57 + }, + { + "epoch": 0.02, + "learning_rate": 4.1428571428571435e-06, + "loss": 0.2793, + "step": 58 + }, + { + "epoch": 0.02, + "learning_rate": 4.2142857142857145e-06, + "loss": 0.2984, + "step": 59 + }, + { + "epoch": 0.02, + "learning_rate": 4.2857142857142855e-06, + "loss": 0.2756, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.357142857142857e-06, + "loss": 0.2526, + "step": 61 + }, + { + "epoch": 0.02, + "learning_rate": 4.428571428571429e-06, + "loss": 0.2586, + "step": 62 + }, + { + "epoch": 0.02, + "learning_rate": 4.5e-06, + "loss": 0.2546, + "step": 63 + }, + { + "epoch": 0.02, + "learning_rate": 4.571428571428572e-06, + "loss": 0.2922, + "step": 64 + }, + { + "epoch": 0.02, + "learning_rate": 4.642857142857144e-06, + "loss": 0.2592, + "step": 65 + }, + { + "epoch": 0.02, + "learning_rate": 4.714285714285715e-06, + "loss": 0.3144, + "step": 66 + }, + { + "epoch": 0.02, + "learning_rate": 4.785714285714287e-06, + "loss": 0.2447, + "step": 67 + }, + { + "epoch": 0.02, + "learning_rate": 4.857142857142858e-06, + "loss": 0.2493, + "step": 68 + }, + { + "epoch": 0.02, + "learning_rate": 4.928571428571429e-06, + "loss": 0.2864, + "step": 69 + }, + { + "epoch": 0.02, + "learning_rate": 5e-06, + "loss": 0.2408, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 5.071428571428571e-06, + "loss": 0.2459, + "step": 71 + }, + { + "epoch": 0.02, + "learning_rate": 5.142857142857142e-06, + "loss": 0.2248, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 5.214285714285715e-06, + "loss": 0.2372, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 5.285714285714286e-06, + "loss": 0.2564, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 5.357142857142857e-06, + "loss": 0.2386, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 5.428571428571429e-06, + "loss": 0.2542, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 5.500000000000001e-06, + "loss": 0.2339, + "step": 77 + }, + { + "epoch": 0.03, + "learning_rate": 5.571428571428572e-06, + "loss": 0.2386, + "step": 78 + }, + { + "epoch": 0.03, + "learning_rate": 5.6428571428571435e-06, + "loss": 0.2462, + "step": 79 + }, + { + "epoch": 0.03, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.2359, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 5.785714285714286e-06, + "loss": 0.2509, + "step": 81 + }, + { + "epoch": 0.03, + "learning_rate": 5.857142857142858e-06, + "loss": 0.2248, + "step": 82 + }, + { + "epoch": 0.03, + "learning_rate": 5.928571428571429e-06, + "loss": 0.2542, + "step": 83 + }, + { + "epoch": 0.03, + "learning_rate": 6e-06, + "loss": 0.2571, + "step": 84 + }, + { + "epoch": 0.03, + "learning_rate": 6.071428571428571e-06, + "loss": 0.2713, + "step": 85 + }, + { + "epoch": 0.03, + "learning_rate": 6.142857142857144e-06, + "loss": 0.2462, + "step": 86 + }, + { + "epoch": 0.03, + "learning_rate": 6.214285714285715e-06, + "loss": 0.2582, + "step": 87 + }, + { + "epoch": 0.03, + "learning_rate": 6.285714285714286e-06, + "loss": 0.2509, + "step": 88 + }, + { + "epoch": 0.03, + "learning_rate": 6.357142857142858e-06, + "loss": 0.2467, + "step": 89 + }, + { + "epoch": 0.03, + "learning_rate": 6.4285714285714295e-06, + "loss": 0.2707, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 6.5000000000000004e-06, + "loss": 0.2297, + "step": 91 + }, + { + "epoch": 0.03, + "learning_rate": 6.571428571428572e-06, + "loss": 0.2437, + "step": 92 + }, + { + "epoch": 0.03, + "learning_rate": 6.642857142857143e-06, + "loss": 0.2325, + "step": 93 + }, + { + "epoch": 0.03, + "learning_rate": 6.714285714285714e-06, + "loss": 0.2501, + "step": 94 + }, + { + "epoch": 0.03, + "learning_rate": 6.785714285714287e-06, + "loss": 0.2327, + "step": 95 + }, + { + "epoch": 0.03, + "learning_rate": 6.857142857142858e-06, + "loss": 0.2219, + "step": 96 + }, + { + "epoch": 0.03, + "learning_rate": 6.928571428571429e-06, + "loss": 0.2341, + "step": 97 + }, + { + "epoch": 0.03, + "learning_rate": 7e-06, + "loss": 0.236, + "step": 98 + }, + { + "epoch": 0.03, + "learning_rate": 7.0714285714285726e-06, + "loss": 0.2568, + "step": 99 + }, + { + "epoch": 0.03, + "learning_rate": 7.1428571428571436e-06, + "loss": 0.2524, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 7.2142857142857145e-06, + "loss": 0.2298, + "step": 101 + }, + { + "epoch": 0.03, + "learning_rate": 7.285714285714286e-06, + "loss": 0.2221, + "step": 102 + }, + { + "epoch": 0.03, + "learning_rate": 7.357142857142858e-06, + "loss": 0.2371, + "step": 103 + }, + { + "epoch": 0.03, + "learning_rate": 7.428571428571429e-06, + "loss": 0.2387, + "step": 104 + }, + { + "epoch": 0.03, + "learning_rate": 7.500000000000001e-06, + "loss": 0.2645, + "step": 105 + }, + { + "epoch": 0.03, + "learning_rate": 7.571428571428572e-06, + "loss": 0.2156, + "step": 106 + }, + { + "epoch": 0.03, + "learning_rate": 7.642857142857143e-06, + "loss": 0.2253, + "step": 107 + }, + { + "epoch": 0.03, + "learning_rate": 7.714285714285716e-06, + "loss": 0.2256, + "step": 108 + }, + { + "epoch": 0.04, + "learning_rate": 7.785714285714287e-06, + "loss": 0.2488, + "step": 109 + }, + { + "epoch": 0.04, + "learning_rate": 7.857142857142858e-06, + "loss": 0.2682, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 7.928571428571429e-06, + "loss": 0.2575, + "step": 111 + }, + { + "epoch": 0.04, + "learning_rate": 8.000000000000001e-06, + "loss": 0.239, + "step": 112 + }, + { + "epoch": 0.04, + "learning_rate": 8.071428571428572e-06, + "loss": 0.2383, + "step": 113 + }, + { + "epoch": 0.04, + "learning_rate": 8.142857142857143e-06, + "loss": 0.2462, + "step": 114 + }, + { + "epoch": 0.04, + "learning_rate": 8.214285714285714e-06, + "loss": 0.2534, + "step": 115 + }, + { + "epoch": 0.04, + "learning_rate": 8.285714285714287e-06, + "loss": 0.2395, + "step": 116 + }, + { + "epoch": 0.04, + "learning_rate": 8.357142857142858e-06, + "loss": 0.2285, + "step": 117 + }, + { + "epoch": 0.04, + "learning_rate": 8.428571428571429e-06, + "loss": 0.2476, + "step": 118 + }, + { + "epoch": 0.04, + "learning_rate": 8.5e-06, + "loss": 0.2563, + "step": 119 + }, + { + "epoch": 0.04, + "learning_rate": 8.571428571428571e-06, + "loss": 0.2237, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 8.642857142857144e-06, + "loss": 0.2459, + "step": 121 + }, + { + "epoch": 0.04, + "learning_rate": 8.714285714285715e-06, + "loss": 0.2192, + "step": 122 + }, + { + "epoch": 0.04, + "learning_rate": 8.785714285714286e-06, + "loss": 0.2344, + "step": 123 + }, + { + "epoch": 0.04, + "learning_rate": 8.857142857142858e-06, + "loss": 0.251, + "step": 124 + }, + { + "epoch": 0.04, + "learning_rate": 8.92857142857143e-06, + "loss": 0.2118, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 9e-06, + "loss": 0.2413, + "step": 126 + }, + { + "epoch": 0.04, + "learning_rate": 9.071428571428573e-06, + "loss": 0.2397, + "step": 127 + }, + { + "epoch": 0.04, + "learning_rate": 9.142857142857144e-06, + "loss": 0.2246, + "step": 128 + }, + { + "epoch": 0.04, + "learning_rate": 9.214285714285715e-06, + "loss": 0.2447, + "step": 129 + }, + { + "epoch": 0.04, + "learning_rate": 9.285714285714288e-06, + "loss": 0.2312, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 9.357142857142859e-06, + "loss": 0.2028, + "step": 131 + }, + { + "epoch": 0.04, + "learning_rate": 9.42857142857143e-06, + "loss": 0.2431, + "step": 132 + }, + { + "epoch": 0.04, + "learning_rate": 9.5e-06, + "loss": 0.236, + "step": 133 + }, + { + "epoch": 0.04, + "learning_rate": 9.571428571428573e-06, + "loss": 0.2298, + "step": 134 + }, + { + "epoch": 0.04, + "learning_rate": 9.642857142857144e-06, + "loss": 0.2352, + "step": 135 + }, + { + "epoch": 0.04, + "learning_rate": 9.714285714285715e-06, + "loss": 0.2507, + "step": 136 + }, + { + "epoch": 0.04, + "learning_rate": 9.785714285714286e-06, + "loss": 0.2299, + "step": 137 + }, + { + "epoch": 0.04, + "learning_rate": 9.857142857142859e-06, + "loss": 0.2256, + "step": 138 + }, + { + "epoch": 0.04, + "learning_rate": 9.92857142857143e-06, + "loss": 0.2297, + "step": 139 + }, + { + "epoch": 0.05, + "learning_rate": 1e-05, + "loss": 0.2513, + "step": 140 + }, + { + "epoch": 0.05, + "learning_rate": 1.0071428571428572e-05, + "loss": 0.2041, + "step": 141 + }, + { + "epoch": 0.05, + "learning_rate": 1.0142857142857143e-05, + "loss": 0.2145, + "step": 142 + }, + { + "epoch": 0.05, + "learning_rate": 1.0214285714285714e-05, + "loss": 0.2278, + "step": 143 + }, + { + "epoch": 0.05, + "learning_rate": 1.0285714285714285e-05, + "loss": 0.2136, + "step": 144 + }, + { + "epoch": 0.05, + "learning_rate": 1.0357142857142859e-05, + "loss": 0.2212, + "step": 145 + }, + { + "epoch": 0.05, + "learning_rate": 1.042857142857143e-05, + "loss": 0.2095, + "step": 146 + }, + { + "epoch": 0.05, + "learning_rate": 1.0500000000000001e-05, + "loss": 0.2159, + "step": 147 + }, + { + "epoch": 0.05, + "learning_rate": 1.0571428571428572e-05, + "loss": 0.2257, + "step": 148 + }, + { + "epoch": 0.05, + "learning_rate": 1.0642857142857143e-05, + "loss": 0.2115, + "step": 149 + }, + { + "epoch": 0.05, + "learning_rate": 1.0714285714285714e-05, + "loss": 0.214, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 1.0785714285714287e-05, + "loss": 0.2428, + "step": 151 + }, + { + "epoch": 0.05, + "learning_rate": 1.0857142857142858e-05, + "loss": 0.2309, + "step": 152 + }, + { + "epoch": 0.05, + "learning_rate": 1.0928571428571429e-05, + "loss": 0.2265, + "step": 153 + }, + { + "epoch": 0.05, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.2151, + "step": 154 + }, + { + "epoch": 0.05, + "learning_rate": 1.1071428571428572e-05, + "loss": 0.2039, + "step": 155 + }, + { + "epoch": 0.05, + "learning_rate": 1.1142857142857143e-05, + "loss": 0.2261, + "step": 156 + }, + { + "epoch": 0.05, + "learning_rate": 1.1214285714285716e-05, + "loss": 0.2236, + "step": 157 + }, + { + "epoch": 0.05, + "learning_rate": 1.1285714285714287e-05, + "loss": 0.2197, + "step": 158 + }, + { + "epoch": 0.05, + "learning_rate": 1.1357142857142858e-05, + "loss": 0.234, + "step": 159 + }, + { + "epoch": 0.05, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.2417, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 1.15e-05, + "loss": 0.2339, + "step": 161 + }, + { + "epoch": 0.05, + "learning_rate": 1.1571428571428573e-05, + "loss": 0.2287, + "step": 162 + }, + { + "epoch": 0.05, + "learning_rate": 1.1642857142857145e-05, + "loss": 0.2292, + "step": 163 + }, + { + "epoch": 0.05, + "learning_rate": 1.1714285714285716e-05, + "loss": 0.2088, + "step": 164 + }, + { + "epoch": 0.05, + "learning_rate": 1.1785714285714287e-05, + "loss": 0.2294, + "step": 165 + }, + { + "epoch": 0.05, + "learning_rate": 1.1857142857142858e-05, + "loss": 0.2149, + "step": 166 + }, + { + "epoch": 0.05, + "learning_rate": 1.192857142857143e-05, + "loss": 0.2307, + "step": 167 + }, + { + "epoch": 0.05, + "learning_rate": 1.2e-05, + "loss": 0.2167, + "step": 168 + }, + { + "epoch": 0.05, + "learning_rate": 1.2071428571428571e-05, + "loss": 0.2313, + "step": 169 + }, + { + "epoch": 0.05, + "learning_rate": 1.2142857142857142e-05, + "loss": 0.208, + "step": 170 + }, + { + "epoch": 0.06, + "learning_rate": 1.2214285714285717e-05, + "loss": 0.2296, + "step": 171 + }, + { + "epoch": 0.06, + "learning_rate": 1.2285714285714288e-05, + "loss": 0.198, + "step": 172 + }, + { + "epoch": 0.06, + "learning_rate": 1.2357142857142859e-05, + "loss": 0.2332, + "step": 173 + }, + { + "epoch": 0.06, + "learning_rate": 1.242857142857143e-05, + "loss": 0.2092, + "step": 174 + }, + { + "epoch": 0.06, + "learning_rate": 1.25e-05, + "loss": 0.2469, + "step": 175 + }, + { + "epoch": 0.06, + "learning_rate": 1.2571428571428572e-05, + "loss": 0.2251, + "step": 176 + }, + { + "epoch": 0.06, + "learning_rate": 1.2642857142857143e-05, + "loss": 0.2194, + "step": 177 + }, + { + "epoch": 0.06, + "learning_rate": 1.2714285714285715e-05, + "loss": 0.2167, + "step": 178 + }, + { + "epoch": 0.06, + "learning_rate": 1.2785714285714286e-05, + "loss": 0.2108, + "step": 179 + }, + { + "epoch": 0.06, + "learning_rate": 1.2857142857142859e-05, + "loss": 0.2068, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 1.292857142857143e-05, + "loss": 0.2598, + "step": 181 + }, + { + "epoch": 0.06, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.2145, + "step": 182 + }, + { + "epoch": 0.06, + "learning_rate": 1.3071428571428572e-05, + "loss": 0.2333, + "step": 183 + }, + { + "epoch": 0.06, + "learning_rate": 1.3142857142857145e-05, + "loss": 0.2233, + "step": 184 + }, + { + "epoch": 0.06, + "learning_rate": 1.3214285714285716e-05, + "loss": 0.2341, + "step": 185 + }, + { + "epoch": 0.06, + "learning_rate": 1.3285714285714287e-05, + "loss": 0.214, + "step": 186 + }, + { + "epoch": 0.06, + "learning_rate": 1.3357142857142858e-05, + "loss": 0.2071, + "step": 187 + }, + { + "epoch": 0.06, + "learning_rate": 1.3428571428571429e-05, + "loss": 0.2211, + "step": 188 + }, + { + "epoch": 0.06, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.2081, + "step": 189 + }, + { + "epoch": 0.06, + "learning_rate": 1.3571428571428574e-05, + "loss": 0.2217, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 1.3642857142857145e-05, + "loss": 0.23, + "step": 191 + }, + { + "epoch": 0.06, + "learning_rate": 1.3714285714285716e-05, + "loss": 0.223, + "step": 192 + }, + { + "epoch": 0.06, + "learning_rate": 1.3785714285714287e-05, + "loss": 0.2079, + "step": 193 + }, + { + "epoch": 0.06, + "learning_rate": 1.3857142857142858e-05, + "loss": 0.209, + "step": 194 + }, + { + "epoch": 0.06, + "learning_rate": 1.3928571428571429e-05, + "loss": 0.2102, + "step": 195 + }, + { + "epoch": 0.06, + "learning_rate": 1.4e-05, + "loss": 0.2258, + "step": 196 + }, + { + "epoch": 0.06, + "learning_rate": 1.4071428571428574e-05, + "loss": 0.2201, + "step": 197 + }, + { + "epoch": 0.06, + "learning_rate": 1.4142857142857145e-05, + "loss": 0.2293, + "step": 198 + }, + { + "epoch": 0.06, + "learning_rate": 1.4214285714285716e-05, + "loss": 0.2173, + "step": 199 + }, + { + "epoch": 0.06, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.2442, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 1.4357142857142858e-05, + "loss": 0.2191, + "step": 201 + }, + { + "epoch": 0.06, + "learning_rate": 1.4428571428571429e-05, + "loss": 0.2181, + "step": 202 + }, + { + "epoch": 0.07, + "learning_rate": 1.45e-05, + "loss": 0.217, + "step": 203 + }, + { + "epoch": 0.07, + "learning_rate": 1.4571428571428573e-05, + "loss": 0.2159, + "step": 204 + }, + { + "epoch": 0.07, + "learning_rate": 1.4642857142857144e-05, + "loss": 0.1943, + "step": 205 + }, + { + "epoch": 0.07, + "learning_rate": 1.4714285714285716e-05, + "loss": 0.2273, + "step": 206 + }, + { + "epoch": 0.07, + "learning_rate": 1.4785714285714287e-05, + "loss": 0.218, + "step": 207 + }, + { + "epoch": 0.07, + "learning_rate": 1.4857142857142858e-05, + "loss": 0.2173, + "step": 208 + }, + { + "epoch": 0.07, + "learning_rate": 1.492857142857143e-05, + "loss": 0.2357, + "step": 209 + }, + { + "epoch": 0.07, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.2286, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 1.5071428571428573e-05, + "loss": 0.1963, + "step": 211 + }, + { + "epoch": 0.07, + "learning_rate": 1.5142857142857144e-05, + "loss": 0.2165, + "step": 212 + }, + { + "epoch": 0.07, + "learning_rate": 1.5214285714285715e-05, + "loss": 0.2302, + "step": 213 + }, + { + "epoch": 0.07, + "learning_rate": 1.5285714285714286e-05, + "loss": 0.2037, + "step": 214 + }, + { + "epoch": 0.07, + "learning_rate": 1.535714285714286e-05, + "loss": 0.1957, + "step": 215 + }, + { + "epoch": 0.07, + "learning_rate": 1.542857142857143e-05, + "loss": 0.22, + "step": 216 + }, + { + "epoch": 0.07, + "learning_rate": 1.55e-05, + "loss": 0.2032, + "step": 217 + }, + { + "epoch": 0.07, + "learning_rate": 1.5571428571428573e-05, + "loss": 0.2211, + "step": 218 + }, + { + "epoch": 0.07, + "learning_rate": 1.5642857142857143e-05, + "loss": 0.2052, + "step": 219 + }, + { + "epoch": 0.07, + "learning_rate": 1.5714285714285715e-05, + "loss": 0.207, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 1.5785714285714288e-05, + "loss": 0.2027, + "step": 221 + }, + { + "epoch": 0.07, + "learning_rate": 1.5857142857142857e-05, + "loss": 0.2044, + "step": 222 + }, + { + "epoch": 0.07, + "learning_rate": 1.592857142857143e-05, + "loss": 0.2256, + "step": 223 + }, + { + "epoch": 0.07, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.2089, + "step": 224 + }, + { + "epoch": 0.07, + "learning_rate": 1.6071428571428572e-05, + "loss": 0.2209, + "step": 225 + }, + { + "epoch": 0.07, + "learning_rate": 1.6142857142857145e-05, + "loss": 0.2207, + "step": 226 + }, + { + "epoch": 0.07, + "learning_rate": 1.6214285714285717e-05, + "loss": 0.2027, + "step": 227 + }, + { + "epoch": 0.07, + "learning_rate": 1.6285714285714287e-05, + "loss": 0.2357, + "step": 228 + }, + { + "epoch": 0.07, + "learning_rate": 1.635714285714286e-05, + "loss": 0.2208, + "step": 229 + }, + { + "epoch": 0.07, + "learning_rate": 1.642857142857143e-05, + "loss": 0.2233, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 1.65e-05, + "loss": 0.2117, + "step": 231 + }, + { + "epoch": 0.07, + "learning_rate": 1.6571428571428574e-05, + "loss": 0.1904, + "step": 232 + }, + { + "epoch": 0.07, + "learning_rate": 1.6642857142857147e-05, + "loss": 0.2232, + "step": 233 + }, + { + "epoch": 0.08, + "learning_rate": 1.6714285714285716e-05, + "loss": 0.2121, + "step": 234 + }, + { + "epoch": 0.08, + "learning_rate": 1.678571428571429e-05, + "loss": 0.2195, + "step": 235 + }, + { + "epoch": 0.08, + "learning_rate": 1.6857142857142858e-05, + "loss": 0.1985, + "step": 236 + }, + { + "epoch": 0.08, + "learning_rate": 1.692857142857143e-05, + "loss": 0.2141, + "step": 237 + }, + { + "epoch": 0.08, + "learning_rate": 1.7e-05, + "loss": 0.2196, + "step": 238 + }, + { + "epoch": 0.08, + "learning_rate": 1.7071428571428573e-05, + "loss": 0.2016, + "step": 239 + }, + { + "epoch": 0.08, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.2012, + "step": 240 + }, + { + "epoch": 0.08, + "learning_rate": 1.7214285714285718e-05, + "loss": 0.2223, + "step": 241 + }, + { + "epoch": 0.08, + "learning_rate": 1.7285714285714287e-05, + "loss": 0.2349, + "step": 242 + }, + { + "epoch": 0.08, + "learning_rate": 1.735714285714286e-05, + "loss": 0.2088, + "step": 243 + }, + { + "epoch": 0.08, + "learning_rate": 1.742857142857143e-05, + "loss": 0.2099, + "step": 244 + }, + { + "epoch": 0.08, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.2029, + "step": 245 + }, + { + "epoch": 0.08, + "learning_rate": 1.757142857142857e-05, + "loss": 0.2091, + "step": 246 + }, + { + "epoch": 0.08, + "learning_rate": 1.7642857142857144e-05, + "loss": 0.2114, + "step": 247 + }, + { + "epoch": 0.08, + "learning_rate": 1.7714285714285717e-05, + "loss": 0.2238, + "step": 248 + }, + { + "epoch": 0.08, + "learning_rate": 1.7785714285714286e-05, + "loss": 0.2225, + "step": 249 + }, + { + "epoch": 0.08, + "learning_rate": 1.785714285714286e-05, + "loss": 0.2381, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 1.792857142857143e-05, + "loss": 0.2222, + "step": 251 + }, + { + "epoch": 0.08, + "learning_rate": 1.8e-05, + "loss": 0.2474, + "step": 252 + }, + { + "epoch": 0.08, + "learning_rate": 1.8071428571428573e-05, + "loss": 0.2092, + "step": 253 + }, + { + "epoch": 0.08, + "learning_rate": 1.8142857142857146e-05, + "loss": 0.2201, + "step": 254 + }, + { + "epoch": 0.08, + "learning_rate": 1.8214285714285715e-05, + "loss": 0.2094, + "step": 255 + }, + { + "epoch": 0.08, + "learning_rate": 1.8285714285714288e-05, + "loss": 0.2312, + "step": 256 + }, + { + "epoch": 0.08, + "learning_rate": 1.8357142857142857e-05, + "loss": 0.2157, + "step": 257 + }, + { + "epoch": 0.08, + "learning_rate": 1.842857142857143e-05, + "loss": 0.253, + "step": 258 + }, + { + "epoch": 0.08, + "learning_rate": 1.8500000000000002e-05, + "loss": 0.2185, + "step": 259 + }, + { + "epoch": 0.08, + "learning_rate": 1.8571428571428575e-05, + "loss": 0.211, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 1.8642857142857144e-05, + "loss": 0.1989, + "step": 261 + }, + { + "epoch": 0.08, + "learning_rate": 1.8714285714285717e-05, + "loss": 0.2054, + "step": 262 + }, + { + "epoch": 0.08, + "learning_rate": 1.8785714285714286e-05, + "loss": 0.232, + "step": 263 + }, + { + "epoch": 0.08, + "learning_rate": 1.885714285714286e-05, + "loss": 0.2102, + "step": 264 + }, + { + "epoch": 0.09, + "learning_rate": 1.892857142857143e-05, + "loss": 0.203, + "step": 265 + }, + { + "epoch": 0.09, + "learning_rate": 1.9e-05, + "loss": 0.2074, + "step": 266 + }, + { + "epoch": 0.09, + "learning_rate": 1.9071428571428574e-05, + "loss": 0.212, + "step": 267 + }, + { + "epoch": 0.09, + "learning_rate": 1.9142857142857146e-05, + "loss": 0.21, + "step": 268 + }, + { + "epoch": 0.09, + "learning_rate": 1.9214285714285716e-05, + "loss": 0.2236, + "step": 269 + }, + { + "epoch": 0.09, + "learning_rate": 1.928571428571429e-05, + "loss": 0.1964, + "step": 270 + }, + { + "epoch": 0.09, + "learning_rate": 1.9357142857142858e-05, + "loss": 0.1871, + "step": 271 + }, + { + "epoch": 0.09, + "learning_rate": 1.942857142857143e-05, + "loss": 0.2242, + "step": 272 + }, + { + "epoch": 0.09, + "learning_rate": 1.95e-05, + "loss": 0.2289, + "step": 273 + }, + { + "epoch": 0.09, + "learning_rate": 1.9571428571428572e-05, + "loss": 0.1956, + "step": 274 + }, + { + "epoch": 0.09, + "learning_rate": 1.9642857142857145e-05, + "loss": 0.217, + "step": 275 + }, + { + "epoch": 0.09, + "learning_rate": 1.9714285714285718e-05, + "loss": 0.2076, + "step": 276 + }, + { + "epoch": 0.09, + "learning_rate": 1.9785714285714287e-05, + "loss": 0.216, + "step": 277 + }, + { + "epoch": 0.09, + "learning_rate": 1.985714285714286e-05, + "loss": 0.2099, + "step": 278 + }, + { + "epoch": 0.09, + "learning_rate": 1.992857142857143e-05, + "loss": 0.2001, + "step": 279 + }, + { + "epoch": 0.09, + "learning_rate": 2e-05, + "loss": 0.2116, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999999396678728e-05, + "loss": 0.2415, + "step": 281 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999997586714986e-05, + "loss": 0.206, + "step": 282 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999994570108988e-05, + "loss": 0.2233, + "step": 283 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999990346861104e-05, + "loss": 0.202, + "step": 284 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999984916971842e-05, + "loss": 0.2113, + "step": 285 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999978280441853e-05, + "loss": 0.2114, + "step": 286 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999970437271943e-05, + "loss": 0.2311, + "step": 287 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999961387463053e-05, + "loss": 0.2145, + "step": 288 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999951131016282e-05, + "loss": 0.2268, + "step": 289 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999939667932863e-05, + "loss": 0.2306, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999926998214175e-05, + "loss": 0.2136, + "step": 291 + }, + { + "epoch": 0.09, + "learning_rate": 1.999991312186176e-05, + "loss": 0.1984, + "step": 292 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999898038877277e-05, + "loss": 0.1812, + "step": 293 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999881749262556e-05, + "loss": 0.2118, + "step": 294 + }, + { + "epoch": 0.09, + "learning_rate": 1.999986425301956e-05, + "loss": 0.216, + "step": 295 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999845550150397e-05, + "loss": 0.2277, + "step": 296 + }, + { + "epoch": 0.1, + "learning_rate": 1.999982564065733e-05, + "loss": 0.212, + "step": 297 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999804524542756e-05, + "loss": 0.207, + "step": 298 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999782201809227e-05, + "loss": 0.2048, + "step": 299 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999758672459433e-05, + "loss": 0.2074, + "step": 300 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999733936496213e-05, + "loss": 0.2131, + "step": 301 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999707993922555e-05, + "loss": 0.2578, + "step": 302 + }, + { + "epoch": 0.1, + "learning_rate": 1.999968084474159e-05, + "loss": 0.2086, + "step": 303 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999652488956585e-05, + "loss": 0.2094, + "step": 304 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999622926570975e-05, + "loss": 0.2217, + "step": 305 + }, + { + "epoch": 0.1, + "learning_rate": 1.999959215758832e-05, + "loss": 0.2174, + "step": 306 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999560182012328e-05, + "loss": 0.2274, + "step": 307 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999526999846864e-05, + "loss": 0.2113, + "step": 308 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999492611095933e-05, + "loss": 0.2065, + "step": 309 + }, + { + "epoch": 0.1, + "learning_rate": 1.999945701576368e-05, + "loss": 0.2182, + "step": 310 + }, + { + "epoch": 0.1, + "learning_rate": 1.99994202138544e-05, + "loss": 0.2177, + "step": 311 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999382205372538e-05, + "loss": 0.2136, + "step": 312 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999342990322677e-05, + "loss": 0.2176, + "step": 313 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999302568709548e-05, + "loss": 0.1984, + "step": 314 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999260940538033e-05, + "loss": 0.205, + "step": 315 + }, + { + "epoch": 0.1, + "learning_rate": 1.999921810581315e-05, + "loss": 0.2117, + "step": 316 + }, + { + "epoch": 0.1, + "learning_rate": 1.999917406454007e-05, + "loss": 0.2029, + "step": 317 + }, + { + "epoch": 0.1, + "learning_rate": 1.999912881672411e-05, + "loss": 0.2131, + "step": 318 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999082362370724e-05, + "loss": 0.2069, + "step": 319 + }, + { + "epoch": 0.1, + "learning_rate": 1.999903470148552e-05, + "loss": 0.2006, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998985834074246e-05, + "loss": 0.2125, + "step": 321 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998935760142805e-05, + "loss": 0.196, + "step": 322 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998884479697237e-05, + "loss": 0.1998, + "step": 323 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998831992743726e-05, + "loss": 0.2068, + "step": 324 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998778299288608e-05, + "loss": 0.2107, + "step": 325 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998723399338364e-05, + "loss": 0.2194, + "step": 326 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998667292899614e-05, + "loss": 0.2411, + "step": 327 + }, + { + "epoch": 0.11, + "learning_rate": 1.999860997997913e-05, + "loss": 0.2054, + "step": 328 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998551460583826e-05, + "loss": 0.2174, + "step": 329 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998491734720767e-05, + "loss": 0.2005, + "step": 330 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998430802397157e-05, + "loss": 0.2046, + "step": 331 + }, + { + "epoch": 0.11, + "learning_rate": 1.999836866362035e-05, + "loss": 0.2134, + "step": 332 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998305318397843e-05, + "loss": 0.2078, + "step": 333 + }, + { + "epoch": 0.11, + "learning_rate": 1.999824076673728e-05, + "loss": 0.2271, + "step": 334 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998175008646447e-05, + "loss": 0.1935, + "step": 335 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998108044133286e-05, + "loss": 0.2115, + "step": 336 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998039873205866e-05, + "loss": 0.2315, + "step": 337 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997970495872425e-05, + "loss": 0.2131, + "step": 338 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997899912141325e-05, + "loss": 0.1914, + "step": 339 + }, + { + "epoch": 0.11, + "learning_rate": 1.999782812202109e-05, + "loss": 0.2205, + "step": 340 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997755125520376e-05, + "loss": 0.2224, + "step": 341 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997680922647994e-05, + "loss": 0.2474, + "step": 342 + }, + { + "epoch": 0.11, + "learning_rate": 1.99976055134129e-05, + "loss": 0.2163, + "step": 343 + }, + { + "epoch": 0.11, + "learning_rate": 1.999752889782419e-05, + "loss": 0.1953, + "step": 344 + }, + { + "epoch": 0.11, + "learning_rate": 1.999745107589111e-05, + "loss": 0.2049, + "step": 345 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997372047623048e-05, + "loss": 0.2109, + "step": 346 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997291813029546e-05, + "loss": 0.2086, + "step": 347 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997210372120276e-05, + "loss": 0.2173, + "step": 348 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997127724905073e-05, + "loss": 0.1966, + "step": 349 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997043871393906e-05, + "loss": 0.1959, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996958811596897e-05, + "loss": 0.2109, + "step": 351 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996872545524304e-05, + "loss": 0.1923, + "step": 352 + }, + { + "epoch": 0.11, + "learning_rate": 1.999678507318654e-05, + "loss": 0.213, + "step": 353 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996696394594158e-05, + "loss": 0.215, + "step": 354 + }, + { + "epoch": 0.11, + "learning_rate": 1.999660650975786e-05, + "loss": 0.1899, + "step": 355 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996515418688493e-05, + "loss": 0.2024, + "step": 356 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996423121397043e-05, + "loss": 0.1992, + "step": 357 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996329617894648e-05, + "loss": 0.2106, + "step": 358 + }, + { + "epoch": 0.12, + "learning_rate": 1.99962349081926e-05, + "loss": 0.1979, + "step": 359 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996138992302314e-05, + "loss": 0.1982, + "step": 360 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996041870235374e-05, + "loss": 0.1802, + "step": 361 + }, + { + "epoch": 0.12, + "learning_rate": 1.999594354200349e-05, + "loss": 0.2096, + "step": 362 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995844007618536e-05, + "loss": 0.2014, + "step": 363 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995743267092514e-05, + "loss": 0.234, + "step": 364 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995641320437586e-05, + "loss": 0.2277, + "step": 365 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995538167666048e-05, + "loss": 0.2069, + "step": 366 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995433808790354e-05, + "loss": 0.2192, + "step": 367 + }, + { + "epoch": 0.12, + "learning_rate": 1.999532824382309e-05, + "loss": 0.1964, + "step": 368 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995221472776995e-05, + "loss": 0.1963, + "step": 369 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995113495664954e-05, + "loss": 0.198, + "step": 370 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995004312499997e-05, + "loss": 0.2106, + "step": 371 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994893923295296e-05, + "loss": 0.2326, + "step": 372 + }, + { + "epoch": 0.12, + "learning_rate": 1.999478232806417e-05, + "loss": 0.1855, + "step": 373 + }, + { + "epoch": 0.12, + "learning_rate": 1.999466952682009e-05, + "loss": 0.211, + "step": 374 + }, + { + "epoch": 0.12, + "learning_rate": 1.999455551957666e-05, + "loss": 0.2013, + "step": 375 + }, + { + "epoch": 0.12, + "learning_rate": 1.999444030634764e-05, + "loss": 0.1995, + "step": 376 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994323887146936e-05, + "loss": 0.2187, + "step": 377 + }, + { + "epoch": 0.12, + "learning_rate": 1.999420626198859e-05, + "loss": 0.1971, + "step": 378 + }, + { + "epoch": 0.12, + "learning_rate": 1.99940874308868e-05, + "loss": 0.2036, + "step": 379 + }, + { + "epoch": 0.12, + "learning_rate": 1.99939673938559e-05, + "loss": 0.1944, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993846150910372e-05, + "loss": 0.185, + "step": 381 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993723702064852e-05, + "loss": 0.1925, + "step": 382 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993600047334113e-05, + "loss": 0.1939, + "step": 383 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993475186733078e-05, + "loss": 0.1998, + "step": 384 + }, + { + "epoch": 0.12, + "learning_rate": 1.999334912027681e-05, + "loss": 0.2102, + "step": 385 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993221847980517e-05, + "loss": 0.1909, + "step": 386 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993093369859563e-05, + "loss": 0.2202, + "step": 387 + }, + { + "epoch": 0.12, + "learning_rate": 1.999296368592945e-05, + "loss": 0.1825, + "step": 388 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992832796205824e-05, + "loss": 0.1907, + "step": 389 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992700700704478e-05, + "loss": 0.187, + "step": 390 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992567399441353e-05, + "loss": 0.1852, + "step": 391 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992432892432534e-05, + "loss": 0.1962, + "step": 392 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992297179694252e-05, + "loss": 0.1724, + "step": 393 + }, + { + "epoch": 0.13, + "learning_rate": 1.999216026124288e-05, + "loss": 0.1836, + "step": 394 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992022137094937e-05, + "loss": 0.1827, + "step": 395 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991882807267097e-05, + "loss": 0.202, + "step": 396 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991742271776164e-05, + "loss": 0.1966, + "step": 397 + }, + { + "epoch": 0.13, + "learning_rate": 1.99916005306391e-05, + "loss": 0.189, + "step": 398 + }, + { + "epoch": 0.13, + "learning_rate": 1.999145758387301e-05, + "loss": 0.1995, + "step": 399 + }, + { + "epoch": 0.13, + "learning_rate": 1.999131343149514e-05, + "loss": 0.1933, + "step": 400 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991168073522885e-05, + "loss": 0.179, + "step": 401 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991021509973783e-05, + "loss": 0.2071, + "step": 402 + }, + { + "epoch": 0.13, + "learning_rate": 1.999087374086552e-05, + "loss": 0.1988, + "step": 403 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990724766215925e-05, + "loss": 0.2076, + "step": 404 + }, + { + "epoch": 0.13, + "learning_rate": 1.999057458604298e-05, + "loss": 0.1883, + "step": 405 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990423200364794e-05, + "loss": 0.1757, + "step": 406 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990270609199647e-05, + "loss": 0.1972, + "step": 407 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990116812565944e-05, + "loss": 0.1982, + "step": 408 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989961810482246e-05, + "loss": 0.1893, + "step": 409 + }, + { + "epoch": 0.13, + "learning_rate": 1.998980560296725e-05, + "loss": 0.1827, + "step": 410 + }, + { + "epoch": 0.13, + "learning_rate": 1.998964819003981e-05, + "loss": 0.1929, + "step": 411 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989489571718924e-05, + "loss": 0.1953, + "step": 412 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989329748023728e-05, + "loss": 0.2008, + "step": 413 + }, + { + "epoch": 0.13, + "learning_rate": 1.99891687189735e-05, + "loss": 0.198, + "step": 414 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989006484587682e-05, + "loss": 0.1837, + "step": 415 + }, + { + "epoch": 0.13, + "learning_rate": 1.998884304488584e-05, + "loss": 0.1982, + "step": 416 + }, + { + "epoch": 0.13, + "learning_rate": 1.99886783998877e-05, + "loss": 0.2002, + "step": 417 + }, + { + "epoch": 0.13, + "learning_rate": 1.998851254961313e-05, + "loss": 0.1965, + "step": 418 + }, + { + "epoch": 0.13, + "learning_rate": 1.998834549408214e-05, + "loss": 0.1688, + "step": 419 + }, + { + "epoch": 0.14, + "learning_rate": 1.998817723331489e-05, + "loss": 0.1781, + "step": 420 + }, + { + "epoch": 0.14, + "learning_rate": 1.998800776733168e-05, + "loss": 0.2042, + "step": 421 + }, + { + "epoch": 0.14, + "learning_rate": 1.998783709615296e-05, + "loss": 0.1921, + "step": 422 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987665219799323e-05, + "loss": 0.195, + "step": 423 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987492138291508e-05, + "loss": 0.1926, + "step": 424 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987317851650402e-05, + "loss": 0.1775, + "step": 425 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987142359897037e-05, + "loss": 0.1965, + "step": 426 + }, + { + "epoch": 0.14, + "learning_rate": 1.998696566305258e-05, + "loss": 0.2096, + "step": 427 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986787761138363e-05, + "loss": 0.1764, + "step": 428 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986608654175842e-05, + "loss": 0.2042, + "step": 429 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986428342186636e-05, + "loss": 0.2149, + "step": 430 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986246825192502e-05, + "loss": 0.1907, + "step": 431 + }, + { + "epoch": 0.14, + "learning_rate": 1.998606410321534e-05, + "loss": 0.1858, + "step": 432 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985880176277195e-05, + "loss": 0.2059, + "step": 433 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985695044400265e-05, + "loss": 0.2012, + "step": 434 + }, + { + "epoch": 0.14, + "learning_rate": 1.998550870760689e-05, + "loss": 0.1944, + "step": 435 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985321165919552e-05, + "loss": 0.2001, + "step": 436 + }, + { + "epoch": 0.14, + "learning_rate": 1.998513241936088e-05, + "loss": 0.2072, + "step": 437 + }, + { + "epoch": 0.14, + "learning_rate": 1.998494246795365e-05, + "loss": 0.192, + "step": 438 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984751311720777e-05, + "loss": 0.2205, + "step": 439 + }, + { + "epoch": 0.14, + "learning_rate": 1.998455895068534e-05, + "loss": 0.1982, + "step": 440 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984365384870537e-05, + "loss": 0.1915, + "step": 441 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984170614299727e-05, + "loss": 0.1841, + "step": 442 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983974638996417e-05, + "loss": 0.1859, + "step": 443 + }, + { + "epoch": 0.14, + "learning_rate": 1.998377745898425e-05, + "loss": 0.21, + "step": 444 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983579074287024e-05, + "loss": 0.1948, + "step": 445 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983379484928668e-05, + "loss": 0.2013, + "step": 446 + }, + { + "epoch": 0.14, + "learning_rate": 1.998317869093327e-05, + "loss": 0.2236, + "step": 447 + }, + { + "epoch": 0.14, + "learning_rate": 1.998297669232506e-05, + "loss": 0.1958, + "step": 448 + }, + { + "epoch": 0.14, + "learning_rate": 1.9982773489128412e-05, + "loss": 0.2068, + "step": 449 + }, + { + "epoch": 0.14, + "learning_rate": 1.9982569081367844e-05, + "loss": 0.1865, + "step": 450 + }, + { + "epoch": 0.15, + "learning_rate": 1.998236346906802e-05, + "loss": 0.2056, + "step": 451 + }, + { + "epoch": 0.15, + "learning_rate": 1.998215665225375e-05, + "loss": 0.1947, + "step": 452 + }, + { + "epoch": 0.15, + "learning_rate": 1.998194863094999e-05, + "loss": 0.1784, + "step": 453 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981739405181845e-05, + "loss": 0.2001, + "step": 454 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981528974974555e-05, + "loss": 0.1756, + "step": 455 + }, + { + "epoch": 0.15, + "learning_rate": 1.998131734035351e-05, + "loss": 0.1772, + "step": 456 + }, + { + "epoch": 0.15, + "learning_rate": 1.998110450134425e-05, + "loss": 0.1924, + "step": 457 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980890457972462e-05, + "loss": 0.2048, + "step": 458 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980675210263967e-05, + "loss": 0.1999, + "step": 459 + }, + { + "epoch": 0.15, + "learning_rate": 1.998045875824474e-05, + "loss": 0.1919, + "step": 460 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980241101940897e-05, + "loss": 0.1898, + "step": 461 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980022241378703e-05, + "loss": 0.1784, + "step": 462 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979802176584564e-05, + "loss": 0.1801, + "step": 463 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979580907585042e-05, + "loss": 0.1961, + "step": 464 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979358434406826e-05, + "loss": 0.189, + "step": 465 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979134757076767e-05, + "loss": 0.1796, + "step": 466 + }, + { + "epoch": 0.15, + "learning_rate": 1.997890987562185e-05, + "loss": 0.2074, + "step": 467 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978683790069215e-05, + "loss": 0.1881, + "step": 468 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978456500446142e-05, + "loss": 0.1865, + "step": 469 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978228006780056e-05, + "loss": 0.1918, + "step": 470 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977998309098527e-05, + "loss": 0.172, + "step": 471 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977767407429268e-05, + "loss": 0.1736, + "step": 472 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977535301800145e-05, + "loss": 0.1897, + "step": 473 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977301992239167e-05, + "loss": 0.1809, + "step": 474 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977067478774483e-05, + "loss": 0.2078, + "step": 475 + }, + { + "epoch": 0.15, + "learning_rate": 1.997683176143439e-05, + "loss": 0.2124, + "step": 476 + }, + { + "epoch": 0.15, + "learning_rate": 1.997659484024733e-05, + "loss": 0.196, + "step": 477 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976356715241892e-05, + "loss": 0.1863, + "step": 478 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976117386446812e-05, + "loss": 0.2035, + "step": 479 + }, + { + "epoch": 0.15, + "learning_rate": 1.997587685389096e-05, + "loss": 0.202, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 1.997563511760337e-05, + "loss": 0.1755, + "step": 481 + }, + { + "epoch": 0.16, + "learning_rate": 1.9975392177613207e-05, + "loss": 0.2002, + "step": 482 + }, + { + "epoch": 0.16, + "learning_rate": 1.997514803394978e-05, + "loss": 0.1945, + "step": 483 + }, + { + "epoch": 0.16, + "learning_rate": 1.997490268664256e-05, + "loss": 0.1974, + "step": 484 + }, + { + "epoch": 0.16, + "learning_rate": 1.997465613572114e-05, + "loss": 0.1964, + "step": 485 + }, + { + "epoch": 0.16, + "learning_rate": 1.9974408381215277e-05, + "loss": 0.2335, + "step": 486 + }, + { + "epoch": 0.16, + "learning_rate": 1.997415942315486e-05, + "loss": 0.1836, + "step": 487 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973909261569934e-05, + "loss": 0.1963, + "step": 488 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973657896490687e-05, + "loss": 0.1856, + "step": 489 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973405327947445e-05, + "loss": 0.1649, + "step": 490 + }, + { + "epoch": 0.16, + "learning_rate": 1.997315155597068e-05, + "loss": 0.1861, + "step": 491 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972896580591025e-05, + "loss": 0.204, + "step": 492 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972640401839235e-05, + "loss": 0.1766, + "step": 493 + }, + { + "epoch": 0.16, + "learning_rate": 1.997238301974623e-05, + "loss": 0.1924, + "step": 494 + }, + { + "epoch": 0.16, + "learning_rate": 1.997212443434306e-05, + "loss": 0.172, + "step": 495 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971864645660933e-05, + "loss": 0.1876, + "step": 496 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971603653731194e-05, + "loss": 0.1829, + "step": 497 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971341458585334e-05, + "loss": 0.1871, + "step": 498 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971078060254992e-05, + "loss": 0.1701, + "step": 499 + }, + { + "epoch": 0.16, + "learning_rate": 1.997081345877195e-05, + "loss": 0.189, + "step": 500 + }, + { + "epoch": 0.16, + "learning_rate": 1.9970547654168136e-05, + "loss": 0.211, + "step": 501 + }, + { + "epoch": 0.16, + "learning_rate": 1.9970280646475623e-05, + "loss": 0.1922, + "step": 502 + }, + { + "epoch": 0.16, + "learning_rate": 1.9970012435726633e-05, + "loss": 0.2048, + "step": 503 + }, + { + "epoch": 0.16, + "learning_rate": 1.996974302195352e-05, + "loss": 0.2118, + "step": 504 + }, + { + "epoch": 0.16, + "learning_rate": 1.99694724051888e-05, + "loss": 0.1875, + "step": 505 + }, + { + "epoch": 0.16, + "learning_rate": 1.996920058546513e-05, + "loss": 0.1984, + "step": 506 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968927562815302e-05, + "loss": 0.2087, + "step": 507 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968653337272262e-05, + "loss": 0.2012, + "step": 508 + }, + { + "epoch": 0.16, + "learning_rate": 1.99683779088691e-05, + "loss": 0.1933, + "step": 509 + }, + { + "epoch": 0.16, + "learning_rate": 1.996810127763905e-05, + "loss": 0.1783, + "step": 510 + }, + { + "epoch": 0.16, + "learning_rate": 1.996782344361549e-05, + "loss": 0.1902, + "step": 511 + }, + { + "epoch": 0.16, + "learning_rate": 1.996754440683195e-05, + "loss": 0.1873, + "step": 512 + }, + { + "epoch": 0.17, + "learning_rate": 1.996726416732209e-05, + "loss": 0.1921, + "step": 513 + }, + { + "epoch": 0.17, + "learning_rate": 1.9966982725119737e-05, + "loss": 0.1849, + "step": 514 + }, + { + "epoch": 0.17, + "learning_rate": 1.996670008025884e-05, + "loss": 0.1965, + "step": 515 + }, + { + "epoch": 0.17, + "learning_rate": 1.9966416232773513e-05, + "loss": 0.1958, + "step": 516 + }, + { + "epoch": 0.17, + "learning_rate": 1.9966131182698e-05, + "loss": 0.185, + "step": 517 + }, + { + "epoch": 0.17, + "learning_rate": 1.99658449300667e-05, + "loss": 0.2054, + "step": 518 + }, + { + "epoch": 0.17, + "learning_rate": 1.996555747491415e-05, + "loss": 0.196, + "step": 519 + }, + { + "epoch": 0.17, + "learning_rate": 1.996526881727504e-05, + "loss": 0.1728, + "step": 520 + }, + { + "epoch": 0.17, + "learning_rate": 1.9964978957184198e-05, + "loss": 0.1826, + "step": 521 + }, + { + "epoch": 0.17, + "learning_rate": 1.99646878946766e-05, + "loss": 0.1841, + "step": 522 + }, + { + "epoch": 0.17, + "learning_rate": 1.9964395629787364e-05, + "loss": 0.1892, + "step": 523 + }, + { + "epoch": 0.17, + "learning_rate": 1.996410216255176e-05, + "loss": 0.1991, + "step": 524 + }, + { + "epoch": 0.17, + "learning_rate": 1.99638074930052e-05, + "loss": 0.1889, + "step": 525 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963511621183236e-05, + "loss": 0.201, + "step": 526 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963214547121573e-05, + "loss": 0.1908, + "step": 527 + }, + { + "epoch": 0.17, + "learning_rate": 1.9962916270856053e-05, + "loss": 0.1856, + "step": 528 + }, + { + "epoch": 0.17, + "learning_rate": 1.996261679242267e-05, + "loss": 0.1799, + "step": 529 + }, + { + "epoch": 0.17, + "learning_rate": 1.996231611185756e-05, + "loss": 0.2193, + "step": 530 + }, + { + "epoch": 0.17, + "learning_rate": 1.9962014229197003e-05, + "loss": 0.1859, + "step": 531 + }, + { + "epoch": 0.17, + "learning_rate": 1.996171114447743e-05, + "loss": 0.1789, + "step": 532 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961406857735406e-05, + "loss": 0.1989, + "step": 533 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961101369007654e-05, + "loss": 0.1857, + "step": 534 + }, + { + "epoch": 0.17, + "learning_rate": 1.996079467833103e-05, + "loss": 0.1954, + "step": 535 + }, + { + "epoch": 0.17, + "learning_rate": 1.9960486785742543e-05, + "loss": 0.1875, + "step": 536 + }, + { + "epoch": 0.17, + "learning_rate": 1.9960177691279346e-05, + "loss": 0.1937, + "step": 537 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959867394978732e-05, + "loss": 0.1965, + "step": 538 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959555896878147e-05, + "loss": 0.2058, + "step": 539 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959243197015172e-05, + "loss": 0.1706, + "step": 540 + }, + { + "epoch": 0.17, + "learning_rate": 1.9958929295427544e-05, + "loss": 0.1929, + "step": 541 + }, + { + "epoch": 0.17, + "learning_rate": 1.9958614192153138e-05, + "loss": 0.1816, + "step": 542 + }, + { + "epoch": 0.17, + "learning_rate": 1.9958297887229977e-05, + "loss": 0.1863, + "step": 543 + }, + { + "epoch": 0.17, + "learning_rate": 1.9957980380696227e-05, + "loss": 0.1751, + "step": 544 + }, + { + "epoch": 0.18, + "learning_rate": 1.99576616725902e-05, + "loss": 0.1747, + "step": 545 + }, + { + "epoch": 0.18, + "learning_rate": 1.9957341762950346e-05, + "loss": 0.18, + "step": 546 + }, + { + "epoch": 0.18, + "learning_rate": 1.9957020651815275e-05, + "loss": 0.1694, + "step": 547 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956698339223735e-05, + "loss": 0.1918, + "step": 548 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956374825214608e-05, + "loss": 0.1955, + "step": 549 + }, + { + "epoch": 0.18, + "learning_rate": 1.995605010982694e-05, + "loss": 0.1896, + "step": 550 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955724193099905e-05, + "loss": 0.1836, + "step": 551 + }, + { + "epoch": 0.18, + "learning_rate": 1.995539707507284e-05, + "loss": 0.2087, + "step": 552 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955068755785202e-05, + "loss": 0.1736, + "step": 553 + }, + { + "epoch": 0.18, + "learning_rate": 1.995473923527662e-05, + "loss": 0.1665, + "step": 554 + }, + { + "epoch": 0.18, + "learning_rate": 1.9954408513586845e-05, + "loss": 0.1831, + "step": 555 + }, + { + "epoch": 0.18, + "learning_rate": 1.9954076590755794e-05, + "loss": 0.1853, + "step": 556 + }, + { + "epoch": 0.18, + "learning_rate": 1.9953743466823513e-05, + "loss": 0.1857, + "step": 557 + }, + { + "epoch": 0.18, + "learning_rate": 1.9953409141830194e-05, + "loss": 0.1879, + "step": 558 + }, + { + "epoch": 0.18, + "learning_rate": 1.9953073615816185e-05, + "loss": 0.1747, + "step": 559 + }, + { + "epoch": 0.18, + "learning_rate": 1.995273688882197e-05, + "loss": 0.2001, + "step": 560 + }, + { + "epoch": 0.18, + "learning_rate": 1.995239896088818e-05, + "loss": 0.1965, + "step": 561 + }, + { + "epoch": 0.18, + "learning_rate": 1.9952059832055585e-05, + "loss": 0.2, + "step": 562 + }, + { + "epoch": 0.18, + "learning_rate": 1.9951719502365113e-05, + "loss": 0.1814, + "step": 563 + }, + { + "epoch": 0.18, + "learning_rate": 1.995137797185783e-05, + "loss": 0.1865, + "step": 564 + }, + { + "epoch": 0.18, + "learning_rate": 1.995103524057494e-05, + "loss": 0.2209, + "step": 565 + }, + { + "epoch": 0.18, + "learning_rate": 1.9950691308557806e-05, + "loss": 0.1823, + "step": 566 + }, + { + "epoch": 0.18, + "learning_rate": 1.995034617584792e-05, + "loss": 0.1859, + "step": 567 + }, + { + "epoch": 0.18, + "learning_rate": 1.9949999842486933e-05, + "loss": 0.1585, + "step": 568 + }, + { + "epoch": 0.18, + "learning_rate": 1.9949652308516635e-05, + "loss": 0.1714, + "step": 569 + }, + { + "epoch": 0.18, + "learning_rate": 1.9949303573978958e-05, + "loss": 0.1843, + "step": 570 + }, + { + "epoch": 0.18, + "learning_rate": 1.9948953638915986e-05, + "loss": 0.1935, + "step": 571 + }, + { + "epoch": 0.18, + "learning_rate": 1.9948602503369942e-05, + "loss": 0.1942, + "step": 572 + }, + { + "epoch": 0.18, + "learning_rate": 1.994825016738319e-05, + "loss": 0.1782, + "step": 573 + }, + { + "epoch": 0.18, + "learning_rate": 1.9947896630998253e-05, + "loss": 0.1957, + "step": 574 + }, + { + "epoch": 0.18, + "learning_rate": 1.9947541894257782e-05, + "loss": 0.1777, + "step": 575 + }, + { + "epoch": 0.19, + "learning_rate": 1.9947185957204588e-05, + "loss": 0.1871, + "step": 576 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946828819881618e-05, + "loss": 0.1774, + "step": 577 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946470482331962e-05, + "loss": 0.1829, + "step": 578 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946110944598864e-05, + "loss": 0.1911, + "step": 579 + }, + { + "epoch": 0.19, + "learning_rate": 1.9945750206725704e-05, + "loss": 0.2101, + "step": 580 + }, + { + "epoch": 0.19, + "learning_rate": 1.994538826875601e-05, + "loss": 0.1873, + "step": 581 + }, + { + "epoch": 0.19, + "learning_rate": 1.994502513073346e-05, + "loss": 0.1896, + "step": 582 + }, + { + "epoch": 0.19, + "learning_rate": 1.994466079270186e-05, + "loss": 0.1947, + "step": 583 + }, + { + "epoch": 0.19, + "learning_rate": 1.9944295254705187e-05, + "loss": 0.1782, + "step": 584 + }, + { + "epoch": 0.19, + "learning_rate": 1.9943928516787538e-05, + "loss": 0.194, + "step": 585 + }, + { + "epoch": 0.19, + "learning_rate": 1.994356057899317e-05, + "loss": 0.1987, + "step": 586 + }, + { + "epoch": 0.19, + "learning_rate": 1.9943191441366478e-05, + "loss": 0.2076, + "step": 587 + }, + { + "epoch": 0.19, + "learning_rate": 1.9942821103952002e-05, + "loss": 0.1972, + "step": 588 + }, + { + "epoch": 0.19, + "learning_rate": 1.9942449566794436e-05, + "loss": 0.1958, + "step": 589 + }, + { + "epoch": 0.19, + "learning_rate": 1.99420768299386e-05, + "loss": 0.185, + "step": 590 + }, + { + "epoch": 0.19, + "learning_rate": 1.9941702893429483e-05, + "loss": 0.1884, + "step": 591 + }, + { + "epoch": 0.19, + "learning_rate": 1.9941327757312194e-05, + "loss": 0.1841, + "step": 592 + }, + { + "epoch": 0.19, + "learning_rate": 1.9940951421632005e-05, + "loss": 0.1689, + "step": 593 + }, + { + "epoch": 0.19, + "learning_rate": 1.9940573886434324e-05, + "loss": 0.1911, + "step": 594 + }, + { + "epoch": 0.19, + "learning_rate": 1.9940195151764708e-05, + "loss": 0.1692, + "step": 595 + }, + { + "epoch": 0.19, + "learning_rate": 1.9939815217668856e-05, + "loss": 0.1961, + "step": 596 + }, + { + "epoch": 0.19, + "learning_rate": 1.993943408419261e-05, + "loss": 0.1932, + "step": 597 + }, + { + "epoch": 0.19, + "learning_rate": 1.993905175138196e-05, + "loss": 0.1889, + "step": 598 + }, + { + "epoch": 0.19, + "learning_rate": 1.9938668219283042e-05, + "loss": 0.184, + "step": 599 + }, + { + "epoch": 0.19, + "learning_rate": 1.9938283487942137e-05, + "loss": 0.1677, + "step": 600 + }, + { + "epoch": 0.19, + "learning_rate": 1.993789755740566e-05, + "loss": 0.1691, + "step": 601 + }, + { + "epoch": 0.19, + "learning_rate": 1.9937510427720187e-05, + "loss": 0.1904, + "step": 602 + }, + { + "epoch": 0.19, + "learning_rate": 1.9937122098932428e-05, + "loss": 0.1897, + "step": 603 + }, + { + "epoch": 0.19, + "learning_rate": 1.993673257108924e-05, + "loss": 0.1778, + "step": 604 + }, + { + "epoch": 0.19, + "learning_rate": 1.9936341844237623e-05, + "loss": 0.2014, + "step": 605 + }, + { + "epoch": 0.19, + "learning_rate": 1.9935949918424727e-05, + "loss": 0.2116, + "step": 606 + }, + { + "epoch": 0.2, + "learning_rate": 1.993555679369784e-05, + "loss": 0.1957, + "step": 607 + }, + { + "epoch": 0.2, + "learning_rate": 1.99351624701044e-05, + "loss": 0.1959, + "step": 608 + }, + { + "epoch": 0.2, + "learning_rate": 1.9934766947691996e-05, + "loss": 0.1951, + "step": 609 + }, + { + "epoch": 0.2, + "learning_rate": 1.9934370226508338e-05, + "loss": 0.2061, + "step": 610 + }, + { + "epoch": 0.2, + "learning_rate": 1.9933972306601308e-05, + "loss": 0.1944, + "step": 611 + }, + { + "epoch": 0.2, + "learning_rate": 1.9933573188018913e-05, + "loss": 0.1976, + "step": 612 + }, + { + "epoch": 0.2, + "learning_rate": 1.993317287080932e-05, + "loss": 0.1811, + "step": 613 + }, + { + "epoch": 0.2, + "learning_rate": 1.9932771355020826e-05, + "loss": 0.1812, + "step": 614 + }, + { + "epoch": 0.2, + "learning_rate": 1.993236864070188e-05, + "loss": 0.1966, + "step": 615 + }, + { + "epoch": 0.2, + "learning_rate": 1.993196472790108e-05, + "loss": 0.1948, + "step": 616 + }, + { + "epoch": 0.2, + "learning_rate": 1.9931559616667164e-05, + "loss": 0.1988, + "step": 617 + }, + { + "epoch": 0.2, + "learning_rate": 1.9931153307049008e-05, + "loss": 0.1737, + "step": 618 + }, + { + "epoch": 0.2, + "learning_rate": 1.9930745799095645e-05, + "loss": 0.1974, + "step": 619 + }, + { + "epoch": 0.2, + "learning_rate": 1.9930337092856243e-05, + "loss": 0.1924, + "step": 620 + }, + { + "epoch": 0.2, + "learning_rate": 1.992992718838012e-05, + "loss": 0.1745, + "step": 621 + }, + { + "epoch": 0.2, + "learning_rate": 1.9929516085716736e-05, + "loss": 0.1805, + "step": 622 + }, + { + "epoch": 0.2, + "learning_rate": 1.9929103784915698e-05, + "loss": 0.1835, + "step": 623 + }, + { + "epoch": 0.2, + "learning_rate": 1.9928690286026754e-05, + "loss": 0.2055, + "step": 624 + }, + { + "epoch": 0.2, + "learning_rate": 1.9928275589099802e-05, + "loss": 0.2124, + "step": 625 + }, + { + "epoch": 0.2, + "learning_rate": 1.9927859694184873e-05, + "loss": 0.174, + "step": 626 + }, + { + "epoch": 0.2, + "learning_rate": 1.9927442601332162e-05, + "loss": 0.1907, + "step": 627 + }, + { + "epoch": 0.2, + "learning_rate": 1.9927024310591992e-05, + "loss": 0.1756, + "step": 628 + }, + { + "epoch": 0.2, + "learning_rate": 1.9926604822014834e-05, + "loss": 0.2038, + "step": 629 + }, + { + "epoch": 0.2, + "learning_rate": 1.9926184135651302e-05, + "loss": 0.1673, + "step": 630 + }, + { + "epoch": 0.2, + "learning_rate": 1.9925762251552164e-05, + "loss": 0.2008, + "step": 631 + }, + { + "epoch": 0.2, + "learning_rate": 1.9925339169768327e-05, + "loss": 0.1848, + "step": 632 + }, + { + "epoch": 0.2, + "learning_rate": 1.992491489035084e-05, + "loss": 0.1897, + "step": 633 + }, + { + "epoch": 0.2, + "learning_rate": 1.9924489413350894e-05, + "loss": 0.1641, + "step": 634 + }, + { + "epoch": 0.2, + "learning_rate": 1.9924062738819834e-05, + "loss": 0.1734, + "step": 635 + }, + { + "epoch": 0.2, + "learning_rate": 1.9923634866809144e-05, + "loss": 0.1832, + "step": 636 + }, + { + "epoch": 0.2, + "learning_rate": 1.992320579737045e-05, + "loss": 0.1751, + "step": 637 + }, + { + "epoch": 0.21, + "learning_rate": 1.9922775530555533e-05, + "loss": 0.1719, + "step": 638 + }, + { + "epoch": 0.21, + "learning_rate": 1.9922344066416298e-05, + "loss": 0.1795, + "step": 639 + }, + { + "epoch": 0.21, + "learning_rate": 1.9921911405004816e-05, + "loss": 0.205, + "step": 640 + }, + { + "epoch": 0.21, + "learning_rate": 1.9921477546373296e-05, + "loss": 0.1679, + "step": 641 + }, + { + "epoch": 0.21, + "learning_rate": 1.992104249057408e-05, + "loss": 0.1655, + "step": 642 + }, + { + "epoch": 0.21, + "learning_rate": 1.9920606237659674e-05, + "loss": 0.1712, + "step": 643 + }, + { + "epoch": 0.21, + "learning_rate": 1.992016878768271e-05, + "loss": 0.1747, + "step": 644 + }, + { + "epoch": 0.21, + "learning_rate": 1.9919730140695978e-05, + "loss": 0.1746, + "step": 645 + }, + { + "epoch": 0.21, + "learning_rate": 1.99192902967524e-05, + "loss": 0.1689, + "step": 646 + }, + { + "epoch": 0.21, + "learning_rate": 1.991884925590506e-05, + "loss": 0.1758, + "step": 647 + }, + { + "epoch": 0.21, + "learning_rate": 1.9918407018207168e-05, + "loss": 0.1616, + "step": 648 + }, + { + "epoch": 0.21, + "learning_rate": 1.9917963583712085e-05, + "loss": 0.1966, + "step": 649 + }, + { + "epoch": 0.21, + "learning_rate": 1.9917518952473326e-05, + "loss": 0.1931, + "step": 650 + }, + { + "epoch": 0.21, + "learning_rate": 1.9917073124544535e-05, + "loss": 0.1884, + "step": 651 + }, + { + "epoch": 0.21, + "learning_rate": 1.991662609997951e-05, + "loss": 0.1625, + "step": 652 + }, + { + "epoch": 0.21, + "learning_rate": 1.9916177878832185e-05, + "loss": 0.1861, + "step": 653 + }, + { + "epoch": 0.21, + "learning_rate": 1.991572846115666e-05, + "loss": 0.1708, + "step": 654 + }, + { + "epoch": 0.21, + "learning_rate": 1.9915277847007143e-05, + "loss": 0.1738, + "step": 655 + }, + { + "epoch": 0.21, + "learning_rate": 1.9914826036438025e-05, + "loss": 0.1735, + "step": 656 + }, + { + "epoch": 0.21, + "learning_rate": 1.9914373029503814e-05, + "loss": 0.2124, + "step": 657 + }, + { + "epoch": 0.21, + "learning_rate": 1.991391882625917e-05, + "loss": 0.1914, + "step": 658 + }, + { + "epoch": 0.21, + "learning_rate": 1.9913463426758906e-05, + "loss": 0.1958, + "step": 659 + }, + { + "epoch": 0.21, + "learning_rate": 1.9913006831057967e-05, + "loss": 0.158, + "step": 660 + }, + { + "epoch": 0.21, + "learning_rate": 1.9912549039211453e-05, + "loss": 0.1928, + "step": 661 + }, + { + "epoch": 0.21, + "learning_rate": 1.99120900512746e-05, + "loss": 0.1833, + "step": 662 + }, + { + "epoch": 0.21, + "learning_rate": 1.991162986730279e-05, + "loss": 0.1955, + "step": 663 + }, + { + "epoch": 0.21, + "learning_rate": 1.9911168487351552e-05, + "loss": 0.1996, + "step": 664 + }, + { + "epoch": 0.21, + "learning_rate": 1.9910705911476558e-05, + "loss": 0.1987, + "step": 665 + }, + { + "epoch": 0.21, + "learning_rate": 1.9910242139733625e-05, + "loss": 0.1917, + "step": 666 + }, + { + "epoch": 0.21, + "learning_rate": 1.9909777172178712e-05, + "loss": 0.1733, + "step": 667 + }, + { + "epoch": 0.21, + "learning_rate": 1.9909311008867926e-05, + "loss": 0.1862, + "step": 668 + }, + { + "epoch": 0.22, + "learning_rate": 1.9908843649857517e-05, + "loss": 0.1824, + "step": 669 + }, + { + "epoch": 0.22, + "learning_rate": 1.9908375095203876e-05, + "loss": 0.1672, + "step": 670 + }, + { + "epoch": 0.22, + "learning_rate": 1.990790534496354e-05, + "loss": 0.2258, + "step": 671 + }, + { + "epoch": 0.22, + "learning_rate": 1.9907434399193196e-05, + "loss": 0.1873, + "step": 672 + }, + { + "epoch": 0.22, + "learning_rate": 1.9906962257949668e-05, + "loss": 0.1858, + "step": 673 + }, + { + "epoch": 0.22, + "learning_rate": 1.990648892128992e-05, + "loss": 0.1767, + "step": 674 + }, + { + "epoch": 0.22, + "learning_rate": 1.9906014389271078e-05, + "loss": 0.1845, + "step": 675 + }, + { + "epoch": 0.22, + "learning_rate": 1.9905538661950395e-05, + "loss": 0.189, + "step": 676 + }, + { + "epoch": 0.22, + "learning_rate": 1.990506173938527e-05, + "loss": 0.1779, + "step": 677 + }, + { + "epoch": 0.22, + "learning_rate": 1.990458362163326e-05, + "loss": 0.1786, + "step": 678 + }, + { + "epoch": 0.22, + "learning_rate": 1.9904104308752053e-05, + "loss": 0.1965, + "step": 679 + }, + { + "epoch": 0.22, + "learning_rate": 1.9903623800799483e-05, + "loss": 0.1728, + "step": 680 + }, + { + "epoch": 0.22, + "learning_rate": 1.990314209783353e-05, + "loss": 0.1905, + "step": 681 + }, + { + "epoch": 0.22, + "learning_rate": 1.990265919991232e-05, + "loss": 0.1923, + "step": 682 + }, + { + "epoch": 0.22, + "learning_rate": 1.990217510709412e-05, + "loss": 0.1768, + "step": 683 + }, + { + "epoch": 0.22, + "learning_rate": 1.9901689819437345e-05, + "loss": 0.1947, + "step": 684 + }, + { + "epoch": 0.22, + "learning_rate": 1.9901203337000556e-05, + "loss": 0.1871, + "step": 685 + }, + { + "epoch": 0.22, + "learning_rate": 1.990071565984244e-05, + "loss": 0.1754, + "step": 686 + }, + { + "epoch": 0.22, + "learning_rate": 1.9900226788021853e-05, + "loss": 0.1833, + "step": 687 + }, + { + "epoch": 0.22, + "learning_rate": 1.9899736721597787e-05, + "loss": 0.2002, + "step": 688 + }, + { + "epoch": 0.22, + "learning_rate": 1.9899245460629365e-05, + "loss": 0.1618, + "step": 689 + }, + { + "epoch": 0.22, + "learning_rate": 1.9898753005175874e-05, + "loss": 0.1782, + "step": 690 + }, + { + "epoch": 0.22, + "learning_rate": 1.9898259355296728e-05, + "loss": 0.1924, + "step": 691 + }, + { + "epoch": 0.22, + "learning_rate": 1.9897764511051504e-05, + "loss": 0.2008, + "step": 692 + }, + { + "epoch": 0.22, + "learning_rate": 1.98972684724999e-05, + "loss": 0.1827, + "step": 693 + }, + { + "epoch": 0.22, + "learning_rate": 1.9896771239701772e-05, + "loss": 0.1908, + "step": 694 + }, + { + "epoch": 0.22, + "learning_rate": 1.9896272812717127e-05, + "loss": 0.1579, + "step": 695 + }, + { + "epoch": 0.22, + "learning_rate": 1.9895773191606103e-05, + "loss": 0.1814, + "step": 696 + }, + { + "epoch": 0.22, + "learning_rate": 1.9895272376428983e-05, + "loss": 0.1742, + "step": 697 + }, + { + "epoch": 0.22, + "learning_rate": 1.9894770367246197e-05, + "loss": 0.1677, + "step": 698 + }, + { + "epoch": 0.22, + "learning_rate": 1.9894267164118322e-05, + "loss": 0.1654, + "step": 699 + }, + { + "epoch": 0.23, + "learning_rate": 1.989376276710608e-05, + "loss": 0.1761, + "step": 700 + }, + { + "epoch": 0.23, + "learning_rate": 1.989325717627033e-05, + "loss": 0.1956, + "step": 701 + }, + { + "epoch": 0.23, + "learning_rate": 1.989275039167208e-05, + "loss": 0.1938, + "step": 702 + }, + { + "epoch": 0.23, + "learning_rate": 1.989224241337248e-05, + "loss": 0.1797, + "step": 703 + }, + { + "epoch": 0.23, + "learning_rate": 1.989173324143282e-05, + "loss": 0.1734, + "step": 704 + }, + { + "epoch": 0.23, + "learning_rate": 1.989122287591455e-05, + "loss": 0.171, + "step": 705 + }, + { + "epoch": 0.23, + "learning_rate": 1.9890711316879243e-05, + "loss": 0.1768, + "step": 706 + }, + { + "epoch": 0.23, + "learning_rate": 1.989019856438863e-05, + "loss": 0.1896, + "step": 707 + }, + { + "epoch": 0.23, + "learning_rate": 1.988968461850458e-05, + "loss": 0.1757, + "step": 708 + }, + { + "epoch": 0.23, + "learning_rate": 1.9889169479289112e-05, + "loss": 0.1564, + "step": 709 + }, + { + "epoch": 0.23, + "learning_rate": 1.988865314680438e-05, + "loss": 0.2034, + "step": 710 + }, + { + "epoch": 0.23, + "learning_rate": 1.988813562111269e-05, + "loss": 0.1907, + "step": 711 + }, + { + "epoch": 0.23, + "learning_rate": 1.988761690227649e-05, + "loss": 0.1859, + "step": 712 + }, + { + "epoch": 0.23, + "learning_rate": 1.9887096990358366e-05, + "loss": 0.1615, + "step": 713 + }, + { + "epoch": 0.23, + "learning_rate": 1.9886575885421055e-05, + "loss": 0.1737, + "step": 714 + }, + { + "epoch": 0.23, + "learning_rate": 1.988605358752744e-05, + "loss": 0.1927, + "step": 715 + }, + { + "epoch": 0.23, + "learning_rate": 1.9885530096740538e-05, + "loss": 0.18, + "step": 716 + }, + { + "epoch": 0.23, + "learning_rate": 1.9885005413123515e-05, + "loss": 0.1822, + "step": 717 + }, + { + "epoch": 0.23, + "learning_rate": 1.9884479536739688e-05, + "loss": 0.1769, + "step": 718 + }, + { + "epoch": 0.23, + "learning_rate": 1.9883952467652504e-05, + "loss": 0.1731, + "step": 719 + }, + { + "epoch": 0.23, + "learning_rate": 1.9883424205925567e-05, + "loss": 0.1969, + "step": 720 + }, + { + "epoch": 0.23, + "learning_rate": 1.9882894751622613e-05, + "loss": 0.1666, + "step": 721 + }, + { + "epoch": 0.23, + "learning_rate": 1.9882364104807536e-05, + "loss": 0.1715, + "step": 722 + }, + { + "epoch": 0.23, + "learning_rate": 1.9881832265544366e-05, + "loss": 0.1808, + "step": 723 + }, + { + "epoch": 0.23, + "learning_rate": 1.988129923389727e-05, + "loss": 0.1813, + "step": 724 + }, + { + "epoch": 0.23, + "learning_rate": 1.988076500993057e-05, + "loss": 0.1745, + "step": 725 + }, + { + "epoch": 0.23, + "learning_rate": 1.9880229593708726e-05, + "loss": 0.181, + "step": 726 + }, + { + "epoch": 0.23, + "learning_rate": 1.9879692985296345e-05, + "loss": 0.1749, + "step": 727 + }, + { + "epoch": 0.23, + "learning_rate": 1.9879155184758175e-05, + "loss": 0.1809, + "step": 728 + }, + { + "epoch": 0.23, + "learning_rate": 1.9878616192159118e-05, + "loss": 0.1686, + "step": 729 + }, + { + "epoch": 0.23, + "learning_rate": 1.9878076007564197e-05, + "loss": 0.1787, + "step": 730 + }, + { + "epoch": 0.24, + "learning_rate": 1.98775346310386e-05, + "loss": 0.1682, + "step": 731 + }, + { + "epoch": 0.24, + "learning_rate": 1.9876992062647652e-05, + "loss": 0.1525, + "step": 732 + }, + { + "epoch": 0.24, + "learning_rate": 1.987644830245682e-05, + "loss": 0.1786, + "step": 733 + }, + { + "epoch": 0.24, + "learning_rate": 1.987590335053172e-05, + "loss": 0.1813, + "step": 734 + }, + { + "epoch": 0.24, + "learning_rate": 1.9875357206938103e-05, + "loss": 0.1904, + "step": 735 + }, + { + "epoch": 0.24, + "learning_rate": 1.9874809871741877e-05, + "loss": 0.1973, + "step": 736 + }, + { + "epoch": 0.24, + "learning_rate": 1.9874261345009076e-05, + "loss": 0.1661, + "step": 737 + }, + { + "epoch": 0.24, + "learning_rate": 1.987371162680589e-05, + "loss": 0.1915, + "step": 738 + }, + { + "epoch": 0.24, + "learning_rate": 1.9873160717198655e-05, + "loss": 0.1676, + "step": 739 + }, + { + "epoch": 0.24, + "learning_rate": 1.9872608616253846e-05, + "loss": 0.1751, + "step": 740 + }, + { + "epoch": 0.24, + "learning_rate": 1.9872055324038078e-05, + "loss": 0.1697, + "step": 741 + }, + { + "epoch": 0.24, + "learning_rate": 1.987150084061811e-05, + "loss": 0.1677, + "step": 742 + }, + { + "epoch": 0.24, + "learning_rate": 1.987094516606086e-05, + "loss": 0.1787, + "step": 743 + }, + { + "epoch": 0.24, + "learning_rate": 1.9870388300433366e-05, + "loss": 0.1796, + "step": 744 + }, + { + "epoch": 0.24, + "learning_rate": 1.986983024380283e-05, + "loss": 0.1867, + "step": 745 + }, + { + "epoch": 0.24, + "learning_rate": 1.9869270996236586e-05, + "loss": 0.1742, + "step": 746 + }, + { + "epoch": 0.24, + "learning_rate": 1.9868710557802115e-05, + "loss": 0.1504, + "step": 747 + }, + { + "epoch": 0.24, + "learning_rate": 1.9868148928567046e-05, + "loss": 0.1967, + "step": 748 + }, + { + "epoch": 0.24, + "learning_rate": 1.9867586108599142e-05, + "loss": 0.1625, + "step": 749 + }, + { + "epoch": 0.24, + "learning_rate": 1.9867022097966314e-05, + "loss": 0.1553, + "step": 750 + }, + { + "epoch": 0.24, + "learning_rate": 1.9866456896736627e-05, + "loss": 0.1768, + "step": 751 + }, + { + "epoch": 0.24, + "learning_rate": 1.9865890504978273e-05, + "loss": 0.1746, + "step": 752 + }, + { + "epoch": 0.24, + "learning_rate": 1.9865322922759595e-05, + "loss": 0.1806, + "step": 753 + }, + { + "epoch": 0.24, + "learning_rate": 1.9864754150149086e-05, + "loss": 0.1923, + "step": 754 + }, + { + "epoch": 0.24, + "learning_rate": 1.986418418721537e-05, + "loss": 0.1828, + "step": 755 + }, + { + "epoch": 0.24, + "learning_rate": 1.9863613034027224e-05, + "loss": 0.1829, + "step": 756 + }, + { + "epoch": 0.24, + "learning_rate": 1.9863040690653567e-05, + "loss": 0.1687, + "step": 757 + }, + { + "epoch": 0.24, + "learning_rate": 1.9862467157163458e-05, + "loss": 0.1754, + "step": 758 + }, + { + "epoch": 0.24, + "learning_rate": 1.98618924336261e-05, + "loss": 0.1958, + "step": 759 + }, + { + "epoch": 0.24, + "learning_rate": 1.986131652011085e-05, + "loss": 0.1769, + "step": 760 + }, + { + "epoch": 0.24, + "learning_rate": 1.9860739416687194e-05, + "loss": 0.1833, + "step": 761 + }, + { + "epoch": 0.25, + "learning_rate": 1.9860161123424766e-05, + "loss": 0.183, + "step": 762 + }, + { + "epoch": 0.25, + "learning_rate": 1.9859581640393348e-05, + "loss": 0.1683, + "step": 763 + }, + { + "epoch": 0.25, + "learning_rate": 1.9859000967662864e-05, + "loss": 0.1816, + "step": 764 + }, + { + "epoch": 0.25, + "learning_rate": 1.9858419105303378e-05, + "loss": 0.1807, + "step": 765 + }, + { + "epoch": 0.25, + "learning_rate": 1.98578360533851e-05, + "loss": 0.1897, + "step": 766 + }, + { + "epoch": 0.25, + "learning_rate": 1.9857251811978387e-05, + "loss": 0.1729, + "step": 767 + }, + { + "epoch": 0.25, + "learning_rate": 1.9856666381153733e-05, + "loss": 0.1709, + "step": 768 + }, + { + "epoch": 0.25, + "learning_rate": 1.985607976098178e-05, + "loss": 0.1855, + "step": 769 + }, + { + "epoch": 0.25, + "learning_rate": 1.9855491951533308e-05, + "loss": 0.1689, + "step": 770 + }, + { + "epoch": 0.25, + "learning_rate": 1.985490295287925e-05, + "loss": 0.1744, + "step": 771 + }, + { + "epoch": 0.25, + "learning_rate": 1.9854312765090675e-05, + "loss": 0.1832, + "step": 772 + }, + { + "epoch": 0.25, + "learning_rate": 1.98537213882388e-05, + "loss": 0.1857, + "step": 773 + }, + { + "epoch": 0.25, + "learning_rate": 1.9853128822394976e-05, + "loss": 0.1832, + "step": 774 + }, + { + "epoch": 0.25, + "learning_rate": 1.985253506763071e-05, + "loss": 0.1799, + "step": 775 + }, + { + "epoch": 0.25, + "learning_rate": 1.985194012401765e-05, + "loss": 0.1802, + "step": 776 + }, + { + "epoch": 0.25, + "learning_rate": 1.9851343991627575e-05, + "loss": 0.1942, + "step": 777 + }, + { + "epoch": 0.25, + "learning_rate": 1.985074667053243e-05, + "loss": 0.1728, + "step": 778 + }, + { + "epoch": 0.25, + "learning_rate": 1.9850148160804275e-05, + "loss": 0.1823, + "step": 779 + }, + { + "epoch": 0.25, + "learning_rate": 1.984954846251534e-05, + "loss": 0.173, + "step": 780 + }, + { + "epoch": 0.25, + "learning_rate": 1.9848947575737982e-05, + "loss": 0.1645, + "step": 781 + }, + { + "epoch": 0.25, + "learning_rate": 1.9848345500544712e-05, + "loss": 0.1929, + "step": 782 + }, + { + "epoch": 0.25, + "learning_rate": 1.984774223700817e-05, + "loss": 0.1618, + "step": 783 + }, + { + "epoch": 0.25, + "learning_rate": 1.9847137785201158e-05, + "loss": 0.1743, + "step": 784 + }, + { + "epoch": 0.25, + "learning_rate": 1.9846532145196607e-05, + "loss": 0.1691, + "step": 785 + }, + { + "epoch": 0.25, + "learning_rate": 1.9845925317067594e-05, + "loss": 0.1871, + "step": 786 + }, + { + "epoch": 0.25, + "learning_rate": 1.984531730088734e-05, + "loss": 0.179, + "step": 787 + }, + { + "epoch": 0.25, + "learning_rate": 1.9844708096729223e-05, + "loss": 0.1997, + "step": 788 + }, + { + "epoch": 0.25, + "learning_rate": 1.984409770466674e-05, + "loss": 0.1615, + "step": 789 + }, + { + "epoch": 0.25, + "learning_rate": 1.9843486124773546e-05, + "loss": 0.1636, + "step": 790 + }, + { + "epoch": 0.25, + "learning_rate": 1.9842873357123438e-05, + "loss": 0.1761, + "step": 791 + }, + { + "epoch": 0.25, + "learning_rate": 1.9842259401790356e-05, + "loss": 0.1687, + "step": 792 + }, + { + "epoch": 0.26, + "learning_rate": 1.984164425884838e-05, + "loss": 0.1668, + "step": 793 + }, + { + "epoch": 0.26, + "learning_rate": 1.9841027928371738e-05, + "loss": 0.1789, + "step": 794 + }, + { + "epoch": 0.26, + "learning_rate": 1.9840410410434803e-05, + "loss": 0.1934, + "step": 795 + }, + { + "epoch": 0.26, + "learning_rate": 1.9839791705112077e-05, + "loss": 0.177, + "step": 796 + }, + { + "epoch": 0.26, + "learning_rate": 1.9839171812478227e-05, + "loss": 0.1891, + "step": 797 + }, + { + "epoch": 0.26, + "learning_rate": 1.9838550732608042e-05, + "loss": 0.1789, + "step": 798 + }, + { + "epoch": 0.26, + "learning_rate": 1.983792846557647e-05, + "loss": 0.1689, + "step": 799 + }, + { + "epoch": 0.26, + "learning_rate": 1.9837305011458595e-05, + "loss": 0.1757, + "step": 800 + }, + { + "epoch": 0.26, + "learning_rate": 1.9836680370329643e-05, + "loss": 0.1833, + "step": 801 + }, + { + "epoch": 0.26, + "learning_rate": 1.983605454226499e-05, + "loss": 0.1541, + "step": 802 + }, + { + "epoch": 0.26, + "learning_rate": 1.9835427527340152e-05, + "loss": 0.17, + "step": 803 + }, + { + "epoch": 0.26, + "learning_rate": 1.9834799325630784e-05, + "loss": 0.1749, + "step": 804 + }, + { + "epoch": 0.26, + "learning_rate": 1.9834169937212685e-05, + "loss": 0.1686, + "step": 805 + }, + { + "epoch": 0.26, + "learning_rate": 1.9833539362161804e-05, + "loss": 0.1703, + "step": 806 + }, + { + "epoch": 0.26, + "learning_rate": 1.983290760055423e-05, + "loss": 0.1873, + "step": 807 + }, + { + "epoch": 0.26, + "learning_rate": 1.983227465246619e-05, + "loss": 0.1876, + "step": 808 + }, + { + "epoch": 0.26, + "learning_rate": 1.983164051797406e-05, + "loss": 0.1642, + "step": 809 + }, + { + "epoch": 0.26, + "learning_rate": 1.9831005197154356e-05, + "loss": 0.1787, + "step": 810 + }, + { + "epoch": 0.26, + "learning_rate": 1.983036869008374e-05, + "loss": 0.163, + "step": 811 + }, + { + "epoch": 0.26, + "learning_rate": 1.982973099683902e-05, + "loss": 0.182, + "step": 812 + }, + { + "epoch": 0.26, + "learning_rate": 1.9829092117497135e-05, + "loss": 0.1916, + "step": 813 + }, + { + "epoch": 0.26, + "learning_rate": 1.982845205213518e-05, + "loss": 0.1552, + "step": 814 + }, + { + "epoch": 0.26, + "learning_rate": 1.9827810800830384e-05, + "loss": 0.1827, + "step": 815 + }, + { + "epoch": 0.26, + "learning_rate": 1.9827168363660126e-05, + "loss": 0.1783, + "step": 816 + }, + { + "epoch": 0.26, + "learning_rate": 1.9826524740701926e-05, + "loss": 0.1897, + "step": 817 + }, + { + "epoch": 0.26, + "learning_rate": 1.9825879932033444e-05, + "loss": 0.1652, + "step": 818 + }, + { + "epoch": 0.26, + "learning_rate": 1.9825233937732485e-05, + "loss": 0.1769, + "step": 819 + }, + { + "epoch": 0.26, + "learning_rate": 1.9824586757876995e-05, + "loss": 0.1769, + "step": 820 + }, + { + "epoch": 0.26, + "learning_rate": 1.9823938392545075e-05, + "loss": 0.1675, + "step": 821 + }, + { + "epoch": 0.26, + "learning_rate": 1.982328884181495e-05, + "loss": 0.1732, + "step": 822 + }, + { + "epoch": 0.26, + "learning_rate": 1.9822638105765006e-05, + "loss": 0.1806, + "step": 823 + }, + { + "epoch": 0.27, + "learning_rate": 1.9821986184473757e-05, + "loss": 0.172, + "step": 824 + }, + { + "epoch": 0.27, + "learning_rate": 1.9821333078019866e-05, + "loss": 0.1706, + "step": 825 + }, + { + "epoch": 0.27, + "learning_rate": 1.982067878648214e-05, + "loss": 0.1927, + "step": 826 + }, + { + "epoch": 0.27, + "learning_rate": 1.9820023309939536e-05, + "loss": 0.1874, + "step": 827 + }, + { + "epoch": 0.27, + "learning_rate": 1.981936664847114e-05, + "loss": 0.1845, + "step": 828 + }, + { + "epoch": 0.27, + "learning_rate": 1.9818708802156185e-05, + "loss": 0.188, + "step": 829 + }, + { + "epoch": 0.27, + "learning_rate": 1.9818049771074058e-05, + "loss": 0.1949, + "step": 830 + }, + { + "epoch": 0.27, + "learning_rate": 1.9817389555304274e-05, + "loss": 0.1754, + "step": 831 + }, + { + "epoch": 0.27, + "learning_rate": 1.9816728154926496e-05, + "loss": 0.1902, + "step": 832 + }, + { + "epoch": 0.27, + "learning_rate": 1.9816065570020538e-05, + "loss": 0.1766, + "step": 833 + }, + { + "epoch": 0.27, + "learning_rate": 1.9815401800666346e-05, + "loss": 0.1567, + "step": 834 + }, + { + "epoch": 0.27, + "learning_rate": 1.9814736846944013e-05, + "loss": 0.1907, + "step": 835 + }, + { + "epoch": 0.27, + "learning_rate": 1.9814070708933778e-05, + "loss": 0.155, + "step": 836 + }, + { + "epoch": 0.27, + "learning_rate": 1.981340338671602e-05, + "loss": 0.1574, + "step": 837 + }, + { + "epoch": 0.27, + "learning_rate": 1.9812734880371253e-05, + "loss": 0.1669, + "step": 838 + }, + { + "epoch": 0.27, + "learning_rate": 1.9812065189980153e-05, + "loss": 0.1731, + "step": 839 + }, + { + "epoch": 0.27, + "learning_rate": 1.9811394315623523e-05, + "loss": 0.1715, + "step": 840 + }, + { + "epoch": 0.27, + "learning_rate": 1.981072225738231e-05, + "loss": 0.1913, + "step": 841 + }, + { + "epoch": 0.27, + "learning_rate": 1.9810049015337614e-05, + "loss": 0.1712, + "step": 842 + }, + { + "epoch": 0.27, + "learning_rate": 1.9809374589570665e-05, + "loss": 0.1817, + "step": 843 + }, + { + "epoch": 0.27, + "learning_rate": 1.9808698980162847e-05, + "loss": 0.1622, + "step": 844 + }, + { + "epoch": 0.27, + "learning_rate": 1.9808022187195678e-05, + "loss": 0.1572, + "step": 845 + }, + { + "epoch": 0.27, + "learning_rate": 1.9807344210750826e-05, + "loss": 0.1907, + "step": 846 + }, + { + "epoch": 0.27, + "learning_rate": 1.9806665050910098e-05, + "loss": 0.1833, + "step": 847 + }, + { + "epoch": 0.27, + "learning_rate": 1.9805984707755444e-05, + "loss": 0.1749, + "step": 848 + }, + { + "epoch": 0.27, + "learning_rate": 1.9805303181368954e-05, + "loss": 0.1652, + "step": 849 + }, + { + "epoch": 0.27, + "learning_rate": 1.980462047183287e-05, + "loss": 0.1914, + "step": 850 + }, + { + "epoch": 0.27, + "learning_rate": 1.9803936579229563e-05, + "loss": 0.155, + "step": 851 + }, + { + "epoch": 0.27, + "learning_rate": 1.9803251503641562e-05, + "loss": 0.1685, + "step": 852 + }, + { + "epoch": 0.27, + "learning_rate": 1.9802565245151525e-05, + "loss": 0.1794, + "step": 853 + }, + { + "epoch": 0.27, + "learning_rate": 1.980187780384226e-05, + "loss": 0.1898, + "step": 854 + }, + { + "epoch": 0.28, + "learning_rate": 1.9801189179796722e-05, + "loss": 0.1907, + "step": 855 + }, + { + "epoch": 0.28, + "learning_rate": 1.9800499373097998e-05, + "loss": 0.1726, + "step": 856 + }, + { + "epoch": 0.28, + "learning_rate": 1.9799808383829326e-05, + "loss": 0.1758, + "step": 857 + }, + { + "epoch": 0.28, + "learning_rate": 1.9799116212074077e-05, + "loss": 0.1673, + "step": 858 + }, + { + "epoch": 0.28, + "learning_rate": 1.9798422857915783e-05, + "loss": 0.1705, + "step": 859 + }, + { + "epoch": 0.28, + "learning_rate": 1.9797728321438096e-05, + "loss": 0.1586, + "step": 860 + }, + { + "epoch": 0.28, + "learning_rate": 1.9797032602724828e-05, + "loss": 0.1877, + "step": 861 + }, + { + "epoch": 0.28, + "learning_rate": 1.9796335701859924e-05, + "loss": 0.1748, + "step": 862 + }, + { + "epoch": 0.28, + "learning_rate": 1.9795637618927478e-05, + "loss": 0.1566, + "step": 863 + }, + { + "epoch": 0.28, + "learning_rate": 1.9794938354011725e-05, + "loss": 0.1696, + "step": 864 + }, + { + "epoch": 0.28, + "learning_rate": 1.9794237907197032e-05, + "loss": 0.1683, + "step": 865 + }, + { + "epoch": 0.28, + "learning_rate": 1.979353627856793e-05, + "loss": 0.1859, + "step": 866 + }, + { + "epoch": 0.28, + "learning_rate": 1.9792833468209074e-05, + "loss": 0.1885, + "step": 867 + }, + { + "epoch": 0.28, + "learning_rate": 1.979212947620527e-05, + "loss": 0.2076, + "step": 868 + }, + { + "epoch": 0.28, + "learning_rate": 1.979142430264146e-05, + "loss": 0.1728, + "step": 869 + }, + { + "epoch": 0.28, + "learning_rate": 1.979071794760274e-05, + "loss": 0.1748, + "step": 870 + }, + { + "epoch": 0.28, + "learning_rate": 1.979001041117434e-05, + "loss": 0.194, + "step": 871 + }, + { + "epoch": 0.28, + "learning_rate": 1.978930169344163e-05, + "loss": 0.1818, + "step": 872 + }, + { + "epoch": 0.28, + "learning_rate": 1.9788591794490132e-05, + "loss": 0.1692, + "step": 873 + }, + { + "epoch": 0.28, + "learning_rate": 1.9787880714405503e-05, + "loss": 0.1624, + "step": 874 + }, + { + "epoch": 0.28, + "learning_rate": 1.9787168453273546e-05, + "loss": 0.1719, + "step": 875 + }, + { + "epoch": 0.28, + "learning_rate": 1.9786455011180203e-05, + "loss": 0.1736, + "step": 876 + }, + { + "epoch": 0.28, + "learning_rate": 1.9785740388211567e-05, + "loss": 0.1766, + "step": 877 + }, + { + "epoch": 0.28, + "learning_rate": 1.978502458445386e-05, + "loss": 0.1709, + "step": 878 + }, + { + "epoch": 0.28, + "learning_rate": 1.9784307599993456e-05, + "loss": 0.1639, + "step": 879 + }, + { + "epoch": 0.28, + "learning_rate": 1.978358943491687e-05, + "loss": 0.187, + "step": 880 + }, + { + "epoch": 0.28, + "learning_rate": 1.9782870089310766e-05, + "loss": 0.1794, + "step": 881 + }, + { + "epoch": 0.28, + "learning_rate": 1.9782149563261933e-05, + "loss": 0.1645, + "step": 882 + }, + { + "epoch": 0.28, + "learning_rate": 1.9781427856857317e-05, + "loss": 0.179, + "step": 883 + }, + { + "epoch": 0.28, + "learning_rate": 1.9780704970184e-05, + "loss": 0.1552, + "step": 884 + }, + { + "epoch": 0.28, + "learning_rate": 1.977998090332921e-05, + "loss": 0.1751, + "step": 885 + }, + { + "epoch": 0.28, + "learning_rate": 1.9779255656380318e-05, + "loss": 0.1853, + "step": 886 + }, + { + "epoch": 0.29, + "learning_rate": 1.9778529229424833e-05, + "loss": 0.1727, + "step": 887 + }, + { + "epoch": 0.29, + "learning_rate": 1.977780162255041e-05, + "loss": 0.1796, + "step": 888 + }, + { + "epoch": 0.29, + "learning_rate": 1.9777072835844842e-05, + "loss": 0.1766, + "step": 889 + }, + { + "epoch": 0.29, + "learning_rate": 1.9776342869396075e-05, + "loss": 0.1673, + "step": 890 + }, + { + "epoch": 0.29, + "learning_rate": 1.9775611723292182e-05, + "loss": 0.1738, + "step": 891 + }, + { + "epoch": 0.29, + "learning_rate": 1.9774879397621387e-05, + "loss": 0.1687, + "step": 892 + }, + { + "epoch": 0.29, + "learning_rate": 1.977414589247206e-05, + "loss": 0.1615, + "step": 893 + }, + { + "epoch": 0.29, + "learning_rate": 1.9773411207932707e-05, + "loss": 0.1647, + "step": 894 + }, + { + "epoch": 0.29, + "learning_rate": 1.9772675344091976e-05, + "loss": 0.1704, + "step": 895 + }, + { + "epoch": 0.29, + "learning_rate": 1.9771938301038664e-05, + "loss": 0.1646, + "step": 896 + }, + { + "epoch": 0.29, + "learning_rate": 1.97712000788617e-05, + "loss": 0.1801, + "step": 897 + }, + { + "epoch": 0.29, + "learning_rate": 1.9770460677650167e-05, + "loss": 0.1768, + "step": 898 + }, + { + "epoch": 0.29, + "learning_rate": 1.9769720097493278e-05, + "loss": 0.1904, + "step": 899 + }, + { + "epoch": 0.29, + "learning_rate": 1.9768978338480396e-05, + "loss": 0.1721, + "step": 900 + }, + { + "epoch": 0.29, + "learning_rate": 1.976823540070103e-05, + "loss": 0.1692, + "step": 901 + }, + { + "epoch": 0.29, + "learning_rate": 1.976749128424482e-05, + "loss": 0.1877, + "step": 902 + }, + { + "epoch": 0.29, + "learning_rate": 1.976674598920156e-05, + "loss": 0.1787, + "step": 903 + }, + { + "epoch": 0.29, + "learning_rate": 1.9765999515661177e-05, + "loss": 0.1677, + "step": 904 + }, + { + "epoch": 0.29, + "learning_rate": 1.976525186371374e-05, + "loss": 0.1771, + "step": 905 + }, + { + "epoch": 0.29, + "learning_rate": 1.9764503033449475e-05, + "loss": 0.1766, + "step": 906 + }, + { + "epoch": 0.29, + "learning_rate": 1.9763753024958724e-05, + "loss": 0.1738, + "step": 907 + }, + { + "epoch": 0.29, + "learning_rate": 1.9763001838331998e-05, + "loss": 0.165, + "step": 908 + }, + { + "epoch": 0.29, + "learning_rate": 1.9762249473659936e-05, + "loss": 0.1724, + "step": 909 + }, + { + "epoch": 0.29, + "learning_rate": 1.976149593103332e-05, + "loss": 0.1814, + "step": 910 + }, + { + "epoch": 0.29, + "learning_rate": 1.9760741210543074e-05, + "loss": 0.1635, + "step": 911 + }, + { + "epoch": 0.29, + "learning_rate": 1.9759985312280264e-05, + "loss": 0.1597, + "step": 912 + }, + { + "epoch": 0.29, + "learning_rate": 1.9759228236336106e-05, + "loss": 0.1769, + "step": 913 + }, + { + "epoch": 0.29, + "learning_rate": 1.975846998280195e-05, + "loss": 0.169, + "step": 914 + }, + { + "epoch": 0.29, + "learning_rate": 1.9757710551769293e-05, + "loss": 0.1647, + "step": 915 + }, + { + "epoch": 0.29, + "learning_rate": 1.9756949943329763e-05, + "loss": 0.153, + "step": 916 + }, + { + "epoch": 0.29, + "learning_rate": 1.975618815757514e-05, + "loss": 0.1812, + "step": 917 + }, + { + "epoch": 0.3, + "learning_rate": 1.975542519459735e-05, + "loss": 0.1741, + "step": 918 + }, + { + "epoch": 0.3, + "learning_rate": 1.9754661054488456e-05, + "loss": 0.1854, + "step": 919 + }, + { + "epoch": 0.3, + "learning_rate": 1.975389573734066e-05, + "loss": 0.187, + "step": 920 + }, + { + "epoch": 0.3, + "learning_rate": 1.9753129243246302e-05, + "loss": 0.1891, + "step": 921 + }, + { + "epoch": 0.3, + "learning_rate": 1.975236157229788e-05, + "loss": 0.1519, + "step": 922 + }, + { + "epoch": 0.3, + "learning_rate": 1.9751592724588016e-05, + "loss": 0.1727, + "step": 923 + }, + { + "epoch": 0.3, + "learning_rate": 1.975082270020949e-05, + "loss": 0.1703, + "step": 924 + }, + { + "epoch": 0.3, + "learning_rate": 1.975005149925521e-05, + "loss": 0.1666, + "step": 925 + }, + { + "epoch": 0.3, + "learning_rate": 1.9749279121818235e-05, + "loss": 0.184, + "step": 926 + }, + { + "epoch": 0.3, + "learning_rate": 1.974850556799177e-05, + "loss": 0.1632, + "step": 927 + }, + { + "epoch": 0.3, + "learning_rate": 1.9747730837869145e-05, + "loss": 0.1754, + "step": 928 + }, + { + "epoch": 0.3, + "learning_rate": 1.9746954931543847e-05, + "loss": 0.1852, + "step": 929 + }, + { + "epoch": 0.3, + "learning_rate": 1.9746177849109497e-05, + "loss": 0.1627, + "step": 930 + }, + { + "epoch": 0.3, + "learning_rate": 1.9745399590659865e-05, + "loss": 0.1857, + "step": 931 + }, + { + "epoch": 0.3, + "learning_rate": 1.974462015628886e-05, + "loss": 0.1727, + "step": 932 + }, + { + "epoch": 0.3, + "learning_rate": 1.974383954609053e-05, + "loss": 0.1696, + "step": 933 + }, + { + "epoch": 0.3, + "learning_rate": 1.9743057760159063e-05, + "loss": 0.1643, + "step": 934 + }, + { + "epoch": 0.3, + "learning_rate": 1.9742274798588793e-05, + "loss": 0.1609, + "step": 935 + }, + { + "epoch": 0.3, + "learning_rate": 1.9741490661474202e-05, + "loss": 0.1861, + "step": 936 + }, + { + "epoch": 0.3, + "learning_rate": 1.9740705348909906e-05, + "loss": 0.1706, + "step": 937 + }, + { + "epoch": 0.3, + "learning_rate": 1.9739918860990657e-05, + "loss": 0.1779, + "step": 938 + }, + { + "epoch": 0.3, + "learning_rate": 1.9739131197811365e-05, + "loss": 0.1579, + "step": 939 + }, + { + "epoch": 0.3, + "learning_rate": 1.9738342359467066e-05, + "loss": 0.157, + "step": 940 + }, + { + "epoch": 0.3, + "learning_rate": 1.973755234605295e-05, + "loss": 0.1952, + "step": 941 + }, + { + "epoch": 0.3, + "learning_rate": 1.9736761157664338e-05, + "loss": 0.1719, + "step": 942 + }, + { + "epoch": 0.3, + "learning_rate": 1.9735968794396698e-05, + "loss": 0.173, + "step": 943 + }, + { + "epoch": 0.3, + "learning_rate": 1.9735175256345647e-05, + "loss": 0.1726, + "step": 944 + }, + { + "epoch": 0.3, + "learning_rate": 1.9734380543606932e-05, + "loss": 0.1984, + "step": 945 + }, + { + "epoch": 0.3, + "learning_rate": 1.9733584656276445e-05, + "loss": 0.1574, + "step": 946 + }, + { + "epoch": 0.3, + "learning_rate": 1.9732787594450222e-05, + "loss": 0.1564, + "step": 947 + }, + { + "epoch": 0.3, + "learning_rate": 1.973198935822444e-05, + "loss": 0.165, + "step": 948 + }, + { + "epoch": 0.31, + "learning_rate": 1.973118994769542e-05, + "loss": 0.1805, + "step": 949 + }, + { + "epoch": 0.31, + "learning_rate": 1.973038936295962e-05, + "loss": 0.1948, + "step": 950 + }, + { + "epoch": 0.31, + "learning_rate": 1.9729587604113645e-05, + "loss": 0.1778, + "step": 951 + }, + { + "epoch": 0.31, + "learning_rate": 1.9728784671254237e-05, + "loss": 0.1653, + "step": 952 + }, + { + "epoch": 0.31, + "learning_rate": 1.9727980564478274e-05, + "loss": 0.1672, + "step": 953 + }, + { + "epoch": 0.31, + "learning_rate": 1.9727175283882792e-05, + "loss": 0.1714, + "step": 954 + }, + { + "epoch": 0.31, + "learning_rate": 1.972636882956496e-05, + "loss": 0.1657, + "step": 955 + }, + { + "epoch": 0.31, + "learning_rate": 1.9725561201622085e-05, + "loss": 0.1622, + "step": 956 + }, + { + "epoch": 0.31, + "learning_rate": 1.9724752400151615e-05, + "loss": 0.16, + "step": 957 + }, + { + "epoch": 0.31, + "learning_rate": 1.9723942425251147e-05, + "loss": 0.1749, + "step": 958 + }, + { + "epoch": 0.31, + "learning_rate": 1.972313127701842e-05, + "loss": 0.1625, + "step": 959 + }, + { + "epoch": 0.31, + "learning_rate": 1.9722318955551307e-05, + "loss": 0.2156, + "step": 960 + }, + { + "epoch": 0.31, + "learning_rate": 1.9721505460947826e-05, + "loss": 0.1757, + "step": 961 + }, + { + "epoch": 0.31, + "learning_rate": 1.9720690793306137e-05, + "loss": 0.1621, + "step": 962 + }, + { + "epoch": 0.31, + "learning_rate": 1.971987495272454e-05, + "loss": 0.1718, + "step": 963 + }, + { + "epoch": 0.31, + "learning_rate": 1.9719057939301477e-05, + "loss": 0.1668, + "step": 964 + }, + { + "epoch": 0.31, + "learning_rate": 1.9718239753135537e-05, + "loss": 0.1607, + "step": 965 + }, + { + "epoch": 0.31, + "learning_rate": 1.9717420394325445e-05, + "loss": 0.1755, + "step": 966 + }, + { + "epoch": 0.31, + "learning_rate": 1.9716599862970063e-05, + "loss": 0.1631, + "step": 967 + }, + { + "epoch": 0.31, + "learning_rate": 1.9715778159168402e-05, + "loss": 0.1891, + "step": 968 + }, + { + "epoch": 0.31, + "learning_rate": 1.971495528301962e-05, + "loss": 0.1621, + "step": 969 + }, + { + "epoch": 0.31, + "learning_rate": 1.9714131234622996e-05, + "loss": 0.1838, + "step": 970 + }, + { + "epoch": 0.31, + "learning_rate": 1.9713306014077972e-05, + "loss": 0.1575, + "step": 971 + }, + { + "epoch": 0.31, + "learning_rate": 1.9712479621484117e-05, + "loss": 0.1474, + "step": 972 + }, + { + "epoch": 0.31, + "learning_rate": 1.971165205694115e-05, + "loss": 0.1576, + "step": 973 + }, + { + "epoch": 0.31, + "learning_rate": 1.9710823320548937e-05, + "loss": 0.1673, + "step": 974 + }, + { + "epoch": 0.31, + "learning_rate": 1.970999341240746e-05, + "loss": 0.1625, + "step": 975 + }, + { + "epoch": 0.31, + "learning_rate": 1.9709162332616872e-05, + "loss": 0.1691, + "step": 976 + }, + { + "epoch": 0.31, + "learning_rate": 1.9708330081277447e-05, + "loss": 0.1702, + "step": 977 + }, + { + "epoch": 0.31, + "learning_rate": 1.9707496658489613e-05, + "loss": 0.1559, + "step": 978 + }, + { + "epoch": 0.31, + "learning_rate": 1.9706662064353933e-05, + "loss": 0.1827, + "step": 979 + }, + { + "epoch": 0.32, + "learning_rate": 1.9705826298971112e-05, + "loss": 0.1841, + "step": 980 + }, + { + "epoch": 0.32, + "learning_rate": 1.9704989362441998e-05, + "loss": 0.1749, + "step": 981 + }, + { + "epoch": 0.32, + "learning_rate": 1.9704151254867575e-05, + "loss": 0.1623, + "step": 982 + }, + { + "epoch": 0.32, + "learning_rate": 1.970331197634898e-05, + "loss": 0.1566, + "step": 983 + }, + { + "epoch": 0.32, + "learning_rate": 1.970247152698748e-05, + "loss": 0.1821, + "step": 984 + }, + { + "epoch": 0.32, + "learning_rate": 1.9701629906884486e-05, + "loss": 0.1554, + "step": 985 + }, + { + "epoch": 0.32, + "learning_rate": 1.9700787116141553e-05, + "loss": 0.1724, + "step": 986 + }, + { + "epoch": 0.32, + "learning_rate": 1.9699943154860376e-05, + "loss": 0.1921, + "step": 987 + }, + { + "epoch": 0.32, + "learning_rate": 1.969909802314279e-05, + "loss": 0.169, + "step": 988 + }, + { + "epoch": 0.32, + "learning_rate": 1.9698251721090776e-05, + "loss": 0.1587, + "step": 989 + }, + { + "epoch": 0.32, + "learning_rate": 1.9697404248806445e-05, + "loss": 0.1691, + "step": 990 + }, + { + "epoch": 0.32, + "learning_rate": 1.9696555606392063e-05, + "loss": 0.1731, + "step": 991 + }, + { + "epoch": 0.32, + "learning_rate": 1.9695705793950025e-05, + "loss": 0.1561, + "step": 992 + }, + { + "epoch": 0.32, + "learning_rate": 1.969485481158288e-05, + "loss": 0.1828, + "step": 993 + }, + { + "epoch": 0.32, + "learning_rate": 1.9694002659393306e-05, + "loss": 0.1769, + "step": 994 + }, + { + "epoch": 0.32, + "learning_rate": 1.9693149337484127e-05, + "loss": 0.1823, + "step": 995 + }, + { + "epoch": 0.32, + "learning_rate": 1.9692294845958315e-05, + "loss": 0.1731, + "step": 996 + }, + { + "epoch": 0.32, + "learning_rate": 1.969143918491897e-05, + "loss": 0.1887, + "step": 997 + }, + { + "epoch": 0.32, + "learning_rate": 1.969058235446934e-05, + "loss": 0.1581, + "step": 998 + }, + { + "epoch": 0.32, + "learning_rate": 1.968972435471282e-05, + "loss": 0.1599, + "step": 999 + }, + { + "epoch": 0.32, + "learning_rate": 1.968886518575293e-05, + "loss": 0.152, + "step": 1000 + }, + { + "epoch": 0.32, + "learning_rate": 1.9688004847693348e-05, + "loss": 0.1687, + "step": 1001 + }, + { + "epoch": 0.32, + "learning_rate": 1.9687143340637885e-05, + "loss": 0.1738, + "step": 1002 + }, + { + "epoch": 0.32, + "learning_rate": 1.9686280664690495e-05, + "loss": 0.2016, + "step": 1003 + }, + { + "epoch": 0.32, + "learning_rate": 1.968541681995527e-05, + "loss": 0.1695, + "step": 1004 + }, + { + "epoch": 0.32, + "learning_rate": 1.9684551806536448e-05, + "loss": 0.185, + "step": 1005 + }, + { + "epoch": 0.32, + "learning_rate": 1.96836856245384e-05, + "loss": 0.1767, + "step": 1006 + }, + { + "epoch": 0.32, + "learning_rate": 1.9682818274065645e-05, + "loss": 0.182, + "step": 1007 + }, + { + "epoch": 0.32, + "learning_rate": 1.9681949755222846e-05, + "loss": 0.1712, + "step": 1008 + }, + { + "epoch": 0.32, + "learning_rate": 1.9681080068114794e-05, + "loss": 0.1767, + "step": 1009 + }, + { + "epoch": 0.32, + "learning_rate": 1.9680209212846437e-05, + "loss": 0.189, + "step": 1010 + }, + { + "epoch": 0.33, + "learning_rate": 1.9679337189522854e-05, + "loss": 0.1707, + "step": 1011 + }, + { + "epoch": 0.33, + "learning_rate": 1.9678463998249264e-05, + "loss": 0.1739, + "step": 1012 + }, + { + "epoch": 0.33, + "learning_rate": 1.967758963913103e-05, + "loss": 0.1826, + "step": 1013 + }, + { + "epoch": 0.33, + "learning_rate": 1.9676714112273664e-05, + "loss": 0.1719, + "step": 1014 + }, + { + "epoch": 0.33, + "learning_rate": 1.96758374177828e-05, + "loss": 0.1821, + "step": 1015 + }, + { + "epoch": 0.33, + "learning_rate": 1.9674959555764227e-05, + "loss": 0.1571, + "step": 1016 + }, + { + "epoch": 0.33, + "learning_rate": 1.967408052632388e-05, + "loss": 0.1696, + "step": 1017 + }, + { + "epoch": 0.33, + "learning_rate": 1.967320032956781e-05, + "loss": 0.1763, + "step": 1018 + }, + { + "epoch": 0.33, + "learning_rate": 1.967231896560224e-05, + "loss": 0.1674, + "step": 1019 + }, + { + "epoch": 0.33, + "learning_rate": 1.9671436434533514e-05, + "loss": 0.1736, + "step": 1020 + }, + { + "epoch": 0.33, + "learning_rate": 1.9670552736468117e-05, + "loss": 0.1691, + "step": 1021 + }, + { + "epoch": 0.33, + "learning_rate": 1.966966787151269e-05, + "loss": 0.1667, + "step": 1022 + }, + { + "epoch": 0.33, + "learning_rate": 1.9668781839773995e-05, + "loss": 0.1574, + "step": 1023 + }, + { + "epoch": 0.33, + "learning_rate": 1.966789464135895e-05, + "loss": 0.1641, + "step": 1024 + }, + { + "epoch": 0.33, + "learning_rate": 1.9667006276374606e-05, + "loss": 0.1789, + "step": 1025 + }, + { + "epoch": 0.33, + "learning_rate": 1.966611674492816e-05, + "loss": 0.1535, + "step": 1026 + }, + { + "epoch": 0.33, + "learning_rate": 1.9665226047126942e-05, + "loss": 0.1499, + "step": 1027 + }, + { + "epoch": 0.33, + "learning_rate": 1.966433418307843e-05, + "loss": 0.1549, + "step": 1028 + }, + { + "epoch": 0.33, + "learning_rate": 1.9663441152890237e-05, + "loss": 0.1777, + "step": 1029 + }, + { + "epoch": 0.33, + "learning_rate": 1.9662546956670123e-05, + "loss": 0.1701, + "step": 1030 + }, + { + "epoch": 0.33, + "learning_rate": 1.9661651594525986e-05, + "loss": 0.1609, + "step": 1031 + }, + { + "epoch": 0.33, + "learning_rate": 1.9660755066565863e-05, + "loss": 0.1765, + "step": 1032 + }, + { + "epoch": 0.33, + "learning_rate": 1.9659857372897934e-05, + "loss": 0.1773, + "step": 1033 + }, + { + "epoch": 0.33, + "learning_rate": 1.9658958513630518e-05, + "loss": 0.1761, + "step": 1034 + }, + { + "epoch": 0.33, + "learning_rate": 1.965805848887207e-05, + "loss": 0.1796, + "step": 1035 + }, + { + "epoch": 0.33, + "learning_rate": 1.96571572987312e-05, + "loss": 0.1702, + "step": 1036 + }, + { + "epoch": 0.33, + "learning_rate": 1.9656254943316644e-05, + "loss": 0.1496, + "step": 1037 + }, + { + "epoch": 0.33, + "learning_rate": 1.9655351422737285e-05, + "loss": 0.1575, + "step": 1038 + }, + { + "epoch": 0.33, + "learning_rate": 1.9654446737102147e-05, + "loss": 0.1843, + "step": 1039 + }, + { + "epoch": 0.33, + "learning_rate": 1.9653540886520387e-05, + "loss": 0.1771, + "step": 1040 + }, + { + "epoch": 0.33, + "learning_rate": 1.965263387110132e-05, + "loss": 0.1723, + "step": 1041 + }, + { + "epoch": 0.34, + "learning_rate": 1.965172569095438e-05, + "loss": 0.1936, + "step": 1042 + }, + { + "epoch": 0.34, + "learning_rate": 1.9650816346189154e-05, + "loss": 0.1718, + "step": 1043 + }, + { + "epoch": 0.34, + "learning_rate": 1.9649905836915373e-05, + "loss": 0.1667, + "step": 1044 + }, + { + "epoch": 0.34, + "learning_rate": 1.9648994163242898e-05, + "loss": 0.1516, + "step": 1045 + }, + { + "epoch": 0.34, + "learning_rate": 1.9648081325281737e-05, + "loss": 0.1795, + "step": 1046 + }, + { + "epoch": 0.34, + "learning_rate": 1.9647167323142033e-05, + "loss": 0.1796, + "step": 1047 + }, + { + "epoch": 0.34, + "learning_rate": 1.964625215693408e-05, + "loss": 0.1824, + "step": 1048 + }, + { + "epoch": 0.34, + "learning_rate": 1.9645335826768303e-05, + "loss": 0.1634, + "step": 1049 + }, + { + "epoch": 0.34, + "learning_rate": 1.964441833275527e-05, + "loss": 0.1769, + "step": 1050 + }, + { + "epoch": 0.34, + "learning_rate": 1.9643499675005692e-05, + "loss": 0.1498, + "step": 1051 + }, + { + "epoch": 0.34, + "learning_rate": 1.964257985363041e-05, + "loss": 0.1715, + "step": 1052 + }, + { + "epoch": 0.34, + "learning_rate": 1.9641658868740423e-05, + "loss": 0.1596, + "step": 1053 + }, + { + "epoch": 0.34, + "learning_rate": 1.9640736720446857e-05, + "loss": 0.1759, + "step": 1054 + }, + { + "epoch": 0.34, + "learning_rate": 1.9639813408860984e-05, + "loss": 0.1682, + "step": 1055 + }, + { + "epoch": 0.34, + "learning_rate": 1.9638888934094212e-05, + "loss": 0.1715, + "step": 1056 + }, + { + "epoch": 0.34, + "learning_rate": 1.9637963296258094e-05, + "loss": 0.1529, + "step": 1057 + }, + { + "epoch": 0.34, + "learning_rate": 1.963703649546432e-05, + "loss": 0.1563, + "step": 1058 + }, + { + "epoch": 0.34, + "learning_rate": 1.9636108531824725e-05, + "loss": 0.1686, + "step": 1059 + }, + { + "epoch": 0.34, + "learning_rate": 1.9635179405451277e-05, + "loss": 0.1807, + "step": 1060 + }, + { + "epoch": 0.34, + "learning_rate": 1.9634249116456093e-05, + "loss": 0.1594, + "step": 1061 + }, + { + "epoch": 0.34, + "learning_rate": 1.9633317664951418e-05, + "loss": 0.1882, + "step": 1062 + }, + { + "epoch": 0.34, + "learning_rate": 1.963238505104965e-05, + "loss": 0.1865, + "step": 1063 + }, + { + "epoch": 0.34, + "learning_rate": 1.9631451274863326e-05, + "loss": 0.1796, + "step": 1064 + }, + { + "epoch": 0.34, + "learning_rate": 1.963051633650511e-05, + "loss": 0.1615, + "step": 1065 + }, + { + "epoch": 0.34, + "learning_rate": 1.9629580236087823e-05, + "loss": 0.1646, + "step": 1066 + }, + { + "epoch": 0.34, + "learning_rate": 1.9628642973724416e-05, + "loss": 0.1839, + "step": 1067 + }, + { + "epoch": 0.34, + "learning_rate": 1.9627704549527986e-05, + "loss": 0.1876, + "step": 1068 + }, + { + "epoch": 0.34, + "learning_rate": 1.9626764963611764e-05, + "loss": 0.1624, + "step": 1069 + }, + { + "epoch": 0.34, + "learning_rate": 1.9625824216089123e-05, + "loss": 0.146, + "step": 1070 + }, + { + "epoch": 0.34, + "learning_rate": 1.9624882307073584e-05, + "loss": 0.1697, + "step": 1071 + }, + { + "epoch": 0.34, + "learning_rate": 1.9623939236678793e-05, + "loss": 0.1558, + "step": 1072 + }, + { + "epoch": 0.35, + "learning_rate": 1.9622995005018556e-05, + "loss": 0.1929, + "step": 1073 + }, + { + "epoch": 0.35, + "learning_rate": 1.9622049612206795e-05, + "loss": 0.1761, + "step": 1074 + }, + { + "epoch": 0.35, + "learning_rate": 1.9621103058357594e-05, + "loss": 0.1711, + "step": 1075 + }, + { + "epoch": 0.35, + "learning_rate": 1.962015534358517e-05, + "loss": 0.1718, + "step": 1076 + }, + { + "epoch": 0.35, + "learning_rate": 1.9619206468003867e-05, + "loss": 0.1696, + "step": 1077 + }, + { + "epoch": 0.35, + "learning_rate": 1.961825643172819e-05, + "loss": 0.1497, + "step": 1078 + }, + { + "epoch": 0.35, + "learning_rate": 1.9617305234872773e-05, + "loss": 0.1971, + "step": 1079 + }, + { + "epoch": 0.35, + "learning_rate": 1.9616352877552394e-05, + "loss": 0.1628, + "step": 1080 + }, + { + "epoch": 0.35, + "learning_rate": 1.961539935988196e-05, + "loss": 0.1754, + "step": 1081 + }, + { + "epoch": 0.35, + "learning_rate": 1.9614444681976533e-05, + "loss": 0.1739, + "step": 1082 + }, + { + "epoch": 0.35, + "learning_rate": 1.961348884395131e-05, + "loss": 0.1576, + "step": 1083 + }, + { + "epoch": 0.35, + "learning_rate": 1.961253184592162e-05, + "loss": 0.2156, + "step": 1084 + }, + { + "epoch": 0.35, + "learning_rate": 1.961157368800294e-05, + "loss": 0.1575, + "step": 1085 + }, + { + "epoch": 0.35, + "learning_rate": 1.9610614370310892e-05, + "loss": 0.1606, + "step": 1086 + }, + { + "epoch": 0.35, + "learning_rate": 1.9609653892961226e-05, + "loss": 0.1655, + "step": 1087 + }, + { + "epoch": 0.35, + "learning_rate": 1.9608692256069836e-05, + "loss": 0.1512, + "step": 1088 + }, + { + "epoch": 0.35, + "learning_rate": 1.960772945975276e-05, + "loss": 0.1903, + "step": 1089 + }, + { + "epoch": 0.35, + "learning_rate": 1.9606765504126174e-05, + "loss": 0.1581, + "step": 1090 + }, + { + "epoch": 0.35, + "learning_rate": 1.9605800389306386e-05, + "loss": 0.1871, + "step": 1091 + }, + { + "epoch": 0.35, + "learning_rate": 1.960483411540986e-05, + "loss": 0.1557, + "step": 1092 + }, + { + "epoch": 0.35, + "learning_rate": 1.9603866682553187e-05, + "loss": 0.1588, + "step": 1093 + }, + { + "epoch": 0.35, + "learning_rate": 1.96028980908531e-05, + "loss": 0.173, + "step": 1094 + }, + { + "epoch": 0.35, + "learning_rate": 1.9601928340426478e-05, + "loss": 0.1617, + "step": 1095 + }, + { + "epoch": 0.35, + "learning_rate": 1.960095743139033e-05, + "loss": 0.1832, + "step": 1096 + }, + { + "epoch": 0.35, + "learning_rate": 1.959998536386181e-05, + "loss": 0.1578, + "step": 1097 + }, + { + "epoch": 0.35, + "learning_rate": 1.9599012137958214e-05, + "loss": 0.1572, + "step": 1098 + }, + { + "epoch": 0.35, + "learning_rate": 1.959803775379698e-05, + "loss": 0.1759, + "step": 1099 + }, + { + "epoch": 0.35, + "learning_rate": 1.959706221149567e-05, + "loss": 0.1749, + "step": 1100 + }, + { + "epoch": 0.35, + "learning_rate": 1.9596085511172006e-05, + "loss": 0.1678, + "step": 1101 + }, + { + "epoch": 0.35, + "learning_rate": 1.9595107652943842e-05, + "loss": 0.1694, + "step": 1102 + }, + { + "epoch": 0.35, + "learning_rate": 1.9594128636929165e-05, + "loss": 0.185, + "step": 1103 + }, + { + "epoch": 0.36, + "learning_rate": 1.959314846324611e-05, + "loss": 0.176, + "step": 1104 + }, + { + "epoch": 0.36, + "learning_rate": 1.9592167132012946e-05, + "loss": 0.1658, + "step": 1105 + }, + { + "epoch": 0.36, + "learning_rate": 1.9591184643348092e-05, + "loss": 0.173, + "step": 1106 + }, + { + "epoch": 0.36, + "learning_rate": 1.9590200997370092e-05, + "loss": 0.1644, + "step": 1107 + }, + { + "epoch": 0.36, + "learning_rate": 1.9589216194197637e-05, + "loss": 0.1529, + "step": 1108 + }, + { + "epoch": 0.36, + "learning_rate": 1.958823023394956e-05, + "loss": 0.1875, + "step": 1109 + }, + { + "epoch": 0.36, + "learning_rate": 1.9587243116744832e-05, + "loss": 0.1617, + "step": 1110 + }, + { + "epoch": 0.36, + "learning_rate": 1.9586254842702562e-05, + "loss": 0.1756, + "step": 1111 + }, + { + "epoch": 0.36, + "learning_rate": 1.9585265411941997e-05, + "loss": 0.1783, + "step": 1112 + }, + { + "epoch": 0.36, + "learning_rate": 1.958427482458253e-05, + "loss": 0.1374, + "step": 1113 + }, + { + "epoch": 0.36, + "learning_rate": 1.9583283080743686e-05, + "loss": 0.1893, + "step": 1114 + }, + { + "epoch": 0.36, + "learning_rate": 1.9582290180545134e-05, + "loss": 0.1538, + "step": 1115 + }, + { + "epoch": 0.36, + "learning_rate": 1.9581296124106682e-05, + "loss": 0.1661, + "step": 1116 + }, + { + "epoch": 0.36, + "learning_rate": 1.958030091154828e-05, + "loss": 0.1835, + "step": 1117 + }, + { + "epoch": 0.36, + "learning_rate": 1.9579304542990005e-05, + "loss": 0.1424, + "step": 1118 + }, + { + "epoch": 0.36, + "learning_rate": 1.9578307018552096e-05, + "loss": 0.1732, + "step": 1119 + }, + { + "epoch": 0.36, + "learning_rate": 1.957730833835491e-05, + "loss": 0.1735, + "step": 1120 + }, + { + "epoch": 0.36, + "learning_rate": 1.957630850251895e-05, + "loss": 0.1722, + "step": 1121 + }, + { + "epoch": 0.36, + "learning_rate": 1.9575307511164873e-05, + "loss": 0.1525, + "step": 1122 + }, + { + "epoch": 0.36, + "learning_rate": 1.957430536441345e-05, + "loss": 0.1794, + "step": 1123 + }, + { + "epoch": 0.36, + "learning_rate": 1.957330206238561e-05, + "loss": 0.1845, + "step": 1124 + }, + { + "epoch": 0.36, + "learning_rate": 1.9572297605202412e-05, + "loss": 0.1654, + "step": 1125 + }, + { + "epoch": 0.36, + "learning_rate": 1.9571291992985064e-05, + "loss": 0.1619, + "step": 1126 + }, + { + "epoch": 0.36, + "learning_rate": 1.95702852258549e-05, + "loss": 0.1638, + "step": 1127 + }, + { + "epoch": 0.36, + "learning_rate": 1.956927730393341e-05, + "loss": 0.1741, + "step": 1128 + }, + { + "epoch": 0.36, + "learning_rate": 1.9568268227342203e-05, + "loss": 0.1703, + "step": 1129 + }, + { + "epoch": 0.36, + "learning_rate": 1.956725799620305e-05, + "loss": 0.1837, + "step": 1130 + }, + { + "epoch": 0.36, + "learning_rate": 1.956624661063784e-05, + "loss": 0.1731, + "step": 1131 + }, + { + "epoch": 0.36, + "learning_rate": 1.9565234070768617e-05, + "loss": 0.1927, + "step": 1132 + }, + { + "epoch": 0.36, + "learning_rate": 1.9564220376717557e-05, + "loss": 0.1524, + "step": 1133 + }, + { + "epoch": 0.36, + "learning_rate": 1.9563205528606977e-05, + "loss": 0.1595, + "step": 1134 + }, + { + "epoch": 0.37, + "learning_rate": 1.9562189526559333e-05, + "loss": 0.1481, + "step": 1135 + }, + { + "epoch": 0.37, + "learning_rate": 1.9561172370697215e-05, + "loss": 0.1793, + "step": 1136 + }, + { + "epoch": 0.37, + "learning_rate": 1.9560154061143365e-05, + "loss": 0.1484, + "step": 1137 + }, + { + "epoch": 0.37, + "learning_rate": 1.9559134598020653e-05, + "loss": 0.1905, + "step": 1138 + }, + { + "epoch": 0.37, + "learning_rate": 1.9558113981452093e-05, + "loss": 0.1702, + "step": 1139 + }, + { + "epoch": 0.37, + "learning_rate": 1.955709221156083e-05, + "loss": 0.1855, + "step": 1140 + }, + { + "epoch": 0.37, + "learning_rate": 1.9556069288470168e-05, + "loss": 0.162, + "step": 1141 + }, + { + "epoch": 0.37, + "learning_rate": 1.9555045212303528e-05, + "loss": 0.1731, + "step": 1142 + }, + { + "epoch": 0.37, + "learning_rate": 1.9554019983184483e-05, + "loss": 0.1632, + "step": 1143 + }, + { + "epoch": 0.37, + "learning_rate": 1.9552993601236737e-05, + "loss": 0.1424, + "step": 1144 + }, + { + "epoch": 0.37, + "learning_rate": 1.9551966066584144e-05, + "loss": 0.1797, + "step": 1145 + }, + { + "epoch": 0.37, + "learning_rate": 1.9550937379350686e-05, + "loss": 0.1549, + "step": 1146 + }, + { + "epoch": 0.37, + "learning_rate": 1.954990753966049e-05, + "loss": 0.1774, + "step": 1147 + }, + { + "epoch": 0.37, + "learning_rate": 1.954887654763782e-05, + "loss": 0.1661, + "step": 1148 + }, + { + "epoch": 0.37, + "learning_rate": 1.9547844403407085e-05, + "loss": 0.1919, + "step": 1149 + }, + { + "epoch": 0.37, + "learning_rate": 1.9546811107092822e-05, + "loss": 0.1699, + "step": 1150 + }, + { + "epoch": 0.37, + "learning_rate": 1.9545776658819715e-05, + "loss": 0.1642, + "step": 1151 + }, + { + "epoch": 0.37, + "learning_rate": 1.9544741058712585e-05, + "loss": 0.1658, + "step": 1152 + }, + { + "epoch": 0.37, + "learning_rate": 1.9543704306896393e-05, + "loss": 0.1788, + "step": 1153 + }, + { + "epoch": 0.37, + "learning_rate": 1.9542666403496232e-05, + "loss": 0.1601, + "step": 1154 + }, + { + "epoch": 0.37, + "learning_rate": 1.9541627348637345e-05, + "loss": 0.1909, + "step": 1155 + }, + { + "epoch": 0.37, + "learning_rate": 1.9540587142445112e-05, + "loss": 0.1769, + "step": 1156 + }, + { + "epoch": 0.37, + "learning_rate": 1.9539545785045045e-05, + "loss": 0.1579, + "step": 1157 + }, + { + "epoch": 0.37, + "learning_rate": 1.9538503276562797e-05, + "loss": 0.176, + "step": 1158 + }, + { + "epoch": 0.37, + "learning_rate": 1.953745961712416e-05, + "loss": 0.1595, + "step": 1159 + }, + { + "epoch": 0.37, + "learning_rate": 1.9536414806855074e-05, + "loss": 0.1438, + "step": 1160 + }, + { + "epoch": 0.37, + "learning_rate": 1.9535368845881604e-05, + "loss": 0.1505, + "step": 1161 + }, + { + "epoch": 0.37, + "learning_rate": 1.953432173432996e-05, + "loss": 0.1718, + "step": 1162 + }, + { + "epoch": 0.37, + "learning_rate": 1.9533273472326497e-05, + "loss": 0.1596, + "step": 1163 + }, + { + "epoch": 0.37, + "learning_rate": 1.9532224059997693e-05, + "loss": 0.1558, + "step": 1164 + }, + { + "epoch": 0.37, + "learning_rate": 1.9531173497470185e-05, + "loss": 0.1614, + "step": 1165 + }, + { + "epoch": 0.38, + "learning_rate": 1.953012178487073e-05, + "loss": 0.1508, + "step": 1166 + }, + { + "epoch": 0.38, + "learning_rate": 1.9529068922326236e-05, + "loss": 0.1722, + "step": 1167 + }, + { + "epoch": 0.38, + "learning_rate": 1.952801490996375e-05, + "loss": 0.1778, + "step": 1168 + }, + { + "epoch": 0.38, + "learning_rate": 1.9526959747910444e-05, + "loss": 0.1897, + "step": 1169 + }, + { + "epoch": 0.38, + "learning_rate": 1.9525903436293644e-05, + "loss": 0.1796, + "step": 1170 + }, + { + "epoch": 0.38, + "learning_rate": 1.9524845975240806e-05, + "loss": 0.1541, + "step": 1171 + }, + { + "epoch": 0.38, + "learning_rate": 1.9523787364879532e-05, + "loss": 0.156, + "step": 1172 + }, + { + "epoch": 0.38, + "learning_rate": 1.952272760533756e-05, + "loss": 0.1539, + "step": 1173 + }, + { + "epoch": 0.38, + "learning_rate": 1.952166669674276e-05, + "loss": 0.1767, + "step": 1174 + }, + { + "epoch": 0.38, + "learning_rate": 1.9520604639223146e-05, + "loss": 0.1801, + "step": 1175 + }, + { + "epoch": 0.38, + "learning_rate": 1.951954143290687e-05, + "loss": 0.154, + "step": 1176 + }, + { + "epoch": 0.38, + "learning_rate": 1.951847707792223e-05, + "loss": 0.1847, + "step": 1177 + }, + { + "epoch": 0.38, + "learning_rate": 1.9517411574397647e-05, + "loss": 0.1665, + "step": 1178 + }, + { + "epoch": 0.38, + "learning_rate": 1.9516344922461694e-05, + "loss": 0.1691, + "step": 1179 + }, + { + "epoch": 0.38, + "learning_rate": 1.9515277122243076e-05, + "loss": 0.1654, + "step": 1180 + }, + { + "epoch": 0.38, + "learning_rate": 1.9514208173870638e-05, + "loss": 0.1714, + "step": 1181 + }, + { + "epoch": 0.38, + "learning_rate": 1.9513138077473362e-05, + "loss": 0.1676, + "step": 1182 + }, + { + "epoch": 0.38, + "learning_rate": 1.951206683318038e-05, + "loss": 0.1543, + "step": 1183 + }, + { + "epoch": 0.38, + "learning_rate": 1.951099444112094e-05, + "loss": 0.1661, + "step": 1184 + }, + { + "epoch": 0.38, + "learning_rate": 1.9509920901424445e-05, + "loss": 0.162, + "step": 1185 + }, + { + "epoch": 0.38, + "learning_rate": 1.950884621422044e-05, + "loss": 0.1678, + "step": 1186 + }, + { + "epoch": 0.38, + "learning_rate": 1.9507770379638596e-05, + "loss": 0.1642, + "step": 1187 + }, + { + "epoch": 0.38, + "learning_rate": 1.9506693397808724e-05, + "loss": 0.1649, + "step": 1188 + }, + { + "epoch": 0.38, + "learning_rate": 1.9505615268860786e-05, + "loss": 0.176, + "step": 1189 + }, + { + "epoch": 0.38, + "learning_rate": 1.950453599292487e-05, + "loss": 0.1752, + "step": 1190 + }, + { + "epoch": 0.38, + "learning_rate": 1.95034555701312e-05, + "loss": 0.1692, + "step": 1191 + }, + { + "epoch": 0.38, + "learning_rate": 1.9502374000610152e-05, + "loss": 0.1611, + "step": 1192 + }, + { + "epoch": 0.38, + "learning_rate": 1.950129128449223e-05, + "loss": 0.1771, + "step": 1193 + }, + { + "epoch": 0.38, + "learning_rate": 1.950020742190808e-05, + "loss": 0.1664, + "step": 1194 + }, + { + "epoch": 0.38, + "learning_rate": 1.9499122412988482e-05, + "loss": 0.1703, + "step": 1195 + }, + { + "epoch": 0.38, + "learning_rate": 1.9498036257864365e-05, + "loss": 0.1614, + "step": 1196 + }, + { + "epoch": 0.39, + "learning_rate": 1.949694895666678e-05, + "loss": 0.1696, + "step": 1197 + }, + { + "epoch": 0.39, + "learning_rate": 1.9495860509526935e-05, + "loss": 0.1514, + "step": 1198 + }, + { + "epoch": 0.39, + "learning_rate": 1.9494770916576158e-05, + "loss": 0.1661, + "step": 1199 + }, + { + "epoch": 0.39, + "learning_rate": 1.949368017794593e-05, + "loss": 0.1746, + "step": 1200 + }, + { + "epoch": 0.39, + "learning_rate": 1.949258829376786e-05, + "loss": 0.1715, + "step": 1201 + }, + { + "epoch": 0.39, + "learning_rate": 1.94914952641737e-05, + "loss": 0.1889, + "step": 1202 + }, + { + "epoch": 0.39, + "learning_rate": 1.949040108929534e-05, + "loss": 0.1665, + "step": 1203 + }, + { + "epoch": 0.39, + "learning_rate": 1.9489305769264813e-05, + "loss": 0.1742, + "step": 1204 + }, + { + "epoch": 0.39, + "learning_rate": 1.9488209304214277e-05, + "loss": 0.1653, + "step": 1205 + }, + { + "epoch": 0.39, + "learning_rate": 1.9487111694276043e-05, + "loss": 0.1613, + "step": 1206 + }, + { + "epoch": 0.39, + "learning_rate": 1.9486012939582548e-05, + "loss": 0.1681, + "step": 1207 + }, + { + "epoch": 0.39, + "learning_rate": 1.9484913040266377e-05, + "loss": 0.1666, + "step": 1208 + }, + { + "epoch": 0.39, + "learning_rate": 1.9483811996460243e-05, + "loss": 0.1601, + "step": 1209 + }, + { + "epoch": 0.39, + "learning_rate": 1.9482709808297006e-05, + "loss": 0.1599, + "step": 1210 + }, + { + "epoch": 0.39, + "learning_rate": 1.948160647590966e-05, + "loss": 0.1773, + "step": 1211 + }, + { + "epoch": 0.39, + "learning_rate": 1.948050199943134e-05, + "loss": 0.1771, + "step": 1212 + }, + { + "epoch": 0.39, + "learning_rate": 1.947939637899531e-05, + "loss": 0.1562, + "step": 1213 + }, + { + "epoch": 0.39, + "learning_rate": 1.947828961473499e-05, + "loss": 0.1657, + "step": 1214 + }, + { + "epoch": 0.39, + "learning_rate": 1.9477181706783917e-05, + "loss": 0.1706, + "step": 1215 + }, + { + "epoch": 0.39, + "learning_rate": 1.9476072655275782e-05, + "loss": 0.1473, + "step": 1216 + }, + { + "epoch": 0.39, + "learning_rate": 1.9474962460344404e-05, + "loss": 0.1679, + "step": 1217 + }, + { + "epoch": 0.39, + "learning_rate": 1.9473851122123743e-05, + "loss": 0.1867, + "step": 1218 + }, + { + "epoch": 0.39, + "learning_rate": 1.9472738640747907e-05, + "loss": 0.1613, + "step": 1219 + }, + { + "epoch": 0.39, + "learning_rate": 1.9471625016351117e-05, + "loss": 0.172, + "step": 1220 + }, + { + "epoch": 0.39, + "learning_rate": 1.947051024906776e-05, + "loss": 0.1792, + "step": 1221 + }, + { + "epoch": 0.39, + "learning_rate": 1.9469394339032348e-05, + "loss": 0.1595, + "step": 1222 + }, + { + "epoch": 0.39, + "learning_rate": 1.9468277286379527e-05, + "loss": 0.159, + "step": 1223 + }, + { + "epoch": 0.39, + "learning_rate": 1.9467159091244087e-05, + "loss": 0.1655, + "step": 1224 + }, + { + "epoch": 0.39, + "learning_rate": 1.946603975376095e-05, + "loss": 0.1411, + "step": 1225 + }, + { + "epoch": 0.39, + "learning_rate": 1.9464919274065187e-05, + "loss": 0.1561, + "step": 1226 + }, + { + "epoch": 0.39, + "learning_rate": 1.9463797652291995e-05, + "loss": 0.1957, + "step": 1227 + }, + { + "epoch": 0.39, + "learning_rate": 1.946267488857672e-05, + "loss": 0.1593, + "step": 1228 + }, + { + "epoch": 0.4, + "learning_rate": 1.946155098305483e-05, + "loss": 0.1656, + "step": 1229 + }, + { + "epoch": 0.4, + "learning_rate": 1.946042593586195e-05, + "loss": 0.1769, + "step": 1230 + }, + { + "epoch": 0.4, + "learning_rate": 1.9459299747133823e-05, + "loss": 0.1514, + "step": 1231 + }, + { + "epoch": 0.4, + "learning_rate": 1.9458172417006347e-05, + "loss": 0.1677, + "step": 1232 + }, + { + "epoch": 0.4, + "learning_rate": 1.945704394561555e-05, + "loss": 0.1775, + "step": 1233 + }, + { + "epoch": 0.4, + "learning_rate": 1.9455914333097597e-05, + "loss": 0.1589, + "step": 1234 + }, + { + "epoch": 0.4, + "learning_rate": 1.9454783579588788e-05, + "loss": 0.1734, + "step": 1235 + }, + { + "epoch": 0.4, + "learning_rate": 1.9453651685225567e-05, + "loss": 0.1664, + "step": 1236 + }, + { + "epoch": 0.4, + "learning_rate": 1.945251865014452e-05, + "loss": 0.1678, + "step": 1237 + }, + { + "epoch": 0.4, + "learning_rate": 1.9451384474482352e-05, + "loss": 0.1449, + "step": 1238 + }, + { + "epoch": 0.4, + "learning_rate": 1.9450249158375926e-05, + "loss": 0.1559, + "step": 1239 + }, + { + "epoch": 0.4, + "learning_rate": 1.944911270196223e-05, + "loss": 0.1498, + "step": 1240 + }, + { + "epoch": 0.4, + "learning_rate": 1.9447975105378396e-05, + "loss": 0.1712, + "step": 1241 + }, + { + "epoch": 0.4, + "learning_rate": 1.9446836368761692e-05, + "loss": 0.1693, + "step": 1242 + }, + { + "epoch": 0.4, + "learning_rate": 1.944569649224952e-05, + "loss": 0.1571, + "step": 1243 + }, + { + "epoch": 0.4, + "learning_rate": 1.9444555475979425e-05, + "loss": 0.1595, + "step": 1244 + }, + { + "epoch": 0.4, + "learning_rate": 1.9443413320089082e-05, + "loss": 0.152, + "step": 1245 + }, + { + "epoch": 0.4, + "learning_rate": 1.9442270024716313e-05, + "loss": 0.151, + "step": 1246 + }, + { + "epoch": 0.4, + "learning_rate": 1.944112558999907e-05, + "loss": 0.1644, + "step": 1247 + }, + { + "epoch": 0.4, + "learning_rate": 1.9439980016075447e-05, + "loss": 0.1727, + "step": 1248 + }, + { + "epoch": 0.4, + "learning_rate": 1.9438833303083677e-05, + "loss": 0.1592, + "step": 1249 + }, + { + "epoch": 0.4, + "learning_rate": 1.9437685451162122e-05, + "loss": 0.161, + "step": 1250 + }, + { + "epoch": 0.4, + "learning_rate": 1.943653646044929e-05, + "loss": 0.1569, + "step": 1251 + }, + { + "epoch": 0.4, + "learning_rate": 1.9435386331083822e-05, + "loss": 0.1649, + "step": 1252 + }, + { + "epoch": 0.4, + "learning_rate": 1.9434235063204497e-05, + "loss": 0.1557, + "step": 1253 + }, + { + "epoch": 0.4, + "learning_rate": 1.9433082656950232e-05, + "loss": 0.1668, + "step": 1254 + }, + { + "epoch": 0.4, + "learning_rate": 1.943192911246008e-05, + "loss": 0.1514, + "step": 1255 + }, + { + "epoch": 0.4, + "learning_rate": 1.9430774429873235e-05, + "loss": 0.1668, + "step": 1256 + }, + { + "epoch": 0.4, + "learning_rate": 1.9429618609329028e-05, + "loss": 0.1766, + "step": 1257 + }, + { + "epoch": 0.4, + "learning_rate": 1.942846165096692e-05, + "loss": 0.1717, + "step": 1258 + }, + { + "epoch": 0.4, + "learning_rate": 1.9427303554926515e-05, + "loss": 0.1539, + "step": 1259 + }, + { + "epoch": 0.41, + "learning_rate": 1.9426144321347557e-05, + "loss": 0.1496, + "step": 1260 + }, + { + "epoch": 0.41, + "learning_rate": 1.942498395036992e-05, + "loss": 0.1723, + "step": 1261 + }, + { + "epoch": 0.41, + "learning_rate": 1.942382244213363e-05, + "loss": 0.1578, + "step": 1262 + }, + { + "epoch": 0.41, + "learning_rate": 1.9422659796778825e-05, + "loss": 0.1809, + "step": 1263 + }, + { + "epoch": 0.41, + "learning_rate": 1.94214960144458e-05, + "loss": 0.1799, + "step": 1264 + }, + { + "epoch": 0.41, + "learning_rate": 1.9420331095274987e-05, + "loss": 0.176, + "step": 1265 + }, + { + "epoch": 0.41, + "learning_rate": 1.941916503940694e-05, + "loss": 0.1696, + "step": 1266 + }, + { + "epoch": 0.41, + "learning_rate": 1.9417997846982374e-05, + "loss": 0.1604, + "step": 1267 + }, + { + "epoch": 0.41, + "learning_rate": 1.941682951814212e-05, + "loss": 0.1407, + "step": 1268 + }, + { + "epoch": 0.41, + "learning_rate": 1.9415660053027148e-05, + "loss": 0.1453, + "step": 1269 + }, + { + "epoch": 0.41, + "learning_rate": 1.9414489451778577e-05, + "loss": 0.1659, + "step": 1270 + }, + { + "epoch": 0.41, + "learning_rate": 1.9413317714537657e-05, + "loss": 0.1633, + "step": 1271 + }, + { + "epoch": 0.41, + "learning_rate": 1.9412144841445776e-05, + "loss": 0.1703, + "step": 1272 + }, + { + "epoch": 0.41, + "learning_rate": 1.941097083264445e-05, + "loss": 0.1679, + "step": 1273 + }, + { + "epoch": 0.41, + "learning_rate": 1.940979568827535e-05, + "loss": 0.161, + "step": 1274 + }, + { + "epoch": 0.41, + "learning_rate": 1.9408619408480268e-05, + "loss": 0.1509, + "step": 1275 + }, + { + "epoch": 0.41, + "learning_rate": 1.9407441993401137e-05, + "loss": 0.158, + "step": 1276 + }, + { + "epoch": 0.41, + "learning_rate": 1.9406263443180035e-05, + "loss": 0.1649, + "step": 1277 + }, + { + "epoch": 0.41, + "learning_rate": 1.9405083757959168e-05, + "loss": 0.1543, + "step": 1278 + }, + { + "epoch": 0.41, + "learning_rate": 1.940390293788088e-05, + "loss": 0.1771, + "step": 1279 + }, + { + "epoch": 0.41, + "learning_rate": 1.9402720983087656e-05, + "loss": 0.1512, + "step": 1280 + }, + { + "epoch": 0.41, + "learning_rate": 1.9401537893722117e-05, + "loss": 0.1513, + "step": 1281 + }, + { + "epoch": 0.41, + "learning_rate": 1.9400353669927018e-05, + "loss": 0.1607, + "step": 1282 + }, + { + "epoch": 0.41, + "learning_rate": 1.939916831184525e-05, + "loss": 0.1627, + "step": 1283 + }, + { + "epoch": 0.41, + "learning_rate": 1.939798181961985e-05, + "loss": 0.1687, + "step": 1284 + }, + { + "epoch": 0.41, + "learning_rate": 1.9396794193393974e-05, + "loss": 0.1521, + "step": 1285 + }, + { + "epoch": 0.41, + "learning_rate": 1.9395605433310937e-05, + "loss": 0.1629, + "step": 1286 + }, + { + "epoch": 0.41, + "learning_rate": 1.9394415539514176e-05, + "loss": 0.2007, + "step": 1287 + }, + { + "epoch": 0.41, + "learning_rate": 1.939322451214727e-05, + "loss": 0.1629, + "step": 1288 + }, + { + "epoch": 0.41, + "learning_rate": 1.939203235135393e-05, + "loss": 0.1563, + "step": 1289 + }, + { + "epoch": 0.41, + "learning_rate": 1.9390839057278005e-05, + "loss": 0.1673, + "step": 1290 + }, + { + "epoch": 0.42, + "learning_rate": 1.9389644630063495e-05, + "loss": 0.168, + "step": 1291 + }, + { + "epoch": 0.42, + "learning_rate": 1.938844906985451e-05, + "loss": 0.1575, + "step": 1292 + }, + { + "epoch": 0.42, + "learning_rate": 1.938725237679532e-05, + "loss": 0.1573, + "step": 1293 + }, + { + "epoch": 0.42, + "learning_rate": 1.9386054551030323e-05, + "loss": 0.1591, + "step": 1294 + }, + { + "epoch": 0.42, + "learning_rate": 1.938485559270405e-05, + "loss": 0.1633, + "step": 1295 + }, + { + "epoch": 0.42, + "learning_rate": 1.9383655501961173e-05, + "loss": 0.1515, + "step": 1296 + }, + { + "epoch": 0.42, + "learning_rate": 1.9382454278946503e-05, + "loss": 0.1646, + "step": 1297 + }, + { + "epoch": 0.42, + "learning_rate": 1.938125192380498e-05, + "loss": 0.1513, + "step": 1298 + }, + { + "epoch": 0.42, + "learning_rate": 1.9380048436681695e-05, + "loss": 0.1731, + "step": 1299 + }, + { + "epoch": 0.42, + "learning_rate": 1.9378843817721856e-05, + "loss": 0.1572, + "step": 1300 + }, + { + "epoch": 0.42, + "learning_rate": 1.937763806707082e-05, + "loss": 0.1722, + "step": 1301 + }, + { + "epoch": 0.42, + "learning_rate": 1.937643118487408e-05, + "loss": 0.1536, + "step": 1302 + }, + { + "epoch": 0.42, + "learning_rate": 1.937522317127726e-05, + "loss": 0.1672, + "step": 1303 + }, + { + "epoch": 0.42, + "learning_rate": 1.9374014026426126e-05, + "loss": 0.1699, + "step": 1304 + }, + { + "epoch": 0.42, + "learning_rate": 1.9372803750466577e-05, + "loss": 0.1893, + "step": 1305 + }, + { + "epoch": 0.42, + "learning_rate": 1.9371592343544655e-05, + "loss": 0.1553, + "step": 1306 + }, + { + "epoch": 0.42, + "learning_rate": 1.9370379805806528e-05, + "loss": 0.1776, + "step": 1307 + }, + { + "epoch": 0.42, + "learning_rate": 1.9369166137398513e-05, + "loss": 0.1482, + "step": 1308 + }, + { + "epoch": 0.42, + "learning_rate": 1.936795133846705e-05, + "loss": 0.1704, + "step": 1309 + }, + { + "epoch": 0.42, + "learning_rate": 1.936673540915872e-05, + "loss": 0.1582, + "step": 1310 + }, + { + "epoch": 0.42, + "learning_rate": 1.936551834962025e-05, + "loss": 0.1509, + "step": 1311 + }, + { + "epoch": 0.42, + "learning_rate": 1.936430015999849e-05, + "loss": 0.1526, + "step": 1312 + }, + { + "epoch": 0.42, + "learning_rate": 1.9363080840440432e-05, + "loss": 0.1611, + "step": 1313 + }, + { + "epoch": 0.42, + "learning_rate": 1.9361860391093207e-05, + "loss": 0.1586, + "step": 1314 + }, + { + "epoch": 0.42, + "learning_rate": 1.9360638812104073e-05, + "loss": 0.168, + "step": 1315 + }, + { + "epoch": 0.42, + "learning_rate": 1.9359416103620444e-05, + "loss": 0.1652, + "step": 1316 + }, + { + "epoch": 0.42, + "learning_rate": 1.9358192265789844e-05, + "loss": 0.1738, + "step": 1317 + }, + { + "epoch": 0.42, + "learning_rate": 1.9356967298759953e-05, + "loss": 0.1512, + "step": 1318 + }, + { + "epoch": 0.42, + "learning_rate": 1.935574120267858e-05, + "loss": 0.1519, + "step": 1319 + }, + { + "epoch": 0.42, + "learning_rate": 1.935451397769367e-05, + "loss": 0.1691, + "step": 1320 + }, + { + "epoch": 0.42, + "learning_rate": 1.9353285623953304e-05, + "loss": 0.1753, + "step": 1321 + }, + { + "epoch": 0.43, + "learning_rate": 1.9352056141605705e-05, + "loss": 0.1557, + "step": 1322 + }, + { + "epoch": 0.43, + "learning_rate": 1.9350825530799223e-05, + "loss": 0.1639, + "step": 1323 + }, + { + "epoch": 0.43, + "learning_rate": 1.934959379168235e-05, + "loss": 0.1435, + "step": 1324 + }, + { + "epoch": 0.43, + "learning_rate": 1.934836092440371e-05, + "loss": 0.1769, + "step": 1325 + }, + { + "epoch": 0.43, + "learning_rate": 1.9347126929112077e-05, + "loss": 0.1553, + "step": 1326 + }, + { + "epoch": 0.43, + "learning_rate": 1.9345891805956334e-05, + "loss": 0.1473, + "step": 1327 + }, + { + "epoch": 0.43, + "learning_rate": 1.934465555508553e-05, + "loss": 0.1663, + "step": 1328 + }, + { + "epoch": 0.43, + "learning_rate": 1.934341817664883e-05, + "loss": 0.1644, + "step": 1329 + }, + { + "epoch": 0.43, + "learning_rate": 1.9342179670795544e-05, + "loss": 0.1702, + "step": 1330 + }, + { + "epoch": 0.43, + "learning_rate": 1.934094003767511e-05, + "loss": 0.1698, + "step": 1331 + }, + { + "epoch": 0.43, + "learning_rate": 1.933969927743711e-05, + "loss": 0.1655, + "step": 1332 + }, + { + "epoch": 0.43, + "learning_rate": 1.9338457390231267e-05, + "loss": 0.1732, + "step": 1333 + }, + { + "epoch": 0.43, + "learning_rate": 1.9337214376207417e-05, + "loss": 0.1574, + "step": 1334 + }, + { + "epoch": 0.43, + "learning_rate": 1.9335970235515563e-05, + "loss": 0.16, + "step": 1335 + }, + { + "epoch": 0.43, + "learning_rate": 1.9334724968305818e-05, + "loss": 0.1604, + "step": 1336 + }, + { + "epoch": 0.43, + "learning_rate": 1.9333478574728447e-05, + "loss": 0.1539, + "step": 1337 + }, + { + "epoch": 0.43, + "learning_rate": 1.933223105493384e-05, + "loss": 0.1653, + "step": 1338 + }, + { + "epoch": 0.43, + "learning_rate": 1.9330982409072535e-05, + "loss": 0.1742, + "step": 1339 + }, + { + "epoch": 0.43, + "learning_rate": 1.932973263729519e-05, + "loss": 0.1556, + "step": 1340 + }, + { + "epoch": 0.43, + "learning_rate": 1.9328481739752614e-05, + "loss": 0.1575, + "step": 1341 + }, + { + "epoch": 0.43, + "learning_rate": 1.9327229716595745e-05, + "loss": 0.1565, + "step": 1342 + }, + { + "epoch": 0.43, + "learning_rate": 1.9325976567975658e-05, + "loss": 0.1683, + "step": 1343 + }, + { + "epoch": 0.43, + "learning_rate": 1.932472229404356e-05, + "loss": 0.1598, + "step": 1344 + }, + { + "epoch": 0.43, + "learning_rate": 1.9323466894950796e-05, + "loss": 0.1677, + "step": 1345 + }, + { + "epoch": 0.43, + "learning_rate": 1.9322210370848856e-05, + "loss": 0.1643, + "step": 1346 + }, + { + "epoch": 0.43, + "learning_rate": 1.932095272188935e-05, + "loss": 0.158, + "step": 1347 + }, + { + "epoch": 0.43, + "learning_rate": 1.9319693948224035e-05, + "loss": 0.1562, + "step": 1348 + }, + { + "epoch": 0.43, + "learning_rate": 1.9318434050004798e-05, + "loss": 0.1731, + "step": 1349 + }, + { + "epoch": 0.43, + "learning_rate": 1.9317173027383662e-05, + "loss": 0.1605, + "step": 1350 + }, + { + "epoch": 0.43, + "learning_rate": 1.9315910880512792e-05, + "loss": 0.1753, + "step": 1351 + }, + { + "epoch": 0.43, + "learning_rate": 1.931464760954448e-05, + "loss": 0.1545, + "step": 1352 + }, + { + "epoch": 0.44, + "learning_rate": 1.931338321463116e-05, + "loss": 0.1629, + "step": 1353 + }, + { + "epoch": 0.44, + "learning_rate": 1.9312117695925398e-05, + "loss": 0.1437, + "step": 1354 + }, + { + "epoch": 0.44, + "learning_rate": 1.9310851053579897e-05, + "loss": 0.1782, + "step": 1355 + }, + { + "epoch": 0.44, + "learning_rate": 1.9309583287747496e-05, + "loss": 0.1518, + "step": 1356 + }, + { + "epoch": 0.44, + "learning_rate": 1.930831439858117e-05, + "loss": 0.1679, + "step": 1357 + }, + { + "epoch": 0.44, + "learning_rate": 1.9307044386234027e-05, + "loss": 0.1558, + "step": 1358 + }, + { + "epoch": 0.44, + "learning_rate": 1.930577325085931e-05, + "loss": 0.1868, + "step": 1359 + }, + { + "epoch": 0.44, + "learning_rate": 1.9304500992610404e-05, + "loss": 0.1575, + "step": 1360 + }, + { + "epoch": 0.44, + "learning_rate": 1.9303227611640823e-05, + "loss": 0.1553, + "step": 1361 + }, + { + "epoch": 0.44, + "learning_rate": 1.930195310810422e-05, + "loss": 0.173, + "step": 1362 + }, + { + "epoch": 0.44, + "learning_rate": 1.930067748215438e-05, + "loss": 0.1627, + "step": 1363 + }, + { + "epoch": 0.44, + "learning_rate": 1.9299400733945226e-05, + "loss": 0.1608, + "step": 1364 + }, + { + "epoch": 0.44, + "learning_rate": 1.9298122863630816e-05, + "loss": 0.1667, + "step": 1365 + }, + { + "epoch": 0.44, + "learning_rate": 1.9296843871365343e-05, + "loss": 0.1637, + "step": 1366 + }, + { + "epoch": 0.44, + "learning_rate": 1.9295563757303136e-05, + "loss": 0.164, + "step": 1367 + }, + { + "epoch": 0.44, + "learning_rate": 1.929428252159866e-05, + "loss": 0.159, + "step": 1368 + }, + { + "epoch": 0.44, + "learning_rate": 1.9293000164406513e-05, + "loss": 0.1496, + "step": 1369 + }, + { + "epoch": 0.44, + "learning_rate": 1.929171668588143e-05, + "loss": 0.1799, + "step": 1370 + }, + { + "epoch": 0.44, + "learning_rate": 1.9290432086178282e-05, + "loss": 0.1604, + "step": 1371 + }, + { + "epoch": 0.44, + "learning_rate": 1.928914636545207e-05, + "loss": 0.1541, + "step": 1372 + }, + { + "epoch": 0.44, + "learning_rate": 1.9287859523857938e-05, + "loss": 0.1642, + "step": 1373 + }, + { + "epoch": 0.44, + "learning_rate": 1.9286571561551165e-05, + "loss": 0.1495, + "step": 1374 + }, + { + "epoch": 0.44, + "learning_rate": 1.9285282478687155e-05, + "loss": 0.1773, + "step": 1375 + }, + { + "epoch": 0.44, + "learning_rate": 1.9283992275421462e-05, + "loss": 0.1795, + "step": 1376 + }, + { + "epoch": 0.44, + "learning_rate": 1.9282700951909762e-05, + "loss": 0.1639, + "step": 1377 + }, + { + "epoch": 0.44, + "learning_rate": 1.928140850830787e-05, + "loss": 0.1621, + "step": 1378 + }, + { + "epoch": 0.44, + "learning_rate": 1.928011494477174e-05, + "loss": 0.1716, + "step": 1379 + }, + { + "epoch": 0.44, + "learning_rate": 1.9278820261457465e-05, + "loss": 0.1847, + "step": 1380 + }, + { + "epoch": 0.44, + "learning_rate": 1.9277524458521257e-05, + "loss": 0.1658, + "step": 1381 + }, + { + "epoch": 0.44, + "learning_rate": 1.927622753611948e-05, + "loss": 0.1605, + "step": 1382 + }, + { + "epoch": 0.44, + "learning_rate": 1.9274929494408622e-05, + "loss": 0.1543, + "step": 1383 + }, + { + "epoch": 0.45, + "learning_rate": 1.927363033354531e-05, + "loss": 0.1584, + "step": 1384 + }, + { + "epoch": 0.45, + "learning_rate": 1.927233005368631e-05, + "loss": 0.1554, + "step": 1385 + }, + { + "epoch": 0.45, + "learning_rate": 1.927102865498852e-05, + "loss": 0.141, + "step": 1386 + }, + { + "epoch": 0.45, + "learning_rate": 1.926972613760897e-05, + "loss": 0.1629, + "step": 1387 + }, + { + "epoch": 0.45, + "learning_rate": 1.9268422501704825e-05, + "loss": 0.167, + "step": 1388 + }, + { + "epoch": 0.45, + "learning_rate": 1.926711774743339e-05, + "loss": 0.1933, + "step": 1389 + }, + { + "epoch": 0.45, + "learning_rate": 1.92658118749521e-05, + "loss": 0.162, + "step": 1390 + }, + { + "epoch": 0.45, + "learning_rate": 1.9264504884418528e-05, + "loss": 0.1907, + "step": 1391 + }, + { + "epoch": 0.45, + "learning_rate": 1.9263196775990388e-05, + "loss": 0.1655, + "step": 1392 + }, + { + "epoch": 0.45, + "learning_rate": 1.926188754982551e-05, + "loss": 0.1471, + "step": 1393 + }, + { + "epoch": 0.45, + "learning_rate": 1.9260577206081876e-05, + "loss": 0.1531, + "step": 1394 + }, + { + "epoch": 0.45, + "learning_rate": 1.92592657449176e-05, + "loss": 0.1474, + "step": 1395 + }, + { + "epoch": 0.45, + "learning_rate": 1.9257953166490928e-05, + "loss": 0.1668, + "step": 1396 + }, + { + "epoch": 0.45, + "learning_rate": 1.9256639470960237e-05, + "loss": 0.1627, + "step": 1397 + }, + { + "epoch": 0.45, + "learning_rate": 1.9255324658484048e-05, + "loss": 0.1708, + "step": 1398 + }, + { + "epoch": 0.45, + "learning_rate": 1.925400872922101e-05, + "loss": 0.1665, + "step": 1399 + }, + { + "epoch": 0.45, + "learning_rate": 1.9252691683329907e-05, + "loss": 0.1708, + "step": 1400 + }, + { + "epoch": 0.45, + "learning_rate": 1.925137352096966e-05, + "loss": 0.1683, + "step": 1401 + }, + { + "epoch": 0.45, + "learning_rate": 1.925005424229933e-05, + "loss": 0.1796, + "step": 1402 + }, + { + "epoch": 0.45, + "learning_rate": 1.9248733847478095e-05, + "loss": 0.1833, + "step": 1403 + }, + { + "epoch": 0.45, + "learning_rate": 1.924741233666529e-05, + "loss": 0.1548, + "step": 1404 + }, + { + "epoch": 0.45, + "learning_rate": 1.924608971002037e-05, + "loss": 0.1668, + "step": 1405 + }, + { + "epoch": 0.45, + "learning_rate": 1.9244765967702928e-05, + "loss": 0.1715, + "step": 1406 + }, + { + "epoch": 0.45, + "learning_rate": 1.9243441109872694e-05, + "loss": 0.1681, + "step": 1407 + }, + { + "epoch": 0.45, + "learning_rate": 1.924211513668953e-05, + "loss": 0.1722, + "step": 1408 + }, + { + "epoch": 0.45, + "learning_rate": 1.9240788048313436e-05, + "loss": 0.1681, + "step": 1409 + }, + { + "epoch": 0.45, + "learning_rate": 1.923945984490454e-05, + "loss": 0.1577, + "step": 1410 + }, + { + "epoch": 0.45, + "learning_rate": 1.923813052662311e-05, + "loss": 0.1863, + "step": 1411 + }, + { + "epoch": 0.45, + "learning_rate": 1.923680009362955e-05, + "loss": 0.1821, + "step": 1412 + }, + { + "epoch": 0.45, + "learning_rate": 1.9235468546084392e-05, + "loss": 0.1754, + "step": 1413 + }, + { + "epoch": 0.45, + "learning_rate": 1.923413588414831e-05, + "loss": 0.1602, + "step": 1414 + }, + { + "epoch": 0.46, + "learning_rate": 1.9232802107982103e-05, + "loss": 0.1449, + "step": 1415 + }, + { + "epoch": 0.46, + "learning_rate": 1.923146721774672e-05, + "loss": 0.1615, + "step": 1416 + }, + { + "epoch": 0.46, + "learning_rate": 1.923013121360322e-05, + "loss": 0.1761, + "step": 1417 + }, + { + "epoch": 0.46, + "learning_rate": 1.9228794095712824e-05, + "loss": 0.154, + "step": 1418 + }, + { + "epoch": 0.46, + "learning_rate": 1.922745586423687e-05, + "loss": 0.1717, + "step": 1419 + }, + { + "epoch": 0.46, + "learning_rate": 1.922611651933683e-05, + "loss": 0.1524, + "step": 1420 + }, + { + "epoch": 0.46, + "learning_rate": 1.9224776061174322e-05, + "loss": 0.1647, + "step": 1421 + }, + { + "epoch": 0.46, + "learning_rate": 1.922343448991109e-05, + "loss": 0.1665, + "step": 1422 + }, + { + "epoch": 0.46, + "learning_rate": 1.922209180570901e-05, + "loss": 0.1569, + "step": 1423 + }, + { + "epoch": 0.46, + "learning_rate": 1.9220748008730096e-05, + "loss": 0.167, + "step": 1424 + }, + { + "epoch": 0.46, + "learning_rate": 1.92194030991365e-05, + "loss": 0.1636, + "step": 1425 + }, + { + "epoch": 0.46, + "learning_rate": 1.921805707709051e-05, + "loss": 0.1754, + "step": 1426 + }, + { + "epoch": 0.46, + "learning_rate": 1.921670994275453e-05, + "loss": 0.1711, + "step": 1427 + }, + { + "epoch": 0.46, + "learning_rate": 1.9215361696291114e-05, + "loss": 0.1707, + "step": 1428 + }, + { + "epoch": 0.46, + "learning_rate": 1.9214012337862952e-05, + "loss": 0.1694, + "step": 1429 + }, + { + "epoch": 0.46, + "learning_rate": 1.9212661867632866e-05, + "loss": 0.1736, + "step": 1430 + }, + { + "epoch": 0.46, + "learning_rate": 1.92113102857638e-05, + "loss": 0.1652, + "step": 1431 + }, + { + "epoch": 0.46, + "learning_rate": 1.9209957592418848e-05, + "loss": 0.1649, + "step": 1432 + }, + { + "epoch": 0.46, + "learning_rate": 1.920860378776123e-05, + "loss": 0.1734, + "step": 1433 + }, + { + "epoch": 0.46, + "learning_rate": 1.92072488719543e-05, + "loss": 0.1527, + "step": 1434 + }, + { + "epoch": 0.46, + "learning_rate": 1.920589284516155e-05, + "loss": 0.1691, + "step": 1435 + }, + { + "epoch": 0.46, + "learning_rate": 1.9204535707546602e-05, + "loss": 0.1767, + "step": 1436 + }, + { + "epoch": 0.46, + "learning_rate": 1.920317745927322e-05, + "loss": 0.1824, + "step": 1437 + }, + { + "epoch": 0.46, + "learning_rate": 1.920181810050529e-05, + "loss": 0.1778, + "step": 1438 + }, + { + "epoch": 0.46, + "learning_rate": 1.9200457631406842e-05, + "loss": 0.1618, + "step": 1439 + }, + { + "epoch": 0.46, + "learning_rate": 1.919909605214203e-05, + "loss": 0.1641, + "step": 1440 + }, + { + "epoch": 0.46, + "learning_rate": 1.9197733362875153e-05, + "loss": 0.1596, + "step": 1441 + }, + { + "epoch": 0.46, + "learning_rate": 1.919636956377064e-05, + "loss": 0.1631, + "step": 1442 + }, + { + "epoch": 0.46, + "learning_rate": 1.9195004654993047e-05, + "loss": 0.1906, + "step": 1443 + }, + { + "epoch": 0.46, + "learning_rate": 1.9193638636707076e-05, + "loss": 0.1642, + "step": 1444 + }, + { + "epoch": 0.46, + "learning_rate": 1.9192271509077552e-05, + "loss": 0.1621, + "step": 1445 + }, + { + "epoch": 0.47, + "learning_rate": 1.9190903272269438e-05, + "loss": 0.153, + "step": 1446 + }, + { + "epoch": 0.47, + "learning_rate": 1.9189533926447836e-05, + "loss": 0.163, + "step": 1447 + }, + { + "epoch": 0.47, + "learning_rate": 1.9188163471777974e-05, + "loss": 0.178, + "step": 1448 + }, + { + "epoch": 0.47, + "learning_rate": 1.918679190842522e-05, + "loss": 0.1631, + "step": 1449 + }, + { + "epoch": 0.47, + "learning_rate": 1.918541923655507e-05, + "loss": 0.1586, + "step": 1450 + }, + { + "epoch": 0.47, + "learning_rate": 1.918404545633315e-05, + "loss": 0.1706, + "step": 1451 + }, + { + "epoch": 0.47, + "learning_rate": 1.9182670567925237e-05, + "loss": 0.1792, + "step": 1452 + }, + { + "epoch": 0.47, + "learning_rate": 1.9181294571497228e-05, + "loss": 0.1785, + "step": 1453 + }, + { + "epoch": 0.47, + "learning_rate": 1.9179917467215153e-05, + "loss": 0.1626, + "step": 1454 + }, + { + "epoch": 0.47, + "learning_rate": 1.9178539255245182e-05, + "loss": 0.1803, + "step": 1455 + }, + { + "epoch": 0.47, + "learning_rate": 1.9177159935753612e-05, + "loss": 0.1647, + "step": 1456 + }, + { + "epoch": 0.47, + "learning_rate": 1.9175779508906888e-05, + "loss": 0.1619, + "step": 1457 + }, + { + "epoch": 0.47, + "learning_rate": 1.9174397974871563e-05, + "loss": 0.1703, + "step": 1458 + }, + { + "epoch": 0.47, + "learning_rate": 1.917301533381435e-05, + "loss": 0.1537, + "step": 1459 + }, + { + "epoch": 0.47, + "learning_rate": 1.9171631585902084e-05, + "loss": 0.1698, + "step": 1460 + }, + { + "epoch": 0.47, + "learning_rate": 1.917024673130173e-05, + "loss": 0.1413, + "step": 1461 + }, + { + "epoch": 0.47, + "learning_rate": 1.916886077018039e-05, + "loss": 0.1598, + "step": 1462 + }, + { + "epoch": 0.47, + "learning_rate": 1.91674737027053e-05, + "loss": 0.1609, + "step": 1463 + }, + { + "epoch": 0.47, + "learning_rate": 1.9166085529043834e-05, + "loss": 0.1481, + "step": 1464 + }, + { + "epoch": 0.47, + "learning_rate": 1.916469624936349e-05, + "loss": 0.148, + "step": 1465 + }, + { + "epoch": 0.47, + "learning_rate": 1.9163305863831908e-05, + "loss": 0.1506, + "step": 1466 + }, + { + "epoch": 0.47, + "learning_rate": 1.9161914372616853e-05, + "loss": 0.154, + "step": 1467 + }, + { + "epoch": 0.47, + "learning_rate": 1.9160521775886237e-05, + "loss": 0.1863, + "step": 1468 + }, + { + "epoch": 0.47, + "learning_rate": 1.9159128073808087e-05, + "loss": 0.1594, + "step": 1469 + }, + { + "epoch": 0.47, + "learning_rate": 1.9157733266550577e-05, + "loss": 0.1587, + "step": 1470 + }, + { + "epoch": 0.47, + "learning_rate": 1.915633735428201e-05, + "loss": 0.1602, + "step": 1471 + }, + { + "epoch": 0.47, + "learning_rate": 1.9154940337170824e-05, + "loss": 0.1483, + "step": 1472 + }, + { + "epoch": 0.47, + "learning_rate": 1.9153542215385588e-05, + "loss": 0.1831, + "step": 1473 + }, + { + "epoch": 0.47, + "learning_rate": 1.9152142989095007e-05, + "loss": 0.145, + "step": 1474 + }, + { + "epoch": 0.47, + "learning_rate": 1.9150742658467914e-05, + "loss": 0.1595, + "step": 1475 + }, + { + "epoch": 0.47, + "learning_rate": 1.9149341223673282e-05, + "loss": 0.1779, + "step": 1476 + }, + { + "epoch": 0.48, + "learning_rate": 1.9147938684880213e-05, + "loss": 0.1585, + "step": 1477 + }, + { + "epoch": 0.48, + "learning_rate": 1.914653504225794e-05, + "loss": 0.1601, + "step": 1478 + }, + { + "epoch": 0.48, + "learning_rate": 1.914513029597584e-05, + "loss": 0.1476, + "step": 1479 + }, + { + "epoch": 0.48, + "learning_rate": 1.9143724446203407e-05, + "loss": 0.1739, + "step": 1480 + }, + { + "epoch": 0.48, + "learning_rate": 1.9142317493110287e-05, + "loss": 0.1559, + "step": 1481 + }, + { + "epoch": 0.48, + "learning_rate": 1.914090943686624e-05, + "loss": 0.1531, + "step": 1482 + }, + { + "epoch": 0.48, + "learning_rate": 1.9139500277641173e-05, + "loss": 0.1598, + "step": 1483 + }, + { + "epoch": 0.48, + "learning_rate": 1.9138090015605117e-05, + "loss": 0.1332, + "step": 1484 + }, + { + "epoch": 0.48, + "learning_rate": 1.9136678650928244e-05, + "loss": 0.155, + "step": 1485 + }, + { + "epoch": 0.48, + "learning_rate": 1.9135266183780854e-05, + "loss": 0.1519, + "step": 1486 + }, + { + "epoch": 0.48, + "learning_rate": 1.913385261433338e-05, + "loss": 0.1829, + "step": 1487 + }, + { + "epoch": 0.48, + "learning_rate": 1.9132437942756393e-05, + "loss": 0.1694, + "step": 1488 + }, + { + "epoch": 0.48, + "learning_rate": 1.9131022169220588e-05, + "loss": 0.1558, + "step": 1489 + }, + { + "epoch": 0.48, + "learning_rate": 1.91296052938968e-05, + "loss": 0.1845, + "step": 1490 + }, + { + "epoch": 0.48, + "learning_rate": 1.9128187316956e-05, + "loss": 0.1679, + "step": 1491 + }, + { + "epoch": 0.48, + "learning_rate": 1.9126768238569283e-05, + "loss": 0.1539, + "step": 1492 + }, + { + "epoch": 0.48, + "learning_rate": 1.912534805890788e-05, + "loss": 0.1645, + "step": 1493 + }, + { + "epoch": 0.48, + "learning_rate": 1.912392677814316e-05, + "loss": 0.1501, + "step": 1494 + }, + { + "epoch": 0.48, + "learning_rate": 1.9122504396446615e-05, + "loss": 0.1588, + "step": 1495 + }, + { + "epoch": 0.48, + "learning_rate": 1.912108091398988e-05, + "loss": 0.1571, + "step": 1496 + }, + { + "epoch": 0.48, + "learning_rate": 1.9119656330944716e-05, + "loss": 0.1671, + "step": 1497 + }, + { + "epoch": 0.48, + "learning_rate": 1.911823064748302e-05, + "loss": 0.1484, + "step": 1498 + }, + { + "epoch": 0.48, + "learning_rate": 1.9116803863776825e-05, + "loss": 0.1526, + "step": 1499 + }, + { + "epoch": 0.48, + "learning_rate": 1.9115375979998284e-05, + "loss": 0.1624, + "step": 1500 + }, + { + "epoch": 0.48, + "learning_rate": 1.91139469963197e-05, + "loss": 0.1552, + "step": 1501 + }, + { + "epoch": 0.48, + "learning_rate": 1.9112516912913497e-05, + "loss": 0.172, + "step": 1502 + }, + { + "epoch": 0.48, + "learning_rate": 1.9111085729952235e-05, + "loss": 0.1556, + "step": 1503 + }, + { + "epoch": 0.48, + "learning_rate": 1.9109653447608607e-05, + "loss": 0.1577, + "step": 1504 + }, + { + "epoch": 0.48, + "learning_rate": 1.910822006605544e-05, + "loss": 0.1646, + "step": 1505 + }, + { + "epoch": 0.48, + "learning_rate": 1.9106785585465687e-05, + "loss": 0.1593, + "step": 1506 + }, + { + "epoch": 0.48, + "learning_rate": 1.9105350006012438e-05, + "loss": 0.1968, + "step": 1507 + }, + { + "epoch": 0.49, + "learning_rate": 1.9103913327868924e-05, + "loss": 0.1629, + "step": 1508 + }, + { + "epoch": 0.49, + "learning_rate": 1.9102475551208493e-05, + "loss": 0.164, + "step": 1509 + }, + { + "epoch": 0.49, + "learning_rate": 1.910103667620464e-05, + "loss": 0.146, + "step": 1510 + }, + { + "epoch": 0.49, + "learning_rate": 1.909959670303098e-05, + "loss": 0.182, + "step": 1511 + }, + { + "epoch": 0.49, + "learning_rate": 1.9098155631861272e-05, + "loss": 0.1521, + "step": 1512 + }, + { + "epoch": 0.49, + "learning_rate": 1.9096713462869392e-05, + "loss": 0.1731, + "step": 1513 + }, + { + "epoch": 0.49, + "learning_rate": 1.909527019622937e-05, + "loss": 0.1636, + "step": 1514 + }, + { + "epoch": 0.49, + "learning_rate": 1.909382583211535e-05, + "loss": 0.1591, + "step": 1515 + }, + { + "epoch": 0.49, + "learning_rate": 1.9092380370701617e-05, + "loss": 0.1712, + "step": 1516 + }, + { + "epoch": 0.49, + "learning_rate": 1.9090933812162584e-05, + "loss": 0.1556, + "step": 1517 + }, + { + "epoch": 0.49, + "learning_rate": 1.90894861566728e-05, + "loss": 0.1857, + "step": 1518 + }, + { + "epoch": 0.49, + "learning_rate": 1.9088037404406948e-05, + "loss": 0.1444, + "step": 1519 + }, + { + "epoch": 0.49, + "learning_rate": 1.908658755553984e-05, + "loss": 0.1879, + "step": 1520 + }, + { + "epoch": 0.49, + "learning_rate": 1.9085136610246415e-05, + "loss": 0.1872, + "step": 1521 + }, + { + "epoch": 0.49, + "learning_rate": 1.9083684568701757e-05, + "loss": 0.1532, + "step": 1522 + }, + { + "epoch": 0.49, + "learning_rate": 1.9082231431081074e-05, + "loss": 0.1625, + "step": 1523 + }, + { + "epoch": 0.49, + "learning_rate": 1.9080777197559707e-05, + "loss": 0.1652, + "step": 1524 + }, + { + "epoch": 0.49, + "learning_rate": 1.907932186831313e-05, + "loss": 0.1807, + "step": 1525 + }, + { + "epoch": 0.49, + "learning_rate": 1.9077865443516948e-05, + "loss": 0.1487, + "step": 1526 + }, + { + "epoch": 0.49, + "learning_rate": 1.90764079233469e-05, + "loss": 0.1639, + "step": 1527 + }, + { + "epoch": 0.49, + "learning_rate": 1.9074949307978862e-05, + "loss": 0.1834, + "step": 1528 + }, + { + "epoch": 0.49, + "learning_rate": 1.907348959758883e-05, + "loss": 0.1585, + "step": 1529 + }, + { + "epoch": 0.49, + "learning_rate": 1.9072028792352938e-05, + "loss": 0.1669, + "step": 1530 + }, + { + "epoch": 0.49, + "learning_rate": 1.9070566892447456e-05, + "loss": 0.1452, + "step": 1531 + }, + { + "epoch": 0.49, + "learning_rate": 1.9069103898048788e-05, + "loss": 0.1845, + "step": 1532 + }, + { + "epoch": 0.49, + "learning_rate": 1.9067639809333456e-05, + "loss": 0.1678, + "step": 1533 + }, + { + "epoch": 0.49, + "learning_rate": 1.906617462647813e-05, + "loss": 0.16, + "step": 1534 + }, + { + "epoch": 0.49, + "learning_rate": 1.9064708349659598e-05, + "loss": 0.1712, + "step": 1535 + }, + { + "epoch": 0.49, + "learning_rate": 1.9063240979054796e-05, + "loss": 0.1591, + "step": 1536 + }, + { + "epoch": 0.49, + "learning_rate": 1.9061772514840775e-05, + "loss": 0.1368, + "step": 1537 + }, + { + "epoch": 0.49, + "learning_rate": 1.9060302957194732e-05, + "loss": 0.1529, + "step": 1538 + }, + { + "epoch": 0.5, + "learning_rate": 1.9058832306293987e-05, + "loss": 0.1638, + "step": 1539 + }, + { + "epoch": 0.5, + "learning_rate": 1.9057360562315997e-05, + "loss": 0.1467, + "step": 1540 + }, + { + "epoch": 0.5, + "learning_rate": 1.9055887725438348e-05, + "loss": 0.1536, + "step": 1541 + }, + { + "epoch": 0.5, + "learning_rate": 1.905441379583876e-05, + "loss": 0.1611, + "step": 1542 + }, + { + "epoch": 0.5, + "learning_rate": 1.9052938773695082e-05, + "loss": 0.1864, + "step": 1543 + }, + { + "epoch": 0.5, + "learning_rate": 1.9051462659185293e-05, + "loss": 0.1597, + "step": 1544 + }, + { + "epoch": 0.5, + "learning_rate": 1.9049985452487516e-05, + "loss": 0.1735, + "step": 1545 + }, + { + "epoch": 0.5, + "learning_rate": 1.904850715377999e-05, + "loss": 0.1411, + "step": 1546 + }, + { + "epoch": 0.5, + "learning_rate": 1.9047027763241093e-05, + "loss": 0.1574, + "step": 1547 + }, + { + "epoch": 0.5, + "learning_rate": 1.9045547281049338e-05, + "loss": 0.1565, + "step": 1548 + }, + { + "epoch": 0.5, + "learning_rate": 1.9044065707383364e-05, + "loss": 0.1678, + "step": 1549 + }, + { + "epoch": 0.5, + "learning_rate": 1.9042583042421947e-05, + "loss": 0.1577, + "step": 1550 + }, + { + "epoch": 0.5, + "learning_rate": 1.9041099286343988e-05, + "loss": 0.1465, + "step": 1551 + }, + { + "epoch": 0.5, + "learning_rate": 1.9039614439328524e-05, + "loss": 0.1831, + "step": 1552 + }, + { + "epoch": 0.5, + "learning_rate": 1.9038128501554723e-05, + "loss": 0.1454, + "step": 1553 + }, + { + "epoch": 0.5, + "learning_rate": 1.9036641473201887e-05, + "loss": 0.157, + "step": 1554 + }, + { + "epoch": 0.5, + "learning_rate": 1.9035153354449446e-05, + "loss": 0.1881, + "step": 1555 + }, + { + "epoch": 0.5, + "learning_rate": 1.903366414547696e-05, + "loss": 0.163, + "step": 1556 + }, + { + "epoch": 0.5, + "learning_rate": 1.9032173846464124e-05, + "loss": 0.1606, + "step": 1557 + }, + { + "epoch": 0.5, + "learning_rate": 1.903068245759077e-05, + "loss": 0.1728, + "step": 1558 + }, + { + "epoch": 0.5, + "learning_rate": 1.9029189979036846e-05, + "loss": 0.1659, + "step": 1559 + }, + { + "epoch": 0.5, + "learning_rate": 1.902769641098245e-05, + "loss": 0.1577, + "step": 1560 + }, + { + "epoch": 0.5, + "learning_rate": 1.9026201753607792e-05, + "loss": 0.1548, + "step": 1561 + }, + { + "epoch": 0.5, + "learning_rate": 1.9024706007093234e-05, + "loss": 0.1614, + "step": 1562 + }, + { + "epoch": 0.5, + "learning_rate": 1.9023209171619252e-05, + "loss": 0.1655, + "step": 1563 + }, + { + "epoch": 0.5, + "learning_rate": 1.9021711247366463e-05, + "loss": 0.158, + "step": 1564 + }, + { + "epoch": 0.5, + "learning_rate": 1.9020212234515614e-05, + "loss": 0.1629, + "step": 1565 + }, + { + "epoch": 0.5, + "learning_rate": 1.9018712133247577e-05, + "loss": 0.1509, + "step": 1566 + }, + { + "epoch": 0.5, + "learning_rate": 1.9017210943743373e-05, + "loss": 0.1579, + "step": 1567 + }, + { + "epoch": 0.5, + "learning_rate": 1.901570866618413e-05, + "loss": 0.1607, + "step": 1568 + }, + { + "epoch": 0.5, + "learning_rate": 1.9014205300751122e-05, + "loss": 0.1707, + "step": 1569 + }, + { + "epoch": 0.5, + "learning_rate": 1.901270084762575e-05, + "loss": 0.16, + "step": 1570 + }, + { + "epoch": 0.51, + "learning_rate": 1.9011195306989553e-05, + "loss": 0.155, + "step": 1571 + }, + { + "epoch": 0.51, + "learning_rate": 1.900968867902419e-05, + "loss": 0.1538, + "step": 1572 + }, + { + "epoch": 0.51, + "learning_rate": 1.9008180963911463e-05, + "loss": 0.192, + "step": 1573 + }, + { + "epoch": 0.51, + "learning_rate": 1.90066721618333e-05, + "loss": 0.1811, + "step": 1574 + }, + { + "epoch": 0.51, + "learning_rate": 1.900516227297175e-05, + "loss": 0.1566, + "step": 1575 + }, + { + "epoch": 0.51, + "learning_rate": 1.900365129750901e-05, + "loss": 0.1526, + "step": 1576 + }, + { + "epoch": 0.51, + "learning_rate": 1.90021392356274e-05, + "loss": 0.155, + "step": 1577 + }, + { + "epoch": 0.51, + "learning_rate": 1.9000626087509376e-05, + "loss": 0.1778, + "step": 1578 + }, + { + "epoch": 0.51, + "learning_rate": 1.899911185333751e-05, + "loss": 0.1663, + "step": 1579 + }, + { + "epoch": 0.51, + "learning_rate": 1.8997596533294524e-05, + "loss": 0.1643, + "step": 1580 + }, + { + "epoch": 0.51, + "learning_rate": 1.8996080127563258e-05, + "loss": 0.1532, + "step": 1581 + }, + { + "epoch": 0.51, + "learning_rate": 1.8994562636326694e-05, + "loss": 0.1509, + "step": 1582 + }, + { + "epoch": 0.51, + "learning_rate": 1.8993044059767935e-05, + "loss": 0.1544, + "step": 1583 + }, + { + "epoch": 0.51, + "learning_rate": 1.899152439807022e-05, + "loss": 0.151, + "step": 1584 + }, + { + "epoch": 0.51, + "learning_rate": 1.8990003651416916e-05, + "loss": 0.1292, + "step": 1585 + }, + { + "epoch": 0.51, + "learning_rate": 1.8988481819991526e-05, + "loss": 0.164, + "step": 1586 + }, + { + "epoch": 0.51, + "learning_rate": 1.898695890397768e-05, + "loss": 0.1776, + "step": 1587 + }, + { + "epoch": 0.51, + "learning_rate": 1.8985434903559138e-05, + "loss": 0.1728, + "step": 1588 + }, + { + "epoch": 0.51, + "learning_rate": 1.898390981891979e-05, + "loss": 0.1465, + "step": 1589 + }, + { + "epoch": 0.51, + "learning_rate": 1.8982383650243666e-05, + "loss": 0.1549, + "step": 1590 + }, + { + "epoch": 0.51, + "learning_rate": 1.8980856397714914e-05, + "loss": 0.1579, + "step": 1591 + }, + { + "epoch": 0.51, + "learning_rate": 1.8979328061517817e-05, + "loss": 0.1548, + "step": 1592 + }, + { + "epoch": 0.51, + "learning_rate": 1.89777986418368e-05, + "loss": 0.1783, + "step": 1593 + }, + { + "epoch": 0.51, + "learning_rate": 1.8976268138856404e-05, + "loss": 0.1667, + "step": 1594 + }, + { + "epoch": 0.51, + "learning_rate": 1.8974736552761306e-05, + "loss": 0.1709, + "step": 1595 + }, + { + "epoch": 0.51, + "learning_rate": 1.897320388373631e-05, + "loss": 0.1555, + "step": 1596 + }, + { + "epoch": 0.51, + "learning_rate": 1.897167013196636e-05, + "loss": 0.1605, + "step": 1597 + }, + { + "epoch": 0.51, + "learning_rate": 1.897013529763652e-05, + "loss": 0.1732, + "step": 1598 + }, + { + "epoch": 0.51, + "learning_rate": 1.8968599380931994e-05, + "loss": 0.1546, + "step": 1599 + }, + { + "epoch": 0.51, + "learning_rate": 1.8967062382038113e-05, + "loss": 0.1695, + "step": 1600 + }, + { + "epoch": 0.51, + "learning_rate": 1.8965524301140334e-05, + "loss": 0.1337, + "step": 1601 + }, + { + "epoch": 0.52, + "learning_rate": 1.896398513842425e-05, + "loss": 0.1615, + "step": 1602 + }, + { + "epoch": 0.52, + "learning_rate": 1.8962444894075582e-05, + "loss": 0.1511, + "step": 1603 + }, + { + "epoch": 0.52, + "learning_rate": 1.8960903568280186e-05, + "loss": 0.1543, + "step": 1604 + }, + { + "epoch": 0.52, + "learning_rate": 1.8959361161224038e-05, + "loss": 0.1738, + "step": 1605 + }, + { + "epoch": 0.52, + "learning_rate": 1.8957817673093258e-05, + "loss": 0.16, + "step": 1606 + }, + { + "epoch": 0.52, + "learning_rate": 1.8956273104074084e-05, + "loss": 0.1689, + "step": 1607 + }, + { + "epoch": 0.52, + "learning_rate": 1.89547274543529e-05, + "loss": 0.1648, + "step": 1608 + }, + { + "epoch": 0.52, + "learning_rate": 1.89531807241162e-05, + "loss": 0.1697, + "step": 1609 + }, + { + "epoch": 0.52, + "learning_rate": 1.8951632913550625e-05, + "loss": 0.1497, + "step": 1610 + }, + { + "epoch": 0.52, + "learning_rate": 1.895008402284294e-05, + "loss": 0.1549, + "step": 1611 + }, + { + "epoch": 0.52, + "learning_rate": 1.8948534052180038e-05, + "loss": 0.1597, + "step": 1612 + }, + { + "epoch": 0.52, + "learning_rate": 1.8946983001748944e-05, + "loss": 0.1438, + "step": 1613 + }, + { + "epoch": 0.52, + "learning_rate": 1.8945430871736818e-05, + "loss": 0.1386, + "step": 1614 + }, + { + "epoch": 0.52, + "learning_rate": 1.894387766233095e-05, + "loss": 0.1688, + "step": 1615 + }, + { + "epoch": 0.52, + "learning_rate": 1.894232337371875e-05, + "loss": 0.1669, + "step": 1616 + }, + { + "epoch": 0.52, + "learning_rate": 1.8940768006087764e-05, + "loss": 0.145, + "step": 1617 + }, + { + "epoch": 0.52, + "learning_rate": 1.8939211559625676e-05, + "loss": 0.1649, + "step": 1618 + }, + { + "epoch": 0.52, + "learning_rate": 1.8937654034520293e-05, + "loss": 0.1574, + "step": 1619 + }, + { + "epoch": 0.52, + "learning_rate": 1.8936095430959545e-05, + "loss": 0.204, + "step": 1620 + }, + { + "epoch": 0.52, + "learning_rate": 1.8934535749131506e-05, + "loss": 0.1631, + "step": 1621 + }, + { + "epoch": 0.52, + "learning_rate": 1.8932974989224374e-05, + "loss": 0.1699, + "step": 1622 + }, + { + "epoch": 0.52, + "learning_rate": 1.893141315142647e-05, + "loss": 0.1623, + "step": 1623 + }, + { + "epoch": 0.52, + "learning_rate": 1.8929850235926265e-05, + "loss": 0.147, + "step": 1624 + }, + { + "epoch": 0.52, + "learning_rate": 1.8928286242912337e-05, + "loss": 0.1674, + "step": 1625 + }, + { + "epoch": 0.52, + "learning_rate": 1.8926721172573405e-05, + "loss": 0.1691, + "step": 1626 + }, + { + "epoch": 0.52, + "learning_rate": 1.8925155025098318e-05, + "loss": 0.1548, + "step": 1627 + }, + { + "epoch": 0.52, + "learning_rate": 1.8923587800676054e-05, + "loss": 0.1654, + "step": 1628 + }, + { + "epoch": 0.52, + "learning_rate": 1.8922019499495727e-05, + "loss": 0.1536, + "step": 1629 + }, + { + "epoch": 0.52, + "learning_rate": 1.8920450121746562e-05, + "loss": 0.1968, + "step": 1630 + }, + { + "epoch": 0.52, + "learning_rate": 1.891887966761794e-05, + "loss": 0.1764, + "step": 1631 + }, + { + "epoch": 0.52, + "learning_rate": 1.891730813729935e-05, + "loss": 0.1512, + "step": 1632 + }, + { + "epoch": 0.53, + "learning_rate": 1.891573553098042e-05, + "loss": 0.1614, + "step": 1633 + }, + { + "epoch": 0.53, + "learning_rate": 1.8914161848850913e-05, + "loss": 0.1432, + "step": 1634 + }, + { + "epoch": 0.53, + "learning_rate": 1.8912587091100715e-05, + "loss": 0.1627, + "step": 1635 + }, + { + "epoch": 0.53, + "learning_rate": 1.8911011257919834e-05, + "loss": 0.1527, + "step": 1636 + }, + { + "epoch": 0.53, + "learning_rate": 1.890943434949843e-05, + "loss": 0.1531, + "step": 1637 + }, + { + "epoch": 0.53, + "learning_rate": 1.890785636602677e-05, + "loss": 0.1568, + "step": 1638 + }, + { + "epoch": 0.53, + "learning_rate": 1.890627730769526e-05, + "loss": 0.1572, + "step": 1639 + }, + { + "epoch": 0.53, + "learning_rate": 1.8904697174694447e-05, + "loss": 0.1574, + "step": 1640 + }, + { + "epoch": 0.53, + "learning_rate": 1.8903115967214986e-05, + "loss": 0.1645, + "step": 1641 + }, + { + "epoch": 0.53, + "learning_rate": 1.8901533685447672e-05, + "loss": 0.1647, + "step": 1642 + }, + { + "epoch": 0.53, + "learning_rate": 1.8899950329583435e-05, + "loss": 0.1541, + "step": 1643 + }, + { + "epoch": 0.53, + "learning_rate": 1.8898365899813328e-05, + "loss": 0.1634, + "step": 1644 + }, + { + "epoch": 0.53, + "learning_rate": 1.8896780396328532e-05, + "loss": 0.1462, + "step": 1645 + }, + { + "epoch": 0.53, + "learning_rate": 1.8895193819320366e-05, + "loss": 0.1606, + "step": 1646 + }, + { + "epoch": 0.53, + "learning_rate": 1.889360616898027e-05, + "loss": 0.165, + "step": 1647 + }, + { + "epoch": 0.53, + "learning_rate": 1.8892017445499812e-05, + "loss": 0.1663, + "step": 1648 + }, + { + "epoch": 0.53, + "learning_rate": 1.88904276490707e-05, + "loss": 0.1676, + "step": 1649 + }, + { + "epoch": 0.53, + "learning_rate": 1.888883677988477e-05, + "loss": 0.1621, + "step": 1650 + }, + { + "epoch": 0.53, + "learning_rate": 1.8887244838133973e-05, + "loss": 0.1628, + "step": 1651 + }, + { + "epoch": 0.53, + "learning_rate": 1.8885651824010406e-05, + "loss": 0.1723, + "step": 1652 + }, + { + "epoch": 0.53, + "learning_rate": 1.8884057737706286e-05, + "loss": 0.171, + "step": 1653 + }, + { + "epoch": 0.53, + "learning_rate": 1.8882462579413962e-05, + "loss": 0.1546, + "step": 1654 + }, + { + "epoch": 0.53, + "learning_rate": 1.8880866349325916e-05, + "loss": 0.1598, + "step": 1655 + }, + { + "epoch": 0.53, + "learning_rate": 1.887926904763475e-05, + "loss": 0.153, + "step": 1656 + }, + { + "epoch": 0.53, + "learning_rate": 1.8877670674533205e-05, + "loss": 0.1799, + "step": 1657 + }, + { + "epoch": 0.53, + "learning_rate": 1.887607123021415e-05, + "loss": 0.1601, + "step": 1658 + }, + { + "epoch": 0.53, + "learning_rate": 1.8874470714870578e-05, + "loss": 0.1458, + "step": 1659 + }, + { + "epoch": 0.53, + "learning_rate": 1.887286912869561e-05, + "loss": 0.1608, + "step": 1660 + }, + { + "epoch": 0.53, + "learning_rate": 1.887126647188251e-05, + "loss": 0.1626, + "step": 1661 + }, + { + "epoch": 0.53, + "learning_rate": 1.886966274462465e-05, + "loss": 0.1644, + "step": 1662 + }, + { + "epoch": 0.53, + "learning_rate": 1.8868057947115554e-05, + "loss": 0.1391, + "step": 1663 + }, + { + "epoch": 0.54, + "learning_rate": 1.886645207954885e-05, + "loss": 0.1443, + "step": 1664 + }, + { + "epoch": 0.54, + "learning_rate": 1.886484514211832e-05, + "loss": 0.1663, + "step": 1665 + }, + { + "epoch": 0.54, + "learning_rate": 1.8863237135017865e-05, + "loss": 0.1655, + "step": 1666 + }, + { + "epoch": 0.54, + "learning_rate": 1.8861628058441505e-05, + "loss": 0.154, + "step": 1667 + }, + { + "epoch": 0.54, + "learning_rate": 1.8860017912583406e-05, + "loss": 0.1471, + "step": 1668 + }, + { + "epoch": 0.54, + "learning_rate": 1.885840669763785e-05, + "loss": 0.1449, + "step": 1669 + }, + { + "epoch": 0.54, + "learning_rate": 1.8856794413799253e-05, + "loss": 0.142, + "step": 1670 + }, + { + "epoch": 0.54, + "learning_rate": 1.8855181061262163e-05, + "loss": 0.1397, + "step": 1671 + }, + { + "epoch": 0.54, + "learning_rate": 1.8853566640221253e-05, + "loss": 0.1566, + "step": 1672 + }, + { + "epoch": 0.54, + "learning_rate": 1.8851951150871324e-05, + "loss": 0.1609, + "step": 1673 + }, + { + "epoch": 0.54, + "learning_rate": 1.885033459340731e-05, + "loss": 0.1531, + "step": 1674 + }, + { + "epoch": 0.54, + "learning_rate": 1.8848716968024274e-05, + "loss": 0.1688, + "step": 1675 + }, + { + "epoch": 0.54, + "learning_rate": 1.8847098274917397e-05, + "loss": 0.1773, + "step": 1676 + }, + { + "epoch": 0.54, + "learning_rate": 1.8845478514282008e-05, + "loss": 0.15, + "step": 1677 + }, + { + "epoch": 0.54, + "learning_rate": 1.8843857686313548e-05, + "loss": 0.159, + "step": 1678 + }, + { + "epoch": 0.54, + "learning_rate": 1.8842235791207594e-05, + "loss": 0.1556, + "step": 1679 + }, + { + "epoch": 0.54, + "learning_rate": 1.884061282915985e-05, + "loss": 0.1762, + "step": 1680 + }, + { + "epoch": 0.54, + "learning_rate": 1.8838988800366152e-05, + "loss": 0.1556, + "step": 1681 + }, + { + "epoch": 0.54, + "learning_rate": 1.883736370502246e-05, + "loss": 0.1666, + "step": 1682 + }, + { + "epoch": 0.54, + "learning_rate": 1.8835737543324867e-05, + "loss": 0.1462, + "step": 1683 + }, + { + "epoch": 0.54, + "learning_rate": 1.883411031546959e-05, + "loss": 0.1443, + "step": 1684 + }, + { + "epoch": 0.54, + "learning_rate": 1.8832482021652975e-05, + "loss": 0.1697, + "step": 1685 + }, + { + "epoch": 0.54, + "learning_rate": 1.8830852662071507e-05, + "loss": 0.1557, + "step": 1686 + }, + { + "epoch": 0.54, + "learning_rate": 1.8829222236921786e-05, + "loss": 0.1507, + "step": 1687 + }, + { + "epoch": 0.54, + "learning_rate": 1.882759074640055e-05, + "loss": 0.1576, + "step": 1688 + }, + { + "epoch": 0.54, + "learning_rate": 1.882595819070465e-05, + "loss": 0.1506, + "step": 1689 + }, + { + "epoch": 0.54, + "learning_rate": 1.8824324570031094e-05, + "loss": 0.164, + "step": 1690 + }, + { + "epoch": 0.54, + "learning_rate": 1.8822689884576987e-05, + "loss": 0.1452, + "step": 1691 + }, + { + "epoch": 0.54, + "learning_rate": 1.882105413453959e-05, + "loss": 0.1609, + "step": 1692 + }, + { + "epoch": 0.54, + "learning_rate": 1.8819417320116266e-05, + "loss": 0.1479, + "step": 1693 + }, + { + "epoch": 0.54, + "learning_rate": 1.881777944150453e-05, + "loss": 0.1672, + "step": 1694 + }, + { + "epoch": 0.55, + "learning_rate": 1.8816140498902013e-05, + "loss": 0.1435, + "step": 1695 + }, + { + "epoch": 0.55, + "learning_rate": 1.8814500492506475e-05, + "loss": 0.1613, + "step": 1696 + }, + { + "epoch": 0.55, + "learning_rate": 1.8812859422515804e-05, + "loss": 0.1695, + "step": 1697 + }, + { + "epoch": 0.55, + "learning_rate": 1.881121728912803e-05, + "loss": 0.1537, + "step": 1698 + }, + { + "epoch": 0.55, + "learning_rate": 1.8809574092541286e-05, + "loss": 0.1568, + "step": 1699 + }, + { + "epoch": 0.55, + "learning_rate": 1.880792983295385e-05, + "loss": 0.1536, + "step": 1700 + }, + { + "epoch": 0.55, + "learning_rate": 1.880628451056413e-05, + "loss": 0.1518, + "step": 1701 + }, + { + "epoch": 0.55, + "learning_rate": 1.880463812557066e-05, + "loss": 0.1563, + "step": 1702 + }, + { + "epoch": 0.55, + "learning_rate": 1.880299067817209e-05, + "loss": 0.1564, + "step": 1703 + }, + { + "epoch": 0.55, + "learning_rate": 1.8801342168567214e-05, + "loss": 0.1446, + "step": 1704 + }, + { + "epoch": 0.55, + "learning_rate": 1.8799692596954947e-05, + "loss": 0.1631, + "step": 1705 + }, + { + "epoch": 0.55, + "learning_rate": 1.8798041963534337e-05, + "loss": 0.1707, + "step": 1706 + }, + { + "epoch": 0.55, + "learning_rate": 1.879639026850455e-05, + "loss": 0.1672, + "step": 1707 + }, + { + "epoch": 0.55, + "learning_rate": 1.879473751206489e-05, + "loss": 0.1473, + "step": 1708 + }, + { + "epoch": 0.55, + "learning_rate": 1.879308369441479e-05, + "loss": 0.1398, + "step": 1709 + }, + { + "epoch": 0.55, + "learning_rate": 1.8791428815753797e-05, + "loss": 0.1543, + "step": 1710 + }, + { + "epoch": 0.55, + "learning_rate": 1.8789772876281606e-05, + "loss": 0.162, + "step": 1711 + }, + { + "epoch": 0.55, + "learning_rate": 1.8788115876198018e-05, + "loss": 0.1536, + "step": 1712 + }, + { + "epoch": 0.55, + "learning_rate": 1.8786457815702987e-05, + "loss": 0.1505, + "step": 1713 + }, + { + "epoch": 0.55, + "learning_rate": 1.878479869499657e-05, + "loss": 0.1644, + "step": 1714 + }, + { + "epoch": 0.55, + "learning_rate": 1.8783138514278968e-05, + "loss": 0.1566, + "step": 1715 + }, + { + "epoch": 0.55, + "learning_rate": 1.8781477273750508e-05, + "loss": 0.1472, + "step": 1716 + }, + { + "epoch": 0.55, + "learning_rate": 1.877981497361164e-05, + "loss": 0.1844, + "step": 1717 + }, + { + "epoch": 0.55, + "learning_rate": 1.8778151614062944e-05, + "loss": 0.1502, + "step": 1718 + }, + { + "epoch": 0.55, + "learning_rate": 1.877648719530513e-05, + "loss": 0.152, + "step": 1719 + }, + { + "epoch": 0.55, + "learning_rate": 1.877482171753903e-05, + "loss": 0.145, + "step": 1720 + }, + { + "epoch": 0.55, + "learning_rate": 1.8773155180965613e-05, + "loss": 0.162, + "step": 1721 + }, + { + "epoch": 0.55, + "learning_rate": 1.8771487585785966e-05, + "loss": 0.1561, + "step": 1722 + }, + { + "epoch": 0.55, + "learning_rate": 1.8769818932201307e-05, + "loss": 0.1685, + "step": 1723 + }, + { + "epoch": 0.55, + "learning_rate": 1.876814922041299e-05, + "loss": 0.1576, + "step": 1724 + }, + { + "epoch": 0.55, + "learning_rate": 1.876647845062248e-05, + "loss": 0.1498, + "step": 1725 + }, + { + "epoch": 0.56, + "learning_rate": 1.8764806623031388e-05, + "loss": 0.1459, + "step": 1726 + }, + { + "epoch": 0.56, + "learning_rate": 1.8763133737841436e-05, + "loss": 0.1749, + "step": 1727 + }, + { + "epoch": 0.56, + "learning_rate": 1.876145979525449e-05, + "loss": 0.1379, + "step": 1728 + }, + { + "epoch": 0.56, + "learning_rate": 1.8759784795472526e-05, + "loss": 0.1536, + "step": 1729 + }, + { + "epoch": 0.56, + "learning_rate": 1.875810873869766e-05, + "loss": 0.1622, + "step": 1730 + }, + { + "epoch": 0.56, + "learning_rate": 1.875643162513214e-05, + "loss": 0.1566, + "step": 1731 + }, + { + "epoch": 0.56, + "learning_rate": 1.875475345497832e-05, + "loss": 0.1554, + "step": 1732 + }, + { + "epoch": 0.56, + "learning_rate": 1.8753074228438705e-05, + "loss": 0.1683, + "step": 1733 + }, + { + "epoch": 0.56, + "learning_rate": 1.8751393945715913e-05, + "loss": 0.1316, + "step": 1734 + }, + { + "epoch": 0.56, + "learning_rate": 1.87497126070127e-05, + "loss": 0.1527, + "step": 1735 + }, + { + "epoch": 0.56, + "learning_rate": 1.8748030212531936e-05, + "loss": 0.1476, + "step": 1736 + }, + { + "epoch": 0.56, + "learning_rate": 1.874634676247663e-05, + "loss": 0.1421, + "step": 1737 + }, + { + "epoch": 0.56, + "learning_rate": 1.8744662257049908e-05, + "loss": 0.1472, + "step": 1738 + }, + { + "epoch": 0.56, + "learning_rate": 1.874297669645504e-05, + "loss": 0.1603, + "step": 1739 + }, + { + "epoch": 0.56, + "learning_rate": 1.8741290080895406e-05, + "loss": 0.1554, + "step": 1740 + }, + { + "epoch": 0.56, + "learning_rate": 1.8739602410574523e-05, + "loss": 0.158, + "step": 1741 + }, + { + "epoch": 0.56, + "learning_rate": 1.873791368569603e-05, + "loss": 0.149, + "step": 1742 + }, + { + "epoch": 0.56, + "learning_rate": 1.8736223906463698e-05, + "loss": 0.1522, + "step": 1743 + }, + { + "epoch": 0.56, + "learning_rate": 1.873453307308142e-05, + "loss": 0.1415, + "step": 1744 + }, + { + "epoch": 0.56, + "learning_rate": 1.8732841185753224e-05, + "loss": 0.1545, + "step": 1745 + }, + { + "epoch": 0.56, + "learning_rate": 1.8731148244683253e-05, + "loss": 0.1633, + "step": 1746 + }, + { + "epoch": 0.56, + "learning_rate": 1.8729454250075797e-05, + "loss": 0.1603, + "step": 1747 + }, + { + "epoch": 0.56, + "learning_rate": 1.8727759202135246e-05, + "loss": 0.1471, + "step": 1748 + }, + { + "epoch": 0.56, + "learning_rate": 1.872606310106614e-05, + "loss": 0.15, + "step": 1749 + }, + { + "epoch": 0.56, + "learning_rate": 1.8724365947073136e-05, + "loss": 0.1544, + "step": 1750 + }, + { + "epoch": 0.56, + "learning_rate": 1.8722667740361022e-05, + "loss": 0.1492, + "step": 1751 + }, + { + "epoch": 0.56, + "learning_rate": 1.8720968481134706e-05, + "loss": 0.157, + "step": 1752 + }, + { + "epoch": 0.56, + "learning_rate": 1.8719268169599233e-05, + "loss": 0.1845, + "step": 1753 + }, + { + "epoch": 0.56, + "learning_rate": 1.8717566805959766e-05, + "loss": 0.1573, + "step": 1754 + }, + { + "epoch": 0.56, + "learning_rate": 1.87158643904216e-05, + "loss": 0.1581, + "step": 1755 + }, + { + "epoch": 0.56, + "learning_rate": 1.8714160923190156e-05, + "loss": 0.1468, + "step": 1756 + }, + { + "epoch": 0.57, + "learning_rate": 1.8712456404470982e-05, + "loss": 0.163, + "step": 1757 + }, + { + "epoch": 0.57, + "learning_rate": 1.871075083446975e-05, + "loss": 0.1567, + "step": 1758 + }, + { + "epoch": 0.57, + "learning_rate": 1.8709044213392265e-05, + "loss": 0.1671, + "step": 1759 + }, + { + "epoch": 0.57, + "learning_rate": 1.8707336541444454e-05, + "loss": 0.1613, + "step": 1760 + }, + { + "epoch": 0.57, + "learning_rate": 1.870562781883237e-05, + "loss": 0.1463, + "step": 1761 + }, + { + "epoch": 0.57, + "learning_rate": 1.8703918045762197e-05, + "loss": 0.1542, + "step": 1762 + }, + { + "epoch": 0.57, + "learning_rate": 1.870220722244024e-05, + "loss": 0.1593, + "step": 1763 + }, + { + "epoch": 0.57, + "learning_rate": 1.870049534907294e-05, + "loss": 0.1566, + "step": 1764 + }, + { + "epoch": 0.57, + "learning_rate": 1.8698782425866857e-05, + "loss": 0.1595, + "step": 1765 + }, + { + "epoch": 0.57, + "learning_rate": 1.8697068453028677e-05, + "loss": 0.1517, + "step": 1766 + }, + { + "epoch": 0.57, + "learning_rate": 1.8695353430765214e-05, + "loss": 0.1563, + "step": 1767 + }, + { + "epoch": 0.57, + "learning_rate": 1.869363735928342e-05, + "loss": 0.1549, + "step": 1768 + }, + { + "epoch": 0.57, + "learning_rate": 1.869192023879035e-05, + "loss": 0.1698, + "step": 1769 + }, + { + "epoch": 0.57, + "learning_rate": 1.8690202069493204e-05, + "loss": 0.1739, + "step": 1770 + }, + { + "epoch": 0.57, + "learning_rate": 1.868848285159931e-05, + "loss": 0.1642, + "step": 1771 + }, + { + "epoch": 0.57, + "learning_rate": 1.8686762585316102e-05, + "loss": 0.1663, + "step": 1772 + }, + { + "epoch": 0.57, + "learning_rate": 1.868504127085117e-05, + "loss": 0.1448, + "step": 1773 + }, + { + "epoch": 0.57, + "learning_rate": 1.8683318908412208e-05, + "loss": 0.1426, + "step": 1774 + }, + { + "epoch": 0.57, + "learning_rate": 1.8681595498207047e-05, + "loss": 0.1598, + "step": 1775 + }, + { + "epoch": 0.57, + "learning_rate": 1.8679871040443632e-05, + "loss": 0.1491, + "step": 1776 + }, + { + "epoch": 0.57, + "learning_rate": 1.8678145535330054e-05, + "loss": 0.1408, + "step": 1777 + }, + { + "epoch": 0.57, + "learning_rate": 1.8676418983074515e-05, + "loss": 0.1679, + "step": 1778 + }, + { + "epoch": 0.57, + "learning_rate": 1.8674691383885345e-05, + "loss": 0.1553, + "step": 1779 + }, + { + "epoch": 0.57, + "learning_rate": 1.867296273797101e-05, + "loss": 0.1569, + "step": 1780 + }, + { + "epoch": 0.57, + "learning_rate": 1.867123304554009e-05, + "loss": 0.1548, + "step": 1781 + }, + { + "epoch": 0.57, + "learning_rate": 1.8669502306801304e-05, + "loss": 0.1653, + "step": 1782 + }, + { + "epoch": 0.57, + "learning_rate": 1.8667770521963487e-05, + "loss": 0.153, + "step": 1783 + }, + { + "epoch": 0.57, + "learning_rate": 1.8666037691235597e-05, + "loss": 0.1514, + "step": 1784 + }, + { + "epoch": 0.57, + "learning_rate": 1.8664303814826734e-05, + "loss": 0.1527, + "step": 1785 + }, + { + "epoch": 0.57, + "learning_rate": 1.866256889294611e-05, + "loss": 0.1455, + "step": 1786 + }, + { + "epoch": 0.57, + "learning_rate": 1.866083292580307e-05, + "loss": 0.167, + "step": 1787 + }, + { + "epoch": 0.58, + "learning_rate": 1.8659095913607083e-05, + "loss": 0.1544, + "step": 1788 + }, + { + "epoch": 0.58, + "learning_rate": 1.8657357856567744e-05, + "loss": 0.1842, + "step": 1789 + }, + { + "epoch": 0.58, + "learning_rate": 1.8655618754894774e-05, + "loss": 0.1448, + "step": 1790 + }, + { + "epoch": 0.58, + "learning_rate": 1.865387860879802e-05, + "loss": 0.1863, + "step": 1791 + }, + { + "epoch": 0.58, + "learning_rate": 1.8652137418487454e-05, + "loss": 0.1545, + "step": 1792 + }, + { + "epoch": 0.58, + "learning_rate": 1.865039518417318e-05, + "loss": 0.1426, + "step": 1793 + }, + { + "epoch": 0.58, + "learning_rate": 1.8648651906065424e-05, + "loss": 0.1487, + "step": 1794 + }, + { + "epoch": 0.58, + "learning_rate": 1.864690758437453e-05, + "loss": 0.1455, + "step": 1795 + }, + { + "epoch": 0.58, + "learning_rate": 1.8645162219310977e-05, + "loss": 0.1551, + "step": 1796 + }, + { + "epoch": 0.58, + "learning_rate": 1.8643415811085373e-05, + "loss": 0.151, + "step": 1797 + }, + { + "epoch": 0.58, + "learning_rate": 1.8641668359908447e-05, + "loss": 0.1527, + "step": 1798 + }, + { + "epoch": 0.58, + "learning_rate": 1.863991986599105e-05, + "loss": 0.1439, + "step": 1799 + }, + { + "epoch": 0.58, + "learning_rate": 1.8638170329544164e-05, + "loss": 0.1529, + "step": 1800 + }, + { + "epoch": 0.58, + "learning_rate": 1.8636419750778897e-05, + "loss": 0.1498, + "step": 1801 + }, + { + "epoch": 0.58, + "learning_rate": 1.8634668129906477e-05, + "loss": 0.1591, + "step": 1802 + }, + { + "epoch": 0.58, + "learning_rate": 1.8632915467138267e-05, + "loss": 0.1686, + "step": 1803 + }, + { + "epoch": 0.58, + "learning_rate": 1.863116176268575e-05, + "loss": 0.1539, + "step": 1804 + }, + { + "epoch": 0.58, + "learning_rate": 1.8629407016760533e-05, + "loss": 0.1674, + "step": 1805 + }, + { + "epoch": 0.58, + "learning_rate": 1.8627651229574356e-05, + "loss": 0.1734, + "step": 1806 + }, + { + "epoch": 0.58, + "learning_rate": 1.862589440133907e-05, + "loss": 0.1677, + "step": 1807 + }, + { + "epoch": 0.58, + "learning_rate": 1.8624136532266673e-05, + "loss": 0.1607, + "step": 1808 + }, + { + "epoch": 0.58, + "learning_rate": 1.8622377622569267e-05, + "loss": 0.145, + "step": 1809 + }, + { + "epoch": 0.58, + "learning_rate": 1.8620617672459097e-05, + "loss": 0.1585, + "step": 1810 + }, + { + "epoch": 0.58, + "learning_rate": 1.861885668214852e-05, + "loss": 0.1469, + "step": 1811 + }, + { + "epoch": 0.58, + "learning_rate": 1.8617094651850032e-05, + "loss": 0.1566, + "step": 1812 + }, + { + "epoch": 0.58, + "learning_rate": 1.861533158177624e-05, + "loss": 0.1539, + "step": 1813 + }, + { + "epoch": 0.58, + "learning_rate": 1.861356747213989e-05, + "loss": 0.157, + "step": 1814 + }, + { + "epoch": 0.58, + "learning_rate": 1.861180232315384e-05, + "loss": 0.1577, + "step": 1815 + }, + { + "epoch": 0.58, + "learning_rate": 1.8610036135031084e-05, + "loss": 0.1778, + "step": 1816 + }, + { + "epoch": 0.58, + "learning_rate": 1.8608268907984735e-05, + "loss": 0.1585, + "step": 1817 + }, + { + "epoch": 0.58, + "learning_rate": 1.8606500642228037e-05, + "loss": 0.1561, + "step": 1818 + }, + { + "epoch": 0.59, + "learning_rate": 1.860473133797436e-05, + "loss": 0.1464, + "step": 1819 + }, + { + "epoch": 0.59, + "learning_rate": 1.8602960995437187e-05, + "loss": 0.1556, + "step": 1820 + }, + { + "epoch": 0.59, + "learning_rate": 1.860118961483014e-05, + "loss": 0.1504, + "step": 1821 + }, + { + "epoch": 0.59, + "learning_rate": 1.8599417196366967e-05, + "loss": 0.1539, + "step": 1822 + }, + { + "epoch": 0.59, + "learning_rate": 1.8597643740261528e-05, + "loss": 0.1501, + "step": 1823 + }, + { + "epoch": 0.59, + "learning_rate": 1.8595869246727814e-05, + "loss": 0.1558, + "step": 1824 + }, + { + "epoch": 0.59, + "learning_rate": 1.8594093715979948e-05, + "loss": 0.132, + "step": 1825 + }, + { + "epoch": 0.59, + "learning_rate": 1.859231714823217e-05, + "loss": 0.1586, + "step": 1826 + }, + { + "epoch": 0.59, + "learning_rate": 1.8590539543698852e-05, + "loss": 0.1494, + "step": 1827 + }, + { + "epoch": 0.59, + "learning_rate": 1.8588760902594485e-05, + "loss": 0.152, + "step": 1828 + }, + { + "epoch": 0.59, + "learning_rate": 1.8586981225133688e-05, + "loss": 0.1413, + "step": 1829 + }, + { + "epoch": 0.59, + "learning_rate": 1.8585200511531203e-05, + "loss": 0.1567, + "step": 1830 + }, + { + "epoch": 0.59, + "learning_rate": 1.85834187620019e-05, + "loss": 0.1673, + "step": 1831 + }, + { + "epoch": 0.59, + "learning_rate": 1.858163597676077e-05, + "loss": 0.1551, + "step": 1832 + }, + { + "epoch": 0.59, + "learning_rate": 1.8579852156022934e-05, + "loss": 0.156, + "step": 1833 + }, + { + "epoch": 0.59, + "learning_rate": 1.8578067300003634e-05, + "loss": 0.1458, + "step": 1834 + }, + { + "epoch": 0.59, + "learning_rate": 1.8576281408918242e-05, + "loss": 0.1511, + "step": 1835 + }, + { + "epoch": 0.59, + "learning_rate": 1.857449448298225e-05, + "loss": 0.1547, + "step": 1836 + }, + { + "epoch": 0.59, + "learning_rate": 1.857270652241127e-05, + "loss": 0.1637, + "step": 1837 + }, + { + "epoch": 0.59, + "learning_rate": 1.857091752742105e-05, + "loss": 0.1502, + "step": 1838 + }, + { + "epoch": 0.59, + "learning_rate": 1.8569127498227458e-05, + "loss": 0.154, + "step": 1839 + }, + { + "epoch": 0.59, + "learning_rate": 1.8567336435046482e-05, + "loss": 0.1471, + "step": 1840 + }, + { + "epoch": 0.59, + "learning_rate": 1.8565544338094245e-05, + "loss": 0.1584, + "step": 1841 + }, + { + "epoch": 0.59, + "learning_rate": 1.856375120758699e-05, + "loss": 0.1455, + "step": 1842 + }, + { + "epoch": 0.59, + "learning_rate": 1.8561957043741078e-05, + "loss": 0.1661, + "step": 1843 + }, + { + "epoch": 0.59, + "learning_rate": 1.8560161846773002e-05, + "loss": 0.1476, + "step": 1844 + }, + { + "epoch": 0.59, + "learning_rate": 1.855836561689938e-05, + "loss": 0.146, + "step": 1845 + }, + { + "epoch": 0.59, + "learning_rate": 1.8556568354336955e-05, + "loss": 0.155, + "step": 1846 + }, + { + "epoch": 0.59, + "learning_rate": 1.8554770059302588e-05, + "loss": 0.149, + "step": 1847 + }, + { + "epoch": 0.59, + "learning_rate": 1.8552970732013267e-05, + "loss": 0.159, + "step": 1848 + }, + { + "epoch": 0.59, + "learning_rate": 1.8551170372686112e-05, + "loss": 0.1487, + "step": 1849 + }, + { + "epoch": 0.6, + "learning_rate": 1.854936898153836e-05, + "loss": 0.1443, + "step": 1850 + }, + { + "epoch": 0.6, + "learning_rate": 1.8547566558787373e-05, + "loss": 0.1505, + "step": 1851 + }, + { + "epoch": 0.6, + "learning_rate": 1.8545763104650643e-05, + "loss": 0.1623, + "step": 1852 + }, + { + "epoch": 0.6, + "learning_rate": 1.8543958619345777e-05, + "loss": 0.1447, + "step": 1853 + }, + { + "epoch": 0.6, + "learning_rate": 1.8542153103090515e-05, + "loss": 0.1581, + "step": 1854 + }, + { + "epoch": 0.6, + "learning_rate": 1.854034655610272e-05, + "loss": 0.1529, + "step": 1855 + }, + { + "epoch": 0.6, + "learning_rate": 1.8538538978600372e-05, + "loss": 0.1579, + "step": 1856 + }, + { + "epoch": 0.6, + "learning_rate": 1.8536730370801585e-05, + "loss": 0.1516, + "step": 1857 + }, + { + "epoch": 0.6, + "learning_rate": 1.8534920732924594e-05, + "loss": 0.1554, + "step": 1858 + }, + { + "epoch": 0.6, + "learning_rate": 1.8533110065187755e-05, + "loss": 0.1582, + "step": 1859 + }, + { + "epoch": 0.6, + "learning_rate": 1.8531298367809557e-05, + "loss": 0.1521, + "step": 1860 + }, + { + "epoch": 0.6, + "learning_rate": 1.85294856410086e-05, + "loss": 0.1473, + "step": 1861 + }, + { + "epoch": 0.6, + "learning_rate": 1.8527671885003614e-05, + "loss": 0.1622, + "step": 1862 + }, + { + "epoch": 0.6, + "learning_rate": 1.8525857100013463e-05, + "loss": 0.1519, + "step": 1863 + }, + { + "epoch": 0.6, + "learning_rate": 1.852404128625712e-05, + "loss": 0.1788, + "step": 1864 + }, + { + "epoch": 0.6, + "learning_rate": 1.852222444395369e-05, + "loss": 0.1525, + "step": 1865 + }, + { + "epoch": 0.6, + "learning_rate": 1.85204065733224e-05, + "loss": 0.1559, + "step": 1866 + }, + { + "epoch": 0.6, + "learning_rate": 1.851858767458261e-05, + "loss": 0.1553, + "step": 1867 + }, + { + "epoch": 0.6, + "learning_rate": 1.8516767747953784e-05, + "loss": 0.1451, + "step": 1868 + }, + { + "epoch": 0.6, + "learning_rate": 1.851494679365553e-05, + "loss": 0.1543, + "step": 1869 + }, + { + "epoch": 0.6, + "learning_rate": 1.8513124811907568e-05, + "loss": 0.1809, + "step": 1870 + }, + { + "epoch": 0.6, + "learning_rate": 1.8511301802929747e-05, + "loss": 0.1398, + "step": 1871 + }, + { + "epoch": 0.6, + "learning_rate": 1.8509477766942045e-05, + "loss": 0.1482, + "step": 1872 + }, + { + "epoch": 0.6, + "learning_rate": 1.850765270416455e-05, + "loss": 0.1358, + "step": 1873 + }, + { + "epoch": 0.6, + "learning_rate": 1.8505826614817485e-05, + "loss": 0.1619, + "step": 1874 + }, + { + "epoch": 0.6, + "learning_rate": 1.8503999499121193e-05, + "loss": 0.1388, + "step": 1875 + }, + { + "epoch": 0.6, + "learning_rate": 1.8502171357296144e-05, + "loss": 0.168, + "step": 1876 + }, + { + "epoch": 0.6, + "learning_rate": 1.8500342189562926e-05, + "loss": 0.1561, + "step": 1877 + }, + { + "epoch": 0.6, + "learning_rate": 1.8498511996142255e-05, + "loss": 0.1544, + "step": 1878 + }, + { + "epoch": 0.6, + "learning_rate": 1.849668077725497e-05, + "loss": 0.1484, + "step": 1879 + }, + { + "epoch": 0.6, + "learning_rate": 1.8494848533122035e-05, + "loss": 0.1733, + "step": 1880 + }, + { + "epoch": 0.61, + "learning_rate": 1.8493015263964535e-05, + "loss": 0.1446, + "step": 1881 + }, + { + "epoch": 0.61, + "learning_rate": 1.849118097000368e-05, + "loss": 0.1509, + "step": 1882 + }, + { + "epoch": 0.61, + "learning_rate": 1.8489345651460804e-05, + "loss": 0.1649, + "step": 1883 + }, + { + "epoch": 0.61, + "learning_rate": 1.848750930855737e-05, + "loss": 0.1687, + "step": 1884 + }, + { + "epoch": 0.61, + "learning_rate": 1.8485671941514948e-05, + "loss": 0.1696, + "step": 1885 + }, + { + "epoch": 0.61, + "learning_rate": 1.8483833550555252e-05, + "loss": 0.1538, + "step": 1886 + }, + { + "epoch": 0.61, + "learning_rate": 1.8481994135900104e-05, + "loss": 0.1979, + "step": 1887 + }, + { + "epoch": 0.61, + "learning_rate": 1.8480153697771455e-05, + "loss": 0.1395, + "step": 1888 + }, + { + "epoch": 0.61, + "learning_rate": 1.8478312236391386e-05, + "loss": 0.1593, + "step": 1889 + }, + { + "epoch": 0.61, + "learning_rate": 1.8476469751982095e-05, + "loss": 0.1486, + "step": 1890 + }, + { + "epoch": 0.61, + "learning_rate": 1.8474626244765897e-05, + "loss": 0.1537, + "step": 1891 + }, + { + "epoch": 0.61, + "learning_rate": 1.8472781714965242e-05, + "loss": 0.1569, + "step": 1892 + }, + { + "epoch": 0.61, + "learning_rate": 1.8470936162802702e-05, + "loss": 0.1413, + "step": 1893 + }, + { + "epoch": 0.61, + "learning_rate": 1.8469089588500964e-05, + "loss": 0.142, + "step": 1894 + }, + { + "epoch": 0.61, + "learning_rate": 1.8467241992282842e-05, + "loss": 0.1484, + "step": 1895 + }, + { + "epoch": 0.61, + "learning_rate": 1.8465393374371282e-05, + "loss": 0.1387, + "step": 1896 + }, + { + "epoch": 0.61, + "learning_rate": 1.846354373498934e-05, + "loss": 0.1708, + "step": 1897 + }, + { + "epoch": 0.61, + "learning_rate": 1.8461693074360207e-05, + "loss": 0.1588, + "step": 1898 + }, + { + "epoch": 0.61, + "learning_rate": 1.8459841392707186e-05, + "loss": 0.1609, + "step": 1899 + }, + { + "epoch": 0.61, + "learning_rate": 1.845798869025371e-05, + "loss": 0.1892, + "step": 1900 + }, + { + "epoch": 0.61, + "learning_rate": 1.845613496722334e-05, + "loss": 0.1441, + "step": 1901 + }, + { + "epoch": 0.61, + "learning_rate": 1.8454280223839745e-05, + "loss": 0.1529, + "step": 1902 + }, + { + "epoch": 0.61, + "learning_rate": 1.845242446032673e-05, + "loss": 0.1494, + "step": 1903 + }, + { + "epoch": 0.61, + "learning_rate": 1.8450567676908222e-05, + "loss": 0.1658, + "step": 1904 + }, + { + "epoch": 0.61, + "learning_rate": 1.8448709873808265e-05, + "loss": 0.1515, + "step": 1905 + }, + { + "epoch": 0.61, + "learning_rate": 1.8446851051251027e-05, + "loss": 0.1469, + "step": 1906 + }, + { + "epoch": 0.61, + "learning_rate": 1.8444991209460808e-05, + "loss": 0.1571, + "step": 1907 + }, + { + "epoch": 0.61, + "learning_rate": 1.8443130348662022e-05, + "loss": 0.1727, + "step": 1908 + }, + { + "epoch": 0.61, + "learning_rate": 1.8441268469079204e-05, + "loss": 0.1554, + "step": 1909 + }, + { + "epoch": 0.61, + "learning_rate": 1.8439405570937027e-05, + "loss": 0.1505, + "step": 1910 + }, + { + "epoch": 0.61, + "learning_rate": 1.8437541654460264e-05, + "loss": 0.1454, + "step": 1911 + }, + { + "epoch": 0.62, + "learning_rate": 1.8435676719873828e-05, + "loss": 0.1576, + "step": 1912 + }, + { + "epoch": 0.62, + "learning_rate": 1.843381076740275e-05, + "loss": 0.1425, + "step": 1913 + }, + { + "epoch": 0.62, + "learning_rate": 1.8431943797272187e-05, + "loss": 0.1536, + "step": 1914 + }, + { + "epoch": 0.62, + "learning_rate": 1.843007580970741e-05, + "loss": 0.1456, + "step": 1915 + }, + { + "epoch": 0.62, + "learning_rate": 1.8428206804933825e-05, + "loss": 0.1527, + "step": 1916 + }, + { + "epoch": 0.62, + "learning_rate": 1.8426336783176945e-05, + "loss": 0.1503, + "step": 1917 + }, + { + "epoch": 0.62, + "learning_rate": 1.842446574466242e-05, + "loss": 0.1709, + "step": 1918 + }, + { + "epoch": 0.62, + "learning_rate": 1.842259368961602e-05, + "loss": 0.1565, + "step": 1919 + }, + { + "epoch": 0.62, + "learning_rate": 1.8420720618263632e-05, + "loss": 0.1613, + "step": 1920 + }, + { + "epoch": 0.62, + "learning_rate": 1.841884653083127e-05, + "loss": 0.168, + "step": 1921 + }, + { + "epoch": 0.62, + "learning_rate": 1.8416971427545065e-05, + "loss": 0.1565, + "step": 1922 + }, + { + "epoch": 0.62, + "learning_rate": 1.841509530863128e-05, + "loss": 0.1611, + "step": 1923 + }, + { + "epoch": 0.62, + "learning_rate": 1.8413218174316295e-05, + "loss": 0.1586, + "step": 1924 + }, + { + "epoch": 0.62, + "learning_rate": 1.841134002482661e-05, + "loss": 0.163, + "step": 1925 + }, + { + "epoch": 0.62, + "learning_rate": 1.8409460860388852e-05, + "loss": 0.1512, + "step": 1926 + }, + { + "epoch": 0.62, + "learning_rate": 1.8407580681229773e-05, + "loss": 0.1552, + "step": 1927 + }, + { + "epoch": 0.62, + "learning_rate": 1.8405699487576237e-05, + "loss": 0.1499, + "step": 1928 + }, + { + "epoch": 0.62, + "learning_rate": 1.8403817279655237e-05, + "loss": 0.1446, + "step": 1929 + }, + { + "epoch": 0.62, + "learning_rate": 1.8401934057693895e-05, + "loss": 0.1417, + "step": 1930 + }, + { + "epoch": 0.62, + "learning_rate": 1.8400049821919444e-05, + "loss": 0.1567, + "step": 1931 + }, + { + "epoch": 0.62, + "learning_rate": 1.8398164572559243e-05, + "loss": 0.1497, + "step": 1932 + }, + { + "epoch": 0.62, + "learning_rate": 1.8396278309840778e-05, + "loss": 0.1712, + "step": 1933 + }, + { + "epoch": 0.62, + "learning_rate": 1.839439103399165e-05, + "loss": 0.1509, + "step": 1934 + }, + { + "epoch": 0.62, + "learning_rate": 1.8392502745239586e-05, + "loss": 0.1523, + "step": 1935 + }, + { + "epoch": 0.62, + "learning_rate": 1.8390613443812432e-05, + "loss": 0.1433, + "step": 1936 + }, + { + "epoch": 0.62, + "learning_rate": 1.838872312993817e-05, + "loss": 0.1421, + "step": 1937 + }, + { + "epoch": 0.62, + "learning_rate": 1.838683180384488e-05, + "loss": 0.1479, + "step": 1938 + }, + { + "epoch": 0.62, + "learning_rate": 1.8384939465760786e-05, + "loss": 0.153, + "step": 1939 + }, + { + "epoch": 0.62, + "learning_rate": 1.8383046115914225e-05, + "loss": 0.1549, + "step": 1940 + }, + { + "epoch": 0.62, + "learning_rate": 1.8381151754533655e-05, + "loss": 0.1555, + "step": 1941 + }, + { + "epoch": 0.62, + "learning_rate": 1.8379256381847655e-05, + "loss": 0.1624, + "step": 1942 + }, + { + "epoch": 0.62, + "learning_rate": 1.837735999808493e-05, + "loss": 0.1369, + "step": 1943 + }, + { + "epoch": 0.63, + "learning_rate": 1.837546260347431e-05, + "loss": 0.1437, + "step": 1944 + }, + { + "epoch": 0.63, + "learning_rate": 1.837356419824474e-05, + "loss": 0.1335, + "step": 1945 + }, + { + "epoch": 0.63, + "learning_rate": 1.8371664782625287e-05, + "loss": 0.1499, + "step": 1946 + }, + { + "epoch": 0.63, + "learning_rate": 1.8369764356845148e-05, + "loss": 0.1392, + "step": 1947 + }, + { + "epoch": 0.63, + "learning_rate": 1.836786292113363e-05, + "loss": 0.1502, + "step": 1948 + }, + { + "epoch": 0.63, + "learning_rate": 1.8365960475720172e-05, + "loss": 0.1486, + "step": 1949 + }, + { + "epoch": 0.63, + "learning_rate": 1.8364057020834336e-05, + "loss": 0.1608, + "step": 1950 + }, + { + "epoch": 0.63, + "learning_rate": 1.8362152556705788e-05, + "loss": 0.1545, + "step": 1951 + }, + { + "epoch": 0.63, + "learning_rate": 1.8360247083564343e-05, + "loss": 0.1561, + "step": 1952 + }, + { + "epoch": 0.63, + "learning_rate": 1.8358340601639912e-05, + "loss": 0.1637, + "step": 1953 + }, + { + "epoch": 0.63, + "learning_rate": 1.8356433111162547e-05, + "loss": 0.158, + "step": 1954 + }, + { + "epoch": 0.63, + "learning_rate": 1.835452461236241e-05, + "loss": 0.1463, + "step": 1955 + }, + { + "epoch": 0.63, + "learning_rate": 1.835261510546979e-05, + "loss": 0.1505, + "step": 1956 + }, + { + "epoch": 0.63, + "learning_rate": 1.8350704590715097e-05, + "loss": 0.1835, + "step": 1957 + }, + { + "epoch": 0.63, + "learning_rate": 1.8348793068328864e-05, + "loss": 0.1558, + "step": 1958 + }, + { + "epoch": 0.63, + "learning_rate": 1.8346880538541736e-05, + "loss": 0.1381, + "step": 1959 + }, + { + "epoch": 0.63, + "learning_rate": 1.834496700158449e-05, + "loss": 0.1623, + "step": 1960 + }, + { + "epoch": 0.63, + "learning_rate": 1.8343052457688025e-05, + "loss": 0.135, + "step": 1961 + }, + { + "epoch": 0.63, + "learning_rate": 1.8341136907083356e-05, + "loss": 0.1563, + "step": 1962 + }, + { + "epoch": 0.63, + "learning_rate": 1.8339220350001624e-05, + "loss": 0.1566, + "step": 1963 + }, + { + "epoch": 0.63, + "learning_rate": 1.833730278667408e-05, + "loss": 0.1276, + "step": 1964 + }, + { + "epoch": 0.63, + "learning_rate": 1.8335384217332114e-05, + "loss": 0.1493, + "step": 1965 + }, + { + "epoch": 0.63, + "learning_rate": 1.833346464220723e-05, + "loss": 0.137, + "step": 1966 + }, + { + "epoch": 0.63, + "learning_rate": 1.8331544061531044e-05, + "loss": 0.1507, + "step": 1967 + }, + { + "epoch": 0.63, + "learning_rate": 1.8329622475535306e-05, + "loss": 0.1723, + "step": 1968 + }, + { + "epoch": 0.63, + "learning_rate": 1.8327699884451883e-05, + "loss": 0.1538, + "step": 1969 + }, + { + "epoch": 0.63, + "learning_rate": 1.8325776288512765e-05, + "loss": 0.1562, + "step": 1970 + }, + { + "epoch": 0.63, + "learning_rate": 1.8323851687950055e-05, + "loss": 0.173, + "step": 1971 + }, + { + "epoch": 0.63, + "learning_rate": 1.832192608299599e-05, + "loss": 0.1808, + "step": 1972 + }, + { + "epoch": 0.63, + "learning_rate": 1.831999947388292e-05, + "loss": 0.1471, + "step": 1973 + }, + { + "epoch": 0.63, + "learning_rate": 1.8318071860843315e-05, + "loss": 0.1558, + "step": 1974 + }, + { + "epoch": 0.64, + "learning_rate": 1.8316143244109773e-05, + "loss": 0.1334, + "step": 1975 + }, + { + "epoch": 0.64, + "learning_rate": 1.8314213623915003e-05, + "loss": 0.1833, + "step": 1976 + }, + { + "epoch": 0.64, + "learning_rate": 1.8312283000491846e-05, + "loss": 0.1631, + "step": 1977 + }, + { + "epoch": 0.64, + "learning_rate": 1.8310351374073262e-05, + "loss": 0.1622, + "step": 1978 + }, + { + "epoch": 0.64, + "learning_rate": 1.8308418744892322e-05, + "loss": 0.1587, + "step": 1979 + }, + { + "epoch": 0.64, + "learning_rate": 1.830648511318223e-05, + "loss": 0.1607, + "step": 1980 + }, + { + "epoch": 0.64, + "learning_rate": 1.8304550479176307e-05, + "loss": 0.1554, + "step": 1981 + }, + { + "epoch": 0.64, + "learning_rate": 1.830261484310799e-05, + "loss": 0.1463, + "step": 1982 + }, + { + "epoch": 0.64, + "learning_rate": 1.8300678205210844e-05, + "loss": 0.1455, + "step": 1983 + }, + { + "epoch": 0.64, + "learning_rate": 1.829874056571855e-05, + "loss": 0.161, + "step": 1984 + }, + { + "epoch": 0.64, + "learning_rate": 1.8296801924864914e-05, + "loss": 0.158, + "step": 1985 + }, + { + "epoch": 0.64, + "learning_rate": 1.8294862282883864e-05, + "loss": 0.1592, + "step": 1986 + }, + { + "epoch": 0.64, + "learning_rate": 1.8292921640009438e-05, + "loss": 0.1738, + "step": 1987 + }, + { + "epoch": 0.64, + "learning_rate": 1.8290979996475804e-05, + "loss": 0.185, + "step": 1988 + }, + { + "epoch": 0.64, + "learning_rate": 1.8289037352517252e-05, + "loss": 0.1414, + "step": 1989 + }, + { + "epoch": 0.64, + "learning_rate": 1.828709370836819e-05, + "loss": 0.1491, + "step": 1990 + }, + { + "epoch": 0.64, + "learning_rate": 1.828514906426314e-05, + "loss": 0.1508, + "step": 1991 + }, + { + "epoch": 0.64, + "learning_rate": 1.828320342043676e-05, + "loss": 0.1509, + "step": 1992 + }, + { + "epoch": 0.64, + "learning_rate": 1.828125677712381e-05, + "loss": 0.1685, + "step": 1993 + }, + { + "epoch": 0.64, + "learning_rate": 1.8279309134559187e-05, + "loss": 0.165, + "step": 1994 + }, + { + "epoch": 0.64, + "learning_rate": 1.8277360492977908e-05, + "loss": 0.1555, + "step": 1995 + }, + { + "epoch": 0.64, + "learning_rate": 1.8275410852615086e-05, + "loss": 0.1489, + "step": 1996 + }, + { + "epoch": 0.64, + "learning_rate": 1.827346021370599e-05, + "loss": 0.1495, + "step": 1997 + }, + { + "epoch": 0.64, + "learning_rate": 1.8271508576485987e-05, + "loss": 0.143, + "step": 1998 + }, + { + "epoch": 0.64, + "learning_rate": 1.8269555941190565e-05, + "loss": 0.1652, + "step": 1999 + }, + { + "epoch": 0.64, + "learning_rate": 1.8267602308055342e-05, + "loss": 0.1718, + "step": 2000 + }, + { + "epoch": 0.64, + "learning_rate": 1.826564767731605e-05, + "loss": 0.1521, + "step": 2001 + }, + { + "epoch": 0.64, + "learning_rate": 1.826369204920855e-05, + "loss": 0.1804, + "step": 2002 + }, + { + "epoch": 0.64, + "learning_rate": 1.8261735423968804e-05, + "loss": 0.1706, + "step": 2003 + }, + { + "epoch": 0.64, + "learning_rate": 1.8259777801832916e-05, + "loss": 0.148, + "step": 2004 + }, + { + "epoch": 0.64, + "learning_rate": 1.8257819183037093e-05, + "loss": 0.1369, + "step": 2005 + }, + { + "epoch": 0.65, + "learning_rate": 1.825585956781768e-05, + "loss": 0.1426, + "step": 2006 + }, + { + "epoch": 0.65, + "learning_rate": 1.8253898956411124e-05, + "loss": 0.1523, + "step": 2007 + }, + { + "epoch": 0.65, + "learning_rate": 1.825193734905401e-05, + "loss": 0.14, + "step": 2008 + }, + { + "epoch": 0.65, + "learning_rate": 1.8249974745983023e-05, + "loss": 0.1445, + "step": 2009 + }, + { + "epoch": 0.65, + "learning_rate": 1.8248011147434987e-05, + "loss": 0.1684, + "step": 2010 + }, + { + "epoch": 0.65, + "learning_rate": 1.824604655364684e-05, + "loss": 0.154, + "step": 2011 + }, + { + "epoch": 0.65, + "learning_rate": 1.824408096485563e-05, + "loss": 0.172, + "step": 2012 + }, + { + "epoch": 0.65, + "learning_rate": 1.8242114381298533e-05, + "loss": 0.1513, + "step": 2013 + }, + { + "epoch": 0.65, + "learning_rate": 1.8240146803212854e-05, + "loss": 0.1357, + "step": 2014 + }, + { + "epoch": 0.65, + "learning_rate": 1.8238178230836005e-05, + "loss": 0.1371, + "step": 2015 + }, + { + "epoch": 0.65, + "learning_rate": 1.823620866440552e-05, + "loss": 0.1575, + "step": 2016 + }, + { + "epoch": 0.65, + "learning_rate": 1.823423810415906e-05, + "loss": 0.151, + "step": 2017 + }, + { + "epoch": 0.65, + "learning_rate": 1.8232266550334398e-05, + "loss": 0.1449, + "step": 2018 + }, + { + "epoch": 0.65, + "learning_rate": 1.823029400316943e-05, + "loss": 0.1474, + "step": 2019 + }, + { + "epoch": 0.65, + "learning_rate": 1.8228320462902172e-05, + "loss": 0.1444, + "step": 2020 + }, + { + "epoch": 0.65, + "learning_rate": 1.8226345929770758e-05, + "loss": 0.1487, + "step": 2021 + }, + { + "epoch": 0.65, + "learning_rate": 1.822437040401345e-05, + "loss": 0.1436, + "step": 2022 + }, + { + "epoch": 0.65, + "learning_rate": 1.822239388586862e-05, + "loss": 0.1501, + "step": 2023 + }, + { + "epoch": 0.65, + "learning_rate": 1.8220416375574757e-05, + "loss": 0.141, + "step": 2024 + }, + { + "epoch": 0.65, + "learning_rate": 1.8218437873370488e-05, + "loss": 0.1842, + "step": 2025 + }, + { + "epoch": 0.65, + "learning_rate": 1.8216458379494538e-05, + "loss": 0.142, + "step": 2026 + }, + { + "epoch": 0.65, + "learning_rate": 1.8214477894185764e-05, + "loss": 0.1627, + "step": 2027 + }, + { + "epoch": 0.65, + "learning_rate": 1.8212496417683135e-05, + "loss": 0.1458, + "step": 2028 + }, + { + "epoch": 0.65, + "learning_rate": 1.8210513950225755e-05, + "loss": 0.1524, + "step": 2029 + }, + { + "epoch": 0.65, + "learning_rate": 1.820853049205283e-05, + "loss": 0.1469, + "step": 2030 + }, + { + "epoch": 0.65, + "learning_rate": 1.820654604340369e-05, + "loss": 0.1686, + "step": 2031 + }, + { + "epoch": 0.65, + "learning_rate": 1.8204560604517795e-05, + "loss": 0.1405, + "step": 2032 + }, + { + "epoch": 0.65, + "learning_rate": 1.8202574175634707e-05, + "loss": 0.1636, + "step": 2033 + }, + { + "epoch": 0.65, + "learning_rate": 1.8200586756994126e-05, + "loss": 0.1289, + "step": 2034 + }, + { + "epoch": 0.65, + "learning_rate": 1.8198598348835856e-05, + "loss": 0.148, + "step": 2035 + }, + { + "epoch": 0.65, + "learning_rate": 1.8196608951399834e-05, + "loss": 0.1481, + "step": 2036 + }, + { + "epoch": 0.66, + "learning_rate": 1.81946185649261e-05, + "loss": 0.1516, + "step": 2037 + }, + { + "epoch": 0.66, + "learning_rate": 1.8192627189654827e-05, + "loss": 0.1505, + "step": 2038 + }, + { + "epoch": 0.66, + "learning_rate": 1.8190634825826303e-05, + "loss": 0.1576, + "step": 2039 + }, + { + "epoch": 0.66, + "learning_rate": 1.8188641473680936e-05, + "loss": 0.1986, + "step": 2040 + }, + { + "epoch": 0.66, + "learning_rate": 1.8186647133459248e-05, + "loss": 0.1524, + "step": 2041 + }, + { + "epoch": 0.66, + "learning_rate": 1.818465180540189e-05, + "loss": 0.1473, + "step": 2042 + }, + { + "epoch": 0.66, + "learning_rate": 1.818265548974962e-05, + "loss": 0.1474, + "step": 2043 + }, + { + "epoch": 0.66, + "learning_rate": 1.8180658186743333e-05, + "loss": 0.158, + "step": 2044 + }, + { + "epoch": 0.66, + "learning_rate": 1.8178659896624023e-05, + "loss": 0.1508, + "step": 2045 + }, + { + "epoch": 0.66, + "learning_rate": 1.817666061963281e-05, + "loss": 0.1678, + "step": 2046 + }, + { + "epoch": 0.66, + "learning_rate": 1.8174660356010944e-05, + "loss": 0.1682, + "step": 2047 + }, + { + "epoch": 0.66, + "learning_rate": 1.817265910599978e-05, + "loss": 0.1563, + "step": 2048 + }, + { + "epoch": 0.66, + "learning_rate": 1.81706568698408e-05, + "loss": 0.1496, + "step": 2049 + }, + { + "epoch": 0.66, + "learning_rate": 1.81686536477756e-05, + "loss": 0.1478, + "step": 2050 + }, + { + "epoch": 0.66, + "learning_rate": 1.8166649440045902e-05, + "loss": 0.1463, + "step": 2051 + }, + { + "epoch": 0.66, + "learning_rate": 1.8164644246893532e-05, + "loss": 0.1567, + "step": 2052 + }, + { + "epoch": 0.66, + "learning_rate": 1.8162638068560455e-05, + "loss": 0.1527, + "step": 2053 + }, + { + "epoch": 0.66, + "learning_rate": 1.816063090528874e-05, + "loss": 0.1429, + "step": 2054 + }, + { + "epoch": 0.66, + "learning_rate": 1.8158622757320584e-05, + "loss": 0.1504, + "step": 2055 + }, + { + "epoch": 0.66, + "learning_rate": 1.8156613624898293e-05, + "loss": 0.1535, + "step": 2056 + }, + { + "epoch": 0.66, + "learning_rate": 1.81546035082643e-05, + "loss": 0.1626, + "step": 2057 + }, + { + "epoch": 0.66, + "learning_rate": 1.815259240766116e-05, + "loss": 0.1671, + "step": 2058 + }, + { + "epoch": 0.66, + "learning_rate": 1.815058032333153e-05, + "loss": 0.1744, + "step": 2059 + }, + { + "epoch": 0.66, + "learning_rate": 1.8148567255518203e-05, + "loss": 0.1742, + "step": 2060 + }, + { + "epoch": 0.66, + "learning_rate": 1.814655320446409e-05, + "loss": 0.1456, + "step": 2061 + }, + { + "epoch": 0.66, + "learning_rate": 1.81445381704122e-05, + "loss": 0.1492, + "step": 2062 + }, + { + "epoch": 0.66, + "learning_rate": 1.814252215360569e-05, + "loss": 0.1518, + "step": 2063 + }, + { + "epoch": 0.66, + "learning_rate": 1.814050515428781e-05, + "loss": 0.1594, + "step": 2064 + }, + { + "epoch": 0.66, + "learning_rate": 1.813848717270195e-05, + "loss": 0.1426, + "step": 2065 + }, + { + "epoch": 0.66, + "learning_rate": 1.81364682090916e-05, + "loss": 0.1656, + "step": 2066 + }, + { + "epoch": 0.66, + "learning_rate": 1.8134448263700383e-05, + "loss": 0.1518, + "step": 2067 + }, + { + "epoch": 0.67, + "learning_rate": 1.8132427336772033e-05, + "loss": 0.1606, + "step": 2068 + }, + { + "epoch": 0.67, + "learning_rate": 1.81304054285504e-05, + "loss": 0.1568, + "step": 2069 + }, + { + "epoch": 0.67, + "learning_rate": 1.8128382539279455e-05, + "loss": 0.1588, + "step": 2070 + }, + { + "epoch": 0.67, + "learning_rate": 1.8126358669203298e-05, + "loss": 0.1388, + "step": 2071 + }, + { + "epoch": 0.67, + "learning_rate": 1.8124333818566124e-05, + "loss": 0.1507, + "step": 2072 + }, + { + "epoch": 0.67, + "learning_rate": 1.812230798761227e-05, + "loss": 0.1457, + "step": 2073 + }, + { + "epoch": 0.67, + "learning_rate": 1.8120281176586178e-05, + "loss": 0.145, + "step": 2074 + }, + { + "epoch": 0.67, + "learning_rate": 1.8118253385732415e-05, + "loss": 0.1454, + "step": 2075 + }, + { + "epoch": 0.67, + "learning_rate": 1.811622461529566e-05, + "loss": 0.1496, + "step": 2076 + }, + { + "epoch": 0.67, + "learning_rate": 1.8114194865520712e-05, + "loss": 0.1574, + "step": 2077 + }, + { + "epoch": 0.67, + "learning_rate": 1.8112164136652487e-05, + "loss": 0.1502, + "step": 2078 + }, + { + "epoch": 0.67, + "learning_rate": 1.811013242893603e-05, + "loss": 0.1675, + "step": 2079 + }, + { + "epoch": 0.67, + "learning_rate": 1.8108099742616486e-05, + "loss": 0.1488, + "step": 2080 + }, + { + "epoch": 0.67, + "learning_rate": 1.8106066077939137e-05, + "loss": 0.1423, + "step": 2081 + }, + { + "epoch": 0.67, + "learning_rate": 1.8104031435149366e-05, + "loss": 0.1676, + "step": 2082 + }, + { + "epoch": 0.67, + "learning_rate": 1.8101995814492683e-05, + "loss": 0.1569, + "step": 2083 + }, + { + "epoch": 0.67, + "learning_rate": 1.809995921621472e-05, + "loss": 0.1455, + "step": 2084 + }, + { + "epoch": 0.67, + "learning_rate": 1.809792164056121e-05, + "loss": 0.1444, + "step": 2085 + }, + { + "epoch": 0.67, + "learning_rate": 1.809588308777803e-05, + "loss": 0.1525, + "step": 2086 + }, + { + "epoch": 0.67, + "learning_rate": 1.809384355811115e-05, + "loss": 0.1471, + "step": 2087 + }, + { + "epoch": 0.67, + "learning_rate": 1.8091803051806676e-05, + "loss": 0.1513, + "step": 2088 + }, + { + "epoch": 0.67, + "learning_rate": 1.8089761569110816e-05, + "loss": 0.147, + "step": 2089 + }, + { + "epoch": 0.67, + "learning_rate": 1.808771911026991e-05, + "loss": 0.1863, + "step": 2090 + }, + { + "epoch": 0.67, + "learning_rate": 1.808567567553041e-05, + "loss": 0.1497, + "step": 2091 + }, + { + "epoch": 0.67, + "learning_rate": 1.8083631265138875e-05, + "loss": 0.1565, + "step": 2092 + }, + { + "epoch": 0.67, + "learning_rate": 1.8081585879342008e-05, + "loss": 0.1469, + "step": 2093 + }, + { + "epoch": 0.67, + "learning_rate": 1.8079539518386605e-05, + "loss": 0.1648, + "step": 2094 + }, + { + "epoch": 0.67, + "learning_rate": 1.8077492182519587e-05, + "loss": 0.1506, + "step": 2095 + }, + { + "epoch": 0.67, + "learning_rate": 1.8075443871988e-05, + "loss": 0.168, + "step": 2096 + }, + { + "epoch": 0.67, + "learning_rate": 1.8073394587039e-05, + "loss": 0.1476, + "step": 2097 + }, + { + "epoch": 0.67, + "learning_rate": 1.807134432791986e-05, + "loss": 0.1575, + "step": 2098 + }, + { + "epoch": 0.68, + "learning_rate": 1.8069293094877974e-05, + "loss": 0.1414, + "step": 2099 + }, + { + "epoch": 0.68, + "learning_rate": 1.8067240888160854e-05, + "loss": 0.1505, + "step": 2100 + }, + { + "epoch": 0.68, + "learning_rate": 1.806518770801613e-05, + "loss": 0.151, + "step": 2101 + }, + { + "epoch": 0.68, + "learning_rate": 1.806313355469154e-05, + "loss": 0.1502, + "step": 2102 + }, + { + "epoch": 0.68, + "learning_rate": 1.8061078428434956e-05, + "loss": 0.1503, + "step": 2103 + }, + { + "epoch": 0.68, + "learning_rate": 1.805902232949435e-05, + "loss": 0.1351, + "step": 2104 + }, + { + "epoch": 0.68, + "learning_rate": 1.8056965258117825e-05, + "loss": 0.1612, + "step": 2105 + }, + { + "epoch": 0.68, + "learning_rate": 1.8054907214553593e-05, + "loss": 0.1492, + "step": 2106 + }, + { + "epoch": 0.68, + "learning_rate": 1.805284819904999e-05, + "loss": 0.1435, + "step": 2107 + }, + { + "epoch": 0.68, + "learning_rate": 1.8050788211855463e-05, + "loss": 0.1756, + "step": 2108 + }, + { + "epoch": 0.68, + "learning_rate": 1.8048727253218578e-05, + "loss": 0.1579, + "step": 2109 + }, + { + "epoch": 0.68, + "learning_rate": 1.8046665323388023e-05, + "loss": 0.1557, + "step": 2110 + }, + { + "epoch": 0.68, + "learning_rate": 1.8044602422612593e-05, + "loss": 0.1447, + "step": 2111 + }, + { + "epoch": 0.68, + "learning_rate": 1.8042538551141207e-05, + "loss": 0.1718, + "step": 2112 + }, + { + "epoch": 0.68, + "learning_rate": 1.804047370922291e-05, + "loss": 0.15, + "step": 2113 + }, + { + "epoch": 0.68, + "learning_rate": 1.8038407897106844e-05, + "loss": 0.1448, + "step": 2114 + }, + { + "epoch": 0.68, + "learning_rate": 1.8036341115042284e-05, + "loss": 0.1467, + "step": 2115 + }, + { + "epoch": 0.68, + "learning_rate": 1.8034273363278615e-05, + "loss": 0.1485, + "step": 2116 + }, + { + "epoch": 0.68, + "learning_rate": 1.8032204642065337e-05, + "loss": 0.1514, + "step": 2117 + }, + { + "epoch": 0.68, + "learning_rate": 1.8030134951652082e-05, + "loss": 0.1504, + "step": 2118 + }, + { + "epoch": 0.68, + "learning_rate": 1.8028064292288576e-05, + "loss": 0.1518, + "step": 2119 + }, + { + "epoch": 0.68, + "learning_rate": 1.8025992664224675e-05, + "loss": 0.1785, + "step": 2120 + }, + { + "epoch": 0.68, + "learning_rate": 1.802392006771036e-05, + "loss": 0.1414, + "step": 2121 + }, + { + "epoch": 0.68, + "learning_rate": 1.802184650299571e-05, + "loss": 0.1592, + "step": 2122 + }, + { + "epoch": 0.68, + "learning_rate": 1.801977197033093e-05, + "loss": 0.1502, + "step": 2123 + }, + { + "epoch": 0.68, + "learning_rate": 1.801769646996635e-05, + "loss": 0.1528, + "step": 2124 + }, + { + "epoch": 0.68, + "learning_rate": 1.8015620002152398e-05, + "loss": 0.1541, + "step": 2125 + }, + { + "epoch": 0.68, + "learning_rate": 1.8013542567139635e-05, + "loss": 0.1551, + "step": 2126 + }, + { + "epoch": 0.68, + "learning_rate": 1.8011464165178736e-05, + "loss": 0.1503, + "step": 2127 + }, + { + "epoch": 0.68, + "learning_rate": 1.8009384796520487e-05, + "loss": 0.1598, + "step": 2128 + }, + { + "epoch": 0.68, + "learning_rate": 1.8007304461415794e-05, + "loss": 0.1721, + "step": 2129 + }, + { + "epoch": 0.69, + "learning_rate": 1.8005223160115676e-05, + "loss": 0.1659, + "step": 2130 + }, + { + "epoch": 0.69, + "learning_rate": 1.8003140892871277e-05, + "loss": 0.1595, + "step": 2131 + }, + { + "epoch": 0.69, + "learning_rate": 1.8001057659933844e-05, + "loss": 0.1661, + "step": 2132 + }, + { + "epoch": 0.69, + "learning_rate": 1.799897346155476e-05, + "loss": 0.1483, + "step": 2133 + }, + { + "epoch": 0.69, + "learning_rate": 1.7996888297985504e-05, + "loss": 0.145, + "step": 2134 + }, + { + "epoch": 0.69, + "learning_rate": 1.7994802169477684e-05, + "loss": 0.1477, + "step": 2135 + }, + { + "epoch": 0.69, + "learning_rate": 1.7992715076283024e-05, + "loss": 0.1723, + "step": 2136 + }, + { + "epoch": 0.69, + "learning_rate": 1.7990627018653353e-05, + "loss": 0.1571, + "step": 2137 + }, + { + "epoch": 0.69, + "learning_rate": 1.7988537996840634e-05, + "loss": 0.1711, + "step": 2138 + }, + { + "epoch": 0.69, + "learning_rate": 1.7986448011096937e-05, + "loss": 0.1451, + "step": 2139 + }, + { + "epoch": 0.69, + "learning_rate": 1.7984357061674443e-05, + "loss": 0.1467, + "step": 2140 + }, + { + "epoch": 0.69, + "learning_rate": 1.7982265148825454e-05, + "loss": 0.1531, + "step": 2141 + }, + { + "epoch": 0.69, + "learning_rate": 1.7980172272802398e-05, + "loss": 0.1601, + "step": 2142 + }, + { + "epoch": 0.69, + "learning_rate": 1.79780784338578e-05, + "loss": 0.1502, + "step": 2143 + }, + { + "epoch": 0.69, + "learning_rate": 1.7975983632244317e-05, + "loss": 0.1415, + "step": 2144 + }, + { + "epoch": 0.69, + "learning_rate": 1.797388786821472e-05, + "loss": 0.155, + "step": 2145 + }, + { + "epoch": 0.69, + "learning_rate": 1.7971791142021887e-05, + "loss": 0.1346, + "step": 2146 + }, + { + "epoch": 0.69, + "learning_rate": 1.7969693453918816e-05, + "loss": 0.1414, + "step": 2147 + }, + { + "epoch": 0.69, + "learning_rate": 1.796759480415863e-05, + "loss": 0.1453, + "step": 2148 + }, + { + "epoch": 0.69, + "learning_rate": 1.7965495192994555e-05, + "loss": 0.1666, + "step": 2149 + }, + { + "epoch": 0.69, + "learning_rate": 1.7963394620679945e-05, + "loss": 0.138, + "step": 2150 + }, + { + "epoch": 0.69, + "learning_rate": 1.7961293087468255e-05, + "loss": 0.1438, + "step": 2151 + }, + { + "epoch": 0.69, + "learning_rate": 1.7959190593613075e-05, + "loss": 0.1381, + "step": 2152 + }, + { + "epoch": 0.69, + "learning_rate": 1.7957087139368096e-05, + "loss": 0.146, + "step": 2153 + }, + { + "epoch": 0.69, + "learning_rate": 1.7954982724987132e-05, + "loss": 0.1614, + "step": 2154 + }, + { + "epoch": 0.69, + "learning_rate": 1.7952877350724108e-05, + "loss": 0.1427, + "step": 2155 + }, + { + "epoch": 0.69, + "learning_rate": 1.7950771016833064e-05, + "loss": 0.1408, + "step": 2156 + }, + { + "epoch": 0.69, + "learning_rate": 1.7948663723568166e-05, + "loss": 0.1524, + "step": 2157 + }, + { + "epoch": 0.69, + "learning_rate": 1.7946555471183685e-05, + "loss": 0.1649, + "step": 2158 + }, + { + "epoch": 0.69, + "learning_rate": 1.7944446259934018e-05, + "loss": 0.1498, + "step": 2159 + }, + { + "epoch": 0.69, + "learning_rate": 1.794233609007366e-05, + "loss": 0.1995, + "step": 2160 + }, + { + "epoch": 0.7, + "learning_rate": 1.7940224961857242e-05, + "loss": 0.132, + "step": 2161 + }, + { + "epoch": 0.7, + "learning_rate": 1.79381128755395e-05, + "loss": 0.135, + "step": 2162 + }, + { + "epoch": 0.7, + "learning_rate": 1.793599983137529e-05, + "loss": 0.1405, + "step": 2163 + }, + { + "epoch": 0.7, + "learning_rate": 1.793388582961957e-05, + "loss": 0.1467, + "step": 2164 + }, + { + "epoch": 0.7, + "learning_rate": 1.7931770870527437e-05, + "loss": 0.1484, + "step": 2165 + }, + { + "epoch": 0.7, + "learning_rate": 1.7929654954354085e-05, + "loss": 0.1343, + "step": 2166 + }, + { + "epoch": 0.7, + "learning_rate": 1.7927538081354826e-05, + "loss": 0.144, + "step": 2167 + }, + { + "epoch": 0.7, + "learning_rate": 1.79254202517851e-05, + "loss": 0.1647, + "step": 2168 + }, + { + "epoch": 0.7, + "learning_rate": 1.7923301465900448e-05, + "loss": 0.1532, + "step": 2169 + }, + { + "epoch": 0.7, + "learning_rate": 1.792118172395653e-05, + "loss": 0.1523, + "step": 2170 + }, + { + "epoch": 0.7, + "learning_rate": 1.7919061026209126e-05, + "loss": 0.1296, + "step": 2171 + }, + { + "epoch": 0.7, + "learning_rate": 1.791693937291413e-05, + "loss": 0.1567, + "step": 2172 + }, + { + "epoch": 0.7, + "learning_rate": 1.7914816764327546e-05, + "loss": 0.1478, + "step": 2173 + }, + { + "epoch": 0.7, + "learning_rate": 1.79126932007055e-05, + "loss": 0.1413, + "step": 2174 + }, + { + "epoch": 0.7, + "learning_rate": 1.7910568682304226e-05, + "loss": 0.1455, + "step": 2175 + }, + { + "epoch": 0.7, + "learning_rate": 1.790844320938008e-05, + "loss": 0.1436, + "step": 2176 + }, + { + "epoch": 0.7, + "learning_rate": 1.790631678218953e-05, + "loss": 0.1697, + "step": 2177 + }, + { + "epoch": 0.7, + "learning_rate": 1.7904189400989165e-05, + "loss": 0.1532, + "step": 2178 + }, + { + "epoch": 0.7, + "learning_rate": 1.7902061066035677e-05, + "loss": 0.1335, + "step": 2179 + }, + { + "epoch": 0.7, + "learning_rate": 1.789993177758588e-05, + "loss": 0.1576, + "step": 2180 + }, + { + "epoch": 0.7, + "learning_rate": 1.7897801535896708e-05, + "loss": 0.1528, + "step": 2181 + }, + { + "epoch": 0.7, + "learning_rate": 1.78956703412252e-05, + "loss": 0.1575, + "step": 2182 + }, + { + "epoch": 0.7, + "learning_rate": 1.7893538193828523e-05, + "loss": 0.1417, + "step": 2183 + }, + { + "epoch": 0.7, + "learning_rate": 1.789140509396394e-05, + "loss": 0.1551, + "step": 2184 + }, + { + "epoch": 0.7, + "learning_rate": 1.7889271041888844e-05, + "loss": 0.1526, + "step": 2185 + }, + { + "epoch": 0.7, + "learning_rate": 1.7887136037860742e-05, + "loss": 0.1411, + "step": 2186 + }, + { + "epoch": 0.7, + "learning_rate": 1.7885000082137248e-05, + "loss": 0.1431, + "step": 2187 + }, + { + "epoch": 0.7, + "learning_rate": 1.7882863174976105e-05, + "loss": 0.1597, + "step": 2188 + }, + { + "epoch": 0.7, + "learning_rate": 1.7880725316635147e-05, + "loss": 0.1535, + "step": 2189 + }, + { + "epoch": 0.7, + "learning_rate": 1.787858650737235e-05, + "loss": 0.1374, + "step": 2190 + }, + { + "epoch": 0.7, + "learning_rate": 1.7876446747445782e-05, + "loss": 0.1682, + "step": 2191 + }, + { + "epoch": 0.71, + "learning_rate": 1.7874306037113642e-05, + "loss": 0.1386, + "step": 2192 + }, + { + "epoch": 0.71, + "learning_rate": 1.7872164376634236e-05, + "loss": 0.1605, + "step": 2193 + }, + { + "epoch": 0.71, + "learning_rate": 1.7870021766265985e-05, + "loss": 0.1497, + "step": 2194 + }, + { + "epoch": 0.71, + "learning_rate": 1.7867878206267422e-05, + "loss": 0.1714, + "step": 2195 + }, + { + "epoch": 0.71, + "learning_rate": 1.7865733696897205e-05, + "loss": 0.1505, + "step": 2196 + }, + { + "epoch": 0.71, + "learning_rate": 1.78635882384141e-05, + "loss": 0.1458, + "step": 2197 + }, + { + "epoch": 0.71, + "learning_rate": 1.7861441831076975e-05, + "loss": 0.1349, + "step": 2198 + }, + { + "epoch": 0.71, + "learning_rate": 1.7859294475144837e-05, + "loss": 0.1707, + "step": 2199 + }, + { + "epoch": 0.71, + "learning_rate": 1.785714617087679e-05, + "loss": 0.1366, + "step": 2200 + }, + { + "epoch": 0.71, + "learning_rate": 1.785499691853206e-05, + "loss": 0.1485, + "step": 2201 + }, + { + "epoch": 0.71, + "learning_rate": 1.785284671836998e-05, + "loss": 0.1424, + "step": 2202 + }, + { + "epoch": 0.71, + "learning_rate": 1.785069557065001e-05, + "loss": 0.1506, + "step": 2203 + }, + { + "epoch": 0.71, + "learning_rate": 1.784854347563171e-05, + "loss": 0.1347, + "step": 2204 + }, + { + "epoch": 0.71, + "learning_rate": 1.7846390433574768e-05, + "loss": 0.1487, + "step": 2205 + }, + { + "epoch": 0.71, + "learning_rate": 1.784423644473897e-05, + "loss": 0.1693, + "step": 2206 + }, + { + "epoch": 0.71, + "learning_rate": 1.784208150938423e-05, + "loss": 0.1468, + "step": 2207 + }, + { + "epoch": 0.71, + "learning_rate": 1.783992562777057e-05, + "loss": 0.1604, + "step": 2208 + }, + { + "epoch": 0.71, + "learning_rate": 1.7837768800158133e-05, + "loss": 0.1519, + "step": 2209 + }, + { + "epoch": 0.71, + "learning_rate": 1.783561102680717e-05, + "loss": 0.1587, + "step": 2210 + }, + { + "epoch": 0.71, + "learning_rate": 1.783345230797804e-05, + "loss": 0.1435, + "step": 2211 + }, + { + "epoch": 0.71, + "learning_rate": 1.783129264393123e-05, + "loss": 0.1575, + "step": 2212 + }, + { + "epoch": 0.71, + "learning_rate": 1.7829132034927332e-05, + "loss": 0.1554, + "step": 2213 + }, + { + "epoch": 0.71, + "learning_rate": 1.7826970481227053e-05, + "loss": 0.1615, + "step": 2214 + }, + { + "epoch": 0.71, + "learning_rate": 1.7824807983091218e-05, + "loss": 0.159, + "step": 2215 + }, + { + "epoch": 0.71, + "learning_rate": 1.7822644540780763e-05, + "loss": 0.1498, + "step": 2216 + }, + { + "epoch": 0.71, + "learning_rate": 1.7820480154556735e-05, + "loss": 0.1441, + "step": 2217 + }, + { + "epoch": 0.71, + "learning_rate": 1.78183148246803e-05, + "loss": 0.1618, + "step": 2218 + }, + { + "epoch": 0.71, + "learning_rate": 1.7816148551412738e-05, + "loss": 0.1562, + "step": 2219 + }, + { + "epoch": 0.71, + "learning_rate": 1.7813981335015436e-05, + "loss": 0.1437, + "step": 2220 + }, + { + "epoch": 0.71, + "learning_rate": 1.7811813175749904e-05, + "loss": 0.1481, + "step": 2221 + }, + { + "epoch": 0.71, + "learning_rate": 1.780964407387776e-05, + "loss": 0.1637, + "step": 2222 + }, + { + "epoch": 0.72, + "learning_rate": 1.7807474029660736e-05, + "loss": 0.1416, + "step": 2223 + }, + { + "epoch": 0.72, + "learning_rate": 1.780530304336068e-05, + "loss": 0.1512, + "step": 2224 + }, + { + "epoch": 0.72, + "learning_rate": 1.780313111523955e-05, + "loss": 0.1524, + "step": 2225 + }, + { + "epoch": 0.72, + "learning_rate": 1.7800958245559425e-05, + "loss": 0.1634, + "step": 2226 + }, + { + "epoch": 0.72, + "learning_rate": 1.7798784434582484e-05, + "loss": 0.1392, + "step": 2227 + }, + { + "epoch": 0.72, + "learning_rate": 1.779660968257104e-05, + "loss": 0.1591, + "step": 2228 + }, + { + "epoch": 0.72, + "learning_rate": 1.7794433989787503e-05, + "loss": 0.1458, + "step": 2229 + }, + { + "epoch": 0.72, + "learning_rate": 1.7792257356494397e-05, + "loss": 0.1425, + "step": 2230 + }, + { + "epoch": 0.72, + "learning_rate": 1.7790079782954366e-05, + "loss": 0.1483, + "step": 2231 + }, + { + "epoch": 0.72, + "learning_rate": 1.7787901269430168e-05, + "loss": 0.1581, + "step": 2232 + }, + { + "epoch": 0.72, + "learning_rate": 1.778572181618467e-05, + "loss": 0.1607, + "step": 2233 + }, + { + "epoch": 0.72, + "learning_rate": 1.7783541423480853e-05, + "loss": 0.1438, + "step": 2234 + }, + { + "epoch": 0.72, + "learning_rate": 1.7781360091581815e-05, + "loss": 0.1524, + "step": 2235 + }, + { + "epoch": 0.72, + "learning_rate": 1.7779177820750763e-05, + "loss": 0.138, + "step": 2236 + }, + { + "epoch": 0.72, + "learning_rate": 1.7776994611251016e-05, + "loss": 0.1445, + "step": 2237 + }, + { + "epoch": 0.72, + "learning_rate": 1.7774810463346015e-05, + "loss": 0.1683, + "step": 2238 + }, + { + "epoch": 0.72, + "learning_rate": 1.777262537729931e-05, + "loss": 0.1468, + "step": 2239 + }, + { + "epoch": 0.72, + "learning_rate": 1.7770439353374555e-05, + "loss": 0.1428, + "step": 2240 + }, + { + "epoch": 0.72, + "learning_rate": 1.7768252391835526e-05, + "loss": 0.1377, + "step": 2241 + }, + { + "epoch": 0.72, + "learning_rate": 1.7766064492946122e-05, + "loss": 0.1374, + "step": 2242 + }, + { + "epoch": 0.72, + "learning_rate": 1.7763875656970334e-05, + "loss": 0.151, + "step": 2243 + }, + { + "epoch": 0.72, + "learning_rate": 1.7761685884172274e-05, + "loss": 0.1558, + "step": 2244 + }, + { + "epoch": 0.72, + "learning_rate": 1.775949517481618e-05, + "loss": 0.1485, + "step": 2245 + }, + { + "epoch": 0.72, + "learning_rate": 1.775730352916638e-05, + "loss": 0.1651, + "step": 2246 + }, + { + "epoch": 0.72, + "learning_rate": 1.7755110947487338e-05, + "loss": 0.1575, + "step": 2247 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752917430043615e-05, + "loss": 0.1483, + "step": 2248 + }, + { + "epoch": 0.72, + "learning_rate": 1.775072297709989e-05, + "loss": 0.1533, + "step": 2249 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748527588920956e-05, + "loss": 0.1618, + "step": 2250 + }, + { + "epoch": 0.72, + "learning_rate": 1.774633126577172e-05, + "loss": 0.1479, + "step": 2251 + }, + { + "epoch": 0.72, + "learning_rate": 1.7744134007917195e-05, + "loss": 0.1655, + "step": 2252 + }, + { + "epoch": 0.72, + "learning_rate": 1.7741935815622512e-05, + "loss": 0.1416, + "step": 2253 + }, + { + "epoch": 0.73, + "learning_rate": 1.773973668915292e-05, + "loss": 0.1356, + "step": 2254 + }, + { + "epoch": 0.73, + "learning_rate": 1.7737536628773773e-05, + "loss": 0.1486, + "step": 2255 + }, + { + "epoch": 0.73, + "learning_rate": 1.773533563475053e-05, + "loss": 0.1543, + "step": 2256 + }, + { + "epoch": 0.73, + "learning_rate": 1.773313370734879e-05, + "loss": 0.1471, + "step": 2257 + }, + { + "epoch": 0.73, + "learning_rate": 1.773093084683423e-05, + "loss": 0.1506, + "step": 2258 + }, + { + "epoch": 0.73, + "learning_rate": 1.7728727053472664e-05, + "loss": 0.1462, + "step": 2259 + }, + { + "epoch": 0.73, + "learning_rate": 1.7726522327530013e-05, + "loss": 0.1364, + "step": 2260 + }, + { + "epoch": 0.73, + "learning_rate": 1.7724316669272305e-05, + "loss": 0.1356, + "step": 2261 + }, + { + "epoch": 0.73, + "learning_rate": 1.7722110078965685e-05, + "loss": 0.1613, + "step": 2262 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719902556876412e-05, + "loss": 0.1472, + "step": 2263 + }, + { + "epoch": 0.73, + "learning_rate": 1.771769410327085e-05, + "loss": 0.1584, + "step": 2264 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715484718415486e-05, + "loss": 0.1425, + "step": 2265 + }, + { + "epoch": 0.73, + "learning_rate": 1.771327440257691e-05, + "loss": 0.1386, + "step": 2266 + }, + { + "epoch": 0.73, + "learning_rate": 1.771106315602183e-05, + "loss": 0.1551, + "step": 2267 + }, + { + "epoch": 0.73, + "learning_rate": 1.770885097901706e-05, + "loss": 0.1542, + "step": 2268 + }, + { + "epoch": 0.73, + "learning_rate": 1.770663787182954e-05, + "loss": 0.1612, + "step": 2269 + }, + { + "epoch": 0.73, + "learning_rate": 1.7704423834726303e-05, + "loss": 0.1487, + "step": 2270 + }, + { + "epoch": 0.73, + "learning_rate": 1.7702208867974512e-05, + "loss": 0.1623, + "step": 2271 + }, + { + "epoch": 0.73, + "learning_rate": 1.7699992971841427e-05, + "loss": 0.1287, + "step": 2272 + }, + { + "epoch": 0.73, + "learning_rate": 1.7697776146594433e-05, + "loss": 0.1376, + "step": 2273 + }, + { + "epoch": 0.73, + "learning_rate": 1.769555839250102e-05, + "loss": 0.14, + "step": 2274 + }, + { + "epoch": 0.73, + "learning_rate": 1.769333970982879e-05, + "loss": 0.1579, + "step": 2275 + }, + { + "epoch": 0.73, + "learning_rate": 1.769112009884546e-05, + "loss": 0.1654, + "step": 2276 + }, + { + "epoch": 0.73, + "learning_rate": 1.768889955981886e-05, + "loss": 0.1604, + "step": 2277 + }, + { + "epoch": 0.73, + "learning_rate": 1.7686678093016928e-05, + "loss": 0.1301, + "step": 2278 + }, + { + "epoch": 0.73, + "learning_rate": 1.7684455698707714e-05, + "loss": 0.1462, + "step": 2279 + }, + { + "epoch": 0.73, + "learning_rate": 1.768223237715938e-05, + "loss": 0.1597, + "step": 2280 + }, + { + "epoch": 0.73, + "learning_rate": 1.7680008128640204e-05, + "loss": 0.1628, + "step": 2281 + }, + { + "epoch": 0.73, + "learning_rate": 1.7677782953418577e-05, + "loss": 0.1369, + "step": 2282 + }, + { + "epoch": 0.73, + "learning_rate": 1.7675556851762993e-05, + "loss": 0.148, + "step": 2283 + }, + { + "epoch": 0.73, + "learning_rate": 1.7673329823942065e-05, + "loss": 0.1425, + "step": 2284 + }, + { + "epoch": 0.73, + "learning_rate": 1.7671101870224513e-05, + "loss": 0.1396, + "step": 2285 + }, + { + "epoch": 0.74, + "learning_rate": 1.7668872990879175e-05, + "loss": 0.1626, + "step": 2286 + }, + { + "epoch": 0.74, + "learning_rate": 1.7666643186174998e-05, + "loss": 0.1637, + "step": 2287 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664412456381033e-05, + "loss": 0.1561, + "step": 2288 + }, + { + "epoch": 0.74, + "learning_rate": 1.7662180801766453e-05, + "loss": 0.1583, + "step": 2289 + }, + { + "epoch": 0.74, + "learning_rate": 1.765994822260054e-05, + "loss": 0.1446, + "step": 2290 + }, + { + "epoch": 0.74, + "learning_rate": 1.765771471915269e-05, + "loss": 0.1505, + "step": 2291 + }, + { + "epoch": 0.74, + "learning_rate": 1.76554802916924e-05, + "loss": 0.1446, + "step": 2292 + }, + { + "epoch": 0.74, + "learning_rate": 1.765324494048929e-05, + "loss": 0.1518, + "step": 2293 + }, + { + "epoch": 0.74, + "learning_rate": 1.7651008665813083e-05, + "loss": 0.1599, + "step": 2294 + }, + { + "epoch": 0.74, + "learning_rate": 1.7648771467933625e-05, + "loss": 0.1584, + "step": 2295 + }, + { + "epoch": 0.74, + "learning_rate": 1.7646533347120852e-05, + "loss": 0.1492, + "step": 2296 + }, + { + "epoch": 0.74, + "learning_rate": 1.764429430364484e-05, + "loss": 0.1463, + "step": 2297 + }, + { + "epoch": 0.74, + "learning_rate": 1.7642054337775756e-05, + "loss": 0.1461, + "step": 2298 + }, + { + "epoch": 0.74, + "learning_rate": 1.763981344978388e-05, + "loss": 0.1525, + "step": 2299 + }, + { + "epoch": 0.74, + "learning_rate": 1.763757163993961e-05, + "loss": 0.1489, + "step": 2300 + }, + { + "epoch": 0.74, + "learning_rate": 1.763532890851346e-05, + "loss": 0.145, + "step": 2301 + }, + { + "epoch": 0.74, + "learning_rate": 1.7633085255776033e-05, + "loss": 0.1622, + "step": 2302 + }, + { + "epoch": 0.74, + "learning_rate": 1.7630840681998068e-05, + "loss": 0.1372, + "step": 2303 + }, + { + "epoch": 0.74, + "learning_rate": 1.76285951874504e-05, + "loss": 0.132, + "step": 2304 + }, + { + "epoch": 0.74, + "learning_rate": 1.762634877240398e-05, + "loss": 0.1582, + "step": 2305 + }, + { + "epoch": 0.74, + "learning_rate": 1.7624101437129874e-05, + "loss": 0.1533, + "step": 2306 + }, + { + "epoch": 0.74, + "learning_rate": 1.7621853181899254e-05, + "loss": 0.1686, + "step": 2307 + }, + { + "epoch": 0.74, + "learning_rate": 1.76196040069834e-05, + "loss": 0.1586, + "step": 2308 + }, + { + "epoch": 0.74, + "learning_rate": 1.761735391265371e-05, + "loss": 0.1523, + "step": 2309 + }, + { + "epoch": 0.74, + "learning_rate": 1.761510289918169e-05, + "loss": 0.1624, + "step": 2310 + }, + { + "epoch": 0.74, + "learning_rate": 1.7612850966838956e-05, + "loss": 0.1632, + "step": 2311 + }, + { + "epoch": 0.74, + "learning_rate": 1.761059811589724e-05, + "loss": 0.1495, + "step": 2312 + }, + { + "epoch": 0.74, + "learning_rate": 1.760834434662837e-05, + "loss": 0.1631, + "step": 2313 + }, + { + "epoch": 0.74, + "learning_rate": 1.7606089659304307e-05, + "loss": 0.1654, + "step": 2314 + }, + { + "epoch": 0.74, + "learning_rate": 1.7603834054197104e-05, + "loss": 0.1401, + "step": 2315 + }, + { + "epoch": 0.74, + "learning_rate": 1.7601577531578936e-05, + "loss": 0.1431, + "step": 2316 + }, + { + "epoch": 0.75, + "learning_rate": 1.7599320091722085e-05, + "loss": 0.1414, + "step": 2317 + }, + { + "epoch": 0.75, + "learning_rate": 1.7597061734898936e-05, + "loss": 0.1382, + "step": 2318 + }, + { + "epoch": 0.75, + "learning_rate": 1.7594802461382003e-05, + "loss": 0.1496, + "step": 2319 + }, + { + "epoch": 0.75, + "learning_rate": 1.7592542271443888e-05, + "loss": 0.1362, + "step": 2320 + }, + { + "epoch": 0.75, + "learning_rate": 1.7590281165357323e-05, + "loss": 0.1838, + "step": 2321 + }, + { + "epoch": 0.75, + "learning_rate": 1.758801914339514e-05, + "loss": 0.1428, + "step": 2322 + }, + { + "epoch": 0.75, + "learning_rate": 1.7585756205830287e-05, + "loss": 0.1528, + "step": 2323 + }, + { + "epoch": 0.75, + "learning_rate": 1.7583492352935817e-05, + "loss": 0.1539, + "step": 2324 + }, + { + "epoch": 0.75, + "learning_rate": 1.758122758498489e-05, + "loss": 0.1566, + "step": 2325 + }, + { + "epoch": 0.75, + "learning_rate": 1.75789619022508e-05, + "loss": 0.1691, + "step": 2326 + }, + { + "epoch": 0.75, + "learning_rate": 1.7576695305006915e-05, + "loss": 0.1471, + "step": 2327 + }, + { + "epoch": 0.75, + "learning_rate": 1.7574427793526743e-05, + "loss": 0.1575, + "step": 2328 + }, + { + "epoch": 0.75, + "learning_rate": 1.757215936808389e-05, + "loss": 0.1482, + "step": 2329 + }, + { + "epoch": 0.75, + "learning_rate": 1.756989002895207e-05, + "loss": 0.1445, + "step": 2330 + }, + { + "epoch": 0.75, + "learning_rate": 1.7567619776405114e-05, + "loss": 0.1441, + "step": 2331 + }, + { + "epoch": 0.75, + "learning_rate": 1.7565348610716963e-05, + "loss": 0.1632, + "step": 2332 + }, + { + "epoch": 0.75, + "learning_rate": 1.756307653216166e-05, + "loss": 0.1555, + "step": 2333 + }, + { + "epoch": 0.75, + "learning_rate": 1.7560803541013365e-05, + "loss": 0.1427, + "step": 2334 + }, + { + "epoch": 0.75, + "learning_rate": 1.755852963754635e-05, + "loss": 0.1567, + "step": 2335 + }, + { + "epoch": 0.75, + "learning_rate": 1.755625482203499e-05, + "loss": 0.1388, + "step": 2336 + }, + { + "epoch": 0.75, + "learning_rate": 1.755397909475378e-05, + "loss": 0.1708, + "step": 2337 + }, + { + "epoch": 0.75, + "learning_rate": 1.755170245597731e-05, + "loss": 0.1547, + "step": 2338 + }, + { + "epoch": 0.75, + "learning_rate": 1.75494249059803e-05, + "loss": 0.145, + "step": 2339 + }, + { + "epoch": 0.75, + "learning_rate": 1.7547146445037554e-05, + "loss": 0.1413, + "step": 2340 + }, + { + "epoch": 0.75, + "learning_rate": 1.7544867073424016e-05, + "loss": 0.1409, + "step": 2341 + }, + { + "epoch": 0.75, + "learning_rate": 1.7542586791414716e-05, + "loss": 0.1408, + "step": 2342 + }, + { + "epoch": 0.75, + "learning_rate": 1.7540305599284805e-05, + "loss": 0.1505, + "step": 2343 + }, + { + "epoch": 0.75, + "learning_rate": 1.753802349730954e-05, + "loss": 0.1412, + "step": 2344 + }, + { + "epoch": 0.75, + "learning_rate": 1.753574048576429e-05, + "loss": 0.1556, + "step": 2345 + }, + { + "epoch": 0.75, + "learning_rate": 1.7533456564924535e-05, + "loss": 0.1525, + "step": 2346 + }, + { + "epoch": 0.75, + "learning_rate": 1.7531171735065857e-05, + "loss": 0.1562, + "step": 2347 + }, + { + "epoch": 0.76, + "learning_rate": 1.752888599646396e-05, + "loss": 0.1285, + "step": 2348 + }, + { + "epoch": 0.76, + "learning_rate": 1.7526599349394646e-05, + "loss": 0.1452, + "step": 2349 + }, + { + "epoch": 0.76, + "learning_rate": 1.7524311794133833e-05, + "loss": 0.1604, + "step": 2350 + }, + { + "epoch": 0.76, + "learning_rate": 1.752202333095755e-05, + "loss": 0.1586, + "step": 2351 + }, + { + "epoch": 0.76, + "learning_rate": 1.7519733960141927e-05, + "loss": 0.1402, + "step": 2352 + }, + { + "epoch": 0.76, + "learning_rate": 1.7517443681963214e-05, + "loss": 0.1499, + "step": 2353 + }, + { + "epoch": 0.76, + "learning_rate": 1.7515152496697765e-05, + "loss": 0.1436, + "step": 2354 + }, + { + "epoch": 0.76, + "learning_rate": 1.7512860404622042e-05, + "loss": 0.1359, + "step": 2355 + }, + { + "epoch": 0.76, + "learning_rate": 1.751056740601262e-05, + "loss": 0.1343, + "step": 2356 + }, + { + "epoch": 0.76, + "learning_rate": 1.7508273501146182e-05, + "loss": 0.1535, + "step": 2357 + }, + { + "epoch": 0.76, + "learning_rate": 1.750597869029952e-05, + "loss": 0.1681, + "step": 2358 + }, + { + "epoch": 0.76, + "learning_rate": 1.7503682973749538e-05, + "loss": 0.152, + "step": 2359 + }, + { + "epoch": 0.76, + "learning_rate": 1.750138635177324e-05, + "loss": 0.1424, + "step": 2360 + }, + { + "epoch": 0.76, + "learning_rate": 1.7499088824647757e-05, + "loss": 0.1453, + "step": 2361 + }, + { + "epoch": 0.76, + "learning_rate": 1.7496790392650306e-05, + "loss": 0.1352, + "step": 2362 + }, + { + "epoch": 0.76, + "learning_rate": 1.7494491056058235e-05, + "loss": 0.1573, + "step": 2363 + }, + { + "epoch": 0.76, + "learning_rate": 1.749219081514899e-05, + "loss": 0.1693, + "step": 2364 + }, + { + "epoch": 0.76, + "learning_rate": 1.7489889670200125e-05, + "loss": 0.1497, + "step": 2365 + }, + { + "epoch": 0.76, + "learning_rate": 1.748758762148931e-05, + "loss": 0.1586, + "step": 2366 + }, + { + "epoch": 0.76, + "learning_rate": 1.748528466929431e-05, + "loss": 0.137, + "step": 2367 + }, + { + "epoch": 0.76, + "learning_rate": 1.748298081389302e-05, + "loss": 0.1491, + "step": 2368 + }, + { + "epoch": 0.76, + "learning_rate": 1.7480676055563435e-05, + "loss": 0.1463, + "step": 2369 + }, + { + "epoch": 0.76, + "learning_rate": 1.7478370394583647e-05, + "loss": 0.1378, + "step": 2370 + }, + { + "epoch": 0.76, + "learning_rate": 1.747606383123187e-05, + "loss": 0.1564, + "step": 2371 + }, + { + "epoch": 0.76, + "learning_rate": 1.7473756365786426e-05, + "loss": 0.1499, + "step": 2372 + }, + { + "epoch": 0.76, + "learning_rate": 1.7471447998525744e-05, + "loss": 0.1561, + "step": 2373 + }, + { + "epoch": 0.76, + "learning_rate": 1.7469138729728356e-05, + "loss": 0.1455, + "step": 2374 + }, + { + "epoch": 0.76, + "learning_rate": 1.7466828559672917e-05, + "loss": 0.1448, + "step": 2375 + }, + { + "epoch": 0.76, + "learning_rate": 1.7464517488638174e-05, + "loss": 0.153, + "step": 2376 + }, + { + "epoch": 0.76, + "learning_rate": 1.7462205516902995e-05, + "loss": 0.1579, + "step": 2377 + }, + { + "epoch": 0.76, + "learning_rate": 1.745989264474635e-05, + "loss": 0.155, + "step": 2378 + }, + { + "epoch": 0.77, + "learning_rate": 1.7457578872447324e-05, + "loss": 0.14, + "step": 2379 + }, + { + "epoch": 0.77, + "learning_rate": 1.74552642002851e-05, + "loss": 0.1628, + "step": 2380 + }, + { + "epoch": 0.77, + "learning_rate": 1.745294862853898e-05, + "loss": 0.1547, + "step": 2381 + }, + { + "epoch": 0.77, + "learning_rate": 1.7450632157488378e-05, + "loss": 0.1401, + "step": 2382 + }, + { + "epoch": 0.77, + "learning_rate": 1.7448314787412794e-05, + "loss": 0.1613, + "step": 2383 + }, + { + "epoch": 0.77, + "learning_rate": 1.744599651859186e-05, + "loss": 0.14, + "step": 2384 + }, + { + "epoch": 0.77, + "learning_rate": 1.744367735130531e-05, + "loss": 0.1402, + "step": 2385 + }, + { + "epoch": 0.77, + "learning_rate": 1.7441357285832984e-05, + "loss": 0.1408, + "step": 2386 + }, + { + "epoch": 0.77, + "learning_rate": 1.7439036322454827e-05, + "loss": 0.1566, + "step": 2387 + }, + { + "epoch": 0.77, + "learning_rate": 1.74367144614509e-05, + "loss": 0.1767, + "step": 2388 + }, + { + "epoch": 0.77, + "learning_rate": 1.7434391703101364e-05, + "loss": 0.1646, + "step": 2389 + }, + { + "epoch": 0.77, + "learning_rate": 1.74320680476865e-05, + "loss": 0.1474, + "step": 2390 + }, + { + "epoch": 0.77, + "learning_rate": 1.7429743495486682e-05, + "loss": 0.1648, + "step": 2391 + }, + { + "epoch": 0.77, + "learning_rate": 1.742741804678241e-05, + "loss": 0.1571, + "step": 2392 + }, + { + "epoch": 0.77, + "learning_rate": 1.7425091701854274e-05, + "loss": 0.1526, + "step": 2393 + }, + { + "epoch": 0.77, + "learning_rate": 1.7422764460982987e-05, + "loss": 0.1565, + "step": 2394 + }, + { + "epoch": 0.77, + "learning_rate": 1.742043632444936e-05, + "loss": 0.1478, + "step": 2395 + }, + { + "epoch": 0.77, + "learning_rate": 1.7418107292534315e-05, + "loss": 0.154, + "step": 2396 + }, + { + "epoch": 0.77, + "learning_rate": 1.7415777365518885e-05, + "loss": 0.1406, + "step": 2397 + }, + { + "epoch": 0.77, + "learning_rate": 1.741344654368421e-05, + "loss": 0.1419, + "step": 2398 + }, + { + "epoch": 0.77, + "learning_rate": 1.741111482731153e-05, + "loss": 0.1433, + "step": 2399 + }, + { + "epoch": 0.77, + "learning_rate": 1.7408782216682208e-05, + "loss": 0.1512, + "step": 2400 + }, + { + "epoch": 0.77, + "learning_rate": 1.7406448712077706e-05, + "loss": 0.1313, + "step": 2401 + }, + { + "epoch": 0.77, + "learning_rate": 1.740411431377959e-05, + "loss": 0.1546, + "step": 2402 + }, + { + "epoch": 0.77, + "learning_rate": 1.7401779022069544e-05, + "loss": 0.1409, + "step": 2403 + }, + { + "epoch": 0.77, + "learning_rate": 1.7399442837229347e-05, + "loss": 0.1375, + "step": 2404 + }, + { + "epoch": 0.77, + "learning_rate": 1.73971057595409e-05, + "loss": 0.1532, + "step": 2405 + }, + { + "epoch": 0.77, + "learning_rate": 1.73947677892862e-05, + "loss": 0.1519, + "step": 2406 + }, + { + "epoch": 0.77, + "learning_rate": 1.739242892674736e-05, + "loss": 0.1591, + "step": 2407 + }, + { + "epoch": 0.77, + "learning_rate": 1.7390089172206594e-05, + "loss": 0.1629, + "step": 2408 + }, + { + "epoch": 0.77, + "learning_rate": 1.7387748525946227e-05, + "loss": 0.148, + "step": 2409 + }, + { + "epoch": 0.78, + "learning_rate": 1.7385406988248696e-05, + "loss": 0.1537, + "step": 2410 + }, + { + "epoch": 0.78, + "learning_rate": 1.7383064559396535e-05, + "loss": 0.1394, + "step": 2411 + }, + { + "epoch": 0.78, + "learning_rate": 1.7380721239672392e-05, + "loss": 0.1559, + "step": 2412 + }, + { + "epoch": 0.78, + "learning_rate": 1.7378377029359026e-05, + "loss": 0.163, + "step": 2413 + }, + { + "epoch": 0.78, + "learning_rate": 1.7376031928739296e-05, + "loss": 0.1497, + "step": 2414 + }, + { + "epoch": 0.78, + "learning_rate": 1.7373685938096174e-05, + "loss": 0.1497, + "step": 2415 + }, + { + "epoch": 0.78, + "learning_rate": 1.7371339057712736e-05, + "loss": 0.1482, + "step": 2416 + }, + { + "epoch": 0.78, + "learning_rate": 1.7368991287872165e-05, + "loss": 0.1531, + "step": 2417 + }, + { + "epoch": 0.78, + "learning_rate": 1.7366642628857757e-05, + "loss": 0.1322, + "step": 2418 + }, + { + "epoch": 0.78, + "learning_rate": 1.7364293080952904e-05, + "loss": 0.1629, + "step": 2419 + }, + { + "epoch": 0.78, + "learning_rate": 1.7361942644441124e-05, + "loss": 0.1845, + "step": 2420 + }, + { + "epoch": 0.78, + "learning_rate": 1.735959131960602e-05, + "loss": 0.1506, + "step": 2421 + }, + { + "epoch": 0.78, + "learning_rate": 1.735723910673132e-05, + "loss": 0.1579, + "step": 2422 + }, + { + "epoch": 0.78, + "learning_rate": 1.7354886006100843e-05, + "loss": 0.1404, + "step": 2423 + }, + { + "epoch": 0.78, + "learning_rate": 1.7352532017998537e-05, + "loss": 0.1426, + "step": 2424 + }, + { + "epoch": 0.78, + "learning_rate": 1.7350177142708432e-05, + "loss": 0.1678, + "step": 2425 + }, + { + "epoch": 0.78, + "learning_rate": 1.7347821380514682e-05, + "loss": 0.1422, + "step": 2426 + }, + { + "epoch": 0.78, + "learning_rate": 1.7345464731701548e-05, + "loss": 0.141, + "step": 2427 + }, + { + "epoch": 0.78, + "learning_rate": 1.7343107196553385e-05, + "loss": 0.148, + "step": 2428 + }, + { + "epoch": 0.78, + "learning_rate": 1.7340748775354667e-05, + "loss": 0.1513, + "step": 2429 + }, + { + "epoch": 0.78, + "learning_rate": 1.733838946838997e-05, + "loss": 0.1482, + "step": 2430 + }, + { + "epoch": 0.78, + "learning_rate": 1.7336029275943982e-05, + "loss": 0.1489, + "step": 2431 + }, + { + "epoch": 0.78, + "learning_rate": 1.7333668198301494e-05, + "loss": 0.1507, + "step": 2432 + }, + { + "epoch": 0.78, + "learning_rate": 1.7331306235747397e-05, + "loss": 0.1577, + "step": 2433 + }, + { + "epoch": 0.78, + "learning_rate": 1.73289433885667e-05, + "loss": 0.1562, + "step": 2434 + }, + { + "epoch": 0.78, + "learning_rate": 1.7326579657044515e-05, + "loss": 0.1406, + "step": 2435 + }, + { + "epoch": 0.78, + "learning_rate": 1.7324215041466054e-05, + "loss": 0.1449, + "step": 2436 + }, + { + "epoch": 0.78, + "learning_rate": 1.732184954211665e-05, + "loss": 0.1437, + "step": 2437 + }, + { + "epoch": 0.78, + "learning_rate": 1.731948315928173e-05, + "loss": 0.1419, + "step": 2438 + }, + { + "epoch": 0.78, + "learning_rate": 1.7317115893246833e-05, + "loss": 0.1396, + "step": 2439 + }, + { + "epoch": 0.78, + "learning_rate": 1.73147477442976e-05, + "loss": 0.1698, + "step": 2440 + }, + { + "epoch": 0.79, + "learning_rate": 1.7312378712719784e-05, + "loss": 0.1336, + "step": 2441 + }, + { + "epoch": 0.79, + "learning_rate": 1.7310008798799243e-05, + "loss": 0.1394, + "step": 2442 + }, + { + "epoch": 0.79, + "learning_rate": 1.7307638002821942e-05, + "loss": 0.1457, + "step": 2443 + }, + { + "epoch": 0.79, + "learning_rate": 1.7305266325073948e-05, + "loss": 0.1427, + "step": 2444 + }, + { + "epoch": 0.79, + "learning_rate": 1.730289376584144e-05, + "loss": 0.1369, + "step": 2445 + }, + { + "epoch": 0.79, + "learning_rate": 1.73005203254107e-05, + "loss": 0.1505, + "step": 2446 + }, + { + "epoch": 0.79, + "learning_rate": 1.7298146004068122e-05, + "loss": 0.1663, + "step": 2447 + }, + { + "epoch": 0.79, + "learning_rate": 1.7295770802100197e-05, + "loss": 0.146, + "step": 2448 + }, + { + "epoch": 0.79, + "learning_rate": 1.7293394719793524e-05, + "loss": 0.1469, + "step": 2449 + }, + { + "epoch": 0.79, + "learning_rate": 1.729101775743482e-05, + "loss": 0.1489, + "step": 2450 + }, + { + "epoch": 0.79, + "learning_rate": 1.7288639915310893e-05, + "loss": 0.1536, + "step": 2451 + }, + { + "epoch": 0.79, + "learning_rate": 1.7286261193708663e-05, + "loss": 0.1701, + "step": 2452 + }, + { + "epoch": 0.79, + "learning_rate": 1.7283881592915162e-05, + "loss": 0.1417, + "step": 2453 + }, + { + "epoch": 0.79, + "learning_rate": 1.7281501113217517e-05, + "loss": 0.1261, + "step": 2454 + }, + { + "epoch": 0.79, + "learning_rate": 1.727911975490297e-05, + "loss": 0.1533, + "step": 2455 + }, + { + "epoch": 0.79, + "learning_rate": 1.7276737518258865e-05, + "loss": 0.1642, + "step": 2456 + }, + { + "epoch": 0.79, + "learning_rate": 1.7274354403572652e-05, + "loss": 0.1583, + "step": 2457 + }, + { + "epoch": 0.79, + "learning_rate": 1.7271970411131888e-05, + "loss": 0.1694, + "step": 2458 + }, + { + "epoch": 0.79, + "learning_rate": 1.7269585541224238e-05, + "loss": 0.1513, + "step": 2459 + }, + { + "epoch": 0.79, + "learning_rate": 1.7267199794137468e-05, + "loss": 0.1372, + "step": 2460 + }, + { + "epoch": 0.79, + "learning_rate": 1.7264813170159453e-05, + "loss": 0.1435, + "step": 2461 + }, + { + "epoch": 0.79, + "learning_rate": 1.7262425669578177e-05, + "loss": 0.1516, + "step": 2462 + }, + { + "epoch": 0.79, + "learning_rate": 1.7260037292681717e-05, + "loss": 0.1372, + "step": 2463 + }, + { + "epoch": 0.79, + "learning_rate": 1.7257648039758273e-05, + "loss": 0.15, + "step": 2464 + }, + { + "epoch": 0.79, + "learning_rate": 1.7255257911096143e-05, + "loss": 0.1549, + "step": 2465 + }, + { + "epoch": 0.79, + "learning_rate": 1.725286690698372e-05, + "loss": 0.1402, + "step": 2466 + }, + { + "epoch": 0.79, + "learning_rate": 1.7250475027709523e-05, + "loss": 0.129, + "step": 2467 + }, + { + "epoch": 0.79, + "learning_rate": 1.7248082273562164e-05, + "loss": 0.1325, + "step": 2468 + }, + { + "epoch": 0.79, + "learning_rate": 1.724568864483036e-05, + "loss": 0.1414, + "step": 2469 + }, + { + "epoch": 0.79, + "learning_rate": 1.7243294141802936e-05, + "loss": 0.1542, + "step": 2470 + }, + { + "epoch": 0.79, + "learning_rate": 1.724089876476883e-05, + "loss": 0.1528, + "step": 2471 + }, + { + "epoch": 0.8, + "learning_rate": 1.723850251401707e-05, + "loss": 0.1451, + "step": 2472 + }, + { + "epoch": 0.8, + "learning_rate": 1.72361053898368e-05, + "loss": 0.1391, + "step": 2473 + }, + { + "epoch": 0.8, + "learning_rate": 1.723370739251727e-05, + "loss": 0.1604, + "step": 2474 + }, + { + "epoch": 0.8, + "learning_rate": 1.723130852234783e-05, + "loss": 0.1531, + "step": 2475 + }, + { + "epoch": 0.8, + "learning_rate": 1.722890877961794e-05, + "loss": 0.1402, + "step": 2476 + }, + { + "epoch": 0.8, + "learning_rate": 1.722650816461716e-05, + "loss": 0.1567, + "step": 2477 + }, + { + "epoch": 0.8, + "learning_rate": 1.7224106677635163e-05, + "loss": 0.1389, + "step": 2478 + }, + { + "epoch": 0.8, + "learning_rate": 1.722170431896172e-05, + "loss": 0.1515, + "step": 2479 + }, + { + "epoch": 0.8, + "learning_rate": 1.721930108888671e-05, + "loss": 0.1532, + "step": 2480 + }, + { + "epoch": 0.8, + "learning_rate": 1.7216896987700116e-05, + "loss": 0.1414, + "step": 2481 + }, + { + "epoch": 0.8, + "learning_rate": 1.7214492015692025e-05, + "loss": 0.1527, + "step": 2482 + }, + { + "epoch": 0.8, + "learning_rate": 1.721208617315264e-05, + "loss": 0.1485, + "step": 2483 + }, + { + "epoch": 0.8, + "learning_rate": 1.720967946037225e-05, + "loss": 0.181, + "step": 2484 + }, + { + "epoch": 0.8, + "learning_rate": 1.7207271877641263e-05, + "loss": 0.1586, + "step": 2485 + }, + { + "epoch": 0.8, + "learning_rate": 1.720486342525019e-05, + "loss": 0.1748, + "step": 2486 + }, + { + "epoch": 0.8, + "learning_rate": 1.7202454103489646e-05, + "loss": 0.1611, + "step": 2487 + }, + { + "epoch": 0.8, + "learning_rate": 1.7200043912650346e-05, + "loss": 0.1533, + "step": 2488 + }, + { + "epoch": 0.8, + "learning_rate": 1.7197632853023114e-05, + "loss": 0.1624, + "step": 2489 + }, + { + "epoch": 0.8, + "learning_rate": 1.7195220924898883e-05, + "loss": 0.1452, + "step": 2490 + }, + { + "epoch": 0.8, + "learning_rate": 1.7192808128568682e-05, + "loss": 0.1469, + "step": 2491 + }, + { + "epoch": 0.8, + "learning_rate": 1.719039446432365e-05, + "loss": 0.1376, + "step": 2492 + }, + { + "epoch": 0.8, + "learning_rate": 1.7187979932455032e-05, + "loss": 0.1393, + "step": 2493 + }, + { + "epoch": 0.8, + "learning_rate": 1.7185564533254178e-05, + "loss": 0.1298, + "step": 2494 + }, + { + "epoch": 0.8, + "learning_rate": 1.7183148267012533e-05, + "loss": 0.1399, + "step": 2495 + }, + { + "epoch": 0.8, + "learning_rate": 1.718073113402166e-05, + "loss": 0.1438, + "step": 2496 + }, + { + "epoch": 0.8, + "learning_rate": 1.717831313457322e-05, + "loss": 0.1517, + "step": 2497 + }, + { + "epoch": 0.8, + "learning_rate": 1.7175894268958974e-05, + "loss": 0.1399, + "step": 2498 + }, + { + "epoch": 0.8, + "learning_rate": 1.71734745374708e-05, + "loss": 0.1379, + "step": 2499 + }, + { + "epoch": 0.8, + "learning_rate": 1.7171053940400664e-05, + "loss": 0.1481, + "step": 2500 + }, + { + "epoch": 0.8, + "learning_rate": 1.716863247804066e-05, + "loss": 0.138, + "step": 2501 + }, + { + "epoch": 0.8, + "learning_rate": 1.7166210150682956e-05, + "loss": 0.1479, + "step": 2502 + }, + { + "epoch": 0.81, + "learning_rate": 1.716378695861985e-05, + "loss": 0.1333, + "step": 2503 + }, + { + "epoch": 0.81, + "learning_rate": 1.7161362902143734e-05, + "loss": 0.1442, + "step": 2504 + }, + { + "epoch": 0.81, + "learning_rate": 1.7158937981547096e-05, + "loss": 0.1663, + "step": 2505 + }, + { + "epoch": 0.81, + "learning_rate": 1.715651219712255e-05, + "loss": 0.1413, + "step": 2506 + }, + { + "epoch": 0.81, + "learning_rate": 1.7154085549162794e-05, + "loss": 0.1572, + "step": 2507 + }, + { + "epoch": 0.81, + "learning_rate": 1.7151658037960638e-05, + "loss": 0.1506, + "step": 2508 + }, + { + "epoch": 0.81, + "learning_rate": 1.7149229663809e-05, + "loss": 0.1607, + "step": 2509 + }, + { + "epoch": 0.81, + "learning_rate": 1.7146800427000893e-05, + "loss": 0.1497, + "step": 2510 + }, + { + "epoch": 0.81, + "learning_rate": 1.714437032782944e-05, + "loss": 0.1493, + "step": 2511 + }, + { + "epoch": 0.81, + "learning_rate": 1.7141939366587873e-05, + "loss": 0.1347, + "step": 2512 + }, + { + "epoch": 0.81, + "learning_rate": 1.7139507543569513e-05, + "loss": 0.1535, + "step": 2513 + }, + { + "epoch": 0.81, + "learning_rate": 1.71370748590678e-05, + "loss": 0.1538, + "step": 2514 + }, + { + "epoch": 0.81, + "learning_rate": 1.713464131337627e-05, + "loss": 0.1488, + "step": 2515 + }, + { + "epoch": 0.81, + "learning_rate": 1.7132206906788565e-05, + "loss": 0.1426, + "step": 2516 + }, + { + "epoch": 0.81, + "learning_rate": 1.712977163959843e-05, + "loss": 0.1375, + "step": 2517 + }, + { + "epoch": 0.81, + "learning_rate": 1.712733551209972e-05, + "loss": 0.1498, + "step": 2518 + }, + { + "epoch": 0.81, + "learning_rate": 1.7124898524586383e-05, + "loss": 0.1579, + "step": 2519 + }, + { + "epoch": 0.81, + "learning_rate": 1.712246067735248e-05, + "loss": 0.1459, + "step": 2520 + }, + { + "epoch": 0.81, + "learning_rate": 1.7120021970692168e-05, + "loss": 0.1462, + "step": 2521 + }, + { + "epoch": 0.81, + "learning_rate": 1.7117582404899714e-05, + "loss": 0.1362, + "step": 2522 + }, + { + "epoch": 0.81, + "learning_rate": 1.7115141980269482e-05, + "loss": 0.142, + "step": 2523 + }, + { + "epoch": 0.81, + "learning_rate": 1.7112700697095955e-05, + "loss": 0.1556, + "step": 2524 + }, + { + "epoch": 0.81, + "learning_rate": 1.7110258555673694e-05, + "loss": 0.1503, + "step": 2525 + }, + { + "epoch": 0.81, + "learning_rate": 1.7107815556297392e-05, + "loss": 0.1468, + "step": 2526 + }, + { + "epoch": 0.81, + "learning_rate": 1.710537169926182e-05, + "loss": 0.1436, + "step": 2527 + }, + { + "epoch": 0.81, + "learning_rate": 1.7102926984861877e-05, + "loss": 0.1389, + "step": 2528 + }, + { + "epoch": 0.81, + "learning_rate": 1.7100481413392537e-05, + "loss": 0.1724, + "step": 2529 + }, + { + "epoch": 0.81, + "learning_rate": 1.7098034985148907e-05, + "loss": 0.1512, + "step": 2530 + }, + { + "epoch": 0.81, + "learning_rate": 1.7095587700426178e-05, + "loss": 0.1386, + "step": 2531 + }, + { + "epoch": 0.81, + "learning_rate": 1.7093139559519645e-05, + "loss": 0.1434, + "step": 2532 + }, + { + "epoch": 0.81, + "learning_rate": 1.709069056272472e-05, + "loss": 0.1453, + "step": 2533 + }, + { + "epoch": 0.82, + "learning_rate": 1.7088240710336903e-05, + "loss": 0.1545, + "step": 2534 + }, + { + "epoch": 0.82, + "learning_rate": 1.7085790002651807e-05, + "loss": 0.1434, + "step": 2535 + }, + { + "epoch": 0.82, + "learning_rate": 1.7083338439965142e-05, + "loss": 0.1567, + "step": 2536 + }, + { + "epoch": 0.82, + "learning_rate": 1.7080886022572726e-05, + "loss": 0.1433, + "step": 2537 + }, + { + "epoch": 0.82, + "learning_rate": 1.707843275077048e-05, + "loss": 0.1293, + "step": 2538 + }, + { + "epoch": 0.82, + "learning_rate": 1.707597862485442e-05, + "loss": 0.1572, + "step": 2539 + }, + { + "epoch": 0.82, + "learning_rate": 1.7073523645120677e-05, + "loss": 0.1373, + "step": 2540 + }, + { + "epoch": 0.82, + "learning_rate": 1.7071067811865477e-05, + "loss": 0.1521, + "step": 2541 + }, + { + "epoch": 0.82, + "learning_rate": 1.706861112538515e-05, + "loss": 0.1423, + "step": 2542 + }, + { + "epoch": 0.82, + "learning_rate": 1.7066153585976137e-05, + "loss": 0.1468, + "step": 2543 + }, + { + "epoch": 0.82, + "learning_rate": 1.7063695193934963e-05, + "loss": 0.1448, + "step": 2544 + }, + { + "epoch": 0.82, + "learning_rate": 1.706123594955828e-05, + "loss": 0.1437, + "step": 2545 + }, + { + "epoch": 0.82, + "learning_rate": 1.7058775853142825e-05, + "loss": 0.1461, + "step": 2546 + }, + { + "epoch": 0.82, + "learning_rate": 1.7056314904985444e-05, + "loss": 0.1473, + "step": 2547 + }, + { + "epoch": 0.82, + "learning_rate": 1.705385310538309e-05, + "loss": 0.1331, + "step": 2548 + }, + { + "epoch": 0.82, + "learning_rate": 1.7051390454632805e-05, + "loss": 0.1407, + "step": 2549 + }, + { + "epoch": 0.82, + "learning_rate": 1.704892695303175e-05, + "loss": 0.1232, + "step": 2550 + }, + { + "epoch": 0.82, + "learning_rate": 1.704646260087718e-05, + "loss": 0.163, + "step": 2551 + }, + { + "epoch": 0.82, + "learning_rate": 1.7043997398466453e-05, + "loss": 0.1587, + "step": 2552 + }, + { + "epoch": 0.82, + "learning_rate": 1.7041531346097036e-05, + "loss": 0.1603, + "step": 2553 + }, + { + "epoch": 0.82, + "learning_rate": 1.7039064444066488e-05, + "loss": 0.1437, + "step": 2554 + }, + { + "epoch": 0.82, + "learning_rate": 1.7036596692672476e-05, + "loss": 0.1515, + "step": 2555 + }, + { + "epoch": 0.82, + "learning_rate": 1.703412809221277e-05, + "loss": 0.1447, + "step": 2556 + }, + { + "epoch": 0.82, + "learning_rate": 1.703165864298524e-05, + "loss": 0.1573, + "step": 2557 + }, + { + "epoch": 0.82, + "learning_rate": 1.7029188345287868e-05, + "loss": 0.1494, + "step": 2558 + }, + { + "epoch": 0.82, + "learning_rate": 1.7026717199418726e-05, + "loss": 0.143, + "step": 2559 + }, + { + "epoch": 0.82, + "learning_rate": 1.7024245205675986e-05, + "loss": 0.144, + "step": 2560 + }, + { + "epoch": 0.82, + "learning_rate": 1.702177236435794e-05, + "loss": 0.1533, + "step": 2561 + }, + { + "epoch": 0.82, + "learning_rate": 1.7019298675762966e-05, + "loss": 0.1423, + "step": 2562 + }, + { + "epoch": 0.82, + "learning_rate": 1.701682414018955e-05, + "loss": 0.1588, + "step": 2563 + }, + { + "epoch": 0.82, + "learning_rate": 1.701434875793628e-05, + "loss": 0.1403, + "step": 2564 + }, + { + "epoch": 0.83, + "learning_rate": 1.7011872529301848e-05, + "loss": 0.1361, + "step": 2565 + }, + { + "epoch": 0.83, + "learning_rate": 1.7009395454585046e-05, + "loss": 0.1539, + "step": 2566 + }, + { + "epoch": 0.83, + "learning_rate": 1.7006917534084768e-05, + "loss": 0.1649, + "step": 2567 + }, + { + "epoch": 0.83, + "learning_rate": 1.700443876810001e-05, + "loss": 0.1466, + "step": 2568 + }, + { + "epoch": 0.83, + "learning_rate": 1.700195915692987e-05, + "loss": 0.1517, + "step": 2569 + }, + { + "epoch": 0.83, + "learning_rate": 1.6999478700873552e-05, + "loss": 0.186, + "step": 2570 + }, + { + "epoch": 0.83, + "learning_rate": 1.6996997400230347e-05, + "loss": 0.14, + "step": 2571 + }, + { + "epoch": 0.83, + "learning_rate": 1.6994515255299676e-05, + "loss": 0.1519, + "step": 2572 + }, + { + "epoch": 0.83, + "learning_rate": 1.6992032266381034e-05, + "loss": 0.1497, + "step": 2573 + }, + { + "epoch": 0.83, + "learning_rate": 1.698954843377403e-05, + "loss": 0.1494, + "step": 2574 + }, + { + "epoch": 0.83, + "learning_rate": 1.698706375777838e-05, + "loss": 0.1389, + "step": 2575 + }, + { + "epoch": 0.83, + "learning_rate": 1.6984578238693888e-05, + "loss": 0.1544, + "step": 2576 + }, + { + "epoch": 0.83, + "learning_rate": 1.6982091876820474e-05, + "loss": 0.1672, + "step": 2577 + }, + { + "epoch": 0.83, + "learning_rate": 1.6979604672458142e-05, + "loss": 0.1612, + "step": 2578 + }, + { + "epoch": 0.83, + "learning_rate": 1.6977116625907025e-05, + "loss": 0.1515, + "step": 2579 + }, + { + "epoch": 0.83, + "learning_rate": 1.6974627737467327e-05, + "loss": 0.1431, + "step": 2580 + }, + { + "epoch": 0.83, + "learning_rate": 1.6972138007439375e-05, + "loss": 0.1456, + "step": 2581 + }, + { + "epoch": 0.83, + "learning_rate": 1.696964743612359e-05, + "loss": 0.1531, + "step": 2582 + }, + { + "epoch": 0.83, + "learning_rate": 1.696715602382049e-05, + "loss": 0.1367, + "step": 2583 + }, + { + "epoch": 0.83, + "learning_rate": 1.6964663770830705e-05, + "loss": 0.1418, + "step": 2584 + }, + { + "epoch": 0.83, + "learning_rate": 1.6962170677454962e-05, + "loss": 0.1443, + "step": 2585 + }, + { + "epoch": 0.83, + "learning_rate": 1.695967674399408e-05, + "loss": 0.1479, + "step": 2586 + }, + { + "epoch": 0.83, + "learning_rate": 1.6957181970748996e-05, + "loss": 0.1349, + "step": 2587 + }, + { + "epoch": 0.83, + "learning_rate": 1.695468635802074e-05, + "loss": 0.134, + "step": 2588 + }, + { + "epoch": 0.83, + "learning_rate": 1.6952189906110432e-05, + "loss": 0.1608, + "step": 2589 + }, + { + "epoch": 0.83, + "learning_rate": 1.694969261531932e-05, + "loss": 0.1504, + "step": 2590 + }, + { + "epoch": 0.83, + "learning_rate": 1.6947194485948724e-05, + "loss": 0.1413, + "step": 2591 + }, + { + "epoch": 0.83, + "learning_rate": 1.6944695518300087e-05, + "loss": 0.1519, + "step": 2592 + }, + { + "epoch": 0.83, + "learning_rate": 1.6942195712674946e-05, + "loss": 0.143, + "step": 2593 + }, + { + "epoch": 0.83, + "learning_rate": 1.6939695069374932e-05, + "loss": 0.1514, + "step": 2594 + }, + { + "epoch": 0.83, + "learning_rate": 1.693719358870179e-05, + "loss": 0.1448, + "step": 2595 + }, + { + "epoch": 0.84, + "learning_rate": 1.6934691270957354e-05, + "loss": 0.1384, + "step": 2596 + }, + { + "epoch": 0.84, + "learning_rate": 1.6932188116443565e-05, + "loss": 0.1732, + "step": 2597 + }, + { + "epoch": 0.84, + "learning_rate": 1.692968412546247e-05, + "loss": 0.1513, + "step": 2598 + }, + { + "epoch": 0.84, + "learning_rate": 1.6927179298316204e-05, + "loss": 0.1605, + "step": 2599 + }, + { + "epoch": 0.84, + "learning_rate": 1.6924673635307013e-05, + "loss": 0.1679, + "step": 2600 + }, + { + "epoch": 0.84, + "learning_rate": 1.692216713673724e-05, + "loss": 0.1614, + "step": 2601 + }, + { + "epoch": 0.84, + "learning_rate": 1.6919659802909332e-05, + "loss": 0.1492, + "step": 2602 + }, + { + "epoch": 0.84, + "learning_rate": 1.6917151634125833e-05, + "loss": 0.1545, + "step": 2603 + }, + { + "epoch": 0.84, + "learning_rate": 1.691464263068939e-05, + "loss": 0.1389, + "step": 2604 + }, + { + "epoch": 0.84, + "learning_rate": 1.691213279290275e-05, + "loss": 0.1438, + "step": 2605 + }, + { + "epoch": 0.84, + "learning_rate": 1.6909622121068755e-05, + "loss": 0.1364, + "step": 2606 + }, + { + "epoch": 0.84, + "learning_rate": 1.6907110615490362e-05, + "loss": 0.1667, + "step": 2607 + }, + { + "epoch": 0.84, + "learning_rate": 1.6904598276470617e-05, + "loss": 0.1426, + "step": 2608 + }, + { + "epoch": 0.84, + "learning_rate": 1.690208510431267e-05, + "loss": 0.1411, + "step": 2609 + }, + { + "epoch": 0.84, + "learning_rate": 1.689957109931977e-05, + "loss": 0.1395, + "step": 2610 + }, + { + "epoch": 0.84, + "learning_rate": 1.6897056261795266e-05, + "loss": 0.1401, + "step": 2611 + }, + { + "epoch": 0.84, + "learning_rate": 1.689454059204261e-05, + "loss": 0.1628, + "step": 2612 + }, + { + "epoch": 0.84, + "learning_rate": 1.6892024090365357e-05, + "loss": 0.1549, + "step": 2613 + }, + { + "epoch": 0.84, + "learning_rate": 1.6889506757067157e-05, + "loss": 0.1475, + "step": 2614 + }, + { + "epoch": 0.84, + "learning_rate": 1.6886988592451757e-05, + "loss": 0.1453, + "step": 2615 + }, + { + "epoch": 0.84, + "learning_rate": 1.6884469596823015e-05, + "loss": 0.1188, + "step": 2616 + }, + { + "epoch": 0.84, + "learning_rate": 1.688194977048488e-05, + "loss": 0.1396, + "step": 2617 + }, + { + "epoch": 0.84, + "learning_rate": 1.687942911374141e-05, + "loss": 0.1416, + "step": 2618 + }, + { + "epoch": 0.84, + "learning_rate": 1.6876907626896755e-05, + "loss": 0.1419, + "step": 2619 + }, + { + "epoch": 0.84, + "learning_rate": 1.687438531025517e-05, + "loss": 0.1572, + "step": 2620 + }, + { + "epoch": 0.84, + "learning_rate": 1.6871862164121002e-05, + "loss": 0.1584, + "step": 2621 + }, + { + "epoch": 0.84, + "learning_rate": 1.6869338188798713e-05, + "loss": 0.1473, + "step": 2622 + }, + { + "epoch": 0.84, + "learning_rate": 1.686681338459285e-05, + "loss": 0.1434, + "step": 2623 + }, + { + "epoch": 0.84, + "learning_rate": 1.6864287751808075e-05, + "loss": 0.1509, + "step": 2624 + }, + { + "epoch": 0.84, + "learning_rate": 1.6861761290749133e-05, + "loss": 0.1419, + "step": 2625 + }, + { + "epoch": 0.84, + "learning_rate": 1.6859234001720882e-05, + "loss": 0.1454, + "step": 2626 + }, + { + "epoch": 0.84, + "learning_rate": 1.6856705885028275e-05, + "loss": 0.1479, + "step": 2627 + }, + { + "epoch": 0.85, + "learning_rate": 1.6854176940976365e-05, + "loss": 0.1492, + "step": 2628 + }, + { + "epoch": 0.85, + "learning_rate": 1.68516471698703e-05, + "loss": 0.1509, + "step": 2629 + }, + { + "epoch": 0.85, + "learning_rate": 1.6849116572015347e-05, + "loss": 0.1525, + "step": 2630 + }, + { + "epoch": 0.85, + "learning_rate": 1.684658514771684e-05, + "loss": 0.1617, + "step": 2631 + }, + { + "epoch": 0.85, + "learning_rate": 1.684405289728025e-05, + "loss": 0.1595, + "step": 2632 + }, + { + "epoch": 0.85, + "learning_rate": 1.6841519821011112e-05, + "loss": 0.1571, + "step": 2633 + }, + { + "epoch": 0.85, + "learning_rate": 1.6838985919215094e-05, + "loss": 0.1404, + "step": 2634 + }, + { + "epoch": 0.85, + "learning_rate": 1.6836451192197932e-05, + "loss": 0.1521, + "step": 2635 + }, + { + "epoch": 0.85, + "learning_rate": 1.6833915640265485e-05, + "loss": 0.1499, + "step": 2636 + }, + { + "epoch": 0.85, + "learning_rate": 1.6831379263723707e-05, + "loss": 0.1355, + "step": 2637 + }, + { + "epoch": 0.85, + "learning_rate": 1.682884206287864e-05, + "loss": 0.1593, + "step": 2638 + }, + { + "epoch": 0.85, + "learning_rate": 1.682630403803644e-05, + "loss": 0.1431, + "step": 2639 + }, + { + "epoch": 0.85, + "learning_rate": 1.6823765189503348e-05, + "loss": 0.1606, + "step": 2640 + }, + { + "epoch": 0.85, + "learning_rate": 1.6821225517585723e-05, + "loss": 0.129, + "step": 2641 + }, + { + "epoch": 0.85, + "learning_rate": 1.6818685022590004e-05, + "loss": 0.1677, + "step": 2642 + }, + { + "epoch": 0.85, + "learning_rate": 1.681614370482274e-05, + "loss": 0.1466, + "step": 2643 + }, + { + "epoch": 0.85, + "learning_rate": 1.681360156459058e-05, + "loss": 0.1347, + "step": 2644 + }, + { + "epoch": 0.85, + "learning_rate": 1.6811058602200267e-05, + "loss": 0.1513, + "step": 2645 + }, + { + "epoch": 0.85, + "learning_rate": 1.6808514817958645e-05, + "loss": 0.1451, + "step": 2646 + }, + { + "epoch": 0.85, + "learning_rate": 1.680597021217266e-05, + "loss": 0.1477, + "step": 2647 + }, + { + "epoch": 0.85, + "learning_rate": 1.6803424785149352e-05, + "loss": 0.1495, + "step": 2648 + }, + { + "epoch": 0.85, + "learning_rate": 1.6800878537195864e-05, + "loss": 0.1469, + "step": 2649 + }, + { + "epoch": 0.85, + "learning_rate": 1.6798331468619443e-05, + "loss": 0.1522, + "step": 2650 + }, + { + "epoch": 0.85, + "learning_rate": 1.679578357972742e-05, + "loss": 0.1348, + "step": 2651 + }, + { + "epoch": 0.85, + "learning_rate": 1.679323487082724e-05, + "loss": 0.1499, + "step": 2652 + }, + { + "epoch": 0.85, + "learning_rate": 1.679068534222644e-05, + "loss": 0.1469, + "step": 2653 + }, + { + "epoch": 0.85, + "learning_rate": 1.6788134994232653e-05, + "loss": 0.143, + "step": 2654 + }, + { + "epoch": 0.85, + "learning_rate": 1.678558382715362e-05, + "loss": 0.1433, + "step": 2655 + }, + { + "epoch": 0.85, + "learning_rate": 1.6783031841297175e-05, + "loss": 0.1404, + "step": 2656 + }, + { + "epoch": 0.85, + "learning_rate": 1.6780479036971252e-05, + "loss": 0.1665, + "step": 2657 + }, + { + "epoch": 0.85, + "learning_rate": 1.6777925414483876e-05, + "loss": 0.1265, + "step": 2658 + }, + { + "epoch": 0.86, + "learning_rate": 1.6775370974143187e-05, + "loss": 0.1455, + "step": 2659 + }, + { + "epoch": 0.86, + "learning_rate": 1.6772815716257414e-05, + "loss": 0.1339, + "step": 2660 + }, + { + "epoch": 0.86, + "learning_rate": 1.677025964113488e-05, + "loss": 0.1568, + "step": 2661 + }, + { + "epoch": 0.86, + "learning_rate": 1.676770274908401e-05, + "loss": 0.1465, + "step": 2662 + }, + { + "epoch": 0.86, + "learning_rate": 1.6765145040413337e-05, + "loss": 0.1464, + "step": 2663 + }, + { + "epoch": 0.86, + "learning_rate": 1.6762586515431484e-05, + "loss": 0.1506, + "step": 2664 + }, + { + "epoch": 0.86, + "learning_rate": 1.676002717444717e-05, + "loss": 0.141, + "step": 2665 + }, + { + "epoch": 0.86, + "learning_rate": 1.6757467017769216e-05, + "loss": 0.1579, + "step": 2666 + }, + { + "epoch": 0.86, + "learning_rate": 1.6754906045706542e-05, + "loss": 0.1581, + "step": 2667 + }, + { + "epoch": 0.86, + "learning_rate": 1.6752344258568167e-05, + "loss": 0.1689, + "step": 2668 + }, + { + "epoch": 0.86, + "learning_rate": 1.674978165666321e-05, + "loss": 0.1622, + "step": 2669 + }, + { + "epoch": 0.86, + "learning_rate": 1.674721824030088e-05, + "loss": 0.1309, + "step": 2670 + }, + { + "epoch": 0.86, + "learning_rate": 1.6744654009790488e-05, + "loss": 0.1587, + "step": 2671 + }, + { + "epoch": 0.86, + "learning_rate": 1.6742088965441456e-05, + "loss": 0.1459, + "step": 2672 + }, + { + "epoch": 0.86, + "learning_rate": 1.6739523107563278e-05, + "loss": 0.1415, + "step": 2673 + }, + { + "epoch": 0.86, + "learning_rate": 1.6736956436465573e-05, + "loss": 0.1474, + "step": 2674 + }, + { + "epoch": 0.86, + "learning_rate": 1.6734388952458043e-05, + "loss": 0.1442, + "step": 2675 + }, + { + "epoch": 0.86, + "learning_rate": 1.6731820655850492e-05, + "loss": 0.1598, + "step": 2676 + }, + { + "epoch": 0.86, + "learning_rate": 1.672925154695282e-05, + "loss": 0.1513, + "step": 2677 + }, + { + "epoch": 0.86, + "learning_rate": 1.6726681626075026e-05, + "loss": 0.1563, + "step": 2678 + }, + { + "epoch": 0.86, + "learning_rate": 1.672411089352721e-05, + "loss": 0.1518, + "step": 2679 + }, + { + "epoch": 0.86, + "learning_rate": 1.6721539349619567e-05, + "loss": 0.1571, + "step": 2680 + }, + { + "epoch": 0.86, + "learning_rate": 1.6718966994662388e-05, + "loss": 0.1553, + "step": 2681 + }, + { + "epoch": 0.86, + "learning_rate": 1.671639382896607e-05, + "loss": 0.1523, + "step": 2682 + }, + { + "epoch": 0.86, + "learning_rate": 1.6713819852841094e-05, + "loss": 0.1681, + "step": 2683 + }, + { + "epoch": 0.86, + "learning_rate": 1.6711245066598054e-05, + "loss": 0.1545, + "step": 2684 + }, + { + "epoch": 0.86, + "learning_rate": 1.6708669470547632e-05, + "loss": 0.1408, + "step": 2685 + }, + { + "epoch": 0.86, + "learning_rate": 1.670609306500061e-05, + "loss": 0.1573, + "step": 2686 + }, + { + "epoch": 0.86, + "learning_rate": 1.670351585026787e-05, + "loss": 0.1609, + "step": 2687 + }, + { + "epoch": 0.86, + "learning_rate": 1.6700937826660384e-05, + "loss": 0.1536, + "step": 2688 + }, + { + "epoch": 0.86, + "learning_rate": 1.6698358994489235e-05, + "loss": 0.1345, + "step": 2689 + }, + { + "epoch": 0.87, + "learning_rate": 1.669577935406559e-05, + "loss": 0.1577, + "step": 2690 + }, + { + "epoch": 0.87, + "learning_rate": 1.6693198905700722e-05, + "loss": 0.1405, + "step": 2691 + }, + { + "epoch": 0.87, + "learning_rate": 1.6690617649706e-05, + "loss": 0.153, + "step": 2692 + }, + { + "epoch": 0.87, + "learning_rate": 1.6688035586392885e-05, + "loss": 0.1409, + "step": 2693 + }, + { + "epoch": 0.87, + "learning_rate": 1.6685452716072946e-05, + "loss": 0.1413, + "step": 2694 + }, + { + "epoch": 0.87, + "learning_rate": 1.668286903905784e-05, + "loss": 0.147, + "step": 2695 + }, + { + "epoch": 0.87, + "learning_rate": 1.668028455565932e-05, + "loss": 0.1544, + "step": 2696 + }, + { + "epoch": 0.87, + "learning_rate": 1.6677699266189247e-05, + "loss": 0.1521, + "step": 2697 + }, + { + "epoch": 0.87, + "learning_rate": 1.6675113170959568e-05, + "loss": 0.1552, + "step": 2698 + }, + { + "epoch": 0.87, + "learning_rate": 1.667252627028234e-05, + "loss": 0.1463, + "step": 2699 + }, + { + "epoch": 0.87, + "learning_rate": 1.6669938564469707e-05, + "loss": 0.1433, + "step": 2700 + }, + { + "epoch": 0.87, + "learning_rate": 1.6667350053833903e-05, + "loss": 0.1697, + "step": 2701 + }, + { + "epoch": 0.87, + "learning_rate": 1.666476073868728e-05, + "loss": 0.1546, + "step": 2702 + }, + { + "epoch": 0.87, + "learning_rate": 1.6662170619342267e-05, + "loss": 0.1278, + "step": 2703 + }, + { + "epoch": 0.87, + "learning_rate": 1.665957969611141e-05, + "loss": 0.1608, + "step": 2704 + }, + { + "epoch": 0.87, + "learning_rate": 1.665698796930733e-05, + "loss": 0.1492, + "step": 2705 + }, + { + "epoch": 0.87, + "learning_rate": 1.6654395439242762e-05, + "loss": 0.1338, + "step": 2706 + }, + { + "epoch": 0.87, + "learning_rate": 1.665180210623053e-05, + "loss": 0.1593, + "step": 2707 + }, + { + "epoch": 0.87, + "learning_rate": 1.6649207970583557e-05, + "loss": 0.1592, + "step": 2708 + }, + { + "epoch": 0.87, + "learning_rate": 1.664661303261486e-05, + "loss": 0.1692, + "step": 2709 + }, + { + "epoch": 0.87, + "learning_rate": 1.664401729263756e-05, + "loss": 0.1416, + "step": 2710 + }, + { + "epoch": 0.87, + "learning_rate": 1.6641420750964863e-05, + "loss": 0.1324, + "step": 2711 + }, + { + "epoch": 0.87, + "learning_rate": 1.6638823407910085e-05, + "loss": 0.1613, + "step": 2712 + }, + { + "epoch": 0.87, + "learning_rate": 1.6636225263786633e-05, + "loss": 0.1568, + "step": 2713 + }, + { + "epoch": 0.87, + "learning_rate": 1.6633626318908003e-05, + "loss": 0.1263, + "step": 2714 + }, + { + "epoch": 0.87, + "learning_rate": 1.6631026573587803e-05, + "loss": 0.1355, + "step": 2715 + }, + { + "epoch": 0.87, + "learning_rate": 1.662842602813972e-05, + "loss": 0.1352, + "step": 2716 + }, + { + "epoch": 0.87, + "learning_rate": 1.6625824682877557e-05, + "loss": 0.157, + "step": 2717 + }, + { + "epoch": 0.87, + "learning_rate": 1.66232225381152e-05, + "loss": 0.1574, + "step": 2718 + }, + { + "epoch": 0.87, + "learning_rate": 1.6620619594166633e-05, + "loss": 0.1771, + "step": 2719 + }, + { + "epoch": 0.87, + "learning_rate": 1.6618015851345938e-05, + "loss": 0.1659, + "step": 2720 + }, + { + "epoch": 0.88, + "learning_rate": 1.6615411309967293e-05, + "loss": 0.1382, + "step": 2721 + }, + { + "epoch": 0.88, + "learning_rate": 1.6612805970344978e-05, + "loss": 0.1484, + "step": 2722 + }, + { + "epoch": 0.88, + "learning_rate": 1.6610199832793357e-05, + "loss": 0.1414, + "step": 2723 + }, + { + "epoch": 0.88, + "learning_rate": 1.6607592897626904e-05, + "loss": 0.1386, + "step": 2724 + }, + { + "epoch": 0.88, + "learning_rate": 1.6604985165160183e-05, + "loss": 0.1477, + "step": 2725 + }, + { + "epoch": 0.88, + "learning_rate": 1.6602376635707852e-05, + "loss": 0.1494, + "step": 2726 + }, + { + "epoch": 0.88, + "learning_rate": 1.6599767309584663e-05, + "loss": 0.1362, + "step": 2727 + }, + { + "epoch": 0.88, + "learning_rate": 1.6597157187105475e-05, + "loss": 0.1491, + "step": 2728 + }, + { + "epoch": 0.88, + "learning_rate": 1.659454626858523e-05, + "loss": 0.1388, + "step": 2729 + }, + { + "epoch": 0.88, + "learning_rate": 1.659193455433898e-05, + "loss": 0.1715, + "step": 2730 + }, + { + "epoch": 0.88, + "learning_rate": 1.6589322044681863e-05, + "loss": 0.1466, + "step": 2731 + }, + { + "epoch": 0.88, + "learning_rate": 1.658670873992911e-05, + "loss": 0.1497, + "step": 2732 + }, + { + "epoch": 0.88, + "learning_rate": 1.658409464039606e-05, + "loss": 0.1411, + "step": 2733 + }, + { + "epoch": 0.88, + "learning_rate": 1.6581479746398145e-05, + "loss": 0.1542, + "step": 2734 + }, + { + "epoch": 0.88, + "learning_rate": 1.6578864058250876e-05, + "loss": 0.1319, + "step": 2735 + }, + { + "epoch": 0.88, + "learning_rate": 1.657624757626988e-05, + "loss": 0.1508, + "step": 2736 + }, + { + "epoch": 0.88, + "learning_rate": 1.657363030077088e-05, + "loss": 0.1343, + "step": 2737 + }, + { + "epoch": 0.88, + "learning_rate": 1.6571012232069678e-05, + "loss": 0.1573, + "step": 2738 + }, + { + "epoch": 0.88, + "learning_rate": 1.6568393370482182e-05, + "loss": 0.1526, + "step": 2739 + }, + { + "epoch": 0.88, + "learning_rate": 1.65657737163244e-05, + "loss": 0.1317, + "step": 2740 + }, + { + "epoch": 0.88, + "learning_rate": 1.6563153269912428e-05, + "loss": 0.1503, + "step": 2741 + }, + { + "epoch": 0.88, + "learning_rate": 1.6560532031562456e-05, + "loss": 0.1635, + "step": 2742 + }, + { + "epoch": 0.88, + "learning_rate": 1.6557910001590786e-05, + "loss": 0.171, + "step": 2743 + }, + { + "epoch": 0.88, + "learning_rate": 1.6555287180313787e-05, + "loss": 0.1376, + "step": 2744 + }, + { + "epoch": 0.88, + "learning_rate": 1.6552663568047952e-05, + "loss": 0.1367, + "step": 2745 + }, + { + "epoch": 0.88, + "learning_rate": 1.655003916510985e-05, + "loss": 0.1571, + "step": 2746 + }, + { + "epoch": 0.88, + "learning_rate": 1.6547413971816163e-05, + "loss": 0.1525, + "step": 2747 + }, + { + "epoch": 0.88, + "learning_rate": 1.6544787988483647e-05, + "loss": 0.1476, + "step": 2748 + }, + { + "epoch": 0.88, + "learning_rate": 1.6542161215429168e-05, + "loss": 0.1425, + "step": 2749 + }, + { + "epoch": 0.88, + "learning_rate": 1.6539533652969683e-05, + "loss": 0.1463, + "step": 2750 + }, + { + "epoch": 0.88, + "learning_rate": 1.6536905301422246e-05, + "loss": 0.1434, + "step": 2751 + }, + { + "epoch": 0.89, + "learning_rate": 1.6534276161104003e-05, + "loss": 0.1495, + "step": 2752 + }, + { + "epoch": 0.89, + "learning_rate": 1.65316462323322e-05, + "loss": 0.1503, + "step": 2753 + }, + { + "epoch": 0.89, + "learning_rate": 1.6529015515424178e-05, + "loss": 0.178, + "step": 2754 + }, + { + "epoch": 0.89, + "learning_rate": 1.6526384010697363e-05, + "loss": 0.1361, + "step": 2755 + }, + { + "epoch": 0.89, + "learning_rate": 1.652375171846929e-05, + "loss": 0.1413, + "step": 2756 + }, + { + "epoch": 0.89, + "learning_rate": 1.652111863905758e-05, + "loss": 0.169, + "step": 2757 + }, + { + "epoch": 0.89, + "learning_rate": 1.651848477277995e-05, + "loss": 0.1294, + "step": 2758 + }, + { + "epoch": 0.89, + "learning_rate": 1.6515850119954213e-05, + "loss": 0.1654, + "step": 2759 + }, + { + "epoch": 0.89, + "learning_rate": 1.651321468089828e-05, + "loss": 0.146, + "step": 2760 + }, + { + "epoch": 0.89, + "learning_rate": 1.651057845593016e-05, + "loss": 0.1642, + "step": 2761 + }, + { + "epoch": 0.89, + "learning_rate": 1.6507941445367935e-05, + "loss": 0.1536, + "step": 2762 + }, + { + "epoch": 0.89, + "learning_rate": 1.6505303649529815e-05, + "loss": 0.1524, + "step": 2763 + }, + { + "epoch": 0.89, + "learning_rate": 1.6502665068734082e-05, + "loss": 0.1484, + "step": 2764 + }, + { + "epoch": 0.89, + "learning_rate": 1.6500025703299113e-05, + "loss": 0.1417, + "step": 2765 + }, + { + "epoch": 0.89, + "learning_rate": 1.6497385553543388e-05, + "loss": 0.1436, + "step": 2766 + }, + { + "epoch": 0.89, + "learning_rate": 1.649474461978548e-05, + "loss": 0.1507, + "step": 2767 + }, + { + "epoch": 0.89, + "learning_rate": 1.6492102902344056e-05, + "loss": 0.1436, + "step": 2768 + }, + { + "epoch": 0.89, + "learning_rate": 1.6489460401537875e-05, + "loss": 0.1364, + "step": 2769 + }, + { + "epoch": 0.89, + "learning_rate": 1.6486817117685795e-05, + "loss": 0.1463, + "step": 2770 + }, + { + "epoch": 0.89, + "learning_rate": 1.6484173051106763e-05, + "loss": 0.1476, + "step": 2771 + }, + { + "epoch": 0.89, + "learning_rate": 1.6481528202119826e-05, + "loss": 0.1499, + "step": 2772 + }, + { + "epoch": 0.89, + "learning_rate": 1.647888257104412e-05, + "loss": 0.1476, + "step": 2773 + }, + { + "epoch": 0.89, + "learning_rate": 1.6476236158198878e-05, + "loss": 0.1311, + "step": 2774 + }, + { + "epoch": 0.89, + "learning_rate": 1.647358896390343e-05, + "loss": 0.1333, + "step": 2775 + }, + { + "epoch": 0.89, + "learning_rate": 1.6470940988477195e-05, + "loss": 0.1429, + "step": 2776 + }, + { + "epoch": 0.89, + "learning_rate": 1.646829223223969e-05, + "loss": 0.1335, + "step": 2777 + }, + { + "epoch": 0.89, + "learning_rate": 1.646564269551053e-05, + "loss": 0.1537, + "step": 2778 + }, + { + "epoch": 0.89, + "learning_rate": 1.646299237860941e-05, + "loss": 0.1452, + "step": 2779 + }, + { + "epoch": 0.89, + "learning_rate": 1.6460341281856137e-05, + "loss": 0.1312, + "step": 2780 + }, + { + "epoch": 0.89, + "learning_rate": 1.64576894055706e-05, + "loss": 0.1506, + "step": 2781 + }, + { + "epoch": 0.89, + "learning_rate": 1.645503675007278e-05, + "loss": 0.1429, + "step": 2782 + }, + { + "epoch": 0.9, + "learning_rate": 1.6452383315682768e-05, + "loss": 0.1369, + "step": 2783 + }, + { + "epoch": 0.9, + "learning_rate": 1.6449729102720735e-05, + "loss": 0.1278, + "step": 2784 + }, + { + "epoch": 0.9, + "learning_rate": 1.6447074111506944e-05, + "loss": 0.1447, + "step": 2785 + }, + { + "epoch": 0.9, + "learning_rate": 1.6444418342361766e-05, + "loss": 0.1444, + "step": 2786 + }, + { + "epoch": 0.9, + "learning_rate": 1.6441761795605656e-05, + "loss": 0.1398, + "step": 2787 + }, + { + "epoch": 0.9, + "learning_rate": 1.6439104471559157e-05, + "loss": 0.1347, + "step": 2788 + }, + { + "epoch": 0.9, + "learning_rate": 1.643644637054292e-05, + "loss": 0.1411, + "step": 2789 + }, + { + "epoch": 0.9, + "learning_rate": 1.643378749287768e-05, + "loss": 0.1471, + "step": 2790 + }, + { + "epoch": 0.9, + "learning_rate": 1.643112783888427e-05, + "loss": 0.1532, + "step": 2791 + }, + { + "epoch": 0.9, + "learning_rate": 1.6428467408883614e-05, + "loss": 0.1592, + "step": 2792 + }, + { + "epoch": 0.9, + "learning_rate": 1.6425806203196734e-05, + "loss": 0.1442, + "step": 2793 + }, + { + "epoch": 0.9, + "learning_rate": 1.6423144222144737e-05, + "loss": 0.1354, + "step": 2794 + }, + { + "epoch": 0.9, + "learning_rate": 1.642048146604883e-05, + "loss": 0.1322, + "step": 2795 + }, + { + "epoch": 0.9, + "learning_rate": 1.6417817935230318e-05, + "loss": 0.1479, + "step": 2796 + }, + { + "epoch": 0.9, + "learning_rate": 1.6415153630010587e-05, + "loss": 0.1579, + "step": 2797 + }, + { + "epoch": 0.9, + "learning_rate": 1.641248855071113e-05, + "loss": 0.1348, + "step": 2798 + }, + { + "epoch": 0.9, + "learning_rate": 1.6409822697653517e-05, + "loss": 0.1409, + "step": 2799 + }, + { + "epoch": 0.9, + "learning_rate": 1.6407156071159432e-05, + "loss": 0.1401, + "step": 2800 + }, + { + "epoch": 0.9, + "learning_rate": 1.6404488671550637e-05, + "loss": 0.132, + "step": 2801 + }, + { + "epoch": 0.9, + "learning_rate": 1.6401820499148988e-05, + "loss": 0.1482, + "step": 2802 + }, + { + "epoch": 0.9, + "learning_rate": 1.6399151554276446e-05, + "loss": 0.1494, + "step": 2803 + }, + { + "epoch": 0.9, + "learning_rate": 1.639648183725505e-05, + "loss": 0.1455, + "step": 2804 + }, + { + "epoch": 0.9, + "learning_rate": 1.639381134840694e-05, + "loss": 0.1419, + "step": 2805 + }, + { + "epoch": 0.9, + "learning_rate": 1.6391140088054353e-05, + "loss": 0.1434, + "step": 2806 + }, + { + "epoch": 0.9, + "learning_rate": 1.638846805651961e-05, + "loss": 0.1263, + "step": 2807 + }, + { + "epoch": 0.9, + "learning_rate": 1.6385795254125142e-05, + "loss": 0.1558, + "step": 2808 + }, + { + "epoch": 0.9, + "learning_rate": 1.6383121681193444e-05, + "loss": 0.1385, + "step": 2809 + }, + { + "epoch": 0.9, + "learning_rate": 1.6380447338047124e-05, + "loss": 0.1485, + "step": 2810 + }, + { + "epoch": 0.9, + "learning_rate": 1.637777222500889e-05, + "loss": 0.1435, + "step": 2811 + }, + { + "epoch": 0.9, + "learning_rate": 1.6375096342401523e-05, + "loss": 0.1445, + "step": 2812 + }, + { + "epoch": 0.9, + "learning_rate": 1.6372419690547908e-05, + "loss": 0.1473, + "step": 2813 + }, + { + "epoch": 0.91, + "learning_rate": 1.6369742269771024e-05, + "loss": 0.1358, + "step": 2814 + }, + { + "epoch": 0.91, + "learning_rate": 1.6367064080393936e-05, + "loss": 0.1359, + "step": 2815 + }, + { + "epoch": 0.91, + "learning_rate": 1.6364385122739812e-05, + "loss": 0.1659, + "step": 2816 + }, + { + "epoch": 0.91, + "learning_rate": 1.6361705397131902e-05, + "loss": 0.169, + "step": 2817 + }, + { + "epoch": 0.91, + "learning_rate": 1.6359024903893553e-05, + "loss": 0.1434, + "step": 2818 + }, + { + "epoch": 0.91, + "learning_rate": 1.6356343643348204e-05, + "loss": 0.1456, + "step": 2819 + }, + { + "epoch": 0.91, + "learning_rate": 1.6353661615819386e-05, + "loss": 0.1562, + "step": 2820 + }, + { + "epoch": 0.91, + "learning_rate": 1.635097882163073e-05, + "loss": 0.1561, + "step": 2821 + }, + { + "epoch": 0.91, + "learning_rate": 1.6348295261105946e-05, + "loss": 0.1613, + "step": 2822 + }, + { + "epoch": 0.91, + "learning_rate": 1.634561093456885e-05, + "loss": 0.1318, + "step": 2823 + }, + { + "epoch": 0.91, + "learning_rate": 1.6342925842343343e-05, + "loss": 0.1464, + "step": 2824 + }, + { + "epoch": 0.91, + "learning_rate": 1.634023998475342e-05, + "loss": 0.1502, + "step": 2825 + }, + { + "epoch": 0.91, + "learning_rate": 1.6337553362123165e-05, + "loss": 0.1486, + "step": 2826 + }, + { + "epoch": 0.91, + "learning_rate": 1.6334865974776757e-05, + "loss": 0.1425, + "step": 2827 + }, + { + "epoch": 0.91, + "learning_rate": 1.6332177823038472e-05, + "loss": 0.1569, + "step": 2828 + }, + { + "epoch": 0.91, + "learning_rate": 1.632948890723267e-05, + "loss": 0.137, + "step": 2829 + }, + { + "epoch": 0.91, + "learning_rate": 1.6326799227683806e-05, + "loss": 0.1385, + "step": 2830 + }, + { + "epoch": 0.91, + "learning_rate": 1.6324108784716432e-05, + "loss": 0.1409, + "step": 2831 + }, + { + "epoch": 0.91, + "learning_rate": 1.632141757865519e-05, + "loss": 0.1597, + "step": 2832 + }, + { + "epoch": 0.91, + "learning_rate": 1.6318725609824804e-05, + "loss": 0.1465, + "step": 2833 + }, + { + "epoch": 0.91, + "learning_rate": 1.6316032878550107e-05, + "loss": 0.1447, + "step": 2834 + }, + { + "epoch": 0.91, + "learning_rate": 1.631333938515601e-05, + "loss": 0.1482, + "step": 2835 + }, + { + "epoch": 0.91, + "learning_rate": 1.6310645129967525e-05, + "loss": 0.1286, + "step": 2836 + }, + { + "epoch": 0.91, + "learning_rate": 1.630795011330975e-05, + "loss": 0.1404, + "step": 2837 + }, + { + "epoch": 0.91, + "learning_rate": 1.630525433550788e-05, + "loss": 0.1653, + "step": 2838 + }, + { + "epoch": 0.91, + "learning_rate": 1.6302557796887192e-05, + "loss": 0.1556, + "step": 2839 + }, + { + "epoch": 0.91, + "learning_rate": 1.629986049777307e-05, + "loss": 0.1373, + "step": 2840 + }, + { + "epoch": 0.91, + "learning_rate": 1.6297162438490976e-05, + "loss": 0.1496, + "step": 2841 + }, + { + "epoch": 0.91, + "learning_rate": 1.6294463619366473e-05, + "loss": 0.1382, + "step": 2842 + }, + { + "epoch": 0.91, + "learning_rate": 1.6291764040725212e-05, + "loss": 0.1469, + "step": 2843 + }, + { + "epoch": 0.91, + "learning_rate": 1.6289063702892932e-05, + "loss": 0.1373, + "step": 2844 + }, + { + "epoch": 0.92, + "learning_rate": 1.628636260619547e-05, + "loss": 0.1501, + "step": 2845 + }, + { + "epoch": 0.92, + "learning_rate": 1.6283660750958752e-05, + "loss": 0.1362, + "step": 2846 + }, + { + "epoch": 0.92, + "learning_rate": 1.6280958137508793e-05, + "loss": 0.1468, + "step": 2847 + }, + { + "epoch": 0.92, + "learning_rate": 1.6278254766171703e-05, + "loss": 0.1395, + "step": 2848 + }, + { + "epoch": 0.92, + "learning_rate": 1.6275550637273685e-05, + "loss": 0.1382, + "step": 2849 + }, + { + "epoch": 0.92, + "learning_rate": 1.6272845751141026e-05, + "loss": 0.1493, + "step": 2850 + }, + { + "epoch": 0.92, + "learning_rate": 1.6270140108100115e-05, + "loss": 0.1381, + "step": 2851 + }, + { + "epoch": 0.92, + "learning_rate": 1.626743370847742e-05, + "loss": 0.1395, + "step": 2852 + }, + { + "epoch": 0.92, + "learning_rate": 1.626472655259951e-05, + "loss": 0.1359, + "step": 2853 + }, + { + "epoch": 0.92, + "learning_rate": 1.6262018640793042e-05, + "loss": 0.1471, + "step": 2854 + }, + { + "epoch": 0.92, + "learning_rate": 1.6259309973384764e-05, + "loss": 0.1515, + "step": 2855 + }, + { + "epoch": 0.92, + "learning_rate": 1.6256600550701513e-05, + "loss": 0.1445, + "step": 2856 + }, + { + "epoch": 0.92, + "learning_rate": 1.6253890373070223e-05, + "loss": 0.1597, + "step": 2857 + }, + { + "epoch": 0.92, + "learning_rate": 1.6251179440817915e-05, + "loss": 0.1485, + "step": 2858 + }, + { + "epoch": 0.92, + "learning_rate": 1.62484677542717e-05, + "loss": 0.1502, + "step": 2859 + }, + { + "epoch": 0.92, + "learning_rate": 1.624575531375878e-05, + "loss": 0.1365, + "step": 2860 + }, + { + "epoch": 0.92, + "learning_rate": 1.6243042119606455e-05, + "loss": 0.1568, + "step": 2861 + }, + { + "epoch": 0.92, + "learning_rate": 1.6240328172142104e-05, + "loss": 0.1413, + "step": 2862 + }, + { + "epoch": 0.92, + "learning_rate": 1.623761347169321e-05, + "loss": 0.1517, + "step": 2863 + }, + { + "epoch": 0.92, + "learning_rate": 1.6234898018587336e-05, + "loss": 0.1498, + "step": 2864 + }, + { + "epoch": 0.92, + "learning_rate": 1.6232181813152143e-05, + "loss": 0.1439, + "step": 2865 + }, + { + "epoch": 0.92, + "learning_rate": 1.6229464855715376e-05, + "loss": 0.1399, + "step": 2866 + }, + { + "epoch": 0.92, + "learning_rate": 1.6226747146604876e-05, + "loss": 0.1355, + "step": 2867 + }, + { + "epoch": 0.92, + "learning_rate": 1.622402868614858e-05, + "loss": 0.1424, + "step": 2868 + }, + { + "epoch": 0.92, + "learning_rate": 1.6221309474674497e-05, + "loss": 0.1543, + "step": 2869 + }, + { + "epoch": 0.92, + "learning_rate": 1.621858951251075e-05, + "loss": 0.1415, + "step": 2870 + }, + { + "epoch": 0.92, + "learning_rate": 1.621586879998553e-05, + "loss": 0.1501, + "step": 2871 + }, + { + "epoch": 0.92, + "learning_rate": 1.621314733742714e-05, + "loss": 0.1336, + "step": 2872 + }, + { + "epoch": 0.92, + "learning_rate": 1.6210425125163956e-05, + "loss": 0.1354, + "step": 2873 + }, + { + "epoch": 0.92, + "learning_rate": 1.620770216352446e-05, + "loss": 0.1504, + "step": 2874 + }, + { + "epoch": 0.92, + "learning_rate": 1.6204978452837208e-05, + "loss": 0.1474, + "step": 2875 + }, + { + "epoch": 0.93, + "learning_rate": 1.6202253993430853e-05, + "loss": 0.1521, + "step": 2876 + }, + { + "epoch": 0.93, + "learning_rate": 1.619952878563415e-05, + "loss": 0.1629, + "step": 2877 + }, + { + "epoch": 0.93, + "learning_rate": 1.6196802829775928e-05, + "loss": 0.1433, + "step": 2878 + }, + { + "epoch": 0.93, + "learning_rate": 1.619407612618511e-05, + "loss": 0.1399, + "step": 2879 + }, + { + "epoch": 0.93, + "learning_rate": 1.6191348675190718e-05, + "loss": 0.1537, + "step": 2880 + }, + { + "epoch": 0.93, + "learning_rate": 1.6188620477121852e-05, + "loss": 0.1524, + "step": 2881 + }, + { + "epoch": 0.93, + "learning_rate": 1.618589153230771e-05, + "loss": 0.1372, + "step": 2882 + }, + { + "epoch": 0.93, + "learning_rate": 1.6183161841077582e-05, + "loss": 0.1504, + "step": 2883 + }, + { + "epoch": 0.93, + "learning_rate": 1.6180431403760836e-05, + "loss": 0.1441, + "step": 2884 + }, + { + "epoch": 0.93, + "learning_rate": 1.6177700220686946e-05, + "loss": 0.1523, + "step": 2885 + }, + { + "epoch": 0.93, + "learning_rate": 1.6174968292185465e-05, + "loss": 0.1481, + "step": 2886 + }, + { + "epoch": 0.93, + "learning_rate": 1.6172235618586037e-05, + "loss": 0.1444, + "step": 2887 + }, + { + "epoch": 0.93, + "learning_rate": 1.6169502200218403e-05, + "loss": 0.1351, + "step": 2888 + }, + { + "epoch": 0.93, + "learning_rate": 1.6166768037412383e-05, + "loss": 0.1399, + "step": 2889 + }, + { + "epoch": 0.93, + "learning_rate": 1.6164033130497894e-05, + "loss": 0.139, + "step": 2890 + }, + { + "epoch": 0.93, + "learning_rate": 1.6161297479804946e-05, + "loss": 0.1456, + "step": 2891 + }, + { + "epoch": 0.93, + "learning_rate": 1.615856108566363e-05, + "loss": 0.1408, + "step": 2892 + }, + { + "epoch": 0.93, + "learning_rate": 1.6155823948404138e-05, + "loss": 0.1615, + "step": 2893 + }, + { + "epoch": 0.93, + "learning_rate": 1.6153086068356732e-05, + "loss": 0.1548, + "step": 2894 + }, + { + "epoch": 0.93, + "learning_rate": 1.6150347445851786e-05, + "loss": 0.1386, + "step": 2895 + }, + { + "epoch": 0.93, + "learning_rate": 1.6147608081219753e-05, + "loss": 0.1453, + "step": 2896 + }, + { + "epoch": 0.93, + "learning_rate": 1.6144867974791176e-05, + "loss": 0.1431, + "step": 2897 + }, + { + "epoch": 0.93, + "learning_rate": 1.6142127126896682e-05, + "loss": 0.1592, + "step": 2898 + }, + { + "epoch": 0.93, + "learning_rate": 1.6139385537866998e-05, + "loss": 0.1435, + "step": 2899 + }, + { + "epoch": 0.93, + "learning_rate": 1.6136643208032937e-05, + "loss": 0.1496, + "step": 2900 + }, + { + "epoch": 0.93, + "learning_rate": 1.6133900137725397e-05, + "loss": 0.1458, + "step": 2901 + }, + { + "epoch": 0.93, + "learning_rate": 1.6131156327275372e-05, + "loss": 0.1478, + "step": 2902 + }, + { + "epoch": 0.93, + "learning_rate": 1.612841177701394e-05, + "loss": 0.1574, + "step": 2903 + }, + { + "epoch": 0.93, + "learning_rate": 1.6125666487272268e-05, + "loss": 0.1413, + "step": 2904 + }, + { + "epoch": 0.93, + "learning_rate": 1.612292045838162e-05, + "loss": 0.1519, + "step": 2905 + }, + { + "epoch": 0.93, + "learning_rate": 1.612017369067334e-05, + "loss": 0.1469, + "step": 2906 + }, + { + "epoch": 0.94, + "learning_rate": 1.6117426184478863e-05, + "loss": 0.1318, + "step": 2907 + }, + { + "epoch": 0.94, + "learning_rate": 1.6114677940129716e-05, + "loss": 0.1504, + "step": 2908 + }, + { + "epoch": 0.94, + "learning_rate": 1.611192895795752e-05, + "loss": 0.1472, + "step": 2909 + }, + { + "epoch": 0.94, + "learning_rate": 1.6109179238293967e-05, + "loss": 0.1371, + "step": 2910 + }, + { + "epoch": 0.94, + "learning_rate": 1.6106428781470858e-05, + "loss": 0.1473, + "step": 2911 + }, + { + "epoch": 0.94, + "learning_rate": 1.6103677587820076e-05, + "loss": 0.1516, + "step": 2912 + }, + { + "epoch": 0.94, + "learning_rate": 1.6100925657673584e-05, + "loss": 0.1419, + "step": 2913 + }, + { + "epoch": 0.94, + "learning_rate": 1.6098172991363446e-05, + "loss": 0.1341, + "step": 2914 + }, + { + "epoch": 0.94, + "learning_rate": 1.6095419589221814e-05, + "loss": 0.1451, + "step": 2915 + }, + { + "epoch": 0.94, + "learning_rate": 1.6092665451580925e-05, + "loss": 0.1387, + "step": 2916 + }, + { + "epoch": 0.94, + "learning_rate": 1.6089910578773097e-05, + "loss": 0.1306, + "step": 2917 + }, + { + "epoch": 0.94, + "learning_rate": 1.6087154971130753e-05, + "loss": 0.1453, + "step": 2918 + }, + { + "epoch": 0.94, + "learning_rate": 1.608439862898639e-05, + "loss": 0.1394, + "step": 2919 + }, + { + "epoch": 0.94, + "learning_rate": 1.6081641552672604e-05, + "loss": 0.148, + "step": 2920 + }, + { + "epoch": 0.94, + "learning_rate": 1.6078883742522075e-05, + "loss": 0.1409, + "step": 2921 + }, + { + "epoch": 0.94, + "learning_rate": 1.6076125198867573e-05, + "loss": 0.1648, + "step": 2922 + }, + { + "epoch": 0.94, + "learning_rate": 1.6073365922041952e-05, + "loss": 0.1348, + "step": 2923 + }, + { + "epoch": 0.94, + "learning_rate": 1.607060591237816e-05, + "loss": 0.1378, + "step": 2924 + }, + { + "epoch": 0.94, + "learning_rate": 1.6067845170209233e-05, + "loss": 0.1358, + "step": 2925 + }, + { + "epoch": 0.94, + "learning_rate": 1.606508369586829e-05, + "loss": 0.1452, + "step": 2926 + }, + { + "epoch": 0.94, + "learning_rate": 1.606232148968855e-05, + "loss": 0.1516, + "step": 2927 + }, + { + "epoch": 0.94, + "learning_rate": 1.60595585520033e-05, + "loss": 0.1476, + "step": 2928 + }, + { + "epoch": 0.94, + "learning_rate": 1.605679488314594e-05, + "loss": 0.1307, + "step": 2929 + }, + { + "epoch": 0.94, + "learning_rate": 1.6054030483449944e-05, + "loss": 0.1562, + "step": 2930 + }, + { + "epoch": 0.94, + "learning_rate": 1.605126535324887e-05, + "loss": 0.1476, + "step": 2931 + }, + { + "epoch": 0.94, + "learning_rate": 1.6048499492876378e-05, + "loss": 0.1389, + "step": 2932 + }, + { + "epoch": 0.94, + "learning_rate": 1.60457329026662e-05, + "loss": 0.1455, + "step": 2933 + }, + { + "epoch": 0.94, + "learning_rate": 1.604296558295217e-05, + "loss": 0.1527, + "step": 2934 + }, + { + "epoch": 0.94, + "learning_rate": 1.6040197534068206e-05, + "loss": 0.149, + "step": 2935 + }, + { + "epoch": 0.94, + "learning_rate": 1.6037428756348306e-05, + "loss": 0.1339, + "step": 2936 + }, + { + "epoch": 0.94, + "learning_rate": 1.6034659250126568e-05, + "loss": 0.1609, + "step": 2937 + }, + { + "epoch": 0.95, + "learning_rate": 1.6031889015737176e-05, + "loss": 0.1407, + "step": 2938 + }, + { + "epoch": 0.95, + "learning_rate": 1.6029118053514388e-05, + "loss": 0.1416, + "step": 2939 + }, + { + "epoch": 0.95, + "learning_rate": 1.6026346363792565e-05, + "loss": 0.1381, + "step": 2940 + }, + { + "epoch": 0.95, + "learning_rate": 1.602357394690615e-05, + "loss": 0.1282, + "step": 2941 + }, + { + "epoch": 0.95, + "learning_rate": 1.6020800803189682e-05, + "loss": 0.1381, + "step": 2942 + }, + { + "epoch": 0.95, + "learning_rate": 1.601802693297777e-05, + "loss": 0.1465, + "step": 2943 + }, + { + "epoch": 0.95, + "learning_rate": 1.6015252336605124e-05, + "loss": 0.133, + "step": 2944 + }, + { + "epoch": 0.95, + "learning_rate": 1.6012477014406543e-05, + "loss": 0.1626, + "step": 2945 + }, + { + "epoch": 0.95, + "learning_rate": 1.6009700966716907e-05, + "loss": 0.1302, + "step": 2946 + }, + { + "epoch": 0.95, + "learning_rate": 1.600692419387118e-05, + "loss": 0.1287, + "step": 2947 + }, + { + "epoch": 0.95, + "learning_rate": 1.600414669620443e-05, + "loss": 0.1469, + "step": 2948 + }, + { + "epoch": 0.95, + "learning_rate": 1.600136847405179e-05, + "loss": 0.1411, + "step": 2949 + }, + { + "epoch": 0.95, + "learning_rate": 1.59985895277485e-05, + "loss": 0.134, + "step": 2950 + }, + { + "epoch": 0.95, + "learning_rate": 1.599580985762988e-05, + "loss": 0.1272, + "step": 2951 + }, + { + "epoch": 0.95, + "learning_rate": 1.5993029464031327e-05, + "loss": 0.1681, + "step": 2952 + }, + { + "epoch": 0.95, + "learning_rate": 1.5990248347288346e-05, + "loss": 0.14, + "step": 2953 + }, + { + "epoch": 0.95, + "learning_rate": 1.598746650773651e-05, + "loss": 0.1395, + "step": 2954 + }, + { + "epoch": 0.95, + "learning_rate": 1.5984683945711503e-05, + "loss": 0.1487, + "step": 2955 + }, + { + "epoch": 0.95, + "learning_rate": 1.5981900661549063e-05, + "loss": 0.1467, + "step": 2956 + }, + { + "epoch": 0.95, + "learning_rate": 1.5979116655585037e-05, + "loss": 0.1415, + "step": 2957 + }, + { + "epoch": 0.95, + "learning_rate": 1.5976331928155366e-05, + "loss": 0.1679, + "step": 2958 + }, + { + "epoch": 0.95, + "learning_rate": 1.5973546479596053e-05, + "loss": 0.1587, + "step": 2959 + }, + { + "epoch": 0.95, + "learning_rate": 1.597076031024321e-05, + "loss": 0.1524, + "step": 2960 + }, + { + "epoch": 0.95, + "learning_rate": 1.5967973420433023e-05, + "loss": 0.1243, + "step": 2961 + }, + { + "epoch": 0.95, + "learning_rate": 1.5965185810501775e-05, + "loss": 0.1517, + "step": 2962 + }, + { + "epoch": 0.95, + "learning_rate": 1.5962397480785828e-05, + "loss": 0.1421, + "step": 2963 + }, + { + "epoch": 0.95, + "learning_rate": 1.5959608431621634e-05, + "loss": 0.1236, + "step": 2964 + }, + { + "epoch": 0.95, + "learning_rate": 1.5956818663345733e-05, + "loss": 0.1521, + "step": 2965 + }, + { + "epoch": 0.95, + "learning_rate": 1.595402817629475e-05, + "loss": 0.1352, + "step": 2966 + }, + { + "epoch": 0.95, + "learning_rate": 1.59512369708054e-05, + "loss": 0.1422, + "step": 2967 + }, + { + "epoch": 0.95, + "learning_rate": 1.594844504721447e-05, + "loss": 0.1479, + "step": 2968 + }, + { + "epoch": 0.95, + "learning_rate": 1.5945652405858856e-05, + "loss": 0.1429, + "step": 2969 + }, + { + "epoch": 0.96, + "learning_rate": 1.594285904707553e-05, + "loss": 0.1473, + "step": 2970 + }, + { + "epoch": 0.96, + "learning_rate": 1.5940064971201548e-05, + "loss": 0.1458, + "step": 2971 + }, + { + "epoch": 0.96, + "learning_rate": 1.5937270178574056e-05, + "loss": 0.148, + "step": 2972 + }, + { + "epoch": 0.96, + "learning_rate": 1.593447466953028e-05, + "loss": 0.1482, + "step": 2973 + }, + { + "epoch": 0.96, + "learning_rate": 1.593167844440755e-05, + "loss": 0.148, + "step": 2974 + }, + { + "epoch": 0.96, + "learning_rate": 1.592888150354326e-05, + "loss": 0.1474, + "step": 2975 + }, + { + "epoch": 0.96, + "learning_rate": 1.5926083847274904e-05, + "loss": 0.1336, + "step": 2976 + }, + { + "epoch": 0.96, + "learning_rate": 1.5923285475940057e-05, + "loss": 0.1565, + "step": 2977 + }, + { + "epoch": 0.96, + "learning_rate": 1.5920486389876383e-05, + "loss": 0.1423, + "step": 2978 + }, + { + "epoch": 0.96, + "learning_rate": 1.5917686589421636e-05, + "loss": 0.1371, + "step": 2979 + }, + { + "epoch": 0.96, + "learning_rate": 1.5914886074913648e-05, + "loss": 0.1436, + "step": 2980 + }, + { + "epoch": 0.96, + "learning_rate": 1.5912084846690342e-05, + "loss": 0.1406, + "step": 2981 + }, + { + "epoch": 0.96, + "learning_rate": 1.590928290508973e-05, + "loss": 0.1489, + "step": 2982 + }, + { + "epoch": 0.96, + "learning_rate": 1.5906480250449895e-05, + "loss": 0.1525, + "step": 2983 + }, + { + "epoch": 0.96, + "learning_rate": 1.5903676883109032e-05, + "loss": 0.1669, + "step": 2984 + }, + { + "epoch": 0.96, + "learning_rate": 1.5900872803405393e-05, + "loss": 0.1561, + "step": 2985 + }, + { + "epoch": 0.96, + "learning_rate": 1.589806801167734e-05, + "loss": 0.154, + "step": 2986 + }, + { + "epoch": 0.96, + "learning_rate": 1.589526250826331e-05, + "loss": 0.1416, + "step": 2987 + }, + { + "epoch": 0.96, + "learning_rate": 1.5892456293501823e-05, + "loss": 0.1397, + "step": 2988 + }, + { + "epoch": 0.96, + "learning_rate": 1.588964936773149e-05, + "loss": 0.136, + "step": 2989 + }, + { + "epoch": 0.96, + "learning_rate": 1.5886841731291008e-05, + "loss": 0.1593, + "step": 2990 + }, + { + "epoch": 0.96, + "learning_rate": 1.5884033384519154e-05, + "loss": 0.1479, + "step": 2991 + }, + { + "epoch": 0.96, + "learning_rate": 1.5881224327754807e-05, + "loss": 0.1336, + "step": 2992 + }, + { + "epoch": 0.96, + "learning_rate": 1.5878414561336905e-05, + "loss": 0.1396, + "step": 2993 + }, + { + "epoch": 0.96, + "learning_rate": 1.5875604085604496e-05, + "loss": 0.1387, + "step": 2994 + }, + { + "epoch": 0.96, + "learning_rate": 1.5872792900896697e-05, + "loss": 0.1608, + "step": 2995 + }, + { + "epoch": 0.96, + "learning_rate": 1.5869981007552725e-05, + "loss": 0.1542, + "step": 2996 + }, + { + "epoch": 0.96, + "learning_rate": 1.586716840591187e-05, + "loss": 0.1511, + "step": 2997 + }, + { + "epoch": 0.96, + "learning_rate": 1.5864355096313515e-05, + "loss": 0.1312, + "step": 2998 + }, + { + "epoch": 0.96, + "learning_rate": 1.5861541079097125e-05, + "loss": 0.1446, + "step": 2999 + }, + { + "epoch": 0.96, + "learning_rate": 1.5858726354602248e-05, + "loss": 0.1436, + "step": 3000 + }, + { + "epoch": 0.97, + "learning_rate": 1.5855910923168527e-05, + "loss": 0.142, + "step": 3001 + }, + { + "epoch": 0.97, + "learning_rate": 1.5853094785135682e-05, + "loss": 0.1467, + "step": 3002 + }, + { + "epoch": 0.97, + "learning_rate": 1.5850277940843513e-05, + "loss": 0.1348, + "step": 3003 + }, + { + "epoch": 0.97, + "learning_rate": 1.5847460390631925e-05, + "loss": 0.1341, + "step": 3004 + }, + { + "epoch": 0.97, + "learning_rate": 1.5844642134840885e-05, + "loss": 0.1419, + "step": 3005 + }, + { + "epoch": 0.97, + "learning_rate": 1.5841823173810457e-05, + "loss": 0.1391, + "step": 3006 + }, + { + "epoch": 0.97, + "learning_rate": 1.5839003507880796e-05, + "loss": 0.1328, + "step": 3007 + }, + { + "epoch": 0.97, + "learning_rate": 1.5836183137392128e-05, + "loss": 0.161, + "step": 3008 + }, + { + "epoch": 0.97, + "learning_rate": 1.583336206268477e-05, + "loss": 0.15, + "step": 3009 + }, + { + "epoch": 0.97, + "learning_rate": 1.5830540284099133e-05, + "loss": 0.1378, + "step": 3010 + }, + { + "epoch": 0.97, + "learning_rate": 1.5827717801975698e-05, + "loss": 0.136, + "step": 3011 + }, + { + "epoch": 0.97, + "learning_rate": 1.582489461665504e-05, + "loss": 0.1599, + "step": 3012 + }, + { + "epoch": 0.97, + "learning_rate": 1.582207072847782e-05, + "loss": 0.1347, + "step": 3013 + }, + { + "epoch": 0.97, + "learning_rate": 1.581924613778477e-05, + "loss": 0.1471, + "step": 3014 + }, + { + "epoch": 0.97, + "learning_rate": 1.5816420844916728e-05, + "loss": 0.1476, + "step": 3015 + }, + { + "epoch": 0.97, + "learning_rate": 1.58135948502146e-05, + "loss": 0.1397, + "step": 3016 + }, + { + "epoch": 0.97, + "learning_rate": 1.5810768154019386e-05, + "loss": 0.1487, + "step": 3017 + }, + { + "epoch": 0.97, + "learning_rate": 1.580794075667216e-05, + "loss": 0.15, + "step": 3018 + }, + { + "epoch": 0.97, + "learning_rate": 1.5805112658514098e-05, + "loss": 0.1669, + "step": 3019 + }, + { + "epoch": 0.97, + "learning_rate": 1.580228385988644e-05, + "loss": 0.1435, + "step": 3020 + }, + { + "epoch": 0.97, + "learning_rate": 1.5799454361130533e-05, + "loss": 0.1354, + "step": 3021 + }, + { + "epoch": 0.97, + "learning_rate": 1.5796624162587787e-05, + "loss": 0.1384, + "step": 3022 + }, + { + "epoch": 0.97, + "learning_rate": 1.5793793264599705e-05, + "loss": 0.1368, + "step": 3023 + }, + { + "epoch": 0.97, + "learning_rate": 1.5790961667507884e-05, + "loss": 0.1607, + "step": 3024 + }, + { + "epoch": 0.97, + "learning_rate": 1.578812937165399e-05, + "loss": 0.1506, + "step": 3025 + }, + { + "epoch": 0.97, + "learning_rate": 1.5785296377379777e-05, + "loss": 0.1555, + "step": 3026 + }, + { + "epoch": 0.97, + "learning_rate": 1.5782462685027094e-05, + "loss": 0.1534, + "step": 3027 + }, + { + "epoch": 0.97, + "learning_rate": 1.5779628294937862e-05, + "loss": 0.1662, + "step": 3028 + }, + { + "epoch": 0.97, + "learning_rate": 1.577679320745409e-05, + "loss": 0.1469, + "step": 3029 + }, + { + "epoch": 0.97, + "learning_rate": 1.5773957422917873e-05, + "loss": 0.1461, + "step": 3030 + }, + { + "epoch": 0.97, + "learning_rate": 1.5771120941671386e-05, + "loss": 0.1409, + "step": 3031 + }, + { + "epoch": 0.98, + "learning_rate": 1.5768283764056898e-05, + "loss": 0.1359, + "step": 3032 + }, + { + "epoch": 0.98, + "learning_rate": 1.5765445890416748e-05, + "loss": 0.1448, + "step": 3033 + }, + { + "epoch": 0.98, + "learning_rate": 1.5762607321093368e-05, + "loss": 0.1405, + "step": 3034 + }, + { + "epoch": 0.98, + "learning_rate": 1.5759768056429274e-05, + "loss": 0.1593, + "step": 3035 + }, + { + "epoch": 0.98, + "learning_rate": 1.575692809676706e-05, + "loss": 0.1473, + "step": 3036 + }, + { + "epoch": 0.98, + "learning_rate": 1.575408744244941e-05, + "loss": 0.1372, + "step": 3037 + }, + { + "epoch": 0.98, + "learning_rate": 1.575124609381909e-05, + "loss": 0.1576, + "step": 3038 + }, + { + "epoch": 0.98, + "learning_rate": 1.574840405121895e-05, + "loss": 0.1555, + "step": 3039 + }, + { + "epoch": 0.98, + "learning_rate": 1.574556131499192e-05, + "loss": 0.1497, + "step": 3040 + }, + { + "epoch": 0.98, + "learning_rate": 1.5742717885481017e-05, + "loss": 0.143, + "step": 3041 + }, + { + "epoch": 0.98, + "learning_rate": 1.573987376302934e-05, + "loss": 0.1426, + "step": 3042 + }, + { + "epoch": 0.98, + "learning_rate": 1.5737028947980076e-05, + "loss": 0.1424, + "step": 3043 + }, + { + "epoch": 0.98, + "learning_rate": 1.5734183440676493e-05, + "loss": 0.1476, + "step": 3044 + }, + { + "epoch": 0.98, + "learning_rate": 1.573133724146194e-05, + "loss": 0.1397, + "step": 3045 + }, + { + "epoch": 0.98, + "learning_rate": 1.5728490350679853e-05, + "loss": 0.15, + "step": 3046 + }, + { + "epoch": 0.98, + "learning_rate": 1.572564276867375e-05, + "loss": 0.1476, + "step": 3047 + }, + { + "epoch": 0.98, + "learning_rate": 1.5722794495787227e-05, + "loss": 0.1356, + "step": 3048 + }, + { + "epoch": 0.98, + "learning_rate": 1.571994553236398e-05, + "loss": 0.1498, + "step": 3049 + }, + { + "epoch": 0.98, + "learning_rate": 1.5717095878747764e-05, + "loss": 0.135, + "step": 3050 + }, + { + "epoch": 0.98, + "learning_rate": 1.5714245535282436e-05, + "loss": 0.1579, + "step": 3051 + }, + { + "epoch": 0.98, + "learning_rate": 1.5711394502311934e-05, + "loss": 0.1413, + "step": 3052 + }, + { + "epoch": 0.98, + "learning_rate": 1.570854278018027e-05, + "loss": 0.1458, + "step": 3053 + }, + { + "epoch": 0.98, + "learning_rate": 1.5705690369231552e-05, + "loss": 0.1367, + "step": 3054 + }, + { + "epoch": 0.98, + "learning_rate": 1.5702837269809958e-05, + "loss": 0.1403, + "step": 3055 + }, + { + "epoch": 0.98, + "learning_rate": 1.5699983482259756e-05, + "loss": 0.1481, + "step": 3056 + }, + { + "epoch": 0.98, + "learning_rate": 1.5697129006925296e-05, + "loss": 0.1423, + "step": 3057 + }, + { + "epoch": 0.98, + "learning_rate": 1.5694273844151015e-05, + "loss": 0.1528, + "step": 3058 + }, + { + "epoch": 0.98, + "learning_rate": 1.5691417994281426e-05, + "loss": 0.1534, + "step": 3059 + }, + { + "epoch": 0.98, + "learning_rate": 1.5688561457661125e-05, + "loss": 0.1326, + "step": 3060 + }, + { + "epoch": 0.98, + "learning_rate": 1.5685704234634803e-05, + "loss": 0.1521, + "step": 3061 + }, + { + "epoch": 0.98, + "learning_rate": 1.568284632554721e-05, + "loss": 0.1499, + "step": 3062 + }, + { + "epoch": 0.99, + "learning_rate": 1.5679987730743208e-05, + "loss": 0.1618, + "step": 3063 + }, + { + "epoch": 0.99, + "learning_rate": 1.567712845056772e-05, + "loss": 0.1367, + "step": 3064 + }, + { + "epoch": 0.99, + "learning_rate": 1.5674268485365765e-05, + "loss": 0.1475, + "step": 3065 + }, + { + "epoch": 0.99, + "learning_rate": 1.5671407835482426e-05, + "loss": 0.1398, + "step": 3066 + }, + { + "epoch": 0.99, + "learning_rate": 1.5668546501262897e-05, + "loss": 0.1805, + "step": 3067 + }, + { + "epoch": 0.99, + "learning_rate": 1.5665684483052425e-05, + "loss": 0.1447, + "step": 3068 + }, + { + "epoch": 0.99, + "learning_rate": 1.5662821781196362e-05, + "loss": 0.1543, + "step": 3069 + }, + { + "epoch": 0.99, + "learning_rate": 1.565995839604013e-05, + "loss": 0.1342, + "step": 3070 + }, + { + "epoch": 0.99, + "learning_rate": 1.565709432792924e-05, + "loss": 0.1412, + "step": 3071 + }, + { + "epoch": 0.99, + "learning_rate": 1.565422957720928e-05, + "loss": 0.1385, + "step": 3072 + }, + { + "epoch": 0.99, + "learning_rate": 1.565136414422592e-05, + "loss": 0.1451, + "step": 3073 + }, + { + "epoch": 0.99, + "learning_rate": 1.5648498029324923e-05, + "loss": 0.1404, + "step": 3074 + }, + { + "epoch": 0.99, + "learning_rate": 1.564563123285212e-05, + "loss": 0.1574, + "step": 3075 + }, + { + "epoch": 0.99, + "learning_rate": 1.5642763755153438e-05, + "loss": 0.1366, + "step": 3076 + }, + { + "epoch": 0.99, + "learning_rate": 1.563989559657487e-05, + "loss": 0.1392, + "step": 3077 + }, + { + "epoch": 0.99, + "learning_rate": 1.563702675746251e-05, + "loss": 0.1493, + "step": 3078 + }, + { + "epoch": 0.99, + "learning_rate": 1.5634157238162516e-05, + "loss": 0.143, + "step": 3079 + }, + { + "epoch": 0.99, + "learning_rate": 1.5631287039021143e-05, + "loss": 0.1597, + "step": 3080 + }, + { + "epoch": 0.99, + "learning_rate": 1.5628416160384714e-05, + "loss": 0.1501, + "step": 3081 + }, + { + "epoch": 0.99, + "learning_rate": 1.5625544602599647e-05, + "loss": 0.1363, + "step": 3082 + }, + { + "epoch": 0.99, + "learning_rate": 1.5622672366012436e-05, + "loss": 0.1357, + "step": 3083 + }, + { + "epoch": 0.99, + "learning_rate": 1.5619799450969655e-05, + "loss": 0.1427, + "step": 3084 + }, + { + "epoch": 0.99, + "learning_rate": 1.5616925857817965e-05, + "loss": 0.1399, + "step": 3085 + }, + { + "epoch": 0.99, + "learning_rate": 1.56140515869041e-05, + "loss": 0.1297, + "step": 3086 + }, + { + "epoch": 0.99, + "learning_rate": 1.561117663857489e-05, + "loss": 0.1383, + "step": 3087 + }, + { + "epoch": 0.99, + "learning_rate": 1.5608301013177234e-05, + "loss": 0.1563, + "step": 3088 + }, + { + "epoch": 0.99, + "learning_rate": 1.560542471105812e-05, + "loss": 0.1417, + "step": 3089 + }, + { + "epoch": 0.99, + "learning_rate": 1.5602547732564607e-05, + "loss": 0.1408, + "step": 3090 + }, + { + "epoch": 0.99, + "learning_rate": 1.559967007804385e-05, + "loss": 0.1397, + "step": 3091 + }, + { + "epoch": 0.99, + "learning_rate": 1.5596791747843083e-05, + "loss": 0.146, + "step": 3092 + }, + { + "epoch": 0.99, + "learning_rate": 1.5593912742309608e-05, + "loss": 0.1525, + "step": 3093 + }, + { + "epoch": 1.0, + "learning_rate": 1.5591033061790827e-05, + "loss": 0.1327, + "step": 3094 + }, + { + "epoch": 1.0, + "learning_rate": 1.5588152706634207e-05, + "loss": 0.1369, + "step": 3095 + }, + { + "epoch": 1.0, + "learning_rate": 1.558527167718731e-05, + "loss": 0.144, + "step": 3096 + }, + { + "epoch": 1.0, + "learning_rate": 1.558238997379777e-05, + "loss": 0.1611, + "step": 3097 + }, + { + "epoch": 1.0, + "learning_rate": 1.5579507596813304e-05, + "loss": 0.1408, + "step": 3098 + }, + { + "epoch": 1.0, + "learning_rate": 1.5576624546581713e-05, + "loss": 0.1443, + "step": 3099 + }, + { + "epoch": 1.0, + "learning_rate": 1.5573740823450885e-05, + "loss": 0.1376, + "step": 3100 + }, + { + "epoch": 1.0, + "learning_rate": 1.557085642776877e-05, + "loss": 0.1472, + "step": 3101 + }, + { + "epoch": 1.0, + "learning_rate": 1.556797135988342e-05, + "loss": 0.1235, + "step": 3102 + }, + { + "epoch": 1.0, + "learning_rate": 1.5565085620142958e-05, + "loss": 0.1367, + "step": 3103 + }, + { + "epoch": 1.0, + "learning_rate": 1.5562199208895586e-05, + "loss": 0.1535, + "step": 3104 + }, + { + "epoch": 1.0, + "learning_rate": 1.5559312126489596e-05, + "loss": 0.1671, + "step": 3105 + }, + { + "epoch": 1.0, + "learning_rate": 1.5556424373273357e-05, + "loss": 0.1446, + "step": 3106 + }, + { + "epoch": 1.0, + "learning_rate": 1.555353594959531e-05, + "loss": 0.1554, + "step": 3107 + }, + { + "epoch": 1.0, + "learning_rate": 1.5550646855803986e-05, + "loss": 0.1543, + "step": 3108 + }, + { + "epoch": 1.0, + "learning_rate": 1.5547757092248e-05, + "loss": 0.1278, + "step": 3109 + }, + { + "epoch": 1.0, + "learning_rate": 1.554486665927604e-05, + "loss": 0.1226, + "step": 3110 + }, + { + "epoch": 1.0, + "learning_rate": 1.554197555723688e-05, + "loss": 0.109, + "step": 3111 + }, + { + "epoch": 1.0, + "learning_rate": 1.553908378647937e-05, + "loss": 0.1243, + "step": 3112 + }, + { + "epoch": 1.0, + "learning_rate": 1.553619134735245e-05, + "loss": 0.1217, + "step": 3113 + }, + { + "epoch": 1.0, + "learning_rate": 1.5533298240205124e-05, + "loss": 0.1171, + "step": 3114 + }, + { + "epoch": 1.0, + "learning_rate": 1.5530404465386492e-05, + "loss": 0.112, + "step": 3115 + }, + { + "epoch": 1.0, + "learning_rate": 1.552751002324573e-05, + "loss": 0.115, + "step": 3116 + }, + { + "epoch": 1.0, + "learning_rate": 1.552461491413209e-05, + "loss": 0.1146, + "step": 3117 + }, + { + "epoch": 1.0, + "learning_rate": 1.5521719138394914e-05, + "loss": 0.1155, + "step": 3118 + }, + { + "epoch": 1.0, + "learning_rate": 1.5518822696383612e-05, + "loss": 0.1013, + "step": 3119 + }, + { + "epoch": 1.0, + "learning_rate": 1.5515925588447683e-05, + "loss": 0.1067, + "step": 3120 + }, + { + "epoch": 1.0, + "learning_rate": 1.551302781493671e-05, + "loss": 0.1237, + "step": 3121 + }, + { + "epoch": 1.0, + "learning_rate": 1.551012937620034e-05, + "loss": 0.1136, + "step": 3122 + }, + { + "epoch": 1.0, + "learning_rate": 1.5507230272588326e-05, + "loss": 0.1146, + "step": 3123 + }, + { + "epoch": 1.0, + "learning_rate": 1.5504330504450474e-05, + "loss": 0.1177, + "step": 3124 + }, + { + "epoch": 1.01, + "learning_rate": 1.5501430072136682e-05, + "loss": 0.1054, + "step": 3125 + }, + { + "epoch": 1.01, + "learning_rate": 1.5498528975996933e-05, + "loss": 0.1052, + "step": 3126 + }, + { + "epoch": 1.01, + "learning_rate": 1.5495627216381287e-05, + "loss": 0.117, + "step": 3127 + }, + { + "epoch": 1.01, + "learning_rate": 1.549272479363988e-05, + "loss": 0.1091, + "step": 3128 + }, + { + "epoch": 1.01, + "learning_rate": 1.5489821708122935e-05, + "loss": 0.1141, + "step": 3129 + }, + { + "epoch": 1.01, + "learning_rate": 1.5486917960180742e-05, + "loss": 0.1077, + "step": 3130 + }, + { + "epoch": 1.01, + "learning_rate": 1.5484013550163683e-05, + "loss": 0.1244, + "step": 3131 + }, + { + "epoch": 1.01, + "learning_rate": 1.548110847842222e-05, + "loss": 0.1212, + "step": 3132 + }, + { + "epoch": 1.01, + "learning_rate": 1.547820274530689e-05, + "loss": 0.1135, + "step": 3133 + }, + { + "epoch": 1.01, + "learning_rate": 1.547529635116831e-05, + "loss": 0.1102, + "step": 3134 + }, + { + "epoch": 1.01, + "learning_rate": 1.5472389296357175e-05, + "loss": 0.1082, + "step": 3135 + }, + { + "epoch": 1.01, + "learning_rate": 1.5469481581224274e-05, + "loss": 0.103, + "step": 3136 + }, + { + "epoch": 1.01, + "learning_rate": 1.5466573206120448e-05, + "loss": 0.1092, + "step": 3137 + }, + { + "epoch": 1.01, + "learning_rate": 1.5463664171396643e-05, + "loss": 0.1037, + "step": 3138 + }, + { + "epoch": 1.01, + "learning_rate": 1.5460754477403877e-05, + "loss": 0.1156, + "step": 3139 + }, + { + "epoch": 1.01, + "learning_rate": 1.5457844124493243e-05, + "loss": 0.1155, + "step": 3140 + }, + { + "epoch": 1.01, + "learning_rate": 1.5454933113015917e-05, + "loss": 0.0955, + "step": 3141 + }, + { + "epoch": 1.01, + "learning_rate": 1.5452021443323155e-05, + "loss": 0.1046, + "step": 3142 + }, + { + "epoch": 1.01, + "learning_rate": 1.544910911576629e-05, + "loss": 0.1172, + "step": 3143 + }, + { + "epoch": 1.01, + "learning_rate": 1.544619613069673e-05, + "loss": 0.1034, + "step": 3144 + }, + { + "epoch": 1.01, + "learning_rate": 1.5443282488465983e-05, + "loss": 0.1119, + "step": 3145 + }, + { + "epoch": 1.01, + "learning_rate": 1.544036818942561e-05, + "loss": 0.1154, + "step": 3146 + }, + { + "epoch": 1.01, + "learning_rate": 1.5437453233927265e-05, + "loss": 0.1118, + "step": 3147 + }, + { + "epoch": 1.01, + "learning_rate": 1.543453762232268e-05, + "loss": 0.1122, + "step": 3148 + }, + { + "epoch": 1.01, + "learning_rate": 1.543162135496367e-05, + "loss": 0.1139, + "step": 3149 + }, + { + "epoch": 1.01, + "learning_rate": 1.542870443220211e-05, + "loss": 0.1149, + "step": 3150 + }, + { + "epoch": 1.01, + "learning_rate": 1.5425786854389983e-05, + "loss": 0.1032, + "step": 3151 + }, + { + "epoch": 1.01, + "learning_rate": 1.542286862187933e-05, + "loss": 0.1184, + "step": 3152 + }, + { + "epoch": 1.01, + "learning_rate": 1.5419949735022278e-05, + "loss": 0.1336, + "step": 3153 + }, + { + "epoch": 1.01, + "learning_rate": 1.541703019417103e-05, + "loss": 0.1103, + "step": 3154 + }, + { + "epoch": 1.01, + "learning_rate": 1.5414109999677876e-05, + "loss": 0.12, + "step": 3155 + }, + { + "epoch": 1.02, + "learning_rate": 1.5411189151895175e-05, + "loss": 0.1118, + "step": 3156 + }, + { + "epoch": 1.02, + "learning_rate": 1.5408267651175368e-05, + "loss": 0.1062, + "step": 3157 + }, + { + "epoch": 1.02, + "learning_rate": 1.5405345497870977e-05, + "loss": 0.1068, + "step": 3158 + }, + { + "epoch": 1.02, + "learning_rate": 1.54024226923346e-05, + "loss": 0.1085, + "step": 3159 + }, + { + "epoch": 1.02, + "learning_rate": 1.5399499234918924e-05, + "loss": 0.1093, + "step": 3160 + }, + { + "epoch": 1.02, + "learning_rate": 1.5396575125976695e-05, + "loss": 0.1121, + "step": 3161 + }, + { + "epoch": 1.02, + "learning_rate": 1.5393650365860748e-05, + "loss": 0.1057, + "step": 3162 + }, + { + "epoch": 1.02, + "learning_rate": 1.5390724954924007e-05, + "loss": 0.1099, + "step": 3163 + }, + { + "epoch": 1.02, + "learning_rate": 1.5387798893519454e-05, + "loss": 0.1093, + "step": 3164 + }, + { + "epoch": 1.02, + "learning_rate": 1.538487218200017e-05, + "loss": 0.1127, + "step": 3165 + }, + { + "epoch": 1.02, + "learning_rate": 1.5381944820719296e-05, + "loss": 0.117, + "step": 3166 + }, + { + "epoch": 1.02, + "learning_rate": 1.5379016810030062e-05, + "loss": 0.1122, + "step": 3167 + }, + { + "epoch": 1.02, + "learning_rate": 1.5376088150285777e-05, + "loss": 0.1086, + "step": 3168 + }, + { + "epoch": 1.02, + "learning_rate": 1.537315884183982e-05, + "loss": 0.1138, + "step": 3169 + }, + { + "epoch": 1.02, + "learning_rate": 1.5370228885045662e-05, + "loss": 0.114, + "step": 3170 + }, + { + "epoch": 1.02, + "learning_rate": 1.5367298280256835e-05, + "loss": 0.1038, + "step": 3171 + }, + { + "epoch": 1.02, + "learning_rate": 1.5364367027826968e-05, + "loss": 0.1057, + "step": 3172 + }, + { + "epoch": 1.02, + "learning_rate": 1.5361435128109753e-05, + "loss": 0.1171, + "step": 3173 + }, + { + "epoch": 1.02, + "learning_rate": 1.5358502581458964e-05, + "loss": 0.1176, + "step": 3174 + }, + { + "epoch": 1.02, + "learning_rate": 1.5355569388228455e-05, + "loss": 0.1033, + "step": 3175 + }, + { + "epoch": 1.02, + "learning_rate": 1.535263554877216e-05, + "loss": 0.1079, + "step": 3176 + }, + { + "epoch": 1.02, + "learning_rate": 1.5349701063444088e-05, + "loss": 0.125, + "step": 3177 + }, + { + "epoch": 1.02, + "learning_rate": 1.5346765932598326e-05, + "loss": 0.1027, + "step": 3178 + }, + { + "epoch": 1.02, + "learning_rate": 1.534383015658904e-05, + "loss": 0.1088, + "step": 3179 + }, + { + "epoch": 1.02, + "learning_rate": 1.534089373577047e-05, + "loss": 0.1066, + "step": 3180 + }, + { + "epoch": 1.02, + "learning_rate": 1.5337956670496943e-05, + "loss": 0.1065, + "step": 3181 + }, + { + "epoch": 1.02, + "learning_rate": 1.533501896112285e-05, + "loss": 0.1091, + "step": 3182 + }, + { + "epoch": 1.02, + "learning_rate": 1.5332080608002672e-05, + "loss": 0.1225, + "step": 3183 + }, + { + "epoch": 1.02, + "learning_rate": 1.5329141611490968e-05, + "loss": 0.1128, + "step": 3184 + }, + { + "epoch": 1.02, + "learning_rate": 1.5326201971942363e-05, + "loss": 0.1051, + "step": 3185 + }, + { + "epoch": 1.02, + "learning_rate": 1.5323261689711566e-05, + "loss": 0.1003, + "step": 3186 + }, + { + "epoch": 1.03, + "learning_rate": 1.5320320765153367e-05, + "loss": 0.1207, + "step": 3187 + }, + { + "epoch": 1.03, + "learning_rate": 1.5317379198622632e-05, + "loss": 0.1215, + "step": 3188 + }, + { + "epoch": 1.03, + "learning_rate": 1.5314436990474296e-05, + "loss": 0.1131, + "step": 3189 + }, + { + "epoch": 1.03, + "learning_rate": 1.5311494141063387e-05, + "loss": 0.1208, + "step": 3190 + }, + { + "epoch": 1.03, + "learning_rate": 1.5308550650745e-05, + "loss": 0.1176, + "step": 3191 + }, + { + "epoch": 1.03, + "learning_rate": 1.53056065198743e-05, + "loss": 0.1075, + "step": 3192 + }, + { + "epoch": 1.03, + "learning_rate": 1.530266174880655e-05, + "loss": 0.1063, + "step": 3193 + }, + { + "epoch": 1.03, + "learning_rate": 1.529971633789707e-05, + "loss": 0.1083, + "step": 3194 + }, + { + "epoch": 1.03, + "learning_rate": 1.529677028750127e-05, + "loss": 0.1162, + "step": 3195 + }, + { + "epoch": 1.03, + "learning_rate": 1.5293823597974638e-05, + "loss": 0.1148, + "step": 3196 + }, + { + "epoch": 1.03, + "learning_rate": 1.5290876269672726e-05, + "loss": 0.1043, + "step": 3197 + }, + { + "epoch": 1.03, + "learning_rate": 1.528792830295117e-05, + "loss": 0.1156, + "step": 3198 + }, + { + "epoch": 1.03, + "learning_rate": 1.5284979698165693e-05, + "loss": 0.1107, + "step": 3199 + }, + { + "epoch": 1.03, + "learning_rate": 1.5282030455672076e-05, + "loss": 0.126, + "step": 3200 + }, + { + "epoch": 1.03, + "learning_rate": 1.5279080575826197e-05, + "loss": 0.1166, + "step": 3201 + }, + { + "epoch": 1.03, + "learning_rate": 1.5276130058983993e-05, + "loss": 0.113, + "step": 3202 + }, + { + "epoch": 1.03, + "learning_rate": 1.5273178905501495e-05, + "loss": 0.1179, + "step": 3203 + }, + { + "epoch": 1.03, + "learning_rate": 1.527022711573479e-05, + "loss": 0.1176, + "step": 3204 + }, + { + "epoch": 1.03, + "learning_rate": 1.526727469004006e-05, + "loss": 0.1184, + "step": 3205 + }, + { + "epoch": 1.03, + "learning_rate": 1.526432162877356e-05, + "loss": 0.1123, + "step": 3206 + }, + { + "epoch": 1.03, + "learning_rate": 1.5261367932291613e-05, + "loss": 0.1002, + "step": 3207 + }, + { + "epoch": 1.03, + "learning_rate": 1.525841360095063e-05, + "loss": 0.1234, + "step": 3208 + }, + { + "epoch": 1.03, + "learning_rate": 1.525545863510709e-05, + "loss": 0.1161, + "step": 3209 + }, + { + "epoch": 1.03, + "learning_rate": 1.5252503035117552e-05, + "loss": 0.1201, + "step": 3210 + }, + { + "epoch": 1.03, + "learning_rate": 1.5249546801338648e-05, + "loss": 0.1171, + "step": 3211 + }, + { + "epoch": 1.03, + "learning_rate": 1.5246589934127096e-05, + "loss": 0.1123, + "step": 3212 + }, + { + "epoch": 1.03, + "learning_rate": 1.5243632433839681e-05, + "loss": 0.1079, + "step": 3213 + }, + { + "epoch": 1.03, + "learning_rate": 1.5240674300833269e-05, + "loss": 0.1271, + "step": 3214 + }, + { + "epoch": 1.03, + "learning_rate": 1.52377155354648e-05, + "loss": 0.1143, + "step": 3215 + }, + { + "epoch": 1.03, + "learning_rate": 1.5234756138091288e-05, + "loss": 0.1144, + "step": 3216 + }, + { + "epoch": 1.03, + "learning_rate": 1.5231796109069837e-05, + "loss": 0.115, + "step": 3217 + }, + { + "epoch": 1.04, + "learning_rate": 1.5228835448757605e-05, + "loss": 0.1113, + "step": 3218 + }, + { + "epoch": 1.04, + "learning_rate": 1.522587415751184e-05, + "loss": 0.1097, + "step": 3219 + }, + { + "epoch": 1.04, + "learning_rate": 1.5222912235689868e-05, + "loss": 0.1126, + "step": 3220 + }, + { + "epoch": 1.04, + "learning_rate": 1.5219949683649086e-05, + "loss": 0.1007, + "step": 3221 + }, + { + "epoch": 1.04, + "learning_rate": 1.5216986501746966e-05, + "loss": 0.1249, + "step": 3222 + }, + { + "epoch": 1.04, + "learning_rate": 1.521402269034106e-05, + "loss": 0.1095, + "step": 3223 + }, + { + "epoch": 1.04, + "learning_rate": 1.5211058249788996e-05, + "loss": 0.1095, + "step": 3224 + }, + { + "epoch": 1.04, + "learning_rate": 1.5208093180448469e-05, + "loss": 0.1152, + "step": 3225 + }, + { + "epoch": 1.04, + "learning_rate": 1.5205127482677265e-05, + "loss": 0.1192, + "step": 3226 + }, + { + "epoch": 1.04, + "learning_rate": 1.5202161156833236e-05, + "loss": 0.1053, + "step": 3227 + }, + { + "epoch": 1.04, + "learning_rate": 1.5199194203274306e-05, + "loss": 0.1092, + "step": 3228 + }, + { + "epoch": 1.04, + "learning_rate": 1.5196226622358483e-05, + "loss": 0.0945, + "step": 3229 + }, + { + "epoch": 1.04, + "learning_rate": 1.5193258414443852e-05, + "loss": 0.1117, + "step": 3230 + }, + { + "epoch": 1.04, + "learning_rate": 1.5190289579888567e-05, + "loss": 0.1248, + "step": 3231 + }, + { + "epoch": 1.04, + "learning_rate": 1.5187320119050854e-05, + "loss": 0.1081, + "step": 3232 + }, + { + "epoch": 1.04, + "learning_rate": 1.5184350032289033e-05, + "loss": 0.1233, + "step": 3233 + }, + { + "epoch": 1.04, + "learning_rate": 1.518137931996148e-05, + "loss": 0.1055, + "step": 3234 + }, + { + "epoch": 1.04, + "learning_rate": 1.517840798242665e-05, + "loss": 0.1028, + "step": 3235 + }, + { + "epoch": 1.04, + "learning_rate": 1.5175436020043085e-05, + "loss": 0.119, + "step": 3236 + }, + { + "epoch": 1.04, + "learning_rate": 1.5172463433169396e-05, + "loss": 0.1107, + "step": 3237 + }, + { + "epoch": 1.04, + "learning_rate": 1.5169490222164255e-05, + "loss": 0.1208, + "step": 3238 + }, + { + "epoch": 1.04, + "learning_rate": 1.5166516387386436e-05, + "loss": 0.107, + "step": 3239 + }, + { + "epoch": 1.04, + "learning_rate": 1.5163541929194769e-05, + "loss": 0.1061, + "step": 3240 + }, + { + "epoch": 1.04, + "learning_rate": 1.5160566847948163e-05, + "loss": 0.1197, + "step": 3241 + }, + { + "epoch": 1.04, + "learning_rate": 1.5157591144005609e-05, + "loss": 0.1175, + "step": 3242 + }, + { + "epoch": 1.04, + "learning_rate": 1.5154614817726163e-05, + "loss": 0.1179, + "step": 3243 + }, + { + "epoch": 1.04, + "learning_rate": 1.515163786946896e-05, + "loss": 0.1084, + "step": 3244 + }, + { + "epoch": 1.04, + "learning_rate": 1.514866029959322e-05, + "loss": 0.1021, + "step": 3245 + }, + { + "epoch": 1.04, + "learning_rate": 1.5145682108458224e-05, + "loss": 0.1021, + "step": 3246 + }, + { + "epoch": 1.04, + "learning_rate": 1.5142703296423328e-05, + "loss": 0.1236, + "step": 3247 + }, + { + "epoch": 1.04, + "learning_rate": 1.5139723863847977e-05, + "loss": 0.1092, + "step": 3248 + }, + { + "epoch": 1.05, + "learning_rate": 1.5136743811091679e-05, + "loss": 0.1055, + "step": 3249 + }, + { + "epoch": 1.05, + "learning_rate": 1.5133763138514015e-05, + "loss": 0.1082, + "step": 3250 + }, + { + "epoch": 1.05, + "learning_rate": 1.513078184647465e-05, + "loss": 0.117, + "step": 3251 + }, + { + "epoch": 1.05, + "learning_rate": 1.5127799935333323e-05, + "loss": 0.1172, + "step": 3252 + }, + { + "epoch": 1.05, + "learning_rate": 1.5124817405449836e-05, + "loss": 0.1114, + "step": 3253 + }, + { + "epoch": 1.05, + "learning_rate": 1.512183425718408e-05, + "loss": 0.1121, + "step": 3254 + }, + { + "epoch": 1.05, + "learning_rate": 1.5118850490896012e-05, + "loss": 0.1048, + "step": 3255 + }, + { + "epoch": 1.05, + "learning_rate": 1.5115866106945664e-05, + "loss": 0.1122, + "step": 3256 + }, + { + "epoch": 1.05, + "learning_rate": 1.511288110569315e-05, + "loss": 0.1146, + "step": 3257 + }, + { + "epoch": 1.05, + "learning_rate": 1.5109895487498648e-05, + "loss": 0.1121, + "step": 3258 + }, + { + "epoch": 1.05, + "learning_rate": 1.510690925272242e-05, + "loss": 0.1039, + "step": 3259 + }, + { + "epoch": 1.05, + "learning_rate": 1.5103922401724789e-05, + "loss": 0.1108, + "step": 3260 + }, + { + "epoch": 1.05, + "learning_rate": 1.5100934934866169e-05, + "loss": 0.1181, + "step": 3261 + }, + { + "epoch": 1.05, + "learning_rate": 1.5097946852507042e-05, + "loss": 0.1076, + "step": 3262 + }, + { + "epoch": 1.05, + "learning_rate": 1.5094958155007953e-05, + "loss": 0.1177, + "step": 3263 + }, + { + "epoch": 1.05, + "learning_rate": 1.509196884272954e-05, + "loss": 0.12, + "step": 3264 + }, + { + "epoch": 1.05, + "learning_rate": 1.5088978916032506e-05, + "loss": 0.1035, + "step": 3265 + }, + { + "epoch": 1.05, + "learning_rate": 1.508598837527762e-05, + "loss": 0.102, + "step": 3266 + }, + { + "epoch": 1.05, + "learning_rate": 1.5082997220825744e-05, + "loss": 0.1211, + "step": 3267 + }, + { + "epoch": 1.05, + "learning_rate": 1.5080005453037791e-05, + "loss": 0.1218, + "step": 3268 + }, + { + "epoch": 1.05, + "learning_rate": 1.5077013072274774e-05, + "loss": 0.0995, + "step": 3269 + }, + { + "epoch": 1.05, + "learning_rate": 1.507402007889776e-05, + "loss": 0.1149, + "step": 3270 + }, + { + "epoch": 1.05, + "learning_rate": 1.5071026473267894e-05, + "loss": 0.121, + "step": 3271 + }, + { + "epoch": 1.05, + "learning_rate": 1.50680322557464e-05, + "loss": 0.1053, + "step": 3272 + }, + { + "epoch": 1.05, + "learning_rate": 1.5065037426694575e-05, + "loss": 0.1165, + "step": 3273 + }, + { + "epoch": 1.05, + "learning_rate": 1.506204198647378e-05, + "loss": 0.1154, + "step": 3274 + }, + { + "epoch": 1.05, + "learning_rate": 1.505904593544547e-05, + "loss": 0.1186, + "step": 3275 + }, + { + "epoch": 1.05, + "learning_rate": 1.505604927397115e-05, + "loss": 0.1021, + "step": 3276 + }, + { + "epoch": 1.05, + "learning_rate": 1.5053052002412414e-05, + "loss": 0.1178, + "step": 3277 + }, + { + "epoch": 1.05, + "learning_rate": 1.5050054121130925e-05, + "loss": 0.1143, + "step": 3278 + }, + { + "epoch": 1.05, + "learning_rate": 1.5047055630488423e-05, + "loss": 0.11, + "step": 3279 + }, + { + "epoch": 1.06, + "learning_rate": 1.5044056530846715e-05, + "loss": 0.1243, + "step": 3280 + }, + { + "epoch": 1.06, + "learning_rate": 1.5041056822567685e-05, + "loss": 0.1122, + "step": 3281 + }, + { + "epoch": 1.06, + "learning_rate": 1.5038056506013297e-05, + "loss": 0.1095, + "step": 3282 + }, + { + "epoch": 1.06, + "learning_rate": 1.5035055581545574e-05, + "loss": 0.1134, + "step": 3283 + }, + { + "epoch": 1.06, + "learning_rate": 1.5032054049526623e-05, + "loss": 0.1061, + "step": 3284 + }, + { + "epoch": 1.06, + "learning_rate": 1.5029051910318622e-05, + "loss": 0.1263, + "step": 3285 + }, + { + "epoch": 1.06, + "learning_rate": 1.5026049164283824e-05, + "loss": 0.1264, + "step": 3286 + }, + { + "epoch": 1.06, + "learning_rate": 1.5023045811784548e-05, + "loss": 0.1131, + "step": 3287 + }, + { + "epoch": 1.06, + "learning_rate": 1.5020041853183198e-05, + "loss": 0.1236, + "step": 3288 + }, + { + "epoch": 1.06, + "learning_rate": 1.5017037288842238e-05, + "loss": 0.1042, + "step": 3289 + }, + { + "epoch": 1.06, + "learning_rate": 1.5014032119124215e-05, + "loss": 0.1074, + "step": 3290 + }, + { + "epoch": 1.06, + "learning_rate": 1.5011026344391747e-05, + "loss": 0.1168, + "step": 3291 + }, + { + "epoch": 1.06, + "learning_rate": 1.5008019965007518e-05, + "loss": 0.1023, + "step": 3292 + }, + { + "epoch": 1.06, + "learning_rate": 1.5005012981334294e-05, + "loss": 0.1022, + "step": 3293 + }, + { + "epoch": 1.06, + "learning_rate": 1.5002005393734914e-05, + "loss": 0.1256, + "step": 3294 + }, + { + "epoch": 1.06, + "learning_rate": 1.4998997202572278e-05, + "loss": 0.1088, + "step": 3295 + }, + { + "epoch": 1.06, + "learning_rate": 1.4995988408209374e-05, + "loss": 0.1099, + "step": 3296 + }, + { + "epoch": 1.06, + "learning_rate": 1.4992979011009254e-05, + "loss": 0.1262, + "step": 3297 + }, + { + "epoch": 1.06, + "learning_rate": 1.4989969011335043e-05, + "loss": 0.105, + "step": 3298 + }, + { + "epoch": 1.06, + "learning_rate": 1.4986958409549943e-05, + "loss": 0.1027, + "step": 3299 + }, + { + "epoch": 1.06, + "learning_rate": 1.4983947206017225e-05, + "loss": 0.1133, + "step": 3300 + }, + { + "epoch": 1.06, + "learning_rate": 1.4980935401100234e-05, + "loss": 0.1175, + "step": 3301 + }, + { + "epoch": 1.06, + "learning_rate": 1.4977922995162384e-05, + "loss": 0.1044, + "step": 3302 + }, + { + "epoch": 1.06, + "learning_rate": 1.4974909988567168e-05, + "loss": 0.1153, + "step": 3303 + }, + { + "epoch": 1.06, + "learning_rate": 1.4971896381678152e-05, + "loss": 0.1146, + "step": 3304 + }, + { + "epoch": 1.06, + "learning_rate": 1.4968882174858962e-05, + "loss": 0.1146, + "step": 3305 + }, + { + "epoch": 1.06, + "learning_rate": 1.4965867368473308e-05, + "loss": 0.1073, + "step": 3306 + }, + { + "epoch": 1.06, + "learning_rate": 1.4962851962884972e-05, + "loss": 0.1154, + "step": 3307 + }, + { + "epoch": 1.06, + "learning_rate": 1.4959835958457807e-05, + "loss": 0.1135, + "step": 3308 + }, + { + "epoch": 1.06, + "learning_rate": 1.495681935555573e-05, + "loss": 0.1161, + "step": 3309 + }, + { + "epoch": 1.06, + "learning_rate": 1.4953802154542743e-05, + "loss": 0.1146, + "step": 3310 + }, + { + "epoch": 1.06, + "learning_rate": 1.4950784355782912e-05, + "loss": 0.1157, + "step": 3311 + }, + { + "epoch": 1.07, + "learning_rate": 1.4947765959640379e-05, + "loss": 0.1133, + "step": 3312 + }, + { + "epoch": 1.07, + "learning_rate": 1.4944746966479355e-05, + "loss": 0.1128, + "step": 3313 + }, + { + "epoch": 1.07, + "learning_rate": 1.4941727376664125e-05, + "loss": 0.1104, + "step": 3314 + }, + { + "epoch": 1.07, + "learning_rate": 1.4938707190559046e-05, + "loss": 0.1113, + "step": 3315 + }, + { + "epoch": 1.07, + "learning_rate": 1.4935686408528548e-05, + "loss": 0.1133, + "step": 3316 + }, + { + "epoch": 1.07, + "learning_rate": 1.4932665030937126e-05, + "loss": 0.1009, + "step": 3317 + }, + { + "epoch": 1.07, + "learning_rate": 1.4929643058149357e-05, + "loss": 0.1222, + "step": 3318 + }, + { + "epoch": 1.07, + "learning_rate": 1.4926620490529883e-05, + "loss": 0.1206, + "step": 3319 + }, + { + "epoch": 1.07, + "learning_rate": 1.4923597328443423e-05, + "loss": 0.1, + "step": 3320 + }, + { + "epoch": 1.07, + "learning_rate": 1.4920573572254761e-05, + "loss": 0.1031, + "step": 3321 + }, + { + "epoch": 1.07, + "learning_rate": 1.491754922232876e-05, + "loss": 0.115, + "step": 3322 + }, + { + "epoch": 1.07, + "learning_rate": 1.4914524279030344e-05, + "loss": 0.1082, + "step": 3323 + }, + { + "epoch": 1.07, + "learning_rate": 1.4911498742724523e-05, + "loss": 0.1094, + "step": 3324 + }, + { + "epoch": 1.07, + "learning_rate": 1.490847261377637e-05, + "loss": 0.1264, + "step": 3325 + }, + { + "epoch": 1.07, + "learning_rate": 1.4905445892551027e-05, + "loss": 0.1224, + "step": 3326 + }, + { + "epoch": 1.07, + "learning_rate": 1.4902418579413713e-05, + "loss": 0.1057, + "step": 3327 + }, + { + "epoch": 1.07, + "learning_rate": 1.4899390674729716e-05, + "loss": 0.1093, + "step": 3328 + }, + { + "epoch": 1.07, + "learning_rate": 1.4896362178864396e-05, + "loss": 0.1333, + "step": 3329 + }, + { + "epoch": 1.07, + "learning_rate": 1.4893333092183184e-05, + "loss": 0.1115, + "step": 3330 + }, + { + "epoch": 1.07, + "learning_rate": 1.4890303415051584e-05, + "loss": 0.1064, + "step": 3331 + }, + { + "epoch": 1.07, + "learning_rate": 1.488727314783517e-05, + "loss": 0.1128, + "step": 3332 + }, + { + "epoch": 1.07, + "learning_rate": 1.4884242290899585e-05, + "loss": 0.1048, + "step": 3333 + }, + { + "epoch": 1.07, + "learning_rate": 1.4881210844610543e-05, + "loss": 0.104, + "step": 3334 + }, + { + "epoch": 1.07, + "learning_rate": 1.4878178809333834e-05, + "loss": 0.1133, + "step": 3335 + }, + { + "epoch": 1.07, + "learning_rate": 1.487514618543532e-05, + "loss": 0.1077, + "step": 3336 + }, + { + "epoch": 1.07, + "learning_rate": 1.4872112973280923e-05, + "loss": 0.115, + "step": 3337 + }, + { + "epoch": 1.07, + "learning_rate": 1.4869079173236648e-05, + "loss": 0.1237, + "step": 3338 + }, + { + "epoch": 1.07, + "learning_rate": 1.4866044785668563e-05, + "loss": 0.1028, + "step": 3339 + }, + { + "epoch": 1.07, + "learning_rate": 1.4863009810942814e-05, + "loss": 0.1024, + "step": 3340 + }, + { + "epoch": 1.07, + "learning_rate": 1.4859974249425615e-05, + "loss": 0.1037, + "step": 3341 + }, + { + "epoch": 1.07, + "learning_rate": 1.485693810148324e-05, + "loss": 0.1087, + "step": 3342 + }, + { + "epoch": 1.08, + "learning_rate": 1.4853901367482056e-05, + "loss": 0.1167, + "step": 3343 + }, + { + "epoch": 1.08, + "learning_rate": 1.4850864047788481e-05, + "loss": 0.1058, + "step": 3344 + }, + { + "epoch": 1.08, + "learning_rate": 1.484782614276901e-05, + "loss": 0.1168, + "step": 3345 + }, + { + "epoch": 1.08, + "learning_rate": 1.4844787652790215e-05, + "loss": 0.1051, + "step": 3346 + }, + { + "epoch": 1.08, + "learning_rate": 1.484174857821873e-05, + "loss": 0.1033, + "step": 3347 + }, + { + "epoch": 1.08, + "learning_rate": 1.4838708919421261e-05, + "loss": 0.12, + "step": 3348 + }, + { + "epoch": 1.08, + "learning_rate": 1.4835668676764588e-05, + "loss": 0.1071, + "step": 3349 + }, + { + "epoch": 1.08, + "learning_rate": 1.4832627850615563e-05, + "loss": 0.1112, + "step": 3350 + }, + { + "epoch": 1.08, + "learning_rate": 1.4829586441341097e-05, + "loss": 0.1172, + "step": 3351 + }, + { + "epoch": 1.08, + "learning_rate": 1.4826544449308186e-05, + "loss": 0.121, + "step": 3352 + }, + { + "epoch": 1.08, + "learning_rate": 1.482350187488389e-05, + "loss": 0.0994, + "step": 3353 + }, + { + "epoch": 1.08, + "learning_rate": 1.4820458718435333e-05, + "loss": 0.1174, + "step": 3354 + }, + { + "epoch": 1.08, + "learning_rate": 1.481741498032972e-05, + "loss": 0.1449, + "step": 3355 + }, + { + "epoch": 1.08, + "learning_rate": 1.4814370660934323e-05, + "loss": 0.1152, + "step": 3356 + }, + { + "epoch": 1.08, + "learning_rate": 1.4811325760616478e-05, + "loss": 0.1129, + "step": 3357 + }, + { + "epoch": 1.08, + "learning_rate": 1.4808280279743594e-05, + "loss": 0.1033, + "step": 3358 + }, + { + "epoch": 1.08, + "learning_rate": 1.4805234218683163e-05, + "loss": 0.1122, + "step": 3359 + }, + { + "epoch": 1.08, + "learning_rate": 1.480218757780272e-05, + "loss": 0.11, + "step": 3360 + }, + { + "epoch": 1.08, + "learning_rate": 1.4799140357469894e-05, + "loss": 0.1036, + "step": 3361 + }, + { + "epoch": 1.08, + "learning_rate": 1.4796092558052379e-05, + "loss": 0.1097, + "step": 3362 + }, + { + "epoch": 1.08, + "learning_rate": 1.479304417991793e-05, + "loss": 0.1263, + "step": 3363 + }, + { + "epoch": 1.08, + "learning_rate": 1.4789995223434376e-05, + "loss": 0.1087, + "step": 3364 + }, + { + "epoch": 1.08, + "learning_rate": 1.4786945688969625e-05, + "loss": 0.1127, + "step": 3365 + }, + { + "epoch": 1.08, + "learning_rate": 1.4783895576891639e-05, + "loss": 0.1118, + "step": 3366 + }, + { + "epoch": 1.08, + "learning_rate": 1.4780844887568457e-05, + "loss": 0.1077, + "step": 3367 + }, + { + "epoch": 1.08, + "learning_rate": 1.4777793621368197e-05, + "loss": 0.1143, + "step": 3368 + }, + { + "epoch": 1.08, + "learning_rate": 1.477474177865903e-05, + "loss": 0.1183, + "step": 3369 + }, + { + "epoch": 1.08, + "learning_rate": 1.4771689359809202e-05, + "loss": 0.1121, + "step": 3370 + }, + { + "epoch": 1.08, + "learning_rate": 1.4768636365187038e-05, + "loss": 0.0956, + "step": 3371 + }, + { + "epoch": 1.08, + "learning_rate": 1.4765582795160926e-05, + "loss": 0.1152, + "step": 3372 + }, + { + "epoch": 1.08, + "learning_rate": 1.4762528650099315e-05, + "loss": 0.1137, + "step": 3373 + }, + { + "epoch": 1.09, + "learning_rate": 1.4759473930370738e-05, + "loss": 0.129, + "step": 3374 + }, + { + "epoch": 1.09, + "learning_rate": 1.4756418636343785e-05, + "loss": 0.1193, + "step": 3375 + }, + { + "epoch": 1.09, + "learning_rate": 1.4753362768387125e-05, + "loss": 0.1152, + "step": 3376 + }, + { + "epoch": 1.09, + "learning_rate": 1.4750306326869492e-05, + "loss": 0.1133, + "step": 3377 + }, + { + "epoch": 1.09, + "learning_rate": 1.4747249312159685e-05, + "loss": 0.1033, + "step": 3378 + }, + { + "epoch": 1.09, + "learning_rate": 1.4744191724626582e-05, + "loss": 0.1129, + "step": 3379 + }, + { + "epoch": 1.09, + "learning_rate": 1.474113356463912e-05, + "loss": 0.1138, + "step": 3380 + }, + { + "epoch": 1.09, + "learning_rate": 1.4738074832566313e-05, + "loss": 0.1046, + "step": 3381 + }, + { + "epoch": 1.09, + "learning_rate": 1.4735015528777239e-05, + "loss": 0.1103, + "step": 3382 + }, + { + "epoch": 1.09, + "learning_rate": 1.4731955653641045e-05, + "loss": 0.1259, + "step": 3383 + }, + { + "epoch": 1.09, + "learning_rate": 1.4728895207526952e-05, + "loss": 0.1128, + "step": 3384 + }, + { + "epoch": 1.09, + "learning_rate": 1.4725834190804244e-05, + "loss": 0.1121, + "step": 3385 + }, + { + "epoch": 1.09, + "learning_rate": 1.4722772603842277e-05, + "loss": 0.1177, + "step": 3386 + }, + { + "epoch": 1.09, + "learning_rate": 1.4719710447010475e-05, + "loss": 0.1051, + "step": 3387 + }, + { + "epoch": 1.09, + "learning_rate": 1.471664772067833e-05, + "loss": 0.1045, + "step": 3388 + }, + { + "epoch": 1.09, + "learning_rate": 1.4713584425215405e-05, + "loss": 0.1241, + "step": 3389 + }, + { + "epoch": 1.09, + "learning_rate": 1.4710520560991333e-05, + "loss": 0.1126, + "step": 3390 + }, + { + "epoch": 1.09, + "learning_rate": 1.4707456128375803e-05, + "loss": 0.1113, + "step": 3391 + }, + { + "epoch": 1.09, + "learning_rate": 1.4704391127738596e-05, + "loss": 0.117, + "step": 3392 + }, + { + "epoch": 1.09, + "learning_rate": 1.4701325559449537e-05, + "loss": 0.1143, + "step": 3393 + }, + { + "epoch": 1.09, + "learning_rate": 1.4698259423878534e-05, + "loss": 0.1249, + "step": 3394 + }, + { + "epoch": 1.09, + "learning_rate": 1.4695192721395563e-05, + "loss": 0.1226, + "step": 3395 + }, + { + "epoch": 1.09, + "learning_rate": 1.4692125452370664e-05, + "loss": 0.1127, + "step": 3396 + }, + { + "epoch": 1.09, + "learning_rate": 1.4689057617173944e-05, + "loss": 0.1136, + "step": 3397 + }, + { + "epoch": 1.09, + "learning_rate": 1.4685989216175583e-05, + "loss": 0.1281, + "step": 3398 + }, + { + "epoch": 1.09, + "learning_rate": 1.468292024974583e-05, + "loss": 0.1062, + "step": 3399 + }, + { + "epoch": 1.09, + "learning_rate": 1.467985071825499e-05, + "loss": 0.1279, + "step": 3400 + }, + { + "epoch": 1.09, + "learning_rate": 1.4676780622073455e-05, + "loss": 0.1116, + "step": 3401 + }, + { + "epoch": 1.09, + "learning_rate": 1.4673709961571678e-05, + "loss": 0.107, + "step": 3402 + }, + { + "epoch": 1.09, + "learning_rate": 1.4670638737120167e-05, + "loss": 0.1119, + "step": 3403 + }, + { + "epoch": 1.09, + "learning_rate": 1.4667566949089516e-05, + "loss": 0.1024, + "step": 3404 + }, + { + "epoch": 1.1, + "learning_rate": 1.4664494597850384e-05, + "loss": 0.1152, + "step": 3405 + }, + { + "epoch": 1.1, + "learning_rate": 1.4661421683773485e-05, + "loss": 0.1144, + "step": 3406 + }, + { + "epoch": 1.1, + "learning_rate": 1.4658348207229614e-05, + "loss": 0.1157, + "step": 3407 + }, + { + "epoch": 1.1, + "learning_rate": 1.4655274168589635e-05, + "loss": 0.1047, + "step": 3408 + }, + { + "epoch": 1.1, + "learning_rate": 1.4652199568224464e-05, + "loss": 0.1163, + "step": 3409 + }, + { + "epoch": 1.1, + "learning_rate": 1.46491244065051e-05, + "loss": 0.1138, + "step": 3410 + }, + { + "epoch": 1.1, + "learning_rate": 1.464604868380261e-05, + "loss": 0.117, + "step": 3411 + }, + { + "epoch": 1.1, + "learning_rate": 1.4642972400488118e-05, + "loss": 0.1107, + "step": 3412 + }, + { + "epoch": 1.1, + "learning_rate": 1.4639895556932821e-05, + "loss": 0.1194, + "step": 3413 + }, + { + "epoch": 1.1, + "learning_rate": 1.463681815350799e-05, + "loss": 0.1127, + "step": 3414 + }, + { + "epoch": 1.1, + "learning_rate": 1.4633740190584954e-05, + "loss": 0.102, + "step": 3415 + }, + { + "epoch": 1.1, + "learning_rate": 1.4630661668535106e-05, + "loss": 0.1129, + "step": 3416 + }, + { + "epoch": 1.1, + "learning_rate": 1.4627582587729927e-05, + "loss": 0.1094, + "step": 3417 + }, + { + "epoch": 1.1, + "learning_rate": 1.4624502948540944e-05, + "loss": 0.111, + "step": 3418 + }, + { + "epoch": 1.1, + "learning_rate": 1.462142275133976e-05, + "loss": 0.1151, + "step": 3419 + }, + { + "epoch": 1.1, + "learning_rate": 1.4618341996498044e-05, + "loss": 0.1163, + "step": 3420 + }, + { + "epoch": 1.1, + "learning_rate": 1.4615260684387538e-05, + "loss": 0.1222, + "step": 3421 + }, + { + "epoch": 1.1, + "learning_rate": 1.4612178815380041e-05, + "loss": 0.11, + "step": 3422 + }, + { + "epoch": 1.1, + "learning_rate": 1.4609096389847425e-05, + "loss": 0.1114, + "step": 3423 + }, + { + "epoch": 1.1, + "learning_rate": 1.460601340816163e-05, + "loss": 0.1191, + "step": 3424 + }, + { + "epoch": 1.1, + "learning_rate": 1.4602929870694662e-05, + "loss": 0.1268, + "step": 3425 + }, + { + "epoch": 1.1, + "learning_rate": 1.4599845777818594e-05, + "loss": 0.1136, + "step": 3426 + }, + { + "epoch": 1.1, + "learning_rate": 1.4596761129905563e-05, + "loss": 0.1041, + "step": 3427 + }, + { + "epoch": 1.1, + "learning_rate": 1.459367592732778e-05, + "loss": 0.1108, + "step": 3428 + }, + { + "epoch": 1.1, + "learning_rate": 1.459059017045751e-05, + "loss": 0.1214, + "step": 3429 + }, + { + "epoch": 1.1, + "learning_rate": 1.4587503859667106e-05, + "loss": 0.116, + "step": 3430 + }, + { + "epoch": 1.1, + "learning_rate": 1.458441699532897e-05, + "loss": 0.1141, + "step": 3431 + }, + { + "epoch": 1.1, + "learning_rate": 1.4581329577815571e-05, + "loss": 0.0966, + "step": 3432 + }, + { + "epoch": 1.1, + "learning_rate": 1.4578241607499458e-05, + "loss": 0.1182, + "step": 3433 + }, + { + "epoch": 1.1, + "learning_rate": 1.4575153084753233e-05, + "loss": 0.1155, + "step": 3434 + }, + { + "epoch": 1.1, + "learning_rate": 1.4572064009949571e-05, + "loss": 0.1053, + "step": 3435 + }, + { + "epoch": 1.11, + "learning_rate": 1.4568974383461217e-05, + "loss": 0.1264, + "step": 3436 + }, + { + "epoch": 1.11, + "learning_rate": 1.4565884205660975e-05, + "loss": 0.1168, + "step": 3437 + }, + { + "epoch": 1.11, + "learning_rate": 1.456279347692172e-05, + "loss": 0.1141, + "step": 3438 + }, + { + "epoch": 1.11, + "learning_rate": 1.455970219761639e-05, + "loss": 0.1151, + "step": 3439 + }, + { + "epoch": 1.11, + "learning_rate": 1.4556610368117996e-05, + "loss": 0.1174, + "step": 3440 + }, + { + "epoch": 1.11, + "learning_rate": 1.4553517988799609e-05, + "loss": 0.1127, + "step": 3441 + }, + { + "epoch": 1.11, + "learning_rate": 1.4550425060034367e-05, + "loss": 0.1157, + "step": 3442 + }, + { + "epoch": 1.11, + "learning_rate": 1.4547331582195479e-05, + "loss": 0.1169, + "step": 3443 + }, + { + "epoch": 1.11, + "learning_rate": 1.4544237555656217e-05, + "loss": 0.1192, + "step": 3444 + }, + { + "epoch": 1.11, + "learning_rate": 1.454114298078992e-05, + "loss": 0.1086, + "step": 3445 + }, + { + "epoch": 1.11, + "learning_rate": 1.4538047857969988e-05, + "loss": 0.1156, + "step": 3446 + }, + { + "epoch": 1.11, + "learning_rate": 1.4534952187569897e-05, + "loss": 0.115, + "step": 3447 + }, + { + "epoch": 1.11, + "learning_rate": 1.453185596996318e-05, + "loss": 0.108, + "step": 3448 + }, + { + "epoch": 1.11, + "learning_rate": 1.4528759205523442e-05, + "loss": 0.1081, + "step": 3449 + }, + { + "epoch": 1.11, + "learning_rate": 1.4525661894624353e-05, + "loss": 0.1085, + "step": 3450 + }, + { + "epoch": 1.11, + "learning_rate": 1.4522564037639644e-05, + "loss": 0.1079, + "step": 3451 + }, + { + "epoch": 1.11, + "learning_rate": 1.4519465634943115e-05, + "loss": 0.1098, + "step": 3452 + }, + { + "epoch": 1.11, + "learning_rate": 1.4516366686908639e-05, + "loss": 0.1183, + "step": 3453 + }, + { + "epoch": 1.11, + "learning_rate": 1.4513267193910141e-05, + "loss": 0.1142, + "step": 3454 + }, + { + "epoch": 1.11, + "learning_rate": 1.4510167156321621e-05, + "loss": 0.1215, + "step": 3455 + }, + { + "epoch": 1.11, + "learning_rate": 1.4507066574517148e-05, + "loss": 0.1087, + "step": 3456 + }, + { + "epoch": 1.11, + "learning_rate": 1.4503965448870848e-05, + "loss": 0.1208, + "step": 3457 + }, + { + "epoch": 1.11, + "learning_rate": 1.450086377975691e-05, + "loss": 0.1236, + "step": 3458 + }, + { + "epoch": 1.11, + "learning_rate": 1.4497761567549602e-05, + "loss": 0.109, + "step": 3459 + }, + { + "epoch": 1.11, + "learning_rate": 1.449465881262325e-05, + "loss": 0.113, + "step": 3460 + }, + { + "epoch": 1.11, + "learning_rate": 1.4491555515352242e-05, + "loss": 0.1208, + "step": 3461 + }, + { + "epoch": 1.11, + "learning_rate": 1.4488451676111037e-05, + "loss": 0.1107, + "step": 3462 + }, + { + "epoch": 1.11, + "learning_rate": 1.4485347295274158e-05, + "loss": 0.1179, + "step": 3463 + }, + { + "epoch": 1.11, + "learning_rate": 1.4482242373216191e-05, + "loss": 0.1264, + "step": 3464 + }, + { + "epoch": 1.11, + "learning_rate": 1.4479136910311789e-05, + "loss": 0.1044, + "step": 3465 + }, + { + "epoch": 1.11, + "learning_rate": 1.4476030906935672e-05, + "loss": 0.1261, + "step": 3466 + }, + { + "epoch": 1.12, + "learning_rate": 1.4472924363462624e-05, + "loss": 0.1177, + "step": 3467 + }, + { + "epoch": 1.12, + "learning_rate": 1.4469817280267491e-05, + "loss": 0.1196, + "step": 3468 + }, + { + "epoch": 1.12, + "learning_rate": 1.4466709657725191e-05, + "loss": 0.113, + "step": 3469 + }, + { + "epoch": 1.12, + "learning_rate": 1.4463601496210699e-05, + "loss": 0.1391, + "step": 3470 + }, + { + "epoch": 1.12, + "learning_rate": 1.446049279609906e-05, + "loss": 0.1121, + "step": 3471 + }, + { + "epoch": 1.12, + "learning_rate": 1.4457383557765385e-05, + "loss": 0.1124, + "step": 3472 + }, + { + "epoch": 1.12, + "learning_rate": 1.4454273781584847e-05, + "loss": 0.1068, + "step": 3473 + }, + { + "epoch": 1.12, + "learning_rate": 1.4451163467932681e-05, + "loss": 0.1162, + "step": 3474 + }, + { + "epoch": 1.12, + "learning_rate": 1.4448052617184196e-05, + "loss": 0.1037, + "step": 3475 + }, + { + "epoch": 1.12, + "learning_rate": 1.444494122971476e-05, + "loss": 0.1174, + "step": 3476 + }, + { + "epoch": 1.12, + "learning_rate": 1.44418293058998e-05, + "loss": 0.1118, + "step": 3477 + }, + { + "epoch": 1.12, + "learning_rate": 1.443871684611482e-05, + "loss": 0.1062, + "step": 3478 + }, + { + "epoch": 1.12, + "learning_rate": 1.4435603850735387e-05, + "loss": 0.1228, + "step": 3479 + }, + { + "epoch": 1.12, + "learning_rate": 1.4432490320137116e-05, + "loss": 0.1256, + "step": 3480 + }, + { + "epoch": 1.12, + "learning_rate": 1.4429376254695706e-05, + "loss": 0.1096, + "step": 3481 + }, + { + "epoch": 1.12, + "learning_rate": 1.4426261654786915e-05, + "loss": 0.103, + "step": 3482 + }, + { + "epoch": 1.12, + "learning_rate": 1.4423146520786561e-05, + "loss": 0.1016, + "step": 3483 + }, + { + "epoch": 1.12, + "learning_rate": 1.442003085307053e-05, + "loss": 0.1148, + "step": 3484 + }, + { + "epoch": 1.12, + "learning_rate": 1.441691465201477e-05, + "loss": 0.1145, + "step": 3485 + }, + { + "epoch": 1.12, + "learning_rate": 1.44137979179953e-05, + "loss": 0.1262, + "step": 3486 + }, + { + "epoch": 1.12, + "learning_rate": 1.4410680651388194e-05, + "loss": 0.112, + "step": 3487 + }, + { + "epoch": 1.12, + "learning_rate": 1.4407562852569594e-05, + "loss": 0.1155, + "step": 3488 + }, + { + "epoch": 1.12, + "learning_rate": 1.4404444521915713e-05, + "loss": 0.1158, + "step": 3489 + }, + { + "epoch": 1.12, + "learning_rate": 1.4401325659802812e-05, + "loss": 0.1132, + "step": 3490 + }, + { + "epoch": 1.12, + "learning_rate": 1.4398206266607236e-05, + "loss": 0.1064, + "step": 3491 + }, + { + "epoch": 1.12, + "learning_rate": 1.439508634270538e-05, + "loss": 0.1187, + "step": 3492 + }, + { + "epoch": 1.12, + "learning_rate": 1.4391965888473705e-05, + "loss": 0.1062, + "step": 3493 + }, + { + "epoch": 1.12, + "learning_rate": 1.4388844904288743e-05, + "loss": 0.1037, + "step": 3494 + }, + { + "epoch": 1.12, + "learning_rate": 1.4385723390527083e-05, + "loss": 0.1168, + "step": 3495 + }, + { + "epoch": 1.12, + "learning_rate": 1.4382601347565377e-05, + "loss": 0.1071, + "step": 3496 + }, + { + "epoch": 1.12, + "learning_rate": 1.4379478775780351e-05, + "loss": 0.1064, + "step": 3497 + }, + { + "epoch": 1.13, + "learning_rate": 1.437635567554878e-05, + "loss": 0.1126, + "step": 3498 + }, + { + "epoch": 1.13, + "learning_rate": 1.4373232047247515e-05, + "loss": 0.1114, + "step": 3499 + }, + { + "epoch": 1.13, + "learning_rate": 1.4370107891253468e-05, + "loss": 0.1105, + "step": 3500 + }, + { + "epoch": 1.13, + "learning_rate": 1.436698320794361e-05, + "loss": 0.1087, + "step": 3501 + }, + { + "epoch": 1.13, + "learning_rate": 1.436385799769498e-05, + "loss": 0.1107, + "step": 3502 + }, + { + "epoch": 1.13, + "learning_rate": 1.4360732260884678e-05, + "loss": 0.1157, + "step": 3503 + }, + { + "epoch": 1.13, + "learning_rate": 1.4357605997889867e-05, + "loss": 0.1139, + "step": 3504 + }, + { + "epoch": 1.13, + "learning_rate": 1.435447920908778e-05, + "loss": 0.1183, + "step": 3505 + }, + { + "epoch": 1.13, + "learning_rate": 1.4351351894855705e-05, + "loss": 0.133, + "step": 3506 + }, + { + "epoch": 1.13, + "learning_rate": 1.4348224055570996e-05, + "loss": 0.1086, + "step": 3507 + }, + { + "epoch": 1.13, + "learning_rate": 1.4345095691611076e-05, + "loss": 0.1103, + "step": 3508 + }, + { + "epoch": 1.13, + "learning_rate": 1.4341966803353423e-05, + "loss": 0.1191, + "step": 3509 + }, + { + "epoch": 1.13, + "learning_rate": 1.4338837391175582e-05, + "loss": 0.115, + "step": 3510 + }, + { + "epoch": 1.13, + "learning_rate": 1.4335707455455164e-05, + "loss": 0.1077, + "step": 3511 + }, + { + "epoch": 1.13, + "learning_rate": 1.4332576996569838e-05, + "loss": 0.1232, + "step": 3512 + }, + { + "epoch": 1.13, + "learning_rate": 1.4329446014897339e-05, + "loss": 0.1132, + "step": 3513 + }, + { + "epoch": 1.13, + "learning_rate": 1.4326314510815464e-05, + "loss": 0.1142, + "step": 3514 + }, + { + "epoch": 1.13, + "learning_rate": 1.4323182484702076e-05, + "loss": 0.1182, + "step": 3515 + }, + { + "epoch": 1.13, + "learning_rate": 1.4320049936935097e-05, + "loss": 0.1141, + "step": 3516 + }, + { + "epoch": 1.13, + "learning_rate": 1.431691686789251e-05, + "loss": 0.1173, + "step": 3517 + }, + { + "epoch": 1.13, + "learning_rate": 1.4313783277952371e-05, + "loss": 0.1153, + "step": 3518 + }, + { + "epoch": 1.13, + "learning_rate": 1.431064916749279e-05, + "loss": 0.1053, + "step": 3519 + }, + { + "epoch": 1.13, + "learning_rate": 1.4307514536891938e-05, + "loss": 0.1135, + "step": 3520 + }, + { + "epoch": 1.13, + "learning_rate": 1.4304379386528058e-05, + "loss": 0.1159, + "step": 3521 + }, + { + "epoch": 1.13, + "learning_rate": 1.4301243716779447e-05, + "loss": 0.1063, + "step": 3522 + }, + { + "epoch": 1.13, + "learning_rate": 1.429810752802447e-05, + "loss": 0.1092, + "step": 3523 + }, + { + "epoch": 1.13, + "learning_rate": 1.4294970820641556e-05, + "loss": 0.1169, + "step": 3524 + }, + { + "epoch": 1.13, + "learning_rate": 1.4291833595009188e-05, + "loss": 0.1132, + "step": 3525 + }, + { + "epoch": 1.13, + "learning_rate": 1.4288695851505919e-05, + "loss": 0.1128, + "step": 3526 + }, + { + "epoch": 1.13, + "learning_rate": 1.428555759051036e-05, + "loss": 0.1201, + "step": 3527 + }, + { + "epoch": 1.13, + "learning_rate": 1.4282418812401197e-05, + "loss": 0.1158, + "step": 3528 + }, + { + "epoch": 1.14, + "learning_rate": 1.4279279517557156e-05, + "loss": 0.1019, + "step": 3529 + }, + { + "epoch": 1.14, + "learning_rate": 1.4276139706357042e-05, + "loss": 0.0994, + "step": 3530 + }, + { + "epoch": 1.14, + "learning_rate": 1.4272999379179724e-05, + "loss": 0.106, + "step": 3531 + }, + { + "epoch": 1.14, + "learning_rate": 1.4269858536404116e-05, + "loss": 0.1132, + "step": 3532 + }, + { + "epoch": 1.14, + "learning_rate": 1.4266717178409211e-05, + "loss": 0.1116, + "step": 3533 + }, + { + "epoch": 1.14, + "learning_rate": 1.4263575305574065e-05, + "loss": 0.1108, + "step": 3534 + }, + { + "epoch": 1.14, + "learning_rate": 1.4260432918277779e-05, + "loss": 0.108, + "step": 3535 + }, + { + "epoch": 1.14, + "learning_rate": 1.425729001689953e-05, + "loss": 0.1118, + "step": 3536 + }, + { + "epoch": 1.14, + "learning_rate": 1.425414660181856e-05, + "loss": 0.1138, + "step": 3537 + }, + { + "epoch": 1.14, + "learning_rate": 1.4251002673414158e-05, + "loss": 0.1126, + "step": 3538 + }, + { + "epoch": 1.14, + "learning_rate": 1.424785823206569e-05, + "loss": 0.1123, + "step": 3539 + }, + { + "epoch": 1.14, + "learning_rate": 1.4244713278152575e-05, + "loss": 0.1155, + "step": 3540 + }, + { + "epoch": 1.14, + "learning_rate": 1.4241567812054297e-05, + "loss": 0.1088, + "step": 3541 + }, + { + "epoch": 1.14, + "learning_rate": 1.4238421834150403e-05, + "loss": 0.1183, + "step": 3542 + }, + { + "epoch": 1.14, + "learning_rate": 1.4235275344820498e-05, + "loss": 0.1145, + "step": 3543 + }, + { + "epoch": 1.14, + "learning_rate": 1.4232128344444251e-05, + "loss": 0.1324, + "step": 3544 + }, + { + "epoch": 1.14, + "learning_rate": 1.4228980833401391e-05, + "loss": 0.1162, + "step": 3545 + }, + { + "epoch": 1.14, + "learning_rate": 1.4225832812071716e-05, + "loss": 0.0998, + "step": 3546 + }, + { + "epoch": 1.14, + "learning_rate": 1.4222684280835073e-05, + "loss": 0.1207, + "step": 3547 + }, + { + "epoch": 1.14, + "learning_rate": 1.4219535240071378e-05, + "loss": 0.1161, + "step": 3548 + }, + { + "epoch": 1.14, + "learning_rate": 1.4216385690160613e-05, + "loss": 0.1084, + "step": 3549 + }, + { + "epoch": 1.14, + "learning_rate": 1.4213235631482809e-05, + "loss": 0.121, + "step": 3550 + }, + { + "epoch": 1.14, + "learning_rate": 1.421008506441807e-05, + "loss": 0.1044, + "step": 3551 + }, + { + "epoch": 1.14, + "learning_rate": 1.4206933989346559e-05, + "loss": 0.1121, + "step": 3552 + }, + { + "epoch": 1.14, + "learning_rate": 1.420378240664849e-05, + "loss": 0.1235, + "step": 3553 + }, + { + "epoch": 1.14, + "learning_rate": 1.4200630316704153e-05, + "loss": 0.1355, + "step": 3554 + }, + { + "epoch": 1.14, + "learning_rate": 1.4197477719893893e-05, + "loss": 0.1195, + "step": 3555 + }, + { + "epoch": 1.14, + "learning_rate": 1.419432461659811e-05, + "loss": 0.1054, + "step": 3556 + }, + { + "epoch": 1.14, + "learning_rate": 1.4191171007197278e-05, + "loss": 0.109, + "step": 3557 + }, + { + "epoch": 1.14, + "learning_rate": 1.418801689207192e-05, + "loss": 0.1115, + "step": 3558 + }, + { + "epoch": 1.14, + "learning_rate": 1.4184862271602624e-05, + "loss": 0.1218, + "step": 3559 + }, + { + "epoch": 1.15, + "learning_rate": 1.4181707146170046e-05, + "loss": 0.1157, + "step": 3560 + }, + { + "epoch": 1.15, + "learning_rate": 1.4178551516154891e-05, + "loss": 0.1085, + "step": 3561 + }, + { + "epoch": 1.15, + "learning_rate": 1.4175395381937933e-05, + "loss": 0.1083, + "step": 3562 + }, + { + "epoch": 1.15, + "learning_rate": 1.4172238743900005e-05, + "loss": 0.1167, + "step": 3563 + }, + { + "epoch": 1.15, + "learning_rate": 1.4169081602421996e-05, + "loss": 0.1117, + "step": 3564 + }, + { + "epoch": 1.15, + "learning_rate": 1.4165923957884868e-05, + "loss": 0.1267, + "step": 3565 + }, + { + "epoch": 1.15, + "learning_rate": 1.416276581066963e-05, + "loss": 0.1194, + "step": 3566 + }, + { + "epoch": 1.15, + "learning_rate": 1.4159607161157363e-05, + "loss": 0.1114, + "step": 3567 + }, + { + "epoch": 1.15, + "learning_rate": 1.4156448009729195e-05, + "loss": 0.1097, + "step": 3568 + }, + { + "epoch": 1.15, + "learning_rate": 1.4153288356766328e-05, + "loss": 0.1098, + "step": 3569 + }, + { + "epoch": 1.15, + "learning_rate": 1.415012820265002e-05, + "loss": 0.1142, + "step": 3570 + }, + { + "epoch": 1.15, + "learning_rate": 1.4146967547761585e-05, + "loss": 0.1087, + "step": 3571 + }, + { + "epoch": 1.15, + "learning_rate": 1.4143806392482403e-05, + "loss": 0.1048, + "step": 3572 + }, + { + "epoch": 1.15, + "learning_rate": 1.4140644737193911e-05, + "loss": 0.116, + "step": 3573 + }, + { + "epoch": 1.15, + "learning_rate": 1.4137482582277611e-05, + "loss": 0.1256, + "step": 3574 + }, + { + "epoch": 1.15, + "learning_rate": 1.4134319928115059e-05, + "loss": 0.1111, + "step": 3575 + }, + { + "epoch": 1.15, + "learning_rate": 1.4131156775087877e-05, + "loss": 0.1214, + "step": 3576 + }, + { + "epoch": 1.15, + "learning_rate": 1.4127993123577742e-05, + "loss": 0.116, + "step": 3577 + }, + { + "epoch": 1.15, + "learning_rate": 1.4124828973966393e-05, + "loss": 0.1079, + "step": 3578 + }, + { + "epoch": 1.15, + "learning_rate": 1.4121664326635634e-05, + "loss": 0.1198, + "step": 3579 + }, + { + "epoch": 1.15, + "learning_rate": 1.4118499181967322e-05, + "loss": 0.1107, + "step": 3580 + }, + { + "epoch": 1.15, + "learning_rate": 1.4115333540343375e-05, + "loss": 0.1077, + "step": 3581 + }, + { + "epoch": 1.15, + "learning_rate": 1.4112167402145773e-05, + "loss": 0.1111, + "step": 3582 + }, + { + "epoch": 1.15, + "learning_rate": 1.4109000767756564e-05, + "loss": 0.1106, + "step": 3583 + }, + { + "epoch": 1.15, + "learning_rate": 1.4105833637557835e-05, + "loss": 0.1152, + "step": 3584 + }, + { + "epoch": 1.15, + "learning_rate": 1.4102666011931753e-05, + "loss": 0.1179, + "step": 3585 + }, + { + "epoch": 1.15, + "learning_rate": 1.4099497891260538e-05, + "loss": 0.1063, + "step": 3586 + }, + { + "epoch": 1.15, + "learning_rate": 1.4096329275926463e-05, + "loss": 0.1192, + "step": 3587 + }, + { + "epoch": 1.15, + "learning_rate": 1.4093160166311873e-05, + "loss": 0.1067, + "step": 3588 + }, + { + "epoch": 1.15, + "learning_rate": 1.4089990562799163e-05, + "loss": 0.113, + "step": 3589 + }, + { + "epoch": 1.15, + "learning_rate": 1.408682046577079e-05, + "loss": 0.1203, + "step": 3590 + }, + { + "epoch": 1.16, + "learning_rate": 1.4083649875609272e-05, + "loss": 0.114, + "step": 3591 + }, + { + "epoch": 1.16, + "learning_rate": 1.4080478792697188e-05, + "loss": 0.1075, + "step": 3592 + }, + { + "epoch": 1.16, + "learning_rate": 1.4077307217417173e-05, + "loss": 0.1105, + "step": 3593 + }, + { + "epoch": 1.16, + "learning_rate": 1.4074135150151922e-05, + "loss": 0.1213, + "step": 3594 + }, + { + "epoch": 1.16, + "learning_rate": 1.4070962591284192e-05, + "loss": 0.1048, + "step": 3595 + }, + { + "epoch": 1.16, + "learning_rate": 1.4067789541196795e-05, + "loss": 0.1138, + "step": 3596 + }, + { + "epoch": 1.16, + "learning_rate": 1.4064616000272607e-05, + "loss": 0.1124, + "step": 3597 + }, + { + "epoch": 1.16, + "learning_rate": 1.4061441968894559e-05, + "loss": 0.1175, + "step": 3598 + }, + { + "epoch": 1.16, + "learning_rate": 1.4058267447445644e-05, + "loss": 0.1108, + "step": 3599 + }, + { + "epoch": 1.16, + "learning_rate": 1.405509243630891e-05, + "loss": 0.1222, + "step": 3600 + }, + { + "epoch": 1.16, + "learning_rate": 1.4051916935867476e-05, + "loss": 0.1075, + "step": 3601 + }, + { + "epoch": 1.16, + "learning_rate": 1.4048740946504506e-05, + "loss": 0.1085, + "step": 3602 + }, + { + "epoch": 1.16, + "learning_rate": 1.4045564468603226e-05, + "loss": 0.1139, + "step": 3603 + }, + { + "epoch": 1.16, + "learning_rate": 1.4042387502546925e-05, + "loss": 0.1139, + "step": 3604 + }, + { + "epoch": 1.16, + "learning_rate": 1.403921004871895e-05, + "loss": 0.1094, + "step": 3605 + }, + { + "epoch": 1.16, + "learning_rate": 1.4036032107502708e-05, + "loss": 0.1194, + "step": 3606 + }, + { + "epoch": 1.16, + "learning_rate": 1.4032853679281659e-05, + "loss": 0.1106, + "step": 3607 + }, + { + "epoch": 1.16, + "learning_rate": 1.4029674764439331e-05, + "loss": 0.1056, + "step": 3608 + }, + { + "epoch": 1.16, + "learning_rate": 1.4026495363359301e-05, + "loss": 0.1174, + "step": 3609 + }, + { + "epoch": 1.16, + "learning_rate": 1.4023315476425207e-05, + "loss": 0.0977, + "step": 3610 + }, + { + "epoch": 1.16, + "learning_rate": 1.4020135104020756e-05, + "loss": 0.1268, + "step": 3611 + }, + { + "epoch": 1.16, + "learning_rate": 1.4016954246529697e-05, + "loss": 0.1199, + "step": 3612 + }, + { + "epoch": 1.16, + "learning_rate": 1.4013772904335847e-05, + "loss": 0.1077, + "step": 3613 + }, + { + "epoch": 1.16, + "learning_rate": 1.4010591077823086e-05, + "loss": 0.1131, + "step": 3614 + }, + { + "epoch": 1.16, + "learning_rate": 1.400740876737534e-05, + "loss": 0.1142, + "step": 3615 + }, + { + "epoch": 1.16, + "learning_rate": 1.4004225973376604e-05, + "loss": 0.113, + "step": 3616 + }, + { + "epoch": 1.16, + "learning_rate": 1.4001042696210928e-05, + "loss": 0.1073, + "step": 3617 + }, + { + "epoch": 1.16, + "learning_rate": 1.3997858936262415e-05, + "loss": 0.1119, + "step": 3618 + }, + { + "epoch": 1.16, + "learning_rate": 1.3994674693915237e-05, + "loss": 0.1117, + "step": 3619 + }, + { + "epoch": 1.16, + "learning_rate": 1.3991489969553615e-05, + "loss": 0.1099, + "step": 3620 + }, + { + "epoch": 1.16, + "learning_rate": 1.398830476356183e-05, + "loss": 0.1144, + "step": 3621 + }, + { + "epoch": 1.17, + "learning_rate": 1.3985119076324228e-05, + "loss": 0.1172, + "step": 3622 + }, + { + "epoch": 1.17, + "learning_rate": 1.3981932908225203e-05, + "loss": 0.1011, + "step": 3623 + }, + { + "epoch": 1.17, + "learning_rate": 1.397874625964921e-05, + "loss": 0.1105, + "step": 3624 + }, + { + "epoch": 1.17, + "learning_rate": 1.3975559130980769e-05, + "loss": 0.1115, + "step": 3625 + }, + { + "epoch": 1.17, + "learning_rate": 1.3972371522604449e-05, + "loss": 0.1154, + "step": 3626 + }, + { + "epoch": 1.17, + "learning_rate": 1.3969183434904878e-05, + "loss": 0.1175, + "step": 3627 + }, + { + "epoch": 1.17, + "learning_rate": 1.396599486826675e-05, + "loss": 0.1197, + "step": 3628 + }, + { + "epoch": 1.17, + "learning_rate": 1.396280582307481e-05, + "loss": 0.1082, + "step": 3629 + }, + { + "epoch": 1.17, + "learning_rate": 1.3959616299713857e-05, + "loss": 0.1292, + "step": 3630 + }, + { + "epoch": 1.17, + "learning_rate": 1.3956426298568753e-05, + "loss": 0.1086, + "step": 3631 + }, + { + "epoch": 1.17, + "learning_rate": 1.3953235820024427e-05, + "loss": 0.1106, + "step": 3632 + }, + { + "epoch": 1.17, + "learning_rate": 1.3950044864465841e-05, + "loss": 0.1065, + "step": 3633 + }, + { + "epoch": 1.17, + "learning_rate": 1.394685343227804e-05, + "loss": 0.1193, + "step": 3634 + }, + { + "epoch": 1.17, + "learning_rate": 1.3943661523846115e-05, + "loss": 0.1072, + "step": 3635 + }, + { + "epoch": 1.17, + "learning_rate": 1.3940469139555209e-05, + "loss": 0.1145, + "step": 3636 + }, + { + "epoch": 1.17, + "learning_rate": 1.3937276279790531e-05, + "loss": 0.1165, + "step": 3637 + }, + { + "epoch": 1.17, + "learning_rate": 1.393408294493735e-05, + "loss": 0.1104, + "step": 3638 + }, + { + "epoch": 1.17, + "learning_rate": 1.3930889135380984e-05, + "loss": 0.1227, + "step": 3639 + }, + { + "epoch": 1.17, + "learning_rate": 1.3927694851506805e-05, + "loss": 0.1094, + "step": 3640 + }, + { + "epoch": 1.17, + "learning_rate": 1.3924500093700262e-05, + "loss": 0.1126, + "step": 3641 + }, + { + "epoch": 1.17, + "learning_rate": 1.3921304862346838e-05, + "loss": 0.1199, + "step": 3642 + }, + { + "epoch": 1.17, + "learning_rate": 1.3918109157832087e-05, + "loss": 0.1057, + "step": 3643 + }, + { + "epoch": 1.17, + "learning_rate": 1.3914912980541617e-05, + "loss": 0.1113, + "step": 3644 + }, + { + "epoch": 1.17, + "learning_rate": 1.3911716330861091e-05, + "loss": 0.1191, + "step": 3645 + }, + { + "epoch": 1.17, + "learning_rate": 1.3908519209176227e-05, + "loss": 0.1334, + "step": 3646 + }, + { + "epoch": 1.17, + "learning_rate": 1.3905321615872814e-05, + "loss": 0.1126, + "step": 3647 + }, + { + "epoch": 1.17, + "learning_rate": 1.3902123551336674e-05, + "loss": 0.1185, + "step": 3648 + }, + { + "epoch": 1.17, + "learning_rate": 1.3898925015953708e-05, + "loss": 0.1135, + "step": 3649 + }, + { + "epoch": 1.17, + "learning_rate": 1.3895726010109861e-05, + "loss": 0.1199, + "step": 3650 + }, + { + "epoch": 1.17, + "learning_rate": 1.389252653419114e-05, + "loss": 0.1313, + "step": 3651 + }, + { + "epoch": 1.17, + "learning_rate": 1.3889326588583605e-05, + "loss": 0.1078, + "step": 3652 + }, + { + "epoch": 1.17, + "learning_rate": 1.388612617367338e-05, + "loss": 0.1104, + "step": 3653 + }, + { + "epoch": 1.18, + "learning_rate": 1.3882925289846637e-05, + "loss": 0.1061, + "step": 3654 + }, + { + "epoch": 1.18, + "learning_rate": 1.3879723937489606e-05, + "loss": 0.1103, + "step": 3655 + }, + { + "epoch": 1.18, + "learning_rate": 1.3876522116988583e-05, + "loss": 0.1153, + "step": 3656 + }, + { + "epoch": 1.18, + "learning_rate": 1.3873319828729907e-05, + "loss": 0.1142, + "step": 3657 + }, + { + "epoch": 1.18, + "learning_rate": 1.3870117073099983e-05, + "loss": 0.1101, + "step": 3658 + }, + { + "epoch": 1.18, + "learning_rate": 1.3866913850485264e-05, + "loss": 0.0991, + "step": 3659 + }, + { + "epoch": 1.18, + "learning_rate": 1.3863710161272273e-05, + "loss": 0.1109, + "step": 3660 + }, + { + "epoch": 1.18, + "learning_rate": 1.3860506005847575e-05, + "loss": 0.1076, + "step": 3661 + }, + { + "epoch": 1.18, + "learning_rate": 1.3857301384597796e-05, + "loss": 0.1161, + "step": 3662 + }, + { + "epoch": 1.18, + "learning_rate": 1.3854096297909625e-05, + "loss": 0.1187, + "step": 3663 + }, + { + "epoch": 1.18, + "learning_rate": 1.3850890746169796e-05, + "loss": 0.1123, + "step": 3664 + }, + { + "epoch": 1.18, + "learning_rate": 1.3847684729765107e-05, + "loss": 0.1041, + "step": 3665 + }, + { + "epoch": 1.18, + "learning_rate": 1.3844478249082407e-05, + "loss": 0.1187, + "step": 3666 + }, + { + "epoch": 1.18, + "learning_rate": 1.3841271304508609e-05, + "loss": 0.1161, + "step": 3667 + }, + { + "epoch": 1.18, + "learning_rate": 1.383806389643067e-05, + "loss": 0.1177, + "step": 3668 + }, + { + "epoch": 1.18, + "learning_rate": 1.3834856025235615e-05, + "loss": 0.1022, + "step": 3669 + }, + { + "epoch": 1.18, + "learning_rate": 1.3831647691310514e-05, + "loss": 0.1146, + "step": 3670 + }, + { + "epoch": 1.18, + "learning_rate": 1.3828438895042502e-05, + "loss": 0.1119, + "step": 3671 + }, + { + "epoch": 1.18, + "learning_rate": 1.3825229636818767e-05, + "loss": 0.1199, + "step": 3672 + }, + { + "epoch": 1.18, + "learning_rate": 1.3822019917026549e-05, + "loss": 0.1035, + "step": 3673 + }, + { + "epoch": 1.18, + "learning_rate": 1.3818809736053147e-05, + "loss": 0.1174, + "step": 3674 + }, + { + "epoch": 1.18, + "learning_rate": 1.3815599094285917e-05, + "loss": 0.1157, + "step": 3675 + }, + { + "epoch": 1.18, + "learning_rate": 1.3812387992112264e-05, + "loss": 0.106, + "step": 3676 + }, + { + "epoch": 1.18, + "learning_rate": 1.3809176429919659e-05, + "loss": 0.1071, + "step": 3677 + }, + { + "epoch": 1.18, + "learning_rate": 1.380596440809562e-05, + "loss": 0.1087, + "step": 3678 + }, + { + "epoch": 1.18, + "learning_rate": 1.3802751927027722e-05, + "loss": 0.1184, + "step": 3679 + }, + { + "epoch": 1.18, + "learning_rate": 1.37995389871036e-05, + "loss": 0.1014, + "step": 3680 + }, + { + "epoch": 1.18, + "learning_rate": 1.379632558871094e-05, + "loss": 0.1141, + "step": 3681 + }, + { + "epoch": 1.18, + "learning_rate": 1.379311173223748e-05, + "loss": 0.116, + "step": 3682 + }, + { + "epoch": 1.18, + "learning_rate": 1.3789897418071022e-05, + "loss": 0.1114, + "step": 3683 + }, + { + "epoch": 1.18, + "learning_rate": 1.3786682646599421e-05, + "loss": 0.113, + "step": 3684 + }, + { + "epoch": 1.19, + "learning_rate": 1.3783467418210579e-05, + "loss": 0.1158, + "step": 3685 + }, + { + "epoch": 1.19, + "learning_rate": 1.378025173329246e-05, + "loss": 0.1115, + "step": 3686 + }, + { + "epoch": 1.19, + "learning_rate": 1.377703559223309e-05, + "loss": 0.1013, + "step": 3687 + }, + { + "epoch": 1.19, + "learning_rate": 1.3773818995420532e-05, + "loss": 0.1133, + "step": 3688 + }, + { + "epoch": 1.19, + "learning_rate": 1.3770601943242916e-05, + "loss": 0.1089, + "step": 3689 + }, + { + "epoch": 1.19, + "learning_rate": 1.3767384436088434e-05, + "loss": 0.1208, + "step": 3690 + }, + { + "epoch": 1.19, + "learning_rate": 1.3764166474345315e-05, + "loss": 0.1119, + "step": 3691 + }, + { + "epoch": 1.19, + "learning_rate": 1.3760948058401855e-05, + "loss": 0.1176, + "step": 3692 + }, + { + "epoch": 1.19, + "learning_rate": 1.37577291886464e-05, + "loss": 0.1173, + "step": 3693 + }, + { + "epoch": 1.19, + "learning_rate": 1.3754509865467359e-05, + "loss": 0.1216, + "step": 3694 + }, + { + "epoch": 1.19, + "learning_rate": 1.375129008925318e-05, + "loss": 0.1223, + "step": 3695 + }, + { + "epoch": 1.19, + "learning_rate": 1.3748069860392381e-05, + "loss": 0.1121, + "step": 3696 + }, + { + "epoch": 1.19, + "learning_rate": 1.3744849179273527e-05, + "loss": 0.1133, + "step": 3697 + }, + { + "epoch": 1.19, + "learning_rate": 1.3741628046285239e-05, + "loss": 0.1007, + "step": 3698 + }, + { + "epoch": 1.19, + "learning_rate": 1.3738406461816193e-05, + "loss": 0.1091, + "step": 3699 + }, + { + "epoch": 1.19, + "learning_rate": 1.3735184426255117e-05, + "loss": 0.1173, + "step": 3700 + }, + { + "epoch": 1.19, + "learning_rate": 1.3731961939990798e-05, + "loss": 0.1123, + "step": 3701 + }, + { + "epoch": 1.19, + "learning_rate": 1.3728739003412075e-05, + "loss": 0.1145, + "step": 3702 + }, + { + "epoch": 1.19, + "learning_rate": 1.3725515616907841e-05, + "loss": 0.1095, + "step": 3703 + }, + { + "epoch": 1.19, + "learning_rate": 1.372229178086704e-05, + "loss": 0.1225, + "step": 3704 + }, + { + "epoch": 1.19, + "learning_rate": 1.3719067495678675e-05, + "loss": 0.1226, + "step": 3705 + }, + { + "epoch": 1.19, + "learning_rate": 1.371584276173181e-05, + "loss": 0.1164, + "step": 3706 + }, + { + "epoch": 1.19, + "learning_rate": 1.3712617579415543e-05, + "loss": 0.1078, + "step": 3707 + }, + { + "epoch": 1.19, + "learning_rate": 1.3709391949119047e-05, + "loss": 0.1128, + "step": 3708 + }, + { + "epoch": 1.19, + "learning_rate": 1.3706165871231539e-05, + "loss": 0.1202, + "step": 3709 + }, + { + "epoch": 1.19, + "learning_rate": 1.3702939346142286e-05, + "loss": 0.1065, + "step": 3710 + }, + { + "epoch": 1.19, + "learning_rate": 1.3699712374240615e-05, + "loss": 0.1165, + "step": 3711 + }, + { + "epoch": 1.19, + "learning_rate": 1.3696484955915917e-05, + "loss": 0.1069, + "step": 3712 + }, + { + "epoch": 1.19, + "learning_rate": 1.369325709155761e-05, + "loss": 0.1132, + "step": 3713 + }, + { + "epoch": 1.19, + "learning_rate": 1.369002878155519e-05, + "loss": 0.1141, + "step": 3714 + }, + { + "epoch": 1.19, + "learning_rate": 1.3686800026298204e-05, + "loss": 0.1084, + "step": 3715 + }, + { + "epoch": 1.2, + "learning_rate": 1.368357082617624e-05, + "loss": 0.1198, + "step": 3716 + }, + { + "epoch": 1.2, + "learning_rate": 1.3680341181578946e-05, + "loss": 0.1185, + "step": 3717 + }, + { + "epoch": 1.2, + "learning_rate": 1.367711109289603e-05, + "loss": 0.1081, + "step": 3718 + }, + { + "epoch": 1.2, + "learning_rate": 1.3673880560517246e-05, + "loss": 0.1196, + "step": 3719 + }, + { + "epoch": 1.2, + "learning_rate": 1.36706495848324e-05, + "loss": 0.1137, + "step": 3720 + }, + { + "epoch": 1.2, + "learning_rate": 1.3667418166231361e-05, + "loss": 0.1066, + "step": 3721 + }, + { + "epoch": 1.2, + "learning_rate": 1.3664186305104046e-05, + "loss": 0.1249, + "step": 3722 + }, + { + "epoch": 1.2, + "learning_rate": 1.3660954001840416e-05, + "loss": 0.1112, + "step": 3723 + }, + { + "epoch": 1.2, + "learning_rate": 1.3657721256830507e-05, + "loss": 0.1199, + "step": 3724 + }, + { + "epoch": 1.2, + "learning_rate": 1.365448807046439e-05, + "loss": 0.1132, + "step": 3725 + }, + { + "epoch": 1.2, + "learning_rate": 1.365125444313219e-05, + "loss": 0.112, + "step": 3726 + }, + { + "epoch": 1.2, + "learning_rate": 1.3648020375224098e-05, + "loss": 0.1098, + "step": 3727 + }, + { + "epoch": 1.2, + "learning_rate": 1.3644785867130343e-05, + "loss": 0.1104, + "step": 3728 + }, + { + "epoch": 1.2, + "learning_rate": 1.3641550919241225e-05, + "loss": 0.1213, + "step": 3729 + }, + { + "epoch": 1.2, + "learning_rate": 1.363831553194708e-05, + "loss": 0.112, + "step": 3730 + }, + { + "epoch": 1.2, + "learning_rate": 1.3635079705638298e-05, + "loss": 0.1001, + "step": 3731 + }, + { + "epoch": 1.2, + "learning_rate": 1.3631843440705338e-05, + "loss": 0.1195, + "step": 3732 + }, + { + "epoch": 1.2, + "learning_rate": 1.3628606737538699e-05, + "loss": 0.1102, + "step": 3733 + }, + { + "epoch": 1.2, + "learning_rate": 1.362536959652893e-05, + "loss": 0.1094, + "step": 3734 + }, + { + "epoch": 1.2, + "learning_rate": 1.3622132018066646e-05, + "loss": 0.1044, + "step": 3735 + }, + { + "epoch": 1.2, + "learning_rate": 1.36188940025425e-05, + "loss": 0.1125, + "step": 3736 + }, + { + "epoch": 1.2, + "learning_rate": 1.3615655550347204e-05, + "loss": 0.1149, + "step": 3737 + }, + { + "epoch": 1.2, + "learning_rate": 1.3612416661871532e-05, + "loss": 0.1064, + "step": 3738 + }, + { + "epoch": 1.2, + "learning_rate": 1.3609177337506294e-05, + "loss": 0.1127, + "step": 3739 + }, + { + "epoch": 1.2, + "learning_rate": 1.3605937577642366e-05, + "loss": 0.1067, + "step": 3740 + }, + { + "epoch": 1.2, + "learning_rate": 1.3602697382670664e-05, + "loss": 0.126, + "step": 3741 + }, + { + "epoch": 1.2, + "learning_rate": 1.3599456752982172e-05, + "loss": 0.115, + "step": 3742 + }, + { + "epoch": 1.2, + "learning_rate": 1.3596215688967917e-05, + "loss": 0.1228, + "step": 3743 + }, + { + "epoch": 1.2, + "learning_rate": 1.3592974191018973e-05, + "loss": 0.1124, + "step": 3744 + }, + { + "epoch": 1.2, + "learning_rate": 1.3589732259526481e-05, + "loss": 0.1074, + "step": 3745 + }, + { + "epoch": 1.2, + "learning_rate": 1.3586489894881618e-05, + "loss": 0.1175, + "step": 3746 + }, + { + "epoch": 1.21, + "learning_rate": 1.3583247097475628e-05, + "loss": 0.1075, + "step": 3747 + }, + { + "epoch": 1.21, + "learning_rate": 1.3580003867699801e-05, + "loss": 0.1121, + "step": 3748 + }, + { + "epoch": 1.21, + "learning_rate": 1.3576760205945474e-05, + "loss": 0.1117, + "step": 3749 + }, + { + "epoch": 1.21, + "learning_rate": 1.3573516112604042e-05, + "loss": 0.1163, + "step": 3750 + }, + { + "epoch": 1.21, + "learning_rate": 1.3570271588066957e-05, + "loss": 0.1192, + "step": 3751 + }, + { + "epoch": 1.21, + "learning_rate": 1.356702663272571e-05, + "loss": 0.1128, + "step": 3752 + }, + { + "epoch": 1.21, + "learning_rate": 1.3563781246971855e-05, + "loss": 0.119, + "step": 3753 + }, + { + "epoch": 1.21, + "learning_rate": 1.3560535431196991e-05, + "loss": 0.1174, + "step": 3754 + }, + { + "epoch": 1.21, + "learning_rate": 1.3557289185792777e-05, + "loss": 0.1189, + "step": 3755 + }, + { + "epoch": 1.21, + "learning_rate": 1.3554042511150913e-05, + "loss": 0.1028, + "step": 3756 + }, + { + "epoch": 1.21, + "learning_rate": 1.3550795407663158e-05, + "loss": 0.1075, + "step": 3757 + }, + { + "epoch": 1.21, + "learning_rate": 1.3547547875721327e-05, + "loss": 0.1144, + "step": 3758 + }, + { + "epoch": 1.21, + "learning_rate": 1.3544299915717276e-05, + "loss": 0.1149, + "step": 3759 + }, + { + "epoch": 1.21, + "learning_rate": 1.3541051528042915e-05, + "loss": 0.1113, + "step": 3760 + }, + { + "epoch": 1.21, + "learning_rate": 1.3537802713090216e-05, + "loss": 0.1224, + "step": 3761 + }, + { + "epoch": 1.21, + "learning_rate": 1.3534553471251185e-05, + "loss": 0.1173, + "step": 3762 + }, + { + "epoch": 1.21, + "learning_rate": 1.35313038029179e-05, + "loss": 0.1147, + "step": 3763 + }, + { + "epoch": 1.21, + "learning_rate": 1.3528053708482474e-05, + "loss": 0.1037, + "step": 3764 + }, + { + "epoch": 1.21, + "learning_rate": 1.3524803188337074e-05, + "loss": 0.1083, + "step": 3765 + }, + { + "epoch": 1.21, + "learning_rate": 1.3521552242873926e-05, + "loss": 0.1085, + "step": 3766 + }, + { + "epoch": 1.21, + "learning_rate": 1.3518300872485304e-05, + "loss": 0.1094, + "step": 3767 + }, + { + "epoch": 1.21, + "learning_rate": 1.3515049077563531e-05, + "loss": 0.1153, + "step": 3768 + }, + { + "epoch": 1.21, + "learning_rate": 1.3511796858500978e-05, + "loss": 0.1146, + "step": 3769 + }, + { + "epoch": 1.21, + "learning_rate": 1.350854421569008e-05, + "loss": 0.11, + "step": 3770 + }, + { + "epoch": 1.21, + "learning_rate": 1.3505291149523309e-05, + "loss": 0.1167, + "step": 3771 + }, + { + "epoch": 1.21, + "learning_rate": 1.3502037660393195e-05, + "loss": 0.1107, + "step": 3772 + }, + { + "epoch": 1.21, + "learning_rate": 1.3498783748692317e-05, + "loss": 0.1167, + "step": 3773 + }, + { + "epoch": 1.21, + "learning_rate": 1.349552941481331e-05, + "loss": 0.1211, + "step": 3774 + }, + { + "epoch": 1.21, + "learning_rate": 1.3492274659148847e-05, + "loss": 0.1054, + "step": 3775 + }, + { + "epoch": 1.21, + "learning_rate": 1.348901948209167e-05, + "loss": 0.1252, + "step": 3776 + }, + { + "epoch": 1.21, + "learning_rate": 1.3485763884034558e-05, + "loss": 0.1122, + "step": 3777 + }, + { + "epoch": 1.22, + "learning_rate": 1.3482507865370342e-05, + "loss": 0.1358, + "step": 3778 + }, + { + "epoch": 1.22, + "learning_rate": 1.3479251426491917e-05, + "loss": 0.1098, + "step": 3779 + }, + { + "epoch": 1.22, + "learning_rate": 1.3475994567792208e-05, + "loss": 0.1118, + "step": 3780 + }, + { + "epoch": 1.22, + "learning_rate": 1.347273728966421e-05, + "loss": 0.119, + "step": 3781 + }, + { + "epoch": 1.22, + "learning_rate": 1.3469479592500954e-05, + "loss": 0.1141, + "step": 3782 + }, + { + "epoch": 1.22, + "learning_rate": 1.3466221476695529e-05, + "loss": 0.1252, + "step": 3783 + }, + { + "epoch": 1.22, + "learning_rate": 1.3462962942641075e-05, + "loss": 0.1172, + "step": 3784 + }, + { + "epoch": 1.22, + "learning_rate": 1.3459703990730781e-05, + "loss": 0.111, + "step": 3785 + }, + { + "epoch": 1.22, + "learning_rate": 1.345644462135788e-05, + "loss": 0.1215, + "step": 3786 + }, + { + "epoch": 1.22, + "learning_rate": 1.3453184834915671e-05, + "loss": 0.1251, + "step": 3787 + }, + { + "epoch": 1.22, + "learning_rate": 1.3449924631797485e-05, + "loss": 0.1071, + "step": 3788 + }, + { + "epoch": 1.22, + "learning_rate": 1.3446664012396718e-05, + "loss": 0.1095, + "step": 3789 + }, + { + "epoch": 1.22, + "learning_rate": 1.3443402977106807e-05, + "loss": 0.1181, + "step": 3790 + }, + { + "epoch": 1.22, + "learning_rate": 1.3440141526321243e-05, + "loss": 0.1266, + "step": 3791 + }, + { + "epoch": 1.22, + "learning_rate": 1.3436879660433567e-05, + "loss": 0.1135, + "step": 3792 + }, + { + "epoch": 1.22, + "learning_rate": 1.343361737983737e-05, + "loss": 0.1131, + "step": 3793 + }, + { + "epoch": 1.22, + "learning_rate": 1.343035468492629e-05, + "loss": 0.1218, + "step": 3794 + }, + { + "epoch": 1.22, + "learning_rate": 1.3427091576094022e-05, + "loss": 0.1043, + "step": 3795 + }, + { + "epoch": 1.22, + "learning_rate": 1.34238280537343e-05, + "loss": 0.1215, + "step": 3796 + }, + { + "epoch": 1.22, + "learning_rate": 1.3420564118240922e-05, + "loss": 0.1074, + "step": 3797 + }, + { + "epoch": 1.22, + "learning_rate": 1.3417299770007728e-05, + "loss": 0.123, + "step": 3798 + }, + { + "epoch": 1.22, + "learning_rate": 1.3414035009428598e-05, + "loss": 0.1103, + "step": 3799 + }, + { + "epoch": 1.22, + "learning_rate": 1.3410769836897485e-05, + "loss": 0.1224, + "step": 3800 + }, + { + "epoch": 1.22, + "learning_rate": 1.3407504252808372e-05, + "loss": 0.1094, + "step": 3801 + }, + { + "epoch": 1.22, + "learning_rate": 1.3404238257555296e-05, + "loss": 0.1208, + "step": 3802 + }, + { + "epoch": 1.22, + "learning_rate": 1.3400971851532352e-05, + "loss": 0.103, + "step": 3803 + }, + { + "epoch": 1.22, + "learning_rate": 1.3397705035133672e-05, + "loss": 0.1179, + "step": 3804 + }, + { + "epoch": 1.22, + "learning_rate": 1.339443780875345e-05, + "loss": 0.1159, + "step": 3805 + }, + { + "epoch": 1.22, + "learning_rate": 1.3391170172785918e-05, + "loss": 0.1176, + "step": 3806 + }, + { + "epoch": 1.22, + "learning_rate": 1.338790212762537e-05, + "loss": 0.1152, + "step": 3807 + }, + { + "epoch": 1.22, + "learning_rate": 1.3384633673666132e-05, + "loss": 0.1085, + "step": 3808 + }, + { + "epoch": 1.23, + "learning_rate": 1.3381364811302597e-05, + "loss": 0.1081, + "step": 3809 + }, + { + "epoch": 1.23, + "learning_rate": 1.3378095540929204e-05, + "loss": 0.1018, + "step": 3810 + }, + { + "epoch": 1.23, + "learning_rate": 1.3374825862940425e-05, + "loss": 0.1115, + "step": 3811 + }, + { + "epoch": 1.23, + "learning_rate": 1.33715557777308e-05, + "loss": 0.1153, + "step": 3812 + }, + { + "epoch": 1.23, + "learning_rate": 1.3368285285694915e-05, + "loss": 0.1086, + "step": 3813 + }, + { + "epoch": 1.23, + "learning_rate": 1.3365014387227393e-05, + "loss": 0.1038, + "step": 3814 + }, + { + "epoch": 1.23, + "learning_rate": 1.336174308272292e-05, + "loss": 0.1241, + "step": 3815 + }, + { + "epoch": 1.23, + "learning_rate": 1.3358471372576229e-05, + "loss": 0.1176, + "step": 3816 + }, + { + "epoch": 1.23, + "learning_rate": 1.3355199257182087e-05, + "loss": 0.1174, + "step": 3817 + }, + { + "epoch": 1.23, + "learning_rate": 1.3351926736935331e-05, + "loss": 0.1131, + "step": 3818 + }, + { + "epoch": 1.23, + "learning_rate": 1.3348653812230836e-05, + "loss": 0.1146, + "step": 3819 + }, + { + "epoch": 1.23, + "learning_rate": 1.3345380483463524e-05, + "loss": 0.1049, + "step": 3820 + }, + { + "epoch": 1.23, + "learning_rate": 1.334210675102837e-05, + "loss": 0.1205, + "step": 3821 + }, + { + "epoch": 1.23, + "learning_rate": 1.3338832615320399e-05, + "loss": 0.1158, + "step": 3822 + }, + { + "epoch": 1.23, + "learning_rate": 1.3335558076734678e-05, + "loss": 0.1249, + "step": 3823 + }, + { + "epoch": 1.23, + "learning_rate": 1.333228313566633e-05, + "loss": 0.1133, + "step": 3824 + }, + { + "epoch": 1.23, + "learning_rate": 1.3329007792510517e-05, + "loss": 0.1085, + "step": 3825 + }, + { + "epoch": 1.23, + "learning_rate": 1.3325732047662467e-05, + "loss": 0.1279, + "step": 3826 + }, + { + "epoch": 1.23, + "learning_rate": 1.3322455901517435e-05, + "loss": 0.1168, + "step": 3827 + }, + { + "epoch": 1.23, + "learning_rate": 1.331917935447074e-05, + "loss": 0.1041, + "step": 3828 + }, + { + "epoch": 1.23, + "learning_rate": 1.3315902406917746e-05, + "loss": 0.1102, + "step": 3829 + }, + { + "epoch": 1.23, + "learning_rate": 1.3312625059253858e-05, + "loss": 0.1015, + "step": 3830 + }, + { + "epoch": 1.23, + "learning_rate": 1.3309347311874536e-05, + "loss": 0.1094, + "step": 3831 + }, + { + "epoch": 1.23, + "learning_rate": 1.330606916517529e-05, + "loss": 0.1125, + "step": 3832 + }, + { + "epoch": 1.23, + "learning_rate": 1.3302790619551673e-05, + "loss": 0.1063, + "step": 3833 + }, + { + "epoch": 1.23, + "learning_rate": 1.3299511675399288e-05, + "loss": 0.1014, + "step": 3834 + }, + { + "epoch": 1.23, + "learning_rate": 1.3296232333113786e-05, + "loss": 0.1179, + "step": 3835 + }, + { + "epoch": 1.23, + "learning_rate": 1.3292952593090869e-05, + "loss": 0.1209, + "step": 3836 + }, + { + "epoch": 1.23, + "learning_rate": 1.328967245572628e-05, + "loss": 0.1098, + "step": 3837 + }, + { + "epoch": 1.23, + "learning_rate": 1.3286391921415817e-05, + "loss": 0.1141, + "step": 3838 + }, + { + "epoch": 1.23, + "learning_rate": 1.3283110990555325e-05, + "loss": 0.1066, + "step": 3839 + }, + { + "epoch": 1.24, + "learning_rate": 1.3279829663540693e-05, + "loss": 0.1098, + "step": 3840 + }, + { + "epoch": 1.24, + "learning_rate": 1.327654794076786e-05, + "loss": 0.1054, + "step": 3841 + }, + { + "epoch": 1.24, + "learning_rate": 1.3273265822632814e-05, + "loss": 0.1148, + "step": 3842 + }, + { + "epoch": 1.24, + "learning_rate": 1.3269983309531584e-05, + "loss": 0.1156, + "step": 3843 + }, + { + "epoch": 1.24, + "learning_rate": 1.3266700401860262e-05, + "loss": 0.0946, + "step": 3844 + }, + { + "epoch": 1.24, + "learning_rate": 1.3263417100014968e-05, + "loss": 0.1152, + "step": 3845 + }, + { + "epoch": 1.24, + "learning_rate": 1.3260133404391881e-05, + "loss": 0.116, + "step": 3846 + }, + { + "epoch": 1.24, + "learning_rate": 1.325684931538723e-05, + "loss": 0.115, + "step": 3847 + }, + { + "epoch": 1.24, + "learning_rate": 1.3253564833397283e-05, + "loss": 0.1045, + "step": 3848 + }, + { + "epoch": 1.24, + "learning_rate": 1.3250279958818364e-05, + "loss": 0.1154, + "step": 3849 + }, + { + "epoch": 1.24, + "learning_rate": 1.3246994692046837e-05, + "loss": 0.099, + "step": 3850 + }, + { + "epoch": 1.24, + "learning_rate": 1.3243709033479112e-05, + "loss": 0.1198, + "step": 3851 + }, + { + "epoch": 1.24, + "learning_rate": 1.324042298351166e-05, + "loss": 0.113, + "step": 3852 + }, + { + "epoch": 1.24, + "learning_rate": 1.3237136542540985e-05, + "loss": 0.1159, + "step": 3853 + }, + { + "epoch": 1.24, + "learning_rate": 1.3233849710963638e-05, + "loss": 0.1196, + "step": 3854 + }, + { + "epoch": 1.24, + "learning_rate": 1.3230562489176231e-05, + "loss": 0.1145, + "step": 3855 + }, + { + "epoch": 1.24, + "learning_rate": 1.3227274877575409e-05, + "loss": 0.1203, + "step": 3856 + }, + { + "epoch": 1.24, + "learning_rate": 1.3223986876557869e-05, + "loss": 0.111, + "step": 3857 + }, + { + "epoch": 1.24, + "learning_rate": 1.3220698486520358e-05, + "loss": 0.1119, + "step": 3858 + }, + { + "epoch": 1.24, + "learning_rate": 1.3217409707859665e-05, + "loss": 0.1062, + "step": 3859 + }, + { + "epoch": 1.24, + "learning_rate": 1.3214120540972626e-05, + "loss": 0.0989, + "step": 3860 + }, + { + "epoch": 1.24, + "learning_rate": 1.3210830986256132e-05, + "loss": 0.1102, + "step": 3861 + }, + { + "epoch": 1.24, + "learning_rate": 1.3207541044107109e-05, + "loss": 0.127, + "step": 3862 + }, + { + "epoch": 1.24, + "learning_rate": 1.3204250714922538e-05, + "loss": 0.1147, + "step": 3863 + }, + { + "epoch": 1.24, + "learning_rate": 1.3200959999099441e-05, + "loss": 0.1096, + "step": 3864 + }, + { + "epoch": 1.24, + "learning_rate": 1.3197668897034896e-05, + "loss": 0.1124, + "step": 3865 + }, + { + "epoch": 1.24, + "learning_rate": 1.3194377409126016e-05, + "loss": 0.1099, + "step": 3866 + }, + { + "epoch": 1.24, + "learning_rate": 1.3191085535769968e-05, + "loss": 0.1146, + "step": 3867 + }, + { + "epoch": 1.24, + "learning_rate": 1.3187793277363963e-05, + "loss": 0.119, + "step": 3868 + }, + { + "epoch": 1.24, + "learning_rate": 1.318450063430526e-05, + "loss": 0.1076, + "step": 3869 + }, + { + "epoch": 1.24, + "learning_rate": 1.318120760699116e-05, + "loss": 0.1166, + "step": 3870 + }, + { + "epoch": 1.25, + "learning_rate": 1.3177914195819018e-05, + "loss": 0.1072, + "step": 3871 + }, + { + "epoch": 1.25, + "learning_rate": 1.317462040118623e-05, + "loss": 0.1111, + "step": 3872 + }, + { + "epoch": 1.25, + "learning_rate": 1.3171326223490234e-05, + "loss": 0.1128, + "step": 3873 + }, + { + "epoch": 1.25, + "learning_rate": 1.3168031663128526e-05, + "loss": 0.1137, + "step": 3874 + }, + { + "epoch": 1.25, + "learning_rate": 1.3164736720498644e-05, + "loss": 0.1087, + "step": 3875 + }, + { + "epoch": 1.25, + "learning_rate": 1.3161441395998158e-05, + "loss": 0.1267, + "step": 3876 + }, + { + "epoch": 1.25, + "learning_rate": 1.3158145690024706e-05, + "loss": 0.1163, + "step": 3877 + }, + { + "epoch": 1.25, + "learning_rate": 1.315484960297596e-05, + "loss": 0.1069, + "step": 3878 + }, + { + "epoch": 1.25, + "learning_rate": 1.315155313524964e-05, + "loss": 0.11, + "step": 3879 + }, + { + "epoch": 1.25, + "learning_rate": 1.3148256287243508e-05, + "loss": 0.1047, + "step": 3880 + }, + { + "epoch": 1.25, + "learning_rate": 1.3144959059355383e-05, + "loss": 0.1228, + "step": 3881 + }, + { + "epoch": 1.25, + "learning_rate": 1.314166145198311e-05, + "loss": 0.1047, + "step": 3882 + }, + { + "epoch": 1.25, + "learning_rate": 1.3138363465524607e-05, + "loss": 0.1135, + "step": 3883 + }, + { + "epoch": 1.25, + "learning_rate": 1.3135065100377816e-05, + "loss": 0.1085, + "step": 3884 + }, + { + "epoch": 1.25, + "learning_rate": 1.3131766356940728e-05, + "loss": 0.1084, + "step": 3885 + }, + { + "epoch": 1.25, + "learning_rate": 1.3128467235611389e-05, + "loss": 0.1134, + "step": 3886 + }, + { + "epoch": 1.25, + "learning_rate": 1.3125167736787886e-05, + "loss": 0.1262, + "step": 3887 + }, + { + "epoch": 1.25, + "learning_rate": 1.3121867860868346e-05, + "loss": 0.1148, + "step": 3888 + }, + { + "epoch": 1.25, + "learning_rate": 1.3118567608250947e-05, + "loss": 0.1179, + "step": 3889 + }, + { + "epoch": 1.25, + "learning_rate": 1.3115266979333917e-05, + "loss": 0.1145, + "step": 3890 + }, + { + "epoch": 1.25, + "learning_rate": 1.3111965974515517e-05, + "loss": 0.1192, + "step": 3891 + }, + { + "epoch": 1.25, + "learning_rate": 1.3108664594194063e-05, + "loss": 0.1103, + "step": 3892 + }, + { + "epoch": 1.25, + "learning_rate": 1.3105362838767914e-05, + "loss": 0.1309, + "step": 3893 + }, + { + "epoch": 1.25, + "learning_rate": 1.3102060708635475e-05, + "loss": 0.1163, + "step": 3894 + }, + { + "epoch": 1.25, + "learning_rate": 1.309875820419519e-05, + "loss": 0.1099, + "step": 3895 + }, + { + "epoch": 1.25, + "learning_rate": 1.3095455325845559e-05, + "loss": 0.1129, + "step": 3896 + }, + { + "epoch": 1.25, + "learning_rate": 1.3092152073985121e-05, + "loss": 0.1133, + "step": 3897 + }, + { + "epoch": 1.25, + "learning_rate": 1.3088848449012454e-05, + "loss": 0.1204, + "step": 3898 + }, + { + "epoch": 1.25, + "learning_rate": 1.3085544451326196e-05, + "loss": 0.1217, + "step": 3899 + }, + { + "epoch": 1.25, + "learning_rate": 1.3082240081325015e-05, + "loss": 0.1007, + "step": 3900 + }, + { + "epoch": 1.25, + "learning_rate": 1.3078935339407632e-05, + "loss": 0.1268, + "step": 3901 + }, + { + "epoch": 1.26, + "learning_rate": 1.3075630225972813e-05, + "loss": 0.1264, + "step": 3902 + }, + { + "epoch": 1.26, + "learning_rate": 1.3072324741419364e-05, + "loss": 0.1064, + "step": 3903 + }, + { + "epoch": 1.26, + "learning_rate": 1.3069018886146146e-05, + "loss": 0.1091, + "step": 3904 + }, + { + "epoch": 1.26, + "learning_rate": 1.306571266055205e-05, + "loss": 0.1171, + "step": 3905 + }, + { + "epoch": 1.26, + "learning_rate": 1.306240606503602e-05, + "loss": 0.1343, + "step": 3906 + }, + { + "epoch": 1.26, + "learning_rate": 1.3059099099997046e-05, + "loss": 0.11, + "step": 3907 + }, + { + "epoch": 1.26, + "learning_rate": 1.305579176583416e-05, + "loss": 0.1099, + "step": 3908 + }, + { + "epoch": 1.26, + "learning_rate": 1.305248406294644e-05, + "loss": 0.1048, + "step": 3909 + }, + { + "epoch": 1.26, + "learning_rate": 1.3049175991733006e-05, + "loss": 0.1189, + "step": 3910 + }, + { + "epoch": 1.26, + "learning_rate": 1.3045867552593024e-05, + "loss": 0.1165, + "step": 3911 + }, + { + "epoch": 1.26, + "learning_rate": 1.3042558745925704e-05, + "loss": 0.1178, + "step": 3912 + }, + { + "epoch": 1.26, + "learning_rate": 1.3039249572130304e-05, + "loss": 0.1089, + "step": 3913 + }, + { + "epoch": 1.26, + "learning_rate": 1.3035940031606117e-05, + "loss": 0.1107, + "step": 3914 + }, + { + "epoch": 1.26, + "learning_rate": 1.3032630124752493e-05, + "loss": 0.1082, + "step": 3915 + }, + { + "epoch": 1.26, + "learning_rate": 1.3029319851968811e-05, + "loss": 0.1154, + "step": 3916 + }, + { + "epoch": 1.26, + "learning_rate": 1.3026009213654513e-05, + "loss": 0.1123, + "step": 3917 + }, + { + "epoch": 1.26, + "learning_rate": 1.3022698210209069e-05, + "loss": 0.1142, + "step": 3918 + }, + { + "epoch": 1.26, + "learning_rate": 1.3019386842031998e-05, + "loss": 0.1111, + "step": 3919 + }, + { + "epoch": 1.26, + "learning_rate": 1.3016075109522865e-05, + "loss": 0.1137, + "step": 3920 + }, + { + "epoch": 1.26, + "learning_rate": 1.3012763013081278e-05, + "loss": 0.1229, + "step": 3921 + }, + { + "epoch": 1.26, + "learning_rate": 1.3009450553106886e-05, + "loss": 0.1207, + "step": 3922 + }, + { + "epoch": 1.26, + "learning_rate": 1.3006137729999391e-05, + "loss": 0.121, + "step": 3923 + }, + { + "epoch": 1.26, + "learning_rate": 1.3002824544158525e-05, + "loss": 0.1038, + "step": 3924 + }, + { + "epoch": 1.26, + "learning_rate": 1.2999510995984074e-05, + "loss": 0.1125, + "step": 3925 + }, + { + "epoch": 1.26, + "learning_rate": 1.2996197085875867e-05, + "loss": 0.1114, + "step": 3926 + }, + { + "epoch": 1.26, + "learning_rate": 1.2992882814233773e-05, + "loss": 0.1213, + "step": 3927 + }, + { + "epoch": 1.26, + "learning_rate": 1.2989568181457704e-05, + "loss": 0.1125, + "step": 3928 + }, + { + "epoch": 1.26, + "learning_rate": 1.2986253187947622e-05, + "loss": 0.1183, + "step": 3929 + }, + { + "epoch": 1.26, + "learning_rate": 1.2982937834103523e-05, + "loss": 0.0982, + "step": 3930 + }, + { + "epoch": 1.26, + "learning_rate": 1.2979622120325455e-05, + "loss": 0.115, + "step": 3931 + }, + { + "epoch": 1.26, + "learning_rate": 1.2976306047013508e-05, + "loss": 0.1116, + "step": 3932 + }, + { + "epoch": 1.27, + "learning_rate": 1.2972989614567808e-05, + "loss": 0.1212, + "step": 3933 + }, + { + "epoch": 1.27, + "learning_rate": 1.2969672823388533e-05, + "loss": 0.1045, + "step": 3934 + }, + { + "epoch": 1.27, + "learning_rate": 1.29663556738759e-05, + "loss": 0.1144, + "step": 3935 + }, + { + "epoch": 1.27, + "learning_rate": 1.2963038166430173e-05, + "loss": 0.1082, + "step": 3936 + }, + { + "epoch": 1.27, + "learning_rate": 1.2959720301451655e-05, + "loss": 0.107, + "step": 3937 + }, + { + "epoch": 1.27, + "learning_rate": 1.295640207934069e-05, + "loss": 0.119, + "step": 3938 + }, + { + "epoch": 1.27, + "learning_rate": 1.2953083500497679e-05, + "loss": 0.1102, + "step": 3939 + }, + { + "epoch": 1.27, + "learning_rate": 1.2949764565323046e-05, + "loss": 0.1104, + "step": 3940 + }, + { + "epoch": 1.27, + "learning_rate": 1.2946445274217267e-05, + "loss": 0.1141, + "step": 3941 + }, + { + "epoch": 1.27, + "learning_rate": 1.2943125627580875e-05, + "loss": 0.1078, + "step": 3942 + }, + { + "epoch": 1.27, + "learning_rate": 1.2939805625814416e-05, + "loss": 0.1265, + "step": 3943 + }, + { + "epoch": 1.27, + "learning_rate": 1.2936485269318504e-05, + "loss": 0.1216, + "step": 3944 + }, + { + "epoch": 1.27, + "learning_rate": 1.293316455849379e-05, + "loss": 0.1235, + "step": 3945 + }, + { + "epoch": 1.27, + "learning_rate": 1.2929843493740959e-05, + "loss": 0.1095, + "step": 3946 + }, + { + "epoch": 1.27, + "learning_rate": 1.2926522075460746e-05, + "loss": 0.1148, + "step": 3947 + }, + { + "epoch": 1.27, + "learning_rate": 1.2923200304053932e-05, + "loss": 0.1145, + "step": 3948 + }, + { + "epoch": 1.27, + "learning_rate": 1.291987817992133e-05, + "loss": 0.1143, + "step": 3949 + }, + { + "epoch": 1.27, + "learning_rate": 1.2916555703463805e-05, + "loss": 0.1176, + "step": 3950 + }, + { + "epoch": 1.27, + "learning_rate": 1.2913232875082262e-05, + "loss": 0.1139, + "step": 3951 + }, + { + "epoch": 1.27, + "learning_rate": 1.2909909695177647e-05, + "loss": 0.1171, + "step": 3952 + }, + { + "epoch": 1.27, + "learning_rate": 1.2906586164150944e-05, + "loss": 0.119, + "step": 3953 + }, + { + "epoch": 1.27, + "learning_rate": 1.2903262282403195e-05, + "loss": 0.121, + "step": 3954 + }, + { + "epoch": 1.27, + "learning_rate": 1.2899938050335464e-05, + "loss": 0.1161, + "step": 3955 + }, + { + "epoch": 1.27, + "learning_rate": 1.2896613468348869e-05, + "loss": 0.111, + "step": 3956 + }, + { + "epoch": 1.27, + "learning_rate": 1.289328853684457e-05, + "loss": 0.1207, + "step": 3957 + }, + { + "epoch": 1.27, + "learning_rate": 1.2889963256223767e-05, + "loss": 0.1053, + "step": 3958 + }, + { + "epoch": 1.27, + "learning_rate": 1.2886637626887704e-05, + "loss": 0.105, + "step": 3959 + }, + { + "epoch": 1.27, + "learning_rate": 1.2883311649237665e-05, + "loss": 0.1178, + "step": 3960 + }, + { + "epoch": 1.27, + "learning_rate": 1.2879985323674973e-05, + "loss": 0.1165, + "step": 3961 + }, + { + "epoch": 1.27, + "learning_rate": 1.2876658650600999e-05, + "loss": 0.1154, + "step": 3962 + }, + { + "epoch": 1.27, + "learning_rate": 1.2873331630417157e-05, + "loss": 0.1083, + "step": 3963 + }, + { + "epoch": 1.28, + "learning_rate": 1.287000426352489e-05, + "loss": 0.1123, + "step": 3964 + }, + { + "epoch": 1.28, + "learning_rate": 1.2866676550325704e-05, + "loss": 0.1182, + "step": 3965 + }, + { + "epoch": 1.28, + "learning_rate": 1.2863348491221129e-05, + "loss": 0.1143, + "step": 3966 + }, + { + "epoch": 1.28, + "learning_rate": 1.2860020086612741e-05, + "loss": 0.1162, + "step": 3967 + }, + { + "epoch": 1.28, + "learning_rate": 1.2856691336902163e-05, + "loss": 0.1037, + "step": 3968 + }, + { + "epoch": 1.28, + "learning_rate": 1.2853362242491054e-05, + "loss": 0.1223, + "step": 3969 + }, + { + "epoch": 1.28, + "learning_rate": 1.2850032803781119e-05, + "loss": 0.1055, + "step": 3970 + }, + { + "epoch": 1.28, + "learning_rate": 1.28467030211741e-05, + "loss": 0.1187, + "step": 3971 + }, + { + "epoch": 1.28, + "learning_rate": 1.2843372895071782e-05, + "loss": 0.1115, + "step": 3972 + }, + { + "epoch": 1.28, + "learning_rate": 1.2840042425875996e-05, + "loss": 0.1102, + "step": 3973 + }, + { + "epoch": 1.28, + "learning_rate": 1.2836711613988607e-05, + "loss": 0.1156, + "step": 3974 + }, + { + "epoch": 1.28, + "learning_rate": 1.2833380459811524e-05, + "loss": 0.1174, + "step": 3975 + }, + { + "epoch": 1.28, + "learning_rate": 1.2830048963746703e-05, + "loss": 0.1242, + "step": 3976 + }, + { + "epoch": 1.28, + "learning_rate": 1.2826717126196134e-05, + "loss": 0.1204, + "step": 3977 + }, + { + "epoch": 1.28, + "learning_rate": 1.282338494756185e-05, + "loss": 0.1043, + "step": 3978 + }, + { + "epoch": 1.28, + "learning_rate": 1.2820052428245928e-05, + "loss": 0.1054, + "step": 3979 + }, + { + "epoch": 1.28, + "learning_rate": 1.281671956865048e-05, + "loss": 0.1226, + "step": 3980 + }, + { + "epoch": 1.28, + "learning_rate": 1.2813386369177667e-05, + "loss": 0.1068, + "step": 3981 + }, + { + "epoch": 1.28, + "learning_rate": 1.2810052830229684e-05, + "loss": 0.1164, + "step": 3982 + }, + { + "epoch": 1.28, + "learning_rate": 1.2806718952208772e-05, + "loss": 0.1107, + "step": 3983 + }, + { + "epoch": 1.28, + "learning_rate": 1.2803384735517209e-05, + "loss": 0.1093, + "step": 3984 + }, + { + "epoch": 1.28, + "learning_rate": 1.2800050180557322e-05, + "loss": 0.1156, + "step": 3985 + }, + { + "epoch": 1.28, + "learning_rate": 1.2796715287731463e-05, + "loss": 0.1044, + "step": 3986 + }, + { + "epoch": 1.28, + "learning_rate": 1.2793380057442038e-05, + "loss": 0.1137, + "step": 3987 + }, + { + "epoch": 1.28, + "learning_rate": 1.2790044490091496e-05, + "loss": 0.114, + "step": 3988 + }, + { + "epoch": 1.28, + "learning_rate": 1.2786708586082312e-05, + "loss": 0.1052, + "step": 3989 + }, + { + "epoch": 1.28, + "learning_rate": 1.2783372345817013e-05, + "loss": 0.1123, + "step": 3990 + }, + { + "epoch": 1.28, + "learning_rate": 1.2780035769698172e-05, + "loss": 0.1106, + "step": 3991 + }, + { + "epoch": 1.28, + "learning_rate": 1.2776698858128383e-05, + "loss": 0.1114, + "step": 3992 + }, + { + "epoch": 1.28, + "learning_rate": 1.2773361611510293e-05, + "loss": 0.1126, + "step": 3993 + }, + { + "epoch": 1.28, + "learning_rate": 1.27700240302466e-05, + "loss": 0.1195, + "step": 3994 + }, + { + "epoch": 1.29, + "learning_rate": 1.2766686114740017e-05, + "loss": 0.11, + "step": 3995 + }, + { + "epoch": 1.29, + "learning_rate": 1.2763347865393317e-05, + "loss": 0.1163, + "step": 3996 + }, + { + "epoch": 1.29, + "learning_rate": 1.276000928260931e-05, + "loss": 0.1122, + "step": 3997 + }, + { + "epoch": 1.29, + "learning_rate": 1.2756670366790837e-05, + "loss": 0.1123, + "step": 3998 + }, + { + "epoch": 1.29, + "learning_rate": 1.2753331118340793e-05, + "loss": 0.1151, + "step": 3999 + }, + { + "epoch": 1.29, + "learning_rate": 1.27499915376621e-05, + "loss": 0.1016, + "step": 4000 + }, + { + "epoch": 1.29, + "learning_rate": 1.274665162515773e-05, + "loss": 0.1126, + "step": 4001 + }, + { + "epoch": 1.29, + "learning_rate": 1.274331138123069e-05, + "loss": 0.1097, + "step": 4002 + }, + { + "epoch": 1.29, + "learning_rate": 1.2739970806284028e-05, + "loss": 0.1083, + "step": 4003 + }, + { + "epoch": 1.29, + "learning_rate": 1.2736629900720832e-05, + "loss": 0.1074, + "step": 4004 + }, + { + "epoch": 1.29, + "learning_rate": 1.2733288664944226e-05, + "loss": 0.1072, + "step": 4005 + }, + { + "epoch": 1.29, + "learning_rate": 1.2729947099357385e-05, + "loss": 0.1165, + "step": 4006 + }, + { + "epoch": 1.29, + "learning_rate": 1.2726605204363512e-05, + "loss": 0.1083, + "step": 4007 + }, + { + "epoch": 1.29, + "learning_rate": 1.2723262980365852e-05, + "loss": 0.1291, + "step": 4008 + }, + { + "epoch": 1.29, + "learning_rate": 1.2719920427767698e-05, + "loss": 0.1241, + "step": 4009 + }, + { + "epoch": 1.29, + "learning_rate": 1.2716577546972371e-05, + "loss": 0.1355, + "step": 4010 + }, + { + "epoch": 1.29, + "learning_rate": 1.271323433838324e-05, + "loss": 0.1251, + "step": 4011 + }, + { + "epoch": 1.29, + "learning_rate": 1.2709890802403712e-05, + "loss": 0.1301, + "step": 4012 + }, + { + "epoch": 1.29, + "learning_rate": 1.270654693943723e-05, + "loss": 0.13, + "step": 4013 + }, + { + "epoch": 1.29, + "learning_rate": 1.2703202749887282e-05, + "loss": 0.1471, + "step": 4014 + }, + { + "epoch": 1.29, + "learning_rate": 1.2699858234157385e-05, + "loss": 0.1216, + "step": 4015 + }, + { + "epoch": 1.29, + "learning_rate": 1.2696513392651111e-05, + "loss": 0.1291, + "step": 4016 + }, + { + "epoch": 1.29, + "learning_rate": 1.269316822577206e-05, + "loss": 0.1632, + "step": 4017 + }, + { + "epoch": 1.29, + "learning_rate": 1.268982273392387e-05, + "loss": 0.1318, + "step": 4018 + }, + { + "epoch": 1.29, + "learning_rate": 1.2686476917510226e-05, + "loss": 0.1293, + "step": 4019 + }, + { + "epoch": 1.29, + "learning_rate": 1.268313077693485e-05, + "loss": 0.1324, + "step": 4020 + }, + { + "epoch": 1.29, + "learning_rate": 1.2679784312601495e-05, + "loss": 0.1459, + "step": 4021 + }, + { + "epoch": 1.29, + "learning_rate": 1.2676437524913967e-05, + "loss": 0.1303, + "step": 4022 + }, + { + "epoch": 1.29, + "learning_rate": 1.2673090414276101e-05, + "loss": 0.1417, + "step": 4023 + }, + { + "epoch": 1.29, + "learning_rate": 1.2669742981091771e-05, + "loss": 0.1433, + "step": 4024 + }, + { + "epoch": 1.29, + "learning_rate": 1.2666395225764898e-05, + "loss": 0.1514, + "step": 4025 + }, + { + "epoch": 1.3, + "learning_rate": 1.2663047148699428e-05, + "loss": 0.1475, + "step": 4026 + }, + { + "epoch": 1.3, + "learning_rate": 1.2659698750299365e-05, + "loss": 0.1531, + "step": 4027 + }, + { + "epoch": 1.3, + "learning_rate": 1.2656350030968734e-05, + "loss": 0.1382, + "step": 4028 + }, + { + "epoch": 1.3, + "learning_rate": 1.2653000991111604e-05, + "loss": 0.1486, + "step": 4029 + }, + { + "epoch": 1.3, + "learning_rate": 1.2649651631132089e-05, + "loss": 0.1407, + "step": 4030 + }, + { + "epoch": 1.3, + "learning_rate": 1.264630195143434e-05, + "loss": 0.1208, + "step": 4031 + }, + { + "epoch": 1.3, + "learning_rate": 1.2642951952422533e-05, + "loss": 0.1606, + "step": 4032 + }, + { + "epoch": 1.3, + "learning_rate": 1.26396016345009e-05, + "loss": 0.1362, + "step": 4033 + }, + { + "epoch": 1.3, + "learning_rate": 1.2636250998073707e-05, + "loss": 0.1484, + "step": 4034 + }, + { + "epoch": 1.3, + "learning_rate": 1.2632900043545248e-05, + "loss": 0.138, + "step": 4035 + }, + { + "epoch": 1.3, + "learning_rate": 1.2629548771319873e-05, + "loss": 0.1383, + "step": 4036 + }, + { + "epoch": 1.3, + "learning_rate": 1.2626197181801956e-05, + "loss": 0.1397, + "step": 4037 + }, + { + "epoch": 1.3, + "learning_rate": 1.2622845275395912e-05, + "loss": 0.1441, + "step": 4038 + }, + { + "epoch": 1.3, + "learning_rate": 1.2619493052506198e-05, + "loss": 0.1432, + "step": 4039 + }, + { + "epoch": 1.3, + "learning_rate": 1.2616140513537311e-05, + "loss": 0.1242, + "step": 4040 + }, + { + "epoch": 1.3, + "learning_rate": 1.2612787658893777e-05, + "loss": 0.1417, + "step": 4041 + }, + { + "epoch": 1.3, + "learning_rate": 1.2609434488980168e-05, + "loss": 0.1288, + "step": 4042 + }, + { + "epoch": 1.3, + "learning_rate": 1.2606081004201096e-05, + "loss": 0.1468, + "step": 4043 + }, + { + "epoch": 1.3, + "learning_rate": 1.2602727204961198e-05, + "loss": 0.1468, + "step": 4044 + }, + { + "epoch": 1.3, + "learning_rate": 1.2599373091665161e-05, + "loss": 0.1412, + "step": 4045 + }, + { + "epoch": 1.3, + "learning_rate": 1.2596018664717713e-05, + "loss": 0.1296, + "step": 4046 + }, + { + "epoch": 1.3, + "learning_rate": 1.2592663924523607e-05, + "loss": 0.1394, + "step": 4047 + }, + { + "epoch": 1.3, + "learning_rate": 1.258930887148764e-05, + "loss": 0.142, + "step": 4048 + }, + { + "epoch": 1.3, + "learning_rate": 1.258595350601465e-05, + "loss": 0.1428, + "step": 4049 + }, + { + "epoch": 1.3, + "learning_rate": 1.2582597828509508e-05, + "loss": 0.1298, + "step": 4050 + }, + { + "epoch": 1.3, + "learning_rate": 1.2579241839377123e-05, + "loss": 0.1438, + "step": 4051 + }, + { + "epoch": 1.3, + "learning_rate": 1.2575885539022448e-05, + "loss": 0.1472, + "step": 4052 + }, + { + "epoch": 1.3, + "learning_rate": 1.2572528927850463e-05, + "loss": 0.147, + "step": 4053 + }, + { + "epoch": 1.3, + "learning_rate": 1.2569172006266192e-05, + "loss": 0.1347, + "step": 4054 + }, + { + "epoch": 1.3, + "learning_rate": 1.2565814774674698e-05, + "loss": 0.1418, + "step": 4055 + }, + { + "epoch": 1.3, + "learning_rate": 1.2562457233481077e-05, + "loss": 0.128, + "step": 4056 + }, + { + "epoch": 1.31, + "learning_rate": 1.2559099383090465e-05, + "loss": 0.1481, + "step": 4057 + }, + { + "epoch": 1.31, + "learning_rate": 1.2555741223908033e-05, + "loss": 0.1335, + "step": 4058 + }, + { + "epoch": 1.31, + "learning_rate": 1.2552382756338993e-05, + "loss": 0.1454, + "step": 4059 + }, + { + "epoch": 1.31, + "learning_rate": 1.2549023980788588e-05, + "loss": 0.1612, + "step": 4060 + }, + { + "epoch": 1.31, + "learning_rate": 1.2545664897662108e-05, + "loss": 0.1384, + "step": 4061 + }, + { + "epoch": 1.31, + "learning_rate": 1.2542305507364872e-05, + "loss": 0.1485, + "step": 4062 + }, + { + "epoch": 1.31, + "learning_rate": 1.2538945810302234e-05, + "loss": 0.1443, + "step": 4063 + }, + { + "epoch": 1.31, + "learning_rate": 1.253558580687959e-05, + "loss": 0.1452, + "step": 4064 + }, + { + "epoch": 1.31, + "learning_rate": 1.2532225497502381e-05, + "loss": 0.1315, + "step": 4065 + }, + { + "epoch": 1.31, + "learning_rate": 1.252886488257607e-05, + "loss": 0.1467, + "step": 4066 + }, + { + "epoch": 1.31, + "learning_rate": 1.2525503962506163e-05, + "loss": 0.1364, + "step": 4067 + }, + { + "epoch": 1.31, + "learning_rate": 1.2522142737698202e-05, + "loss": 0.1489, + "step": 4068 + }, + { + "epoch": 1.31, + "learning_rate": 1.251878120855777e-05, + "loss": 0.138, + "step": 4069 + }, + { + "epoch": 1.31, + "learning_rate": 1.2515419375490479e-05, + "loss": 0.1495, + "step": 4070 + }, + { + "epoch": 1.31, + "learning_rate": 1.2512057238901986e-05, + "loss": 0.1457, + "step": 4071 + }, + { + "epoch": 1.31, + "learning_rate": 1.2508694799197981e-05, + "loss": 0.1356, + "step": 4072 + }, + { + "epoch": 1.31, + "learning_rate": 1.2505332056784184e-05, + "loss": 0.1388, + "step": 4073 + }, + { + "epoch": 1.31, + "learning_rate": 1.2501969012066366e-05, + "loss": 0.1435, + "step": 4074 + }, + { + "epoch": 1.31, + "learning_rate": 1.2498605665450321e-05, + "loss": 0.1401, + "step": 4075 + }, + { + "epoch": 1.31, + "learning_rate": 1.2495242017341887e-05, + "loss": 0.166, + "step": 4076 + }, + { + "epoch": 1.31, + "learning_rate": 1.2491878068146934e-05, + "loss": 0.1349, + "step": 4077 + }, + { + "epoch": 1.31, + "learning_rate": 1.2488513818271372e-05, + "loss": 0.1535, + "step": 4078 + }, + { + "epoch": 1.31, + "learning_rate": 1.2485149268121148e-05, + "loss": 0.1393, + "step": 4079 + }, + { + "epoch": 1.31, + "learning_rate": 1.248178441810224e-05, + "loss": 0.1388, + "step": 4080 + }, + { + "epoch": 1.31, + "learning_rate": 1.2478419268620662e-05, + "loss": 0.1421, + "step": 4081 + }, + { + "epoch": 1.31, + "learning_rate": 1.2475053820082476e-05, + "loss": 0.1548, + "step": 4082 + }, + { + "epoch": 1.31, + "learning_rate": 1.2471688072893763e-05, + "loss": 0.1399, + "step": 4083 + }, + { + "epoch": 1.31, + "learning_rate": 1.2468322027460653e-05, + "loss": 0.131, + "step": 4084 + }, + { + "epoch": 1.31, + "learning_rate": 1.2464955684189307e-05, + "loss": 0.15, + "step": 4085 + }, + { + "epoch": 1.31, + "learning_rate": 1.246158904348592e-05, + "loss": 0.1319, + "step": 4086 + }, + { + "epoch": 1.31, + "learning_rate": 1.2458222105756723e-05, + "loss": 0.1366, + "step": 4087 + }, + { + "epoch": 1.32, + "learning_rate": 1.2454854871407993e-05, + "loss": 0.1403, + "step": 4088 + }, + { + "epoch": 1.32, + "learning_rate": 1.245148734084603e-05, + "loss": 0.1443, + "step": 4089 + }, + { + "epoch": 1.32, + "learning_rate": 1.2448119514477172e-05, + "loss": 0.1321, + "step": 4090 + }, + { + "epoch": 1.32, + "learning_rate": 1.24447513927078e-05, + "loss": 0.1338, + "step": 4091 + }, + { + "epoch": 1.32, + "learning_rate": 1.2441382975944325e-05, + "loss": 0.1291, + "step": 4092 + }, + { + "epoch": 1.32, + "learning_rate": 1.243801426459319e-05, + "loss": 0.1328, + "step": 4093 + }, + { + "epoch": 1.32, + "learning_rate": 1.2434645259060884e-05, + "loss": 0.1233, + "step": 4094 + }, + { + "epoch": 1.32, + "learning_rate": 1.2431275959753924e-05, + "loss": 0.1259, + "step": 4095 + }, + { + "epoch": 1.32, + "learning_rate": 1.2427906367078861e-05, + "loss": 0.1407, + "step": 4096 + }, + { + "epoch": 1.32, + "learning_rate": 1.2424536481442287e-05, + "loss": 0.1354, + "step": 4097 + }, + { + "epoch": 1.32, + "learning_rate": 1.2421166303250827e-05, + "loss": 0.1469, + "step": 4098 + }, + { + "epoch": 1.32, + "learning_rate": 1.241779583291114e-05, + "loss": 0.1377, + "step": 4099 + }, + { + "epoch": 1.32, + "learning_rate": 1.2414425070829921e-05, + "loss": 0.1251, + "step": 4100 + }, + { + "epoch": 1.32, + "learning_rate": 1.2411054017413901e-05, + "loss": 0.1277, + "step": 4101 + }, + { + "epoch": 1.32, + "learning_rate": 1.2407682673069848e-05, + "loss": 0.145, + "step": 4102 + }, + { + "epoch": 1.32, + "learning_rate": 1.2404311038204558e-05, + "loss": 0.1427, + "step": 4103 + }, + { + "epoch": 1.32, + "learning_rate": 1.240093911322487e-05, + "loss": 0.1464, + "step": 4104 + }, + { + "epoch": 1.32, + "learning_rate": 1.2397566898537655e-05, + "loss": 0.135, + "step": 4105 + }, + { + "epoch": 1.32, + "learning_rate": 1.2394194394549815e-05, + "loss": 0.141, + "step": 4106 + }, + { + "epoch": 1.32, + "learning_rate": 1.2390821601668295e-05, + "loss": 0.1341, + "step": 4107 + }, + { + "epoch": 1.32, + "learning_rate": 1.238744852030007e-05, + "loss": 0.133, + "step": 4108 + }, + { + "epoch": 1.32, + "learning_rate": 1.2384075150852147e-05, + "loss": 0.1293, + "step": 4109 + }, + { + "epoch": 1.32, + "learning_rate": 1.2380701493731576e-05, + "loss": 0.1216, + "step": 4110 + }, + { + "epoch": 1.32, + "learning_rate": 1.2377327549345435e-05, + "loss": 0.1392, + "step": 4111 + }, + { + "epoch": 1.32, + "learning_rate": 1.2373953318100833e-05, + "loss": 0.1346, + "step": 4112 + }, + { + "epoch": 1.32, + "learning_rate": 1.2370578800404927e-05, + "loss": 0.1282, + "step": 4113 + }, + { + "epoch": 1.32, + "learning_rate": 1.2367203996664901e-05, + "loss": 0.1308, + "step": 4114 + }, + { + "epoch": 1.32, + "learning_rate": 1.2363828907287964e-05, + "loss": 0.1375, + "step": 4115 + }, + { + "epoch": 1.32, + "learning_rate": 1.2360453532681376e-05, + "loss": 0.1343, + "step": 4116 + }, + { + "epoch": 1.32, + "learning_rate": 1.2357077873252425e-05, + "loss": 0.1431, + "step": 4117 + }, + { + "epoch": 1.32, + "learning_rate": 1.2353701929408425e-05, + "loss": 0.137, + "step": 4118 + }, + { + "epoch": 1.33, + "learning_rate": 1.235032570155674e-05, + "loss": 0.1507, + "step": 4119 + }, + { + "epoch": 1.33, + "learning_rate": 1.2346949190104757e-05, + "loss": 0.1431, + "step": 4120 + }, + { + "epoch": 1.33, + "learning_rate": 1.23435723954599e-05, + "loss": 0.1235, + "step": 4121 + }, + { + "epoch": 1.33, + "learning_rate": 1.2340195318029623e-05, + "loss": 0.1441, + "step": 4122 + }, + { + "epoch": 1.33, + "learning_rate": 1.2336817958221427e-05, + "loss": 0.1347, + "step": 4123 + }, + { + "epoch": 1.33, + "learning_rate": 1.2333440316442834e-05, + "loss": 0.1373, + "step": 4124 + }, + { + "epoch": 1.33, + "learning_rate": 1.2330062393101404e-05, + "loss": 0.1351, + "step": 4125 + }, + { + "epoch": 1.33, + "learning_rate": 1.2326684188604733e-05, + "loss": 0.1415, + "step": 4126 + }, + { + "epoch": 1.33, + "learning_rate": 1.2323305703360453e-05, + "loss": 0.1446, + "step": 4127 + }, + { + "epoch": 1.33, + "learning_rate": 1.2319926937776217e-05, + "loss": 0.126, + "step": 4128 + }, + { + "epoch": 1.33, + "learning_rate": 1.231654789225973e-05, + "loss": 0.1519, + "step": 4129 + }, + { + "epoch": 1.33, + "learning_rate": 1.2313168567218718e-05, + "loss": 0.1281, + "step": 4130 + }, + { + "epoch": 1.33, + "learning_rate": 1.2309788963060942e-05, + "loss": 0.1379, + "step": 4131 + }, + { + "epoch": 1.33, + "learning_rate": 1.2306409080194209e-05, + "loss": 0.1309, + "step": 4132 + }, + { + "epoch": 1.33, + "learning_rate": 1.2303028919026339e-05, + "loss": 0.1394, + "step": 4133 + }, + { + "epoch": 1.33, + "learning_rate": 1.2299648479965205e-05, + "loss": 0.1219, + "step": 4134 + }, + { + "epoch": 1.33, + "learning_rate": 1.22962677634187e-05, + "loss": 0.1383, + "step": 4135 + }, + { + "epoch": 1.33, + "learning_rate": 1.2292886769794756e-05, + "loss": 0.1352, + "step": 4136 + }, + { + "epoch": 1.33, + "learning_rate": 1.2289505499501341e-05, + "loss": 0.1383, + "step": 4137 + }, + { + "epoch": 1.33, + "learning_rate": 1.2286123952946454e-05, + "loss": 0.1319, + "step": 4138 + }, + { + "epoch": 1.33, + "learning_rate": 1.2282742130538121e-05, + "loss": 0.1222, + "step": 4139 + }, + { + "epoch": 1.33, + "learning_rate": 1.2279360032684415e-05, + "loss": 0.1302, + "step": 4140 + }, + { + "epoch": 1.33, + "learning_rate": 1.2275977659793427e-05, + "loss": 0.148, + "step": 4141 + }, + { + "epoch": 1.33, + "learning_rate": 1.2272595012273292e-05, + "loss": 0.1389, + "step": 4142 + }, + { + "epoch": 1.33, + "learning_rate": 1.2269212090532176e-05, + "loss": 0.1309, + "step": 4143 + }, + { + "epoch": 1.33, + "learning_rate": 1.2265828894978273e-05, + "loss": 0.1267, + "step": 4144 + }, + { + "epoch": 1.33, + "learning_rate": 1.2262445426019818e-05, + "loss": 0.1396, + "step": 4145 + }, + { + "epoch": 1.33, + "learning_rate": 1.225906168406507e-05, + "loss": 0.1262, + "step": 4146 + }, + { + "epoch": 1.33, + "learning_rate": 1.225567766952233e-05, + "loss": 0.1252, + "step": 4147 + }, + { + "epoch": 1.33, + "learning_rate": 1.2252293382799927e-05, + "loss": 0.1394, + "step": 4148 + }, + { + "epoch": 1.33, + "learning_rate": 1.2248908824306219e-05, + "loss": 0.1407, + "step": 4149 + }, + { + "epoch": 1.34, + "learning_rate": 1.2245523994449607e-05, + "loss": 0.1346, + "step": 4150 + }, + { + "epoch": 1.34, + "learning_rate": 1.2242138893638516e-05, + "loss": 0.1335, + "step": 4151 + }, + { + "epoch": 1.34, + "learning_rate": 1.2238753522281407e-05, + "loss": 0.1279, + "step": 4152 + }, + { + "epoch": 1.34, + "learning_rate": 1.2235367880786773e-05, + "loss": 0.1314, + "step": 4153 + }, + { + "epoch": 1.34, + "learning_rate": 1.223198196956314e-05, + "loss": 0.1226, + "step": 4154 + }, + { + "epoch": 1.34, + "learning_rate": 1.2228595789019066e-05, + "loss": 0.1394, + "step": 4155 + }, + { + "epoch": 1.34, + "learning_rate": 1.2225209339563144e-05, + "loss": 0.1343, + "step": 4156 + }, + { + "epoch": 1.34, + "learning_rate": 1.2221822621603997e-05, + "loss": 0.1124, + "step": 4157 + }, + { + "epoch": 1.34, + "learning_rate": 1.2218435635550278e-05, + "loss": 0.1269, + "step": 4158 + }, + { + "epoch": 1.34, + "learning_rate": 1.221504838181068e-05, + "loss": 0.1405, + "step": 4159 + }, + { + "epoch": 1.34, + "learning_rate": 1.2211660860793922e-05, + "loss": 0.133, + "step": 4160 + }, + { + "epoch": 1.34, + "learning_rate": 1.220827307290875e-05, + "loss": 0.1427, + "step": 4161 + }, + { + "epoch": 1.34, + "learning_rate": 1.2204885018563955e-05, + "loss": 0.1286, + "step": 4162 + }, + { + "epoch": 1.34, + "learning_rate": 1.2201496698168356e-05, + "loss": 0.1285, + "step": 4163 + }, + { + "epoch": 1.34, + "learning_rate": 1.2198108112130797e-05, + "loss": 0.1312, + "step": 4164 + }, + { + "epoch": 1.34, + "learning_rate": 1.2194719260860161e-05, + "loss": 0.1197, + "step": 4165 + }, + { + "epoch": 1.34, + "learning_rate": 1.2191330144765368e-05, + "loss": 0.1325, + "step": 4166 + }, + { + "epoch": 1.34, + "learning_rate": 1.2187940764255348e-05, + "loss": 0.1285, + "step": 4167 + }, + { + "epoch": 1.34, + "learning_rate": 1.218455111973909e-05, + "loss": 0.1296, + "step": 4168 + }, + { + "epoch": 1.34, + "learning_rate": 1.2181161211625605e-05, + "loss": 0.1283, + "step": 4169 + }, + { + "epoch": 1.34, + "learning_rate": 1.2177771040323923e-05, + "loss": 0.1101, + "step": 4170 + }, + { + "epoch": 1.34, + "learning_rate": 1.2174380606243124e-05, + "loss": 0.1356, + "step": 4171 + }, + { + "epoch": 1.34, + "learning_rate": 1.2170989909792313e-05, + "loss": 0.1273, + "step": 4172 + }, + { + "epoch": 1.34, + "learning_rate": 1.216759895138062e-05, + "loss": 0.1229, + "step": 4173 + }, + { + "epoch": 1.34, + "learning_rate": 1.2164207731417213e-05, + "loss": 0.1175, + "step": 4174 + }, + { + "epoch": 1.34, + "learning_rate": 1.2160816250311298e-05, + "loss": 0.1481, + "step": 4175 + }, + { + "epoch": 1.34, + "learning_rate": 1.21574245084721e-05, + "loss": 0.1458, + "step": 4176 + }, + { + "epoch": 1.34, + "learning_rate": 1.215403250630888e-05, + "loss": 0.1337, + "step": 4177 + }, + { + "epoch": 1.34, + "learning_rate": 1.2150640244230939e-05, + "loss": 0.1432, + "step": 4178 + }, + { + "epoch": 1.34, + "learning_rate": 1.2147247722647595e-05, + "loss": 0.1327, + "step": 4179 + }, + { + "epoch": 1.34, + "learning_rate": 1.2143854941968203e-05, + "loss": 0.133, + "step": 4180 + }, + { + "epoch": 1.35, + "learning_rate": 1.2140461902602154e-05, + "loss": 0.1229, + "step": 4181 + }, + { + "epoch": 1.35, + "learning_rate": 1.2137068604958866e-05, + "loss": 0.1252, + "step": 4182 + }, + { + "epoch": 1.35, + "learning_rate": 1.2133675049447786e-05, + "loss": 0.1367, + "step": 4183 + }, + { + "epoch": 1.35, + "learning_rate": 1.21302812364784e-05, + "loss": 0.1203, + "step": 4184 + }, + { + "epoch": 1.35, + "learning_rate": 1.2126887166460218e-05, + "loss": 0.144, + "step": 4185 + }, + { + "epoch": 1.35, + "learning_rate": 1.2123492839802777e-05, + "loss": 0.1269, + "step": 4186 + }, + { + "epoch": 1.35, + "learning_rate": 1.2120098256915658e-05, + "loss": 0.1228, + "step": 4187 + }, + { + "epoch": 1.35, + "learning_rate": 1.2116703418208462e-05, + "loss": 0.1358, + "step": 4188 + }, + { + "epoch": 1.35, + "learning_rate": 1.211330832409083e-05, + "loss": 0.1317, + "step": 4189 + }, + { + "epoch": 1.35, + "learning_rate": 1.2109912974972424e-05, + "loss": 0.1383, + "step": 4190 + }, + { + "epoch": 1.35, + "learning_rate": 1.2106517371262938e-05, + "loss": 0.1423, + "step": 4191 + }, + { + "epoch": 1.35, + "learning_rate": 1.2103121513372109e-05, + "loss": 0.1465, + "step": 4192 + }, + { + "epoch": 1.35, + "learning_rate": 1.2099725401709685e-05, + "loss": 0.1242, + "step": 4193 + }, + { + "epoch": 1.35, + "learning_rate": 1.2096329036685469e-05, + "loss": 0.1381, + "step": 4194 + }, + { + "epoch": 1.35, + "learning_rate": 1.2092932418709267e-05, + "loss": 0.12, + "step": 4195 + }, + { + "epoch": 1.35, + "learning_rate": 1.2089535548190935e-05, + "loss": 0.1219, + "step": 4196 + }, + { + "epoch": 1.35, + "learning_rate": 1.2086138425540357e-05, + "loss": 0.1324, + "step": 4197 + }, + { + "epoch": 1.35, + "learning_rate": 1.2082741051167438e-05, + "loss": 0.125, + "step": 4198 + }, + { + "epoch": 1.35, + "learning_rate": 1.2079343425482126e-05, + "loss": 0.1318, + "step": 4199 + }, + { + "epoch": 1.35, + "learning_rate": 1.2075945548894388e-05, + "loss": 0.1205, + "step": 4200 + }, + { + "epoch": 1.35, + "learning_rate": 1.2072547421814229e-05, + "loss": 0.1298, + "step": 4201 + }, + { + "epoch": 1.35, + "learning_rate": 1.206914904465168e-05, + "loss": 0.1239, + "step": 4202 + }, + { + "epoch": 1.35, + "learning_rate": 1.2065750417816806e-05, + "loss": 0.124, + "step": 4203 + }, + { + "epoch": 1.35, + "learning_rate": 1.2062351541719697e-05, + "loss": 0.1198, + "step": 4204 + }, + { + "epoch": 1.35, + "learning_rate": 1.2058952416770478e-05, + "loss": 0.1249, + "step": 4205 + }, + { + "epoch": 1.35, + "learning_rate": 1.20555530433793e-05, + "loss": 0.1301, + "step": 4206 + }, + { + "epoch": 1.35, + "learning_rate": 1.2052153421956343e-05, + "loss": 0.1406, + "step": 4207 + }, + { + "epoch": 1.35, + "learning_rate": 1.2048753552911827e-05, + "loss": 0.1319, + "step": 4208 + }, + { + "epoch": 1.35, + "learning_rate": 1.2045353436655994e-05, + "loss": 0.1201, + "step": 4209 + }, + { + "epoch": 1.35, + "learning_rate": 1.2041953073599109e-05, + "loss": 0.1292, + "step": 4210 + }, + { + "epoch": 1.35, + "learning_rate": 1.2038552464151479e-05, + "loss": 0.1287, + "step": 4211 + }, + { + "epoch": 1.36, + "learning_rate": 1.2035151608723438e-05, + "loss": 0.1156, + "step": 4212 + }, + { + "epoch": 1.36, + "learning_rate": 1.2031750507725344e-05, + "loss": 0.132, + "step": 4213 + }, + { + "epoch": 1.36, + "learning_rate": 1.2028349161567588e-05, + "loss": 0.1399, + "step": 4214 + }, + { + "epoch": 1.36, + "learning_rate": 1.2024947570660599e-05, + "loss": 0.1392, + "step": 4215 + }, + { + "epoch": 1.36, + "learning_rate": 1.2021545735414816e-05, + "loss": 0.1334, + "step": 4216 + }, + { + "epoch": 1.36, + "learning_rate": 1.2018143656240724e-05, + "loss": 0.1211, + "step": 4217 + }, + { + "epoch": 1.36, + "learning_rate": 1.2014741333548836e-05, + "loss": 0.129, + "step": 4218 + }, + { + "epoch": 1.36, + "learning_rate": 1.2011338767749685e-05, + "loss": 0.1315, + "step": 4219 + }, + { + "epoch": 1.36, + "learning_rate": 1.200793595925384e-05, + "loss": 0.1252, + "step": 4220 + }, + { + "epoch": 1.36, + "learning_rate": 1.2004532908471904e-05, + "loss": 0.1274, + "step": 4221 + }, + { + "epoch": 1.36, + "learning_rate": 1.2001129615814495e-05, + "loss": 0.1273, + "step": 4222 + }, + { + "epoch": 1.36, + "learning_rate": 1.1997726081692272e-05, + "loss": 0.1283, + "step": 4223 + }, + { + "epoch": 1.36, + "learning_rate": 1.1994322306515926e-05, + "loss": 0.1274, + "step": 4224 + }, + { + "epoch": 1.36, + "learning_rate": 1.1990918290696161e-05, + "loss": 0.1153, + "step": 4225 + }, + { + "epoch": 1.36, + "learning_rate": 1.1987514034643725e-05, + "loss": 0.1286, + "step": 4226 + }, + { + "epoch": 1.36, + "learning_rate": 1.1984109538769393e-05, + "loss": 0.1154, + "step": 4227 + }, + { + "epoch": 1.36, + "learning_rate": 1.1980704803483963e-05, + "loss": 0.1274, + "step": 4228 + }, + { + "epoch": 1.36, + "learning_rate": 1.197729982919826e-05, + "loss": 0.1299, + "step": 4229 + }, + { + "epoch": 1.36, + "learning_rate": 1.1973894616323152e-05, + "loss": 0.151, + "step": 4230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1970489165269521e-05, + "loss": 0.1351, + "step": 4231 + }, + { + "epoch": 1.36, + "learning_rate": 1.1967083476448282e-05, + "loss": 0.1306, + "step": 4232 + }, + { + "epoch": 1.36, + "learning_rate": 1.1963677550270384e-05, + "loss": 0.1223, + "step": 4233 + }, + { + "epoch": 1.36, + "learning_rate": 1.1960271387146799e-05, + "loss": 0.1286, + "step": 4234 + }, + { + "epoch": 1.36, + "learning_rate": 1.1956864987488525e-05, + "loss": 0.1238, + "step": 4235 + }, + { + "epoch": 1.36, + "learning_rate": 1.1953458351706601e-05, + "loss": 0.1291, + "step": 4236 + }, + { + "epoch": 1.36, + "learning_rate": 1.1950051480212079e-05, + "loss": 0.116, + "step": 4237 + }, + { + "epoch": 1.36, + "learning_rate": 1.1946644373416049e-05, + "loss": 0.1255, + "step": 4238 + }, + { + "epoch": 1.36, + "learning_rate": 1.1943237031729628e-05, + "loss": 0.1297, + "step": 4239 + }, + { + "epoch": 1.36, + "learning_rate": 1.193982945556396e-05, + "loss": 0.119, + "step": 4240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1936421645330217e-05, + "loss": 0.1297, + "step": 4241 + }, + { + "epoch": 1.36, + "learning_rate": 1.1933013601439596e-05, + "loss": 0.1184, + "step": 4242 + }, + { + "epoch": 1.37, + "learning_rate": 1.1929605324303336e-05, + "loss": 0.1218, + "step": 4243 + }, + { + "epoch": 1.37, + "learning_rate": 1.1926196814332685e-05, + "loss": 0.1297, + "step": 4244 + }, + { + "epoch": 1.37, + "learning_rate": 1.192278807193893e-05, + "loss": 0.1284, + "step": 4245 + }, + { + "epoch": 1.37, + "learning_rate": 1.1919379097533388e-05, + "loss": 0.1246, + "step": 4246 + }, + { + "epoch": 1.37, + "learning_rate": 1.19159698915274e-05, + "loss": 0.1255, + "step": 4247 + }, + { + "epoch": 1.37, + "learning_rate": 1.1912560454332329e-05, + "loss": 0.119, + "step": 4248 + }, + { + "epoch": 1.37, + "learning_rate": 1.190915078635958e-05, + "loss": 0.1297, + "step": 4249 + }, + { + "epoch": 1.37, + "learning_rate": 1.1905740888020576e-05, + "loss": 0.1256, + "step": 4250 + }, + { + "epoch": 1.37, + "learning_rate": 1.1902330759726766e-05, + "loss": 0.1358, + "step": 4251 + }, + { + "epoch": 1.37, + "learning_rate": 1.1898920401889636e-05, + "loss": 0.1234, + "step": 4252 + }, + { + "epoch": 1.37, + "learning_rate": 1.189550981492069e-05, + "loss": 0.1262, + "step": 4253 + }, + { + "epoch": 1.37, + "learning_rate": 1.1892098999231463e-05, + "loss": 0.1218, + "step": 4254 + }, + { + "epoch": 1.37, + "learning_rate": 1.1888687955233525e-05, + "loss": 0.1167, + "step": 4255 + }, + { + "epoch": 1.37, + "learning_rate": 1.188527668333846e-05, + "loss": 0.1495, + "step": 4256 + }, + { + "epoch": 1.37, + "learning_rate": 1.188186518395789e-05, + "loss": 0.1336, + "step": 4257 + }, + { + "epoch": 1.37, + "learning_rate": 1.1878453457503465e-05, + "loss": 0.1221, + "step": 4258 + }, + { + "epoch": 1.37, + "learning_rate": 1.1875041504386849e-05, + "loss": 0.1343, + "step": 4259 + }, + { + "epoch": 1.37, + "learning_rate": 1.187162932501975e-05, + "loss": 0.13, + "step": 4260 + }, + { + "epoch": 1.37, + "learning_rate": 1.1868216919813896e-05, + "loss": 0.1301, + "step": 4261 + }, + { + "epoch": 1.37, + "learning_rate": 1.1864804289181036e-05, + "loss": 0.1268, + "step": 4262 + }, + { + "epoch": 1.37, + "learning_rate": 1.1861391433532958e-05, + "loss": 0.124, + "step": 4263 + }, + { + "epoch": 1.37, + "learning_rate": 1.1857978353281474e-05, + "loss": 0.1339, + "step": 4264 + }, + { + "epoch": 1.37, + "learning_rate": 1.1854565048838413e-05, + "loss": 0.1191, + "step": 4265 + }, + { + "epoch": 1.37, + "learning_rate": 1.1851151520615645e-05, + "loss": 0.1355, + "step": 4266 + }, + { + "epoch": 1.37, + "learning_rate": 1.1847737769025061e-05, + "loss": 0.1312, + "step": 4267 + }, + { + "epoch": 1.37, + "learning_rate": 1.1844323794478572e-05, + "loss": 0.1366, + "step": 4268 + }, + { + "epoch": 1.37, + "learning_rate": 1.1840909597388128e-05, + "loss": 0.129, + "step": 4269 + }, + { + "epoch": 1.37, + "learning_rate": 1.1837495178165706e-05, + "loss": 0.1385, + "step": 4270 + }, + { + "epoch": 1.37, + "learning_rate": 1.1834080537223294e-05, + "loss": 0.1186, + "step": 4271 + }, + { + "epoch": 1.37, + "learning_rate": 1.1830665674972921e-05, + "loss": 0.1284, + "step": 4272 + }, + { + "epoch": 1.37, + "learning_rate": 1.182725059182664e-05, + "loss": 0.1228, + "step": 4273 + }, + { + "epoch": 1.38, + "learning_rate": 1.182383528819653e-05, + "loss": 0.1292, + "step": 4274 + }, + { + "epoch": 1.38, + "learning_rate": 1.1820419764494692e-05, + "loss": 0.1243, + "step": 4275 + }, + { + "epoch": 1.38, + "learning_rate": 1.1817004021133263e-05, + "loss": 0.1213, + "step": 4276 + }, + { + "epoch": 1.38, + "learning_rate": 1.1813588058524398e-05, + "loss": 0.1303, + "step": 4277 + }, + { + "epoch": 1.38, + "learning_rate": 1.181017187708028e-05, + "loss": 0.1138, + "step": 4278 + }, + { + "epoch": 1.38, + "learning_rate": 1.1806755477213125e-05, + "loss": 0.1268, + "step": 4279 + }, + { + "epoch": 1.38, + "learning_rate": 1.1803338859335169e-05, + "loss": 0.1302, + "step": 4280 + }, + { + "epoch": 1.38, + "learning_rate": 1.179992202385867e-05, + "loss": 0.132, + "step": 4281 + }, + { + "epoch": 1.38, + "learning_rate": 1.1796504971195923e-05, + "loss": 0.112, + "step": 4282 + }, + { + "epoch": 1.38, + "learning_rate": 1.1793087701759245e-05, + "loss": 0.1378, + "step": 4283 + }, + { + "epoch": 1.38, + "learning_rate": 1.1789670215960975e-05, + "loss": 0.1278, + "step": 4284 + }, + { + "epoch": 1.38, + "learning_rate": 1.1786252514213483e-05, + "loss": 0.1202, + "step": 4285 + }, + { + "epoch": 1.38, + "learning_rate": 1.1782834596929164e-05, + "loss": 0.1104, + "step": 4286 + }, + { + "epoch": 1.38, + "learning_rate": 1.1779416464520434e-05, + "loss": 0.1273, + "step": 4287 + }, + { + "epoch": 1.38, + "learning_rate": 1.1775998117399747e-05, + "loss": 0.1309, + "step": 4288 + }, + { + "epoch": 1.38, + "learning_rate": 1.1772579555979573e-05, + "loss": 0.1264, + "step": 4289 + }, + { + "epoch": 1.38, + "learning_rate": 1.1769160780672405e-05, + "loss": 0.1249, + "step": 4290 + }, + { + "epoch": 1.38, + "learning_rate": 1.1765741791890767e-05, + "loss": 0.1293, + "step": 4291 + }, + { + "epoch": 1.38, + "learning_rate": 1.176232259004722e-05, + "loss": 0.1163, + "step": 4292 + }, + { + "epoch": 1.38, + "learning_rate": 1.1758903175554326e-05, + "loss": 0.1178, + "step": 4293 + }, + { + "epoch": 1.38, + "learning_rate": 1.1755483548824693e-05, + "loss": 0.1199, + "step": 4294 + }, + { + "epoch": 1.38, + "learning_rate": 1.175206371027095e-05, + "loss": 0.144, + "step": 4295 + }, + { + "epoch": 1.38, + "learning_rate": 1.1748643660305743e-05, + "loss": 0.1445, + "step": 4296 + }, + { + "epoch": 1.38, + "learning_rate": 1.174522339934175e-05, + "loss": 0.1316, + "step": 4297 + }, + { + "epoch": 1.38, + "learning_rate": 1.1741802927791681e-05, + "loss": 0.1304, + "step": 4298 + }, + { + "epoch": 1.38, + "learning_rate": 1.1738382246068258e-05, + "loss": 0.1092, + "step": 4299 + }, + { + "epoch": 1.38, + "learning_rate": 1.1734961354584238e-05, + "loss": 0.1211, + "step": 4300 + }, + { + "epoch": 1.38, + "learning_rate": 1.17315402537524e-05, + "loss": 0.13, + "step": 4301 + }, + { + "epoch": 1.38, + "learning_rate": 1.172811894398555e-05, + "loss": 0.1351, + "step": 4302 + }, + { + "epoch": 1.38, + "learning_rate": 1.1724697425696511e-05, + "loss": 0.1214, + "step": 4303 + }, + { + "epoch": 1.38, + "learning_rate": 1.1721275699298147e-05, + "loss": 0.1303, + "step": 4304 + }, + { + "epoch": 1.39, + "learning_rate": 1.1717853765203335e-05, + "loss": 0.1198, + "step": 4305 + }, + { + "epoch": 1.39, + "learning_rate": 1.1714431623824975e-05, + "loss": 0.1318, + "step": 4306 + }, + { + "epoch": 1.39, + "learning_rate": 1.1711009275576006e-05, + "loss": 0.1233, + "step": 4307 + }, + { + "epoch": 1.39, + "learning_rate": 1.1707586720869375e-05, + "loss": 0.117, + "step": 4308 + }, + { + "epoch": 1.39, + "learning_rate": 1.1704163960118069e-05, + "loss": 0.1341, + "step": 4309 + }, + { + "epoch": 1.39, + "learning_rate": 1.1700740993735087e-05, + "loss": 0.1102, + "step": 4310 + }, + { + "epoch": 1.39, + "learning_rate": 1.169731782213346e-05, + "loss": 0.1296, + "step": 4311 + }, + { + "epoch": 1.39, + "learning_rate": 1.1693894445726245e-05, + "loss": 0.1236, + "step": 4312 + }, + { + "epoch": 1.39, + "learning_rate": 1.1690470864926522e-05, + "loss": 0.1253, + "step": 4313 + }, + { + "epoch": 1.39, + "learning_rate": 1.168704708014739e-05, + "loss": 0.1342, + "step": 4314 + }, + { + "epoch": 1.39, + "learning_rate": 1.168362309180198e-05, + "loss": 0.122, + "step": 4315 + }, + { + "epoch": 1.39, + "learning_rate": 1.1680198900303446e-05, + "loss": 0.1296, + "step": 4316 + }, + { + "epoch": 1.39, + "learning_rate": 1.1676774506064963e-05, + "loss": 0.123, + "step": 4317 + }, + { + "epoch": 1.39, + "learning_rate": 1.1673349909499736e-05, + "loss": 0.1191, + "step": 4318 + }, + { + "epoch": 1.39, + "learning_rate": 1.1669925111020988e-05, + "loss": 0.1274, + "step": 4319 + }, + { + "epoch": 1.39, + "learning_rate": 1.1666500111041971e-05, + "loss": 0.1452, + "step": 4320 + }, + { + "epoch": 1.39, + "learning_rate": 1.1663074909975963e-05, + "loss": 0.1202, + "step": 4321 + }, + { + "epoch": 1.39, + "learning_rate": 1.165964950823626e-05, + "loss": 0.1196, + "step": 4322 + }, + { + "epoch": 1.39, + "learning_rate": 1.1656223906236186e-05, + "loss": 0.1205, + "step": 4323 + }, + { + "epoch": 1.39, + "learning_rate": 1.165279810438909e-05, + "loss": 0.1289, + "step": 4324 + }, + { + "epoch": 1.39, + "learning_rate": 1.1649372103108342e-05, + "loss": 0.1419, + "step": 4325 + }, + { + "epoch": 1.39, + "learning_rate": 1.164594590280734e-05, + "loss": 0.1284, + "step": 4326 + }, + { + "epoch": 1.39, + "learning_rate": 1.1642519503899502e-05, + "loss": 0.1253, + "step": 4327 + }, + { + "epoch": 1.39, + "learning_rate": 1.1639092906798276e-05, + "loss": 0.1259, + "step": 4328 + }, + { + "epoch": 1.39, + "learning_rate": 1.1635666111917123e-05, + "loss": 0.1258, + "step": 4329 + }, + { + "epoch": 1.39, + "learning_rate": 1.1632239119669537e-05, + "loss": 0.1199, + "step": 4330 + }, + { + "epoch": 1.39, + "learning_rate": 1.1628811930469038e-05, + "loss": 0.1308, + "step": 4331 + }, + { + "epoch": 1.39, + "learning_rate": 1.1625384544729162e-05, + "loss": 0.1316, + "step": 4332 + }, + { + "epoch": 1.39, + "learning_rate": 1.1621956962863467e-05, + "loss": 0.1279, + "step": 4333 + }, + { + "epoch": 1.39, + "learning_rate": 1.161852918528555e-05, + "loss": 0.1351, + "step": 4334 + }, + { + "epoch": 1.39, + "learning_rate": 1.1615101212409012e-05, + "loss": 0.133, + "step": 4335 + }, + { + "epoch": 1.39, + "learning_rate": 1.1611673044647492e-05, + "loss": 0.1353, + "step": 4336 + }, + { + "epoch": 1.4, + "learning_rate": 1.1608244682414647e-05, + "loss": 0.1202, + "step": 4337 + }, + { + "epoch": 1.4, + "learning_rate": 1.1604816126124154e-05, + "loss": 0.1259, + "step": 4338 + }, + { + "epoch": 1.4, + "learning_rate": 1.160138737618972e-05, + "loss": 0.1212, + "step": 4339 + }, + { + "epoch": 1.4, + "learning_rate": 1.1597958433025072e-05, + "loss": 0.1329, + "step": 4340 + }, + { + "epoch": 1.4, + "learning_rate": 1.1594529297043967e-05, + "loss": 0.111, + "step": 4341 + }, + { + "epoch": 1.4, + "learning_rate": 1.1591099968660164e-05, + "loss": 0.1299, + "step": 4342 + }, + { + "epoch": 1.4, + "learning_rate": 1.1587670448287473e-05, + "loss": 0.1248, + "step": 4343 + }, + { + "epoch": 1.4, + "learning_rate": 1.1584240736339716e-05, + "loss": 0.1236, + "step": 4344 + }, + { + "epoch": 1.4, + "learning_rate": 1.1580810833230727e-05, + "loss": 0.123, + "step": 4345 + }, + { + "epoch": 1.4, + "learning_rate": 1.1577380739374376e-05, + "loss": 0.1384, + "step": 4346 + }, + { + "epoch": 1.4, + "learning_rate": 1.157395045518456e-05, + "loss": 0.1169, + "step": 4347 + }, + { + "epoch": 1.4, + "learning_rate": 1.1570519981075178e-05, + "loss": 0.1128, + "step": 4348 + }, + { + "epoch": 1.4, + "learning_rate": 1.1567089317460178e-05, + "loss": 0.1224, + "step": 4349 + }, + { + "epoch": 1.4, + "learning_rate": 1.1563658464753516e-05, + "loss": 0.1299, + "step": 4350 + }, + { + "epoch": 1.4, + "learning_rate": 1.1560227423369168e-05, + "loss": 0.1166, + "step": 4351 + }, + { + "epoch": 1.4, + "learning_rate": 1.1556796193721141e-05, + "loss": 0.1283, + "step": 4352 + }, + { + "epoch": 1.4, + "learning_rate": 1.1553364776223463e-05, + "loss": 0.1324, + "step": 4353 + }, + { + "epoch": 1.4, + "learning_rate": 1.1549933171290184e-05, + "loss": 0.1312, + "step": 4354 + }, + { + "epoch": 1.4, + "learning_rate": 1.1546501379335374e-05, + "loss": 0.119, + "step": 4355 + }, + { + "epoch": 1.4, + "learning_rate": 1.1543069400773126e-05, + "loss": 0.1236, + "step": 4356 + }, + { + "epoch": 1.4, + "learning_rate": 1.1539637236017562e-05, + "loss": 0.1378, + "step": 4357 + }, + { + "epoch": 1.4, + "learning_rate": 1.1536204885482816e-05, + "loss": 0.1253, + "step": 4358 + }, + { + "epoch": 1.4, + "learning_rate": 1.1532772349583055e-05, + "loss": 0.1178, + "step": 4359 + }, + { + "epoch": 1.4, + "learning_rate": 1.1529339628732462e-05, + "loss": 0.1175, + "step": 4360 + }, + { + "epoch": 1.4, + "learning_rate": 1.152590672334524e-05, + "loss": 0.1359, + "step": 4361 + }, + { + "epoch": 1.4, + "learning_rate": 1.1522473633835623e-05, + "loss": 0.1197, + "step": 4362 + }, + { + "epoch": 1.4, + "learning_rate": 1.1519040360617859e-05, + "loss": 0.111, + "step": 4363 + }, + { + "epoch": 1.4, + "learning_rate": 1.1515606904106222e-05, + "loss": 0.1171, + "step": 4364 + }, + { + "epoch": 1.4, + "learning_rate": 1.1512173264715012e-05, + "loss": 0.1209, + "step": 4365 + }, + { + "epoch": 1.4, + "learning_rate": 1.150873944285854e-05, + "loss": 0.1153, + "step": 4366 + }, + { + "epoch": 1.4, + "learning_rate": 1.1505305438951147e-05, + "loss": 0.1235, + "step": 4367 + }, + { + "epoch": 1.41, + "learning_rate": 1.1501871253407196e-05, + "loss": 0.1207, + "step": 4368 + }, + { + "epoch": 1.41, + "learning_rate": 1.1498436886641068e-05, + "loss": 0.1252, + "step": 4369 + }, + { + "epoch": 1.41, + "learning_rate": 1.1495002339067173e-05, + "loss": 0.1127, + "step": 4370 + }, + { + "epoch": 1.41, + "learning_rate": 1.1491567611099934e-05, + "loss": 0.1197, + "step": 4371 + }, + { + "epoch": 1.41, + "learning_rate": 1.1488132703153805e-05, + "loss": 0.113, + "step": 4372 + }, + { + "epoch": 1.41, + "learning_rate": 1.148469761564325e-05, + "loss": 0.1195, + "step": 4373 + }, + { + "epoch": 1.41, + "learning_rate": 1.1481262348982762e-05, + "loss": 0.1258, + "step": 4374 + }, + { + "epoch": 1.41, + "learning_rate": 1.1477826903586862e-05, + "loss": 0.1277, + "step": 4375 + }, + { + "epoch": 1.41, + "learning_rate": 1.1474391279870076e-05, + "loss": 0.1231, + "step": 4376 + }, + { + "epoch": 1.41, + "learning_rate": 1.1470955478246968e-05, + "loss": 0.1165, + "step": 4377 + }, + { + "epoch": 1.41, + "learning_rate": 1.1467519499132113e-05, + "loss": 0.1201, + "step": 4378 + }, + { + "epoch": 1.41, + "learning_rate": 1.1464083342940113e-05, + "loss": 0.1226, + "step": 4379 + }, + { + "epoch": 1.41, + "learning_rate": 1.1460647010085588e-05, + "loss": 0.1308, + "step": 4380 + }, + { + "epoch": 1.41, + "learning_rate": 1.145721050098318e-05, + "loss": 0.1266, + "step": 4381 + }, + { + "epoch": 1.41, + "learning_rate": 1.1453773816047554e-05, + "loss": 0.1196, + "step": 4382 + }, + { + "epoch": 1.41, + "learning_rate": 1.1450336955693393e-05, + "loss": 0.1291, + "step": 4383 + }, + { + "epoch": 1.41, + "learning_rate": 1.1446899920335407e-05, + "loss": 0.1196, + "step": 4384 + }, + { + "epoch": 1.41, + "learning_rate": 1.1443462710388316e-05, + "loss": 0.1306, + "step": 4385 + }, + { + "epoch": 1.41, + "learning_rate": 1.1440025326266877e-05, + "loss": 0.1117, + "step": 4386 + }, + { + "epoch": 1.41, + "learning_rate": 1.1436587768385858e-05, + "loss": 0.1174, + "step": 4387 + }, + { + "epoch": 1.41, + "learning_rate": 1.1433150037160043e-05, + "loss": 0.1271, + "step": 4388 + }, + { + "epoch": 1.41, + "learning_rate": 1.1429712133004249e-05, + "loss": 0.1262, + "step": 4389 + }, + { + "epoch": 1.41, + "learning_rate": 1.1426274056333307e-05, + "loss": 0.1176, + "step": 4390 + }, + { + "epoch": 1.41, + "learning_rate": 1.1422835807562067e-05, + "loss": 0.1261, + "step": 4391 + }, + { + "epoch": 1.41, + "learning_rate": 1.1419397387105405e-05, + "loss": 0.123, + "step": 4392 + }, + { + "epoch": 1.41, + "learning_rate": 1.141595879537822e-05, + "loss": 0.1218, + "step": 4393 + }, + { + "epoch": 1.41, + "learning_rate": 1.141252003279542e-05, + "loss": 0.1271, + "step": 4394 + }, + { + "epoch": 1.41, + "learning_rate": 1.1409081099771942e-05, + "loss": 0.1213, + "step": 4395 + }, + { + "epoch": 1.41, + "learning_rate": 1.1405641996722748e-05, + "loss": 0.1246, + "step": 4396 + }, + { + "epoch": 1.41, + "learning_rate": 1.1402202724062806e-05, + "loss": 0.1252, + "step": 4397 + }, + { + "epoch": 1.41, + "learning_rate": 1.1398763282207118e-05, + "loss": 0.1196, + "step": 4398 + }, + { + "epoch": 1.42, + "learning_rate": 1.1395323671570706e-05, + "loss": 0.1295, + "step": 4399 + }, + { + "epoch": 1.42, + "learning_rate": 1.1391883892568599e-05, + "loss": 0.1201, + "step": 4400 + }, + { + "epoch": 1.42, + "learning_rate": 1.138844394561586e-05, + "loss": 0.1174, + "step": 4401 + }, + { + "epoch": 1.42, + "learning_rate": 1.1385003831127573e-05, + "loss": 0.132, + "step": 4402 + }, + { + "epoch": 1.42, + "learning_rate": 1.1381563549518823e-05, + "loss": 0.1201, + "step": 4403 + }, + { + "epoch": 1.42, + "learning_rate": 1.1378123101204742e-05, + "loss": 0.12, + "step": 4404 + }, + { + "epoch": 1.42, + "learning_rate": 1.1374682486600464e-05, + "loss": 0.1277, + "step": 4405 + }, + { + "epoch": 1.42, + "learning_rate": 1.137124170612115e-05, + "loss": 0.122, + "step": 4406 + }, + { + "epoch": 1.42, + "learning_rate": 1.1367800760181975e-05, + "loss": 0.1249, + "step": 4407 + }, + { + "epoch": 1.42, + "learning_rate": 1.1364359649198145e-05, + "loss": 0.1221, + "step": 4408 + }, + { + "epoch": 1.42, + "learning_rate": 1.1360918373584873e-05, + "loss": 0.1186, + "step": 4409 + }, + { + "epoch": 1.42, + "learning_rate": 1.1357476933757398e-05, + "loss": 0.1222, + "step": 4410 + }, + { + "epoch": 1.42, + "learning_rate": 1.1354035330130984e-05, + "loss": 0.1218, + "step": 4411 + }, + { + "epoch": 1.42, + "learning_rate": 1.1350593563120907e-05, + "loss": 0.1162, + "step": 4412 + }, + { + "epoch": 1.42, + "learning_rate": 1.1347151633142462e-05, + "loss": 0.1258, + "step": 4413 + }, + { + "epoch": 1.42, + "learning_rate": 1.1343709540610971e-05, + "loss": 0.1226, + "step": 4414 + }, + { + "epoch": 1.42, + "learning_rate": 1.134026728594177e-05, + "loss": 0.1256, + "step": 4415 + }, + { + "epoch": 1.42, + "learning_rate": 1.1336824869550216e-05, + "loss": 0.1219, + "step": 4416 + }, + { + "epoch": 1.42, + "learning_rate": 1.1333382291851687e-05, + "loss": 0.1259, + "step": 4417 + }, + { + "epoch": 1.42, + "learning_rate": 1.1329939553261575e-05, + "loss": 0.1338, + "step": 4418 + }, + { + "epoch": 1.42, + "learning_rate": 1.1326496654195302e-05, + "loss": 0.1152, + "step": 4419 + }, + { + "epoch": 1.42, + "learning_rate": 1.1323053595068296e-05, + "loss": 0.1267, + "step": 4420 + }, + { + "epoch": 1.42, + "learning_rate": 1.1319610376296018e-05, + "loss": 0.1128, + "step": 4421 + }, + { + "epoch": 1.42, + "learning_rate": 1.1316166998293937e-05, + "loss": 0.1265, + "step": 4422 + }, + { + "epoch": 1.42, + "learning_rate": 1.1312723461477543e-05, + "loss": 0.1203, + "step": 4423 + }, + { + "epoch": 1.42, + "learning_rate": 1.1309279766262355e-05, + "loss": 0.131, + "step": 4424 + }, + { + "epoch": 1.42, + "learning_rate": 1.13058359130639e-05, + "loss": 0.1343, + "step": 4425 + }, + { + "epoch": 1.42, + "learning_rate": 1.1302391902297726e-05, + "loss": 0.125, + "step": 4426 + }, + { + "epoch": 1.42, + "learning_rate": 1.1298947734379406e-05, + "loss": 0.1386, + "step": 4427 + }, + { + "epoch": 1.42, + "learning_rate": 1.1295503409724526e-05, + "loss": 0.121, + "step": 4428 + }, + { + "epoch": 1.42, + "learning_rate": 1.1292058928748692e-05, + "loss": 0.1169, + "step": 4429 + }, + { + "epoch": 1.43, + "learning_rate": 1.1288614291867532e-05, + "loss": 0.1154, + "step": 4430 + }, + { + "epoch": 1.43, + "learning_rate": 1.1285169499496686e-05, + "loss": 0.1207, + "step": 4431 + }, + { + "epoch": 1.43, + "learning_rate": 1.1281724552051824e-05, + "loss": 0.1286, + "step": 4432 + }, + { + "epoch": 1.43, + "learning_rate": 1.1278279449948626e-05, + "loss": 0.1325, + "step": 4433 + }, + { + "epoch": 1.43, + "learning_rate": 1.1274834193602786e-05, + "loss": 0.1175, + "step": 4434 + }, + { + "epoch": 1.43, + "learning_rate": 1.1271388783430033e-05, + "loss": 0.12, + "step": 4435 + }, + { + "epoch": 1.43, + "learning_rate": 1.12679432198461e-05, + "loss": 0.127, + "step": 4436 + }, + { + "epoch": 1.43, + "learning_rate": 1.126449750326674e-05, + "loss": 0.1139, + "step": 4437 + }, + { + "epoch": 1.43, + "learning_rate": 1.1261051634107735e-05, + "loss": 0.1226, + "step": 4438 + }, + { + "epoch": 1.43, + "learning_rate": 1.1257605612784873e-05, + "loss": 0.1227, + "step": 4439 + }, + { + "epoch": 1.43, + "learning_rate": 1.125415943971397e-05, + "loss": 0.124, + "step": 4440 + }, + { + "epoch": 1.43, + "learning_rate": 1.1250713115310852e-05, + "loss": 0.1096, + "step": 4441 + }, + { + "epoch": 1.43, + "learning_rate": 1.124726663999137e-05, + "loss": 0.1145, + "step": 4442 + }, + { + "epoch": 1.43, + "learning_rate": 1.1243820014171385e-05, + "loss": 0.1215, + "step": 4443 + }, + { + "epoch": 1.43, + "learning_rate": 1.1240373238266786e-05, + "loss": 0.1224, + "step": 4444 + }, + { + "epoch": 1.43, + "learning_rate": 1.123692631269348e-05, + "loss": 0.1221, + "step": 4445 + }, + { + "epoch": 1.43, + "learning_rate": 1.1233479237867378e-05, + "loss": 0.1209, + "step": 4446 + }, + { + "epoch": 1.43, + "learning_rate": 1.1230032014204422e-05, + "loss": 0.1223, + "step": 4447 + }, + { + "epoch": 1.43, + "learning_rate": 1.1226584642120574e-05, + "loss": 0.1198, + "step": 4448 + }, + { + "epoch": 1.43, + "learning_rate": 1.1223137122031803e-05, + "loss": 0.1183, + "step": 4449 + }, + { + "epoch": 1.43, + "learning_rate": 1.1219689454354104e-05, + "loss": 0.1322, + "step": 4450 + }, + { + "epoch": 1.43, + "learning_rate": 1.1216241639503487e-05, + "loss": 0.1161, + "step": 4451 + }, + { + "epoch": 1.43, + "learning_rate": 1.1212793677895976e-05, + "loss": 0.1318, + "step": 4452 + }, + { + "epoch": 1.43, + "learning_rate": 1.1209345569947622e-05, + "loss": 0.1248, + "step": 4453 + }, + { + "epoch": 1.43, + "learning_rate": 1.1205897316074487e-05, + "loss": 0.1238, + "step": 4454 + }, + { + "epoch": 1.43, + "learning_rate": 1.1202448916692655e-05, + "loss": 0.1211, + "step": 4455 + }, + { + "epoch": 1.43, + "learning_rate": 1.1199000372218214e-05, + "loss": 0.1276, + "step": 4456 + }, + { + "epoch": 1.43, + "learning_rate": 1.1195551683067292e-05, + "loss": 0.1342, + "step": 4457 + }, + { + "epoch": 1.43, + "learning_rate": 1.1192102849656016e-05, + "loss": 0.1356, + "step": 4458 + }, + { + "epoch": 1.43, + "learning_rate": 1.1188653872400539e-05, + "loss": 0.1127, + "step": 4459 + }, + { + "epoch": 1.43, + "learning_rate": 1.118520475171703e-05, + "loss": 0.1129, + "step": 4460 + }, + { + "epoch": 1.44, + "learning_rate": 1.118175548802167e-05, + "loss": 0.1355, + "step": 4461 + }, + { + "epoch": 1.44, + "learning_rate": 1.1178306081730666e-05, + "loss": 0.133, + "step": 4462 + }, + { + "epoch": 1.44, + "learning_rate": 1.1174856533260239e-05, + "loss": 0.1226, + "step": 4463 + }, + { + "epoch": 1.44, + "learning_rate": 1.1171406843026625e-05, + "loss": 0.1234, + "step": 4464 + }, + { + "epoch": 1.44, + "learning_rate": 1.1167957011446073e-05, + "loss": 0.1303, + "step": 4465 + }, + { + "epoch": 1.44, + "learning_rate": 1.1164507038934864e-05, + "loss": 0.1155, + "step": 4466 + }, + { + "epoch": 1.44, + "learning_rate": 1.1161056925909279e-05, + "loss": 0.1073, + "step": 4467 + }, + { + "epoch": 1.44, + "learning_rate": 1.1157606672785627e-05, + "loss": 0.1316, + "step": 4468 + }, + { + "epoch": 1.44, + "learning_rate": 1.1154156279980226e-05, + "loss": 0.12, + "step": 4469 + }, + { + "epoch": 1.44, + "learning_rate": 1.1150705747909423e-05, + "loss": 0.1241, + "step": 4470 + }, + { + "epoch": 1.44, + "learning_rate": 1.1147255076989565e-05, + "loss": 0.1254, + "step": 4471 + }, + { + "epoch": 1.44, + "learning_rate": 1.1143804267637028e-05, + "loss": 0.1393, + "step": 4472 + }, + { + "epoch": 1.44, + "learning_rate": 1.1140353320268205e-05, + "loss": 0.121, + "step": 4473 + }, + { + "epoch": 1.44, + "learning_rate": 1.1136902235299495e-05, + "loss": 0.1142, + "step": 4474 + }, + { + "epoch": 1.44, + "learning_rate": 1.1133451013147327e-05, + "loss": 0.1136, + "step": 4475 + }, + { + "epoch": 1.44, + "learning_rate": 1.1129999654228139e-05, + "loss": 0.1202, + "step": 4476 + }, + { + "epoch": 1.44, + "learning_rate": 1.1126548158958383e-05, + "loss": 0.1206, + "step": 4477 + }, + { + "epoch": 1.44, + "learning_rate": 1.1123096527754533e-05, + "loss": 0.116, + "step": 4478 + }, + { + "epoch": 1.44, + "learning_rate": 1.1119644761033079e-05, + "loss": 0.1254, + "step": 4479 + }, + { + "epoch": 1.44, + "learning_rate": 1.1116192859210527e-05, + "loss": 0.1239, + "step": 4480 + }, + { + "epoch": 1.44, + "learning_rate": 1.1112740822703392e-05, + "loss": 0.1246, + "step": 4481 + }, + { + "epoch": 1.44, + "learning_rate": 1.1109288651928216e-05, + "loss": 0.1294, + "step": 4482 + }, + { + "epoch": 1.44, + "learning_rate": 1.1105836347301555e-05, + "loss": 0.1199, + "step": 4483 + }, + { + "epoch": 1.44, + "learning_rate": 1.1102383909239971e-05, + "loss": 0.1176, + "step": 4484 + }, + { + "epoch": 1.44, + "learning_rate": 1.1098931338160057e-05, + "loss": 0.133, + "step": 4485 + }, + { + "epoch": 1.44, + "learning_rate": 1.1095478634478412e-05, + "loss": 0.1187, + "step": 4486 + }, + { + "epoch": 1.44, + "learning_rate": 1.1092025798611655e-05, + "loss": 0.1288, + "step": 4487 + }, + { + "epoch": 1.44, + "learning_rate": 1.108857283097642e-05, + "loss": 0.1189, + "step": 4488 + }, + { + "epoch": 1.44, + "learning_rate": 1.1085119731989353e-05, + "loss": 0.1181, + "step": 4489 + }, + { + "epoch": 1.44, + "learning_rate": 1.1081666502067126e-05, + "loss": 0.1143, + "step": 4490 + }, + { + "epoch": 1.44, + "learning_rate": 1.1078213141626415e-05, + "loss": 0.1202, + "step": 4491 + }, + { + "epoch": 1.45, + "learning_rate": 1.107475965108392e-05, + "loss": 0.1331, + "step": 4492 + }, + { + "epoch": 1.45, + "learning_rate": 1.1071306030856353e-05, + "loss": 0.1256, + "step": 4493 + }, + { + "epoch": 1.45, + "learning_rate": 1.1067852281360443e-05, + "loss": 0.1187, + "step": 4494 + }, + { + "epoch": 1.45, + "learning_rate": 1.106439840301293e-05, + "loss": 0.1326, + "step": 4495 + }, + { + "epoch": 1.45, + "learning_rate": 1.1060944396230583e-05, + "loss": 0.131, + "step": 4496 + }, + { + "epoch": 1.45, + "learning_rate": 1.1057490261430168e-05, + "loss": 0.1156, + "step": 4497 + }, + { + "epoch": 1.45, + "learning_rate": 1.1054035999028478e-05, + "loss": 0.1139, + "step": 4498 + }, + { + "epoch": 1.45, + "learning_rate": 1.1050581609442322e-05, + "loss": 0.1077, + "step": 4499 + }, + { + "epoch": 1.45, + "learning_rate": 1.1047127093088521e-05, + "loss": 0.1217, + "step": 4500 + }, + { + "epoch": 1.45, + "learning_rate": 1.1043672450383907e-05, + "loss": 0.113, + "step": 4501 + }, + { + "epoch": 1.45, + "learning_rate": 1.1040217681745335e-05, + "loss": 0.1242, + "step": 4502 + }, + { + "epoch": 1.45, + "learning_rate": 1.1036762787589674e-05, + "loss": 0.13, + "step": 4503 + }, + { + "epoch": 1.45, + "learning_rate": 1.1033307768333806e-05, + "loss": 0.1151, + "step": 4504 + }, + { + "epoch": 1.45, + "learning_rate": 1.1029852624394621e-05, + "loss": 0.1178, + "step": 4505 + }, + { + "epoch": 1.45, + "learning_rate": 1.102639735618904e-05, + "loss": 0.1195, + "step": 4506 + }, + { + "epoch": 1.45, + "learning_rate": 1.1022941964133988e-05, + "loss": 0.1312, + "step": 4507 + }, + { + "epoch": 1.45, + "learning_rate": 1.1019486448646407e-05, + "loss": 0.1046, + "step": 4508 + }, + { + "epoch": 1.45, + "learning_rate": 1.1016030810143253e-05, + "loss": 0.1296, + "step": 4509 + }, + { + "epoch": 1.45, + "learning_rate": 1.1012575049041499e-05, + "loss": 0.1172, + "step": 4510 + }, + { + "epoch": 1.45, + "learning_rate": 1.1009119165758129e-05, + "loss": 0.1242, + "step": 4511 + }, + { + "epoch": 1.45, + "learning_rate": 1.1005663160710152e-05, + "loss": 0.1105, + "step": 4512 + }, + { + "epoch": 1.45, + "learning_rate": 1.1002207034314576e-05, + "loss": 0.1124, + "step": 4513 + }, + { + "epoch": 1.45, + "learning_rate": 1.0998750786988435e-05, + "loss": 0.114, + "step": 4514 + }, + { + "epoch": 1.45, + "learning_rate": 1.0995294419148779e-05, + "loss": 0.1322, + "step": 4515 + }, + { + "epoch": 1.45, + "learning_rate": 1.099183793121266e-05, + "loss": 0.1082, + "step": 4516 + }, + { + "epoch": 1.45, + "learning_rate": 1.0988381323597156e-05, + "loss": 0.1148, + "step": 4517 + }, + { + "epoch": 1.45, + "learning_rate": 1.098492459671936e-05, + "loss": 0.1237, + "step": 4518 + }, + { + "epoch": 1.45, + "learning_rate": 1.0981467750996373e-05, + "loss": 0.1226, + "step": 4519 + }, + { + "epoch": 1.45, + "learning_rate": 1.0978010786845306e-05, + "loss": 0.1106, + "step": 4520 + }, + { + "epoch": 1.45, + "learning_rate": 1.09745537046833e-05, + "loss": 0.1281, + "step": 4521 + }, + { + "epoch": 1.45, + "learning_rate": 1.0971096504927498e-05, + "loss": 0.144, + "step": 4522 + }, + { + "epoch": 1.46, + "learning_rate": 1.0967639187995061e-05, + "loss": 0.1332, + "step": 4523 + }, + { + "epoch": 1.46, + "learning_rate": 1.0964181754303159e-05, + "loss": 0.1325, + "step": 4524 + }, + { + "epoch": 1.46, + "learning_rate": 1.0960724204268989e-05, + "loss": 0.1232, + "step": 4525 + }, + { + "epoch": 1.46, + "learning_rate": 1.0957266538309748e-05, + "loss": 0.1227, + "step": 4526 + }, + { + "epoch": 1.46, + "learning_rate": 1.095380875684265e-05, + "loss": 0.1177, + "step": 4527 + }, + { + "epoch": 1.46, + "learning_rate": 1.0950350860284935e-05, + "loss": 0.1172, + "step": 4528 + }, + { + "epoch": 1.46, + "learning_rate": 1.094689284905384e-05, + "loss": 0.1318, + "step": 4529 + }, + { + "epoch": 1.46, + "learning_rate": 1.0943434723566624e-05, + "loss": 0.1125, + "step": 4530 + }, + { + "epoch": 1.46, + "learning_rate": 1.0939976484240562e-05, + "loss": 0.1254, + "step": 4531 + }, + { + "epoch": 1.46, + "learning_rate": 1.093651813149294e-05, + "loss": 0.1227, + "step": 4532 + }, + { + "epoch": 1.46, + "learning_rate": 1.0933059665741052e-05, + "loss": 0.1224, + "step": 4533 + }, + { + "epoch": 1.46, + "learning_rate": 1.0929601087402217e-05, + "loss": 0.1251, + "step": 4534 + }, + { + "epoch": 1.46, + "learning_rate": 1.0926142396893762e-05, + "loss": 0.1324, + "step": 4535 + }, + { + "epoch": 1.46, + "learning_rate": 1.092268359463302e-05, + "loss": 0.1271, + "step": 4536 + }, + { + "epoch": 1.46, + "learning_rate": 1.0919224681037355e-05, + "loss": 0.1251, + "step": 4537 + }, + { + "epoch": 1.46, + "learning_rate": 1.0915765656524127e-05, + "loss": 0.1192, + "step": 4538 + }, + { + "epoch": 1.46, + "learning_rate": 1.0912306521510718e-05, + "loss": 0.1201, + "step": 4539 + }, + { + "epoch": 1.46, + "learning_rate": 1.0908847276414527e-05, + "loss": 0.1182, + "step": 4540 + }, + { + "epoch": 1.46, + "learning_rate": 1.0905387921652952e-05, + "loss": 0.1123, + "step": 4541 + }, + { + "epoch": 1.46, + "learning_rate": 1.090192845764342e-05, + "loss": 0.1271, + "step": 4542 + }, + { + "epoch": 1.46, + "learning_rate": 1.0898468884803366e-05, + "loss": 0.114, + "step": 4543 + }, + { + "epoch": 1.46, + "learning_rate": 1.0895009203550231e-05, + "loss": 0.1226, + "step": 4544 + }, + { + "epoch": 1.46, + "learning_rate": 1.0891549414301478e-05, + "loss": 0.1222, + "step": 4545 + }, + { + "epoch": 1.46, + "learning_rate": 1.088808951747458e-05, + "loss": 0.1101, + "step": 4546 + }, + { + "epoch": 1.46, + "learning_rate": 1.0884629513487023e-05, + "loss": 0.1365, + "step": 4547 + }, + { + "epoch": 1.46, + "learning_rate": 1.0881169402756305e-05, + "loss": 0.1247, + "step": 4548 + }, + { + "epoch": 1.46, + "learning_rate": 1.0877709185699938e-05, + "loss": 0.1214, + "step": 4549 + }, + { + "epoch": 1.46, + "learning_rate": 1.0874248862735445e-05, + "loss": 0.1188, + "step": 4550 + }, + { + "epoch": 1.46, + "learning_rate": 1.0870788434280368e-05, + "loss": 0.1304, + "step": 4551 + }, + { + "epoch": 1.46, + "learning_rate": 1.0867327900752252e-05, + "loss": 0.1312, + "step": 4552 + }, + { + "epoch": 1.46, + "learning_rate": 1.0863867262568661e-05, + "loss": 0.1137, + "step": 4553 + }, + { + "epoch": 1.47, + "learning_rate": 1.0860406520147171e-05, + "loss": 0.13, + "step": 4554 + }, + { + "epoch": 1.47, + "learning_rate": 1.085694567390537e-05, + "loss": 0.1232, + "step": 4555 + }, + { + "epoch": 1.47, + "learning_rate": 1.0853484724260861e-05, + "loss": 0.1347, + "step": 4556 + }, + { + "epoch": 1.47, + "learning_rate": 1.0850023671631249e-05, + "loss": 0.1406, + "step": 4557 + }, + { + "epoch": 1.47, + "learning_rate": 1.0846562516434167e-05, + "loss": 0.132, + "step": 4558 + }, + { + "epoch": 1.47, + "learning_rate": 1.0843101259087252e-05, + "loss": 0.1239, + "step": 4559 + }, + { + "epoch": 1.47, + "learning_rate": 1.083963990000815e-05, + "loss": 0.1128, + "step": 4560 + }, + { + "epoch": 1.47, + "learning_rate": 1.0836178439614526e-05, + "loss": 0.1278, + "step": 4561 + }, + { + "epoch": 1.47, + "learning_rate": 1.0832716878324055e-05, + "loss": 0.1172, + "step": 4562 + }, + { + "epoch": 1.47, + "learning_rate": 1.082925521655442e-05, + "loss": 0.1302, + "step": 4563 + }, + { + "epoch": 1.47, + "learning_rate": 1.0825793454723325e-05, + "loss": 0.1179, + "step": 4564 + }, + { + "epoch": 1.47, + "learning_rate": 1.0822331593248479e-05, + "loss": 0.131, + "step": 4565 + }, + { + "epoch": 1.47, + "learning_rate": 1.0818869632547602e-05, + "loss": 0.1281, + "step": 4566 + }, + { + "epoch": 1.47, + "learning_rate": 1.081540757303843e-05, + "loss": 0.1207, + "step": 4567 + }, + { + "epoch": 1.47, + "learning_rate": 1.0811945415138717e-05, + "loss": 0.1189, + "step": 4568 + }, + { + "epoch": 1.47, + "learning_rate": 1.0808483159266213e-05, + "loss": 0.1137, + "step": 4569 + }, + { + "epoch": 1.47, + "learning_rate": 1.0805020805838688e-05, + "loss": 0.1255, + "step": 4570 + }, + { + "epoch": 1.47, + "learning_rate": 1.0801558355273933e-05, + "loss": 0.1137, + "step": 4571 + }, + { + "epoch": 1.47, + "learning_rate": 1.0798095807989735e-05, + "loss": 0.1179, + "step": 4572 + }, + { + "epoch": 1.47, + "learning_rate": 1.0794633164403898e-05, + "loss": 0.1226, + "step": 4573 + }, + { + "epoch": 1.47, + "learning_rate": 1.0791170424934248e-05, + "loss": 0.1243, + "step": 4574 + }, + { + "epoch": 1.47, + "learning_rate": 1.0787707589998603e-05, + "loss": 0.1209, + "step": 4575 + }, + { + "epoch": 1.47, + "learning_rate": 1.0784244660014813e-05, + "loss": 0.1172, + "step": 4576 + }, + { + "epoch": 1.47, + "learning_rate": 1.0780781635400728e-05, + "loss": 0.132, + "step": 4577 + }, + { + "epoch": 1.47, + "learning_rate": 1.0777318516574205e-05, + "loss": 0.1231, + "step": 4578 + }, + { + "epoch": 1.47, + "learning_rate": 1.0773855303953122e-05, + "loss": 0.1296, + "step": 4579 + }, + { + "epoch": 1.47, + "learning_rate": 1.0770391997955367e-05, + "loss": 0.1129, + "step": 4580 + }, + { + "epoch": 1.47, + "learning_rate": 1.0766928598998837e-05, + "loss": 0.1204, + "step": 4581 + }, + { + "epoch": 1.47, + "learning_rate": 1.0763465107501438e-05, + "loss": 0.1253, + "step": 4582 + }, + { + "epoch": 1.47, + "learning_rate": 1.0760001523881094e-05, + "loss": 0.1282, + "step": 4583 + }, + { + "epoch": 1.47, + "learning_rate": 1.075653784855573e-05, + "loss": 0.1153, + "step": 4584 + }, + { + "epoch": 1.48, + "learning_rate": 1.0753074081943291e-05, + "loss": 0.1156, + "step": 4585 + }, + { + "epoch": 1.48, + "learning_rate": 1.074961022446173e-05, + "loss": 0.1217, + "step": 4586 + }, + { + "epoch": 1.48, + "learning_rate": 1.0746146276529011e-05, + "loss": 0.1229, + "step": 4587 + }, + { + "epoch": 1.48, + "learning_rate": 1.0742682238563105e-05, + "loss": 0.1119, + "step": 4588 + }, + { + "epoch": 1.48, + "learning_rate": 1.0739218110982002e-05, + "loss": 0.1206, + "step": 4589 + }, + { + "epoch": 1.48, + "learning_rate": 1.0735753894203698e-05, + "loss": 0.1223, + "step": 4590 + }, + { + "epoch": 1.48, + "learning_rate": 1.0732289588646196e-05, + "loss": 0.1076, + "step": 4591 + }, + { + "epoch": 1.48, + "learning_rate": 1.072882519472752e-05, + "loss": 0.1246, + "step": 4592 + }, + { + "epoch": 1.48, + "learning_rate": 1.0725360712865693e-05, + "loss": 0.1259, + "step": 4593 + }, + { + "epoch": 1.48, + "learning_rate": 1.0721896143478759e-05, + "loss": 0.1287, + "step": 4594 + }, + { + "epoch": 1.48, + "learning_rate": 1.0718431486984765e-05, + "loss": 0.1287, + "step": 4595 + }, + { + "epoch": 1.48, + "learning_rate": 1.0714966743801768e-05, + "loss": 0.1282, + "step": 4596 + }, + { + "epoch": 1.48, + "learning_rate": 1.0711501914347842e-05, + "loss": 0.1187, + "step": 4597 + }, + { + "epoch": 1.48, + "learning_rate": 1.0708036999041072e-05, + "loss": 0.1176, + "step": 4598 + }, + { + "epoch": 1.48, + "learning_rate": 1.070457199829954e-05, + "loss": 0.1273, + "step": 4599 + }, + { + "epoch": 1.48, + "learning_rate": 1.0701106912541357e-05, + "loss": 0.1133, + "step": 4600 + }, + { + "epoch": 1.48, + "learning_rate": 1.069764174218463e-05, + "loss": 0.1302, + "step": 4601 + }, + { + "epoch": 1.48, + "learning_rate": 1.0694176487647482e-05, + "loss": 0.1278, + "step": 4602 + }, + { + "epoch": 1.48, + "learning_rate": 1.0690711149348047e-05, + "loss": 0.1228, + "step": 4603 + }, + { + "epoch": 1.48, + "learning_rate": 1.0687245727704465e-05, + "loss": 0.1271, + "step": 4604 + }, + { + "epoch": 1.48, + "learning_rate": 1.0683780223134892e-05, + "loss": 0.1263, + "step": 4605 + }, + { + "epoch": 1.48, + "learning_rate": 1.0680314636057486e-05, + "loss": 0.1174, + "step": 4606 + }, + { + "epoch": 1.48, + "learning_rate": 1.067684896689042e-05, + "loss": 0.1279, + "step": 4607 + }, + { + "epoch": 1.48, + "learning_rate": 1.0673383216051883e-05, + "loss": 0.1215, + "step": 4608 + }, + { + "epoch": 1.48, + "learning_rate": 1.0669917383960058e-05, + "loss": 0.1189, + "step": 4609 + }, + { + "epoch": 1.48, + "learning_rate": 1.0666451471033153e-05, + "loss": 0.113, + "step": 4610 + }, + { + "epoch": 1.48, + "learning_rate": 1.0662985477689379e-05, + "loss": 0.1082, + "step": 4611 + }, + { + "epoch": 1.48, + "learning_rate": 1.0659519404346955e-05, + "loss": 0.1184, + "step": 4612 + }, + { + "epoch": 1.48, + "learning_rate": 1.0656053251424115e-05, + "loss": 0.1171, + "step": 4613 + }, + { + "epoch": 1.48, + "learning_rate": 1.0652587019339099e-05, + "loss": 0.113, + "step": 4614 + }, + { + "epoch": 1.48, + "learning_rate": 1.0649120708510154e-05, + "loss": 0.1286, + "step": 4615 + }, + { + "epoch": 1.49, + "learning_rate": 1.0645654319355546e-05, + "loss": 0.1196, + "step": 4616 + }, + { + "epoch": 1.49, + "learning_rate": 1.064218785229354e-05, + "loss": 0.1238, + "step": 4617 + }, + { + "epoch": 1.49, + "learning_rate": 1.0638721307742414e-05, + "loss": 0.1274, + "step": 4618 + }, + { + "epoch": 1.49, + "learning_rate": 1.063525468612046e-05, + "loss": 0.1274, + "step": 4619 + }, + { + "epoch": 1.49, + "learning_rate": 1.0631787987845971e-05, + "loss": 0.1283, + "step": 4620 + }, + { + "epoch": 1.49, + "learning_rate": 1.0628321213337256e-05, + "loss": 0.1195, + "step": 4621 + }, + { + "epoch": 1.49, + "learning_rate": 1.0624854363012629e-05, + "loss": 0.1146, + "step": 4622 + }, + { + "epoch": 1.49, + "learning_rate": 1.0621387437290421e-05, + "loss": 0.1049, + "step": 4623 + }, + { + "epoch": 1.49, + "learning_rate": 1.0617920436588955e-05, + "loss": 0.1217, + "step": 4624 + }, + { + "epoch": 1.49, + "learning_rate": 1.0614453361326582e-05, + "loss": 0.1117, + "step": 4625 + }, + { + "epoch": 1.49, + "learning_rate": 1.0610986211921657e-05, + "loss": 0.1227, + "step": 4626 + }, + { + "epoch": 1.49, + "learning_rate": 1.0607518988792531e-05, + "loss": 0.1083, + "step": 4627 + }, + { + "epoch": 1.49, + "learning_rate": 1.060405169235758e-05, + "loss": 0.1212, + "step": 4628 + }, + { + "epoch": 1.49, + "learning_rate": 1.0600584323035186e-05, + "loss": 0.118, + "step": 4629 + }, + { + "epoch": 1.49, + "learning_rate": 1.0597116881243728e-05, + "loss": 0.1061, + "step": 4630 + }, + { + "epoch": 1.49, + "learning_rate": 1.0593649367401607e-05, + "loss": 0.1295, + "step": 4631 + }, + { + "epoch": 1.49, + "learning_rate": 1.0590181781927229e-05, + "loss": 0.1214, + "step": 4632 + }, + { + "epoch": 1.49, + "learning_rate": 1.0586714125239008e-05, + "loss": 0.1208, + "step": 4633 + }, + { + "epoch": 1.49, + "learning_rate": 1.058324639775536e-05, + "loss": 0.1238, + "step": 4634 + }, + { + "epoch": 1.49, + "learning_rate": 1.0579778599894723e-05, + "loss": 0.1173, + "step": 4635 + }, + { + "epoch": 1.49, + "learning_rate": 1.0576310732075534e-05, + "loss": 0.1228, + "step": 4636 + }, + { + "epoch": 1.49, + "learning_rate": 1.057284279471624e-05, + "loss": 0.1165, + "step": 4637 + }, + { + "epoch": 1.49, + "learning_rate": 1.0569374788235298e-05, + "loss": 0.1252, + "step": 4638 + }, + { + "epoch": 1.49, + "learning_rate": 1.0565906713051171e-05, + "loss": 0.0994, + "step": 4639 + }, + { + "epoch": 1.49, + "learning_rate": 1.056243856958233e-05, + "loss": 0.1166, + "step": 4640 + }, + { + "epoch": 1.49, + "learning_rate": 1.0558970358247262e-05, + "loss": 0.1284, + "step": 4641 + }, + { + "epoch": 1.49, + "learning_rate": 1.055550207946445e-05, + "loss": 0.1206, + "step": 4642 + }, + { + "epoch": 1.49, + "learning_rate": 1.0552033733652393e-05, + "loss": 0.1208, + "step": 4643 + }, + { + "epoch": 1.49, + "learning_rate": 1.05485653212296e-05, + "loss": 0.1195, + "step": 4644 + }, + { + "epoch": 1.49, + "learning_rate": 1.0545096842614582e-05, + "loss": 0.1256, + "step": 4645 + }, + { + "epoch": 1.49, + "learning_rate": 1.0541628298225856e-05, + "loss": 0.1248, + "step": 4646 + }, + { + "epoch": 1.5, + "learning_rate": 1.0538159688481959e-05, + "loss": 0.1206, + "step": 4647 + }, + { + "epoch": 1.5, + "learning_rate": 1.053469101380142e-05, + "loss": 0.1123, + "step": 4648 + }, + { + "epoch": 1.5, + "learning_rate": 1.0531222274602795e-05, + "loss": 0.1353, + "step": 4649 + }, + { + "epoch": 1.5, + "learning_rate": 1.0527753471304625e-05, + "loss": 0.117, + "step": 4650 + }, + { + "epoch": 1.5, + "learning_rate": 1.0524284604325476e-05, + "loss": 0.1076, + "step": 4651 + }, + { + "epoch": 1.5, + "learning_rate": 1.0520815674083919e-05, + "loss": 0.1198, + "step": 4652 + }, + { + "epoch": 1.5, + "learning_rate": 1.0517346680998526e-05, + "loss": 0.114, + "step": 4653 + }, + { + "epoch": 1.5, + "learning_rate": 1.051387762548788e-05, + "loss": 0.1209, + "step": 4654 + }, + { + "epoch": 1.5, + "learning_rate": 1.0510408507970577e-05, + "loss": 0.1178, + "step": 4655 + }, + { + "epoch": 1.5, + "learning_rate": 1.0506939328865207e-05, + "loss": 0.1201, + "step": 4656 + }, + { + "epoch": 1.5, + "learning_rate": 1.0503470088590385e-05, + "loss": 0.1104, + "step": 4657 + }, + { + "epoch": 1.5, + "learning_rate": 1.0500000787564718e-05, + "loss": 0.1188, + "step": 4658 + }, + { + "epoch": 1.5, + "learning_rate": 1.0496531426206828e-05, + "loss": 0.1217, + "step": 4659 + }, + { + "epoch": 1.5, + "learning_rate": 1.0493062004935346e-05, + "loss": 0.1089, + "step": 4660 + }, + { + "epoch": 1.5, + "learning_rate": 1.0489592524168904e-05, + "loss": 0.1253, + "step": 4661 + }, + { + "epoch": 1.5, + "learning_rate": 1.0486122984326146e-05, + "loss": 0.1102, + "step": 4662 + }, + { + "epoch": 1.5, + "learning_rate": 1.0482653385825718e-05, + "loss": 0.123, + "step": 4663 + }, + { + "epoch": 1.5, + "learning_rate": 1.0479183729086279e-05, + "loss": 0.1162, + "step": 4664 + }, + { + "epoch": 1.5, + "learning_rate": 1.0475714014526497e-05, + "loss": 0.1215, + "step": 4665 + }, + { + "epoch": 1.5, + "learning_rate": 1.0472244242565035e-05, + "loss": 0.121, + "step": 4666 + }, + { + "epoch": 1.5, + "learning_rate": 1.0468774413620573e-05, + "loss": 0.121, + "step": 4667 + }, + { + "epoch": 1.5, + "learning_rate": 1.0465304528111797e-05, + "loss": 0.1081, + "step": 4668 + }, + { + "epoch": 1.5, + "learning_rate": 1.0461834586457398e-05, + "loss": 0.1245, + "step": 4669 + }, + { + "epoch": 1.5, + "learning_rate": 1.0458364589076071e-05, + "loss": 0.1173, + "step": 4670 + }, + { + "epoch": 1.5, + "learning_rate": 1.0454894536386523e-05, + "loss": 0.1138, + "step": 4671 + }, + { + "epoch": 1.5, + "learning_rate": 1.0451424428807467e-05, + "loss": 0.1236, + "step": 4672 + }, + { + "epoch": 1.5, + "learning_rate": 1.0447954266757616e-05, + "loss": 0.1045, + "step": 4673 + }, + { + "epoch": 1.5, + "learning_rate": 1.0444484050655697e-05, + "loss": 0.113, + "step": 4674 + }, + { + "epoch": 1.5, + "learning_rate": 1.0441013780920446e-05, + "loss": 0.1067, + "step": 4675 + }, + { + "epoch": 1.5, + "learning_rate": 1.0437543457970591e-05, + "loss": 0.1155, + "step": 4676 + }, + { + "epoch": 1.5, + "learning_rate": 1.0434073082224883e-05, + "loss": 0.1129, + "step": 4677 + }, + { + "epoch": 1.5, + "learning_rate": 1.0430602654102071e-05, + "loss": 0.1296, + "step": 4678 + }, + { + "epoch": 1.51, + "learning_rate": 1.0427132174020908e-05, + "loss": 0.1212, + "step": 4679 + }, + { + "epoch": 1.51, + "learning_rate": 1.0423661642400163e-05, + "loss": 0.1021, + "step": 4680 + }, + { + "epoch": 1.51, + "learning_rate": 1.04201910596586e-05, + "loss": 0.1176, + "step": 4681 + }, + { + "epoch": 1.51, + "learning_rate": 1.0416720426214996e-05, + "loss": 0.1197, + "step": 4682 + }, + { + "epoch": 1.51, + "learning_rate": 1.0413249742488132e-05, + "loss": 0.1205, + "step": 4683 + }, + { + "epoch": 1.51, + "learning_rate": 1.04097790088968e-05, + "loss": 0.123, + "step": 4684 + }, + { + "epoch": 1.51, + "learning_rate": 1.0406308225859784e-05, + "loss": 0.1231, + "step": 4685 + }, + { + "epoch": 1.51, + "learning_rate": 1.0402837393795891e-05, + "loss": 0.1182, + "step": 4686 + }, + { + "epoch": 1.51, + "learning_rate": 1.0399366513123926e-05, + "loss": 0.119, + "step": 4687 + }, + { + "epoch": 1.51, + "learning_rate": 1.0395895584262696e-05, + "loss": 0.1233, + "step": 4688 + }, + { + "epoch": 1.51, + "learning_rate": 1.0392424607631019e-05, + "loss": 0.1297, + "step": 4689 + }, + { + "epoch": 1.51, + "learning_rate": 1.0388953583647725e-05, + "loss": 0.1159, + "step": 4690 + }, + { + "epoch": 1.51, + "learning_rate": 1.0385482512731633e-05, + "loss": 0.1119, + "step": 4691 + }, + { + "epoch": 1.51, + "learning_rate": 1.038201139530158e-05, + "loss": 0.1213, + "step": 4692 + }, + { + "epoch": 1.51, + "learning_rate": 1.037854023177641e-05, + "loss": 0.1258, + "step": 4693 + }, + { + "epoch": 1.51, + "learning_rate": 1.0375069022574965e-05, + "loss": 0.113, + "step": 4694 + }, + { + "epoch": 1.51, + "learning_rate": 1.0371597768116093e-05, + "loss": 0.1252, + "step": 4695 + }, + { + "epoch": 1.51, + "learning_rate": 1.0368126468818656e-05, + "loss": 0.1255, + "step": 4696 + }, + { + "epoch": 1.51, + "learning_rate": 1.036465512510151e-05, + "loss": 0.1212, + "step": 4697 + }, + { + "epoch": 1.51, + "learning_rate": 1.0361183737383526e-05, + "loss": 0.1108, + "step": 4698 + }, + { + "epoch": 1.51, + "learning_rate": 1.0357712306083575e-05, + "loss": 0.1202, + "step": 4699 + }, + { + "epoch": 1.51, + "learning_rate": 1.0354240831620542e-05, + "loss": 0.1164, + "step": 4700 + }, + { + "epoch": 1.51, + "learning_rate": 1.0350769314413297e-05, + "loss": 0.1264, + "step": 4701 + }, + { + "epoch": 1.51, + "learning_rate": 1.0347297754880735e-05, + "loss": 0.1042, + "step": 4702 + }, + { + "epoch": 1.51, + "learning_rate": 1.034382615344175e-05, + "loss": 0.1396, + "step": 4703 + }, + { + "epoch": 1.51, + "learning_rate": 1.0340354510515235e-05, + "loss": 0.1406, + "step": 4704 + }, + { + "epoch": 1.51, + "learning_rate": 1.03368828265201e-05, + "loss": 0.1072, + "step": 4705 + }, + { + "epoch": 1.51, + "learning_rate": 1.033341110187525e-05, + "loss": 0.1129, + "step": 4706 + }, + { + "epoch": 1.51, + "learning_rate": 1.0329939336999598e-05, + "loss": 0.1185, + "step": 4707 + }, + { + "epoch": 1.51, + "learning_rate": 1.0326467532312058e-05, + "loss": 0.1243, + "step": 4708 + }, + { + "epoch": 1.51, + "learning_rate": 1.0322995688231562e-05, + "loss": 0.1099, + "step": 4709 + }, + { + "epoch": 1.52, + "learning_rate": 1.0319523805177029e-05, + "loss": 0.121, + "step": 4710 + }, + { + "epoch": 1.52, + "learning_rate": 1.0316051883567395e-05, + "loss": 0.1064, + "step": 4711 + }, + { + "epoch": 1.52, + "learning_rate": 1.0312579923821598e-05, + "loss": 0.1132, + "step": 4712 + }, + { + "epoch": 1.52, + "learning_rate": 1.0309107926358574e-05, + "loss": 0.117, + "step": 4713 + }, + { + "epoch": 1.52, + "learning_rate": 1.0305635891597274e-05, + "loss": 0.1167, + "step": 4714 + }, + { + "epoch": 1.52, + "learning_rate": 1.0302163819956648e-05, + "loss": 0.1133, + "step": 4715 + }, + { + "epoch": 1.52, + "learning_rate": 1.029869171185565e-05, + "loss": 0.115, + "step": 4716 + }, + { + "epoch": 1.52, + "learning_rate": 1.0295219567713238e-05, + "loss": 0.1153, + "step": 4717 + }, + { + "epoch": 1.52, + "learning_rate": 1.0291747387948376e-05, + "loss": 0.1177, + "step": 4718 + }, + { + "epoch": 1.52, + "learning_rate": 1.0288275172980033e-05, + "loss": 0.1047, + "step": 4719 + }, + { + "epoch": 1.52, + "learning_rate": 1.0284802923227183e-05, + "loss": 0.1164, + "step": 4720 + }, + { + "epoch": 1.52, + "learning_rate": 1.0281330639108801e-05, + "loss": 0.1358, + "step": 4721 + }, + { + "epoch": 1.52, + "learning_rate": 1.0277858321043868e-05, + "loss": 0.1098, + "step": 4722 + }, + { + "epoch": 1.52, + "learning_rate": 1.0274385969451367e-05, + "loss": 0.1226, + "step": 4723 + }, + { + "epoch": 1.52, + "learning_rate": 1.0270913584750287e-05, + "loss": 0.1148, + "step": 4724 + }, + { + "epoch": 1.52, + "learning_rate": 1.0267441167359619e-05, + "loss": 0.1038, + "step": 4725 + }, + { + "epoch": 1.52, + "learning_rate": 1.0263968717698365e-05, + "loss": 0.1283, + "step": 4726 + }, + { + "epoch": 1.52, + "learning_rate": 1.0260496236185522e-05, + "loss": 0.1068, + "step": 4727 + }, + { + "epoch": 1.52, + "learning_rate": 1.0257023723240093e-05, + "loss": 0.119, + "step": 4728 + }, + { + "epoch": 1.52, + "learning_rate": 1.025355117928109e-05, + "loss": 0.1166, + "step": 4729 + }, + { + "epoch": 1.52, + "learning_rate": 1.0250078604727523e-05, + "loss": 0.1298, + "step": 4730 + }, + { + "epoch": 1.52, + "learning_rate": 1.0246605999998407e-05, + "loss": 0.1194, + "step": 4731 + }, + { + "epoch": 1.52, + "learning_rate": 1.024313336551276e-05, + "loss": 0.1183, + "step": 4732 + }, + { + "epoch": 1.52, + "learning_rate": 1.023966070168961e-05, + "loss": 0.1121, + "step": 4733 + }, + { + "epoch": 1.52, + "learning_rate": 1.023618800894798e-05, + "loss": 0.1203, + "step": 4734 + }, + { + "epoch": 1.52, + "learning_rate": 1.0232715287706898e-05, + "loss": 0.1207, + "step": 4735 + }, + { + "epoch": 1.52, + "learning_rate": 1.02292425383854e-05, + "loss": 0.1202, + "step": 4736 + }, + { + "epoch": 1.52, + "learning_rate": 1.0225769761402524e-05, + "loss": 0.1231, + "step": 4737 + }, + { + "epoch": 1.52, + "learning_rate": 1.0222296957177305e-05, + "loss": 0.1353, + "step": 4738 + }, + { + "epoch": 1.52, + "learning_rate": 1.0218824126128793e-05, + "loss": 0.1025, + "step": 4739 + }, + { + "epoch": 1.52, + "learning_rate": 1.0215351268676028e-05, + "loss": 0.1189, + "step": 4740 + }, + { + "epoch": 1.53, + "learning_rate": 1.0211878385238063e-05, + "loss": 0.1139, + "step": 4741 + }, + { + "epoch": 1.53, + "learning_rate": 1.0208405476233952e-05, + "loss": 0.1201, + "step": 4742 + }, + { + "epoch": 1.53, + "learning_rate": 1.0204932542082748e-05, + "loss": 0.1172, + "step": 4743 + }, + { + "epoch": 1.53, + "learning_rate": 1.0201459583203512e-05, + "loss": 0.1258, + "step": 4744 + }, + { + "epoch": 1.53, + "learning_rate": 1.0197986600015307e-05, + "loss": 0.1184, + "step": 4745 + }, + { + "epoch": 1.53, + "learning_rate": 1.0194513592937196e-05, + "loss": 0.1228, + "step": 4746 + }, + { + "epoch": 1.53, + "learning_rate": 1.0191040562388244e-05, + "loss": 0.124, + "step": 4747 + }, + { + "epoch": 1.53, + "learning_rate": 1.0187567508787526e-05, + "loss": 0.1218, + "step": 4748 + }, + { + "epoch": 1.53, + "learning_rate": 1.0184094432554119e-05, + "loss": 0.1109, + "step": 4749 + }, + { + "epoch": 1.53, + "learning_rate": 1.018062133410709e-05, + "loss": 0.11, + "step": 4750 + }, + { + "epoch": 1.53, + "learning_rate": 1.0177148213865523e-05, + "loss": 0.1137, + "step": 4751 + }, + { + "epoch": 1.53, + "learning_rate": 1.0173675072248504e-05, + "loss": 0.1173, + "step": 4752 + }, + { + "epoch": 1.53, + "learning_rate": 1.0170201909675105e-05, + "loss": 0.1318, + "step": 4753 + }, + { + "epoch": 1.53, + "learning_rate": 1.0166728726564424e-05, + "loss": 0.1258, + "step": 4754 + }, + { + "epoch": 1.53, + "learning_rate": 1.0163255523335542e-05, + "loss": 0.112, + "step": 4755 + }, + { + "epoch": 1.53, + "learning_rate": 1.0159782300407556e-05, + "loss": 0.1209, + "step": 4756 + }, + { + "epoch": 1.53, + "learning_rate": 1.0156309058199559e-05, + "loss": 0.108, + "step": 4757 + }, + { + "epoch": 1.53, + "learning_rate": 1.0152835797130644e-05, + "loss": 0.1207, + "step": 4758 + }, + { + "epoch": 1.53, + "learning_rate": 1.0149362517619914e-05, + "loss": 0.1252, + "step": 4759 + }, + { + "epoch": 1.53, + "learning_rate": 1.0145889220086465e-05, + "loss": 0.1148, + "step": 4760 + }, + { + "epoch": 1.53, + "learning_rate": 1.0142415904949402e-05, + "loss": 0.1246, + "step": 4761 + }, + { + "epoch": 1.53, + "learning_rate": 1.013894257262783e-05, + "loss": 0.1164, + "step": 4762 + }, + { + "epoch": 1.53, + "learning_rate": 1.0135469223540856e-05, + "loss": 0.1253, + "step": 4763 + }, + { + "epoch": 1.53, + "learning_rate": 1.013199585810759e-05, + "loss": 0.126, + "step": 4764 + }, + { + "epoch": 1.53, + "learning_rate": 1.0128522476747143e-05, + "loss": 0.1199, + "step": 4765 + }, + { + "epoch": 1.53, + "learning_rate": 1.0125049079878623e-05, + "loss": 0.1104, + "step": 4766 + }, + { + "epoch": 1.53, + "learning_rate": 1.0121575667921156e-05, + "loss": 0.0974, + "step": 4767 + }, + { + "epoch": 1.53, + "learning_rate": 1.0118102241293848e-05, + "loss": 0.1132, + "step": 4768 + }, + { + "epoch": 1.53, + "learning_rate": 1.0114628800415818e-05, + "loss": 0.119, + "step": 4769 + }, + { + "epoch": 1.53, + "learning_rate": 1.0111155345706193e-05, + "loss": 0.1387, + "step": 4770 + }, + { + "epoch": 1.53, + "learning_rate": 1.010768187758409e-05, + "loss": 0.1218, + "step": 4771 + }, + { + "epoch": 1.54, + "learning_rate": 1.0104208396468634e-05, + "loss": 0.1182, + "step": 4772 + }, + { + "epoch": 1.54, + "learning_rate": 1.010073490277895e-05, + "loss": 0.1147, + "step": 4773 + }, + { + "epoch": 1.54, + "learning_rate": 1.0097261396934161e-05, + "loss": 0.1138, + "step": 4774 + }, + { + "epoch": 1.54, + "learning_rate": 1.0093787879353403e-05, + "loss": 0.1025, + "step": 4775 + }, + { + "epoch": 1.54, + "learning_rate": 1.0090314350455796e-05, + "loss": 0.1076, + "step": 4776 + }, + { + "epoch": 1.54, + "learning_rate": 1.0086840810660476e-05, + "loss": 0.1134, + "step": 4777 + }, + { + "epoch": 1.54, + "learning_rate": 1.0083367260386578e-05, + "loss": 0.1543, + "step": 4778 + }, + { + "epoch": 1.54, + "learning_rate": 1.0079893700053228e-05, + "loss": 0.1219, + "step": 4779 + }, + { + "epoch": 1.54, + "learning_rate": 1.0076420130079565e-05, + "loss": 0.1268, + "step": 4780 + }, + { + "epoch": 1.54, + "learning_rate": 1.0072946550884725e-05, + "loss": 0.1148, + "step": 4781 + }, + { + "epoch": 1.54, + "learning_rate": 1.0069472962887843e-05, + "loss": 0.1276, + "step": 4782 + }, + { + "epoch": 1.54, + "learning_rate": 1.0065999366508057e-05, + "loss": 0.1214, + "step": 4783 + }, + { + "epoch": 1.54, + "learning_rate": 1.0062525762164507e-05, + "loss": 0.1196, + "step": 4784 + }, + { + "epoch": 1.54, + "learning_rate": 1.0059052150276334e-05, + "loss": 0.1271, + "step": 4785 + }, + { + "epoch": 1.54, + "learning_rate": 1.0055578531262677e-05, + "loss": 0.1176, + "step": 4786 + }, + { + "epoch": 1.54, + "learning_rate": 1.0052104905542677e-05, + "loss": 0.1119, + "step": 4787 + }, + { + "epoch": 1.54, + "learning_rate": 1.0048631273535476e-05, + "loss": 0.1251, + "step": 4788 + }, + { + "epoch": 1.54, + "learning_rate": 1.0045157635660223e-05, + "loss": 0.1303, + "step": 4789 + }, + { + "epoch": 1.54, + "learning_rate": 1.0041683992336053e-05, + "loss": 0.1103, + "step": 4790 + }, + { + "epoch": 1.54, + "learning_rate": 1.0038210343982118e-05, + "loss": 0.1162, + "step": 4791 + }, + { + "epoch": 1.54, + "learning_rate": 1.003473669101756e-05, + "loss": 0.1155, + "step": 4792 + }, + { + "epoch": 1.54, + "learning_rate": 1.0031263033861523e-05, + "loss": 0.1122, + "step": 4793 + }, + { + "epoch": 1.54, + "learning_rate": 1.002778937293316e-05, + "loss": 0.1276, + "step": 4794 + }, + { + "epoch": 1.54, + "learning_rate": 1.0024315708651609e-05, + "loss": 0.1127, + "step": 4795 + }, + { + "epoch": 1.54, + "learning_rate": 1.002084204143602e-05, + "loss": 0.1079, + "step": 4796 + }, + { + "epoch": 1.54, + "learning_rate": 1.0017368371705543e-05, + "loss": 0.1286, + "step": 4797 + }, + { + "epoch": 1.54, + "learning_rate": 1.0013894699879327e-05, + "loss": 0.1137, + "step": 4798 + }, + { + "epoch": 1.54, + "learning_rate": 1.0010421026376515e-05, + "loss": 0.1214, + "step": 4799 + }, + { + "epoch": 1.54, + "learning_rate": 1.0006947351616256e-05, + "loss": 0.1164, + "step": 4800 + }, + { + "epoch": 1.54, + "learning_rate": 1.0003473676017706e-05, + "loss": 0.1178, + "step": 4801 + }, + { + "epoch": 1.54, + "learning_rate": 1e-05, + "loss": 0.1187, + "step": 4802 + }, + { + "epoch": 1.55, + "learning_rate": 9.996526323982297e-06, + "loss": 0.1176, + "step": 4803 + }, + { + "epoch": 1.55, + "learning_rate": 9.993052648383744e-06, + "loss": 0.1182, + "step": 4804 + }, + { + "epoch": 1.55, + "learning_rate": 9.989578973623486e-06, + "loss": 0.108, + "step": 4805 + }, + { + "epoch": 1.55, + "learning_rate": 9.986105300120678e-06, + "loss": 0.1112, + "step": 4806 + }, + { + "epoch": 1.55, + "learning_rate": 9.98263162829446e-06, + "loss": 0.1178, + "step": 4807 + }, + { + "epoch": 1.55, + "learning_rate": 9.979157958563983e-06, + "loss": 0.1183, + "step": 4808 + }, + { + "epoch": 1.55, + "learning_rate": 9.975684291348395e-06, + "loss": 0.123, + "step": 4809 + }, + { + "epoch": 1.55, + "learning_rate": 9.972210627066846e-06, + "loss": 0.1039, + "step": 4810 + }, + { + "epoch": 1.55, + "learning_rate": 9.968736966138477e-06, + "loss": 0.1075, + "step": 4811 + }, + { + "epoch": 1.55, + "learning_rate": 9.965263308982445e-06, + "loss": 0.1159, + "step": 4812 + }, + { + "epoch": 1.55, + "learning_rate": 9.961789656017885e-06, + "loss": 0.1176, + "step": 4813 + }, + { + "epoch": 1.55, + "learning_rate": 9.95831600766395e-06, + "loss": 0.1208, + "step": 4814 + }, + { + "epoch": 1.55, + "learning_rate": 9.95484236433978e-06, + "loss": 0.1092, + "step": 4815 + }, + { + "epoch": 1.55, + "learning_rate": 9.951368726464526e-06, + "loss": 0.115, + "step": 4816 + }, + { + "epoch": 1.55, + "learning_rate": 9.947895094457324e-06, + "loss": 0.1264, + "step": 4817 + }, + { + "epoch": 1.55, + "learning_rate": 9.944421468737325e-06, + "loss": 0.12, + "step": 4818 + }, + { + "epoch": 1.55, + "learning_rate": 9.94094784972367e-06, + "loss": 0.1113, + "step": 4819 + }, + { + "epoch": 1.55, + "learning_rate": 9.937474237835496e-06, + "loss": 0.1066, + "step": 4820 + }, + { + "epoch": 1.55, + "learning_rate": 9.934000633491945e-06, + "loss": 0.1509, + "step": 4821 + }, + { + "epoch": 1.55, + "learning_rate": 9.93052703711216e-06, + "loss": 0.1138, + "step": 4822 + }, + { + "epoch": 1.55, + "learning_rate": 9.927053449115278e-06, + "loss": 0.1239, + "step": 4823 + }, + { + "epoch": 1.55, + "learning_rate": 9.923579869920435e-06, + "loss": 0.1181, + "step": 4824 + }, + { + "epoch": 1.55, + "learning_rate": 9.920106299946776e-06, + "loss": 0.1224, + "step": 4825 + }, + { + "epoch": 1.55, + "learning_rate": 9.916632739613428e-06, + "loss": 0.1088, + "step": 4826 + }, + { + "epoch": 1.55, + "learning_rate": 9.913159189339526e-06, + "loss": 0.1152, + "step": 4827 + }, + { + "epoch": 1.55, + "learning_rate": 9.909685649544205e-06, + "loss": 0.1171, + "step": 4828 + }, + { + "epoch": 1.55, + "learning_rate": 9.9062121206466e-06, + "loss": 0.1125, + "step": 4829 + }, + { + "epoch": 1.55, + "learning_rate": 9.902738603065839e-06, + "loss": 0.1346, + "step": 4830 + }, + { + "epoch": 1.55, + "learning_rate": 9.899265097221054e-06, + "loss": 0.1168, + "step": 4831 + }, + { + "epoch": 1.55, + "learning_rate": 9.895791603531371e-06, + "loss": 0.1166, + "step": 4832 + }, + { + "epoch": 1.55, + "learning_rate": 9.892318122415913e-06, + "loss": 0.1109, + "step": 4833 + }, + { + "epoch": 1.56, + "learning_rate": 9.88884465429381e-06, + "loss": 0.1114, + "step": 4834 + }, + { + "epoch": 1.56, + "learning_rate": 9.885371199584184e-06, + "loss": 0.102, + "step": 4835 + }, + { + "epoch": 1.56, + "learning_rate": 9.881897758706155e-06, + "loss": 0.1073, + "step": 4836 + }, + { + "epoch": 1.56, + "learning_rate": 9.87842433207885e-06, + "loss": 0.1088, + "step": 4837 + }, + { + "epoch": 1.56, + "learning_rate": 9.87495092012138e-06, + "loss": 0.1177, + "step": 4838 + }, + { + "epoch": 1.56, + "learning_rate": 9.87147752325286e-06, + "loss": 0.1058, + "step": 4839 + }, + { + "epoch": 1.56, + "learning_rate": 9.868004141892412e-06, + "loss": 0.1219, + "step": 4840 + }, + { + "epoch": 1.56, + "learning_rate": 9.864530776459147e-06, + "loss": 0.1108, + "step": 4841 + }, + { + "epoch": 1.56, + "learning_rate": 9.861057427372171e-06, + "loss": 0.1177, + "step": 4842 + }, + { + "epoch": 1.56, + "learning_rate": 9.8575840950506e-06, + "loss": 0.1217, + "step": 4843 + }, + { + "epoch": 1.56, + "learning_rate": 9.854110779913537e-06, + "loss": 0.1301, + "step": 4844 + }, + { + "epoch": 1.56, + "learning_rate": 9.850637482380093e-06, + "loss": 0.1212, + "step": 4845 + }, + { + "epoch": 1.56, + "learning_rate": 9.847164202869359e-06, + "loss": 0.1084, + "step": 4846 + }, + { + "epoch": 1.56, + "learning_rate": 9.843690941800445e-06, + "loss": 0.1304, + "step": 4847 + }, + { + "epoch": 1.56, + "learning_rate": 9.840217699592446e-06, + "loss": 0.1056, + "step": 4848 + }, + { + "epoch": 1.56, + "learning_rate": 9.836744476664458e-06, + "loss": 0.1201, + "step": 4849 + }, + { + "epoch": 1.56, + "learning_rate": 9.833271273435578e-06, + "loss": 0.1109, + "step": 4850 + }, + { + "epoch": 1.56, + "learning_rate": 9.8297980903249e-06, + "loss": 0.1146, + "step": 4851 + }, + { + "epoch": 1.56, + "learning_rate": 9.826324927751501e-06, + "loss": 0.1048, + "step": 4852 + }, + { + "epoch": 1.56, + "learning_rate": 9.822851786134478e-06, + "loss": 0.1107, + "step": 4853 + }, + { + "epoch": 1.56, + "learning_rate": 9.819378665892911e-06, + "loss": 0.1122, + "step": 4854 + }, + { + "epoch": 1.56, + "learning_rate": 9.815905567445883e-06, + "loss": 0.1181, + "step": 4855 + }, + { + "epoch": 1.56, + "learning_rate": 9.812432491212474e-06, + "loss": 0.1228, + "step": 4856 + }, + { + "epoch": 1.56, + "learning_rate": 9.808959437611756e-06, + "loss": 0.1222, + "step": 4857 + }, + { + "epoch": 1.56, + "learning_rate": 9.805486407062809e-06, + "loss": 0.1105, + "step": 4858 + }, + { + "epoch": 1.56, + "learning_rate": 9.802013399984696e-06, + "loss": 0.1239, + "step": 4859 + }, + { + "epoch": 1.56, + "learning_rate": 9.798540416796491e-06, + "loss": 0.1278, + "step": 4860 + }, + { + "epoch": 1.56, + "learning_rate": 9.795067457917255e-06, + "loss": 0.1143, + "step": 4861 + }, + { + "epoch": 1.56, + "learning_rate": 9.791594523766052e-06, + "loss": 0.1079, + "step": 4862 + }, + { + "epoch": 1.56, + "learning_rate": 9.788121614761939e-06, + "loss": 0.1214, + "step": 4863 + }, + { + "epoch": 1.56, + "learning_rate": 9.784648731323977e-06, + "loss": 0.1162, + "step": 4864 + }, + { + "epoch": 1.57, + "learning_rate": 9.781175873871212e-06, + "loss": 0.1215, + "step": 4865 + }, + { + "epoch": 1.57, + "learning_rate": 9.777703042822699e-06, + "loss": 0.1105, + "step": 4866 + }, + { + "epoch": 1.57, + "learning_rate": 9.774230238597478e-06, + "loss": 0.1094, + "step": 4867 + }, + { + "epoch": 1.57, + "learning_rate": 9.770757461614601e-06, + "loss": 0.1238, + "step": 4868 + }, + { + "epoch": 1.57, + "learning_rate": 9.767284712293102e-06, + "loss": 0.1196, + "step": 4869 + }, + { + "epoch": 1.57, + "learning_rate": 9.763811991052021e-06, + "loss": 0.115, + "step": 4870 + }, + { + "epoch": 1.57, + "learning_rate": 9.760339298310391e-06, + "loss": 0.1288, + "step": 4871 + }, + { + "epoch": 1.57, + "learning_rate": 9.756866634487243e-06, + "loss": 0.124, + "step": 4872 + }, + { + "epoch": 1.57, + "learning_rate": 9.753394000001597e-06, + "loss": 0.1081, + "step": 4873 + }, + { + "epoch": 1.57, + "learning_rate": 9.74992139527248e-06, + "loss": 0.1018, + "step": 4874 + }, + { + "epoch": 1.57, + "learning_rate": 9.746448820718912e-06, + "loss": 0.1257, + "step": 4875 + }, + { + "epoch": 1.57, + "learning_rate": 9.742976276759907e-06, + "loss": 0.1093, + "step": 4876 + }, + { + "epoch": 1.57, + "learning_rate": 9.739503763814481e-06, + "loss": 0.1095, + "step": 4877 + }, + { + "epoch": 1.57, + "learning_rate": 9.73603128230164e-06, + "loss": 0.1218, + "step": 4878 + }, + { + "epoch": 1.57, + "learning_rate": 9.732558832640383e-06, + "loss": 0.1195, + "step": 4879 + }, + { + "epoch": 1.57, + "learning_rate": 9.729086415249716e-06, + "loss": 0.1094, + "step": 4880 + }, + { + "epoch": 1.57, + "learning_rate": 9.725614030548637e-06, + "loss": 0.1127, + "step": 4881 + }, + { + "epoch": 1.57, + "learning_rate": 9.722141678956134e-06, + "loss": 0.1109, + "step": 4882 + }, + { + "epoch": 1.57, + "learning_rate": 9.718669360891202e-06, + "loss": 0.1199, + "step": 4883 + }, + { + "epoch": 1.57, + "learning_rate": 9.71519707677282e-06, + "loss": 0.1172, + "step": 4884 + }, + { + "epoch": 1.57, + "learning_rate": 9.711724827019969e-06, + "loss": 0.113, + "step": 4885 + }, + { + "epoch": 1.57, + "learning_rate": 9.708252612051625e-06, + "loss": 0.1198, + "step": 4886 + }, + { + "epoch": 1.57, + "learning_rate": 9.704780432286766e-06, + "loss": 0.1308, + "step": 4887 + }, + { + "epoch": 1.57, + "learning_rate": 9.701308288144352e-06, + "loss": 0.1171, + "step": 4888 + }, + { + "epoch": 1.57, + "learning_rate": 9.697836180043355e-06, + "loss": 0.1013, + "step": 4889 + }, + { + "epoch": 1.57, + "learning_rate": 9.69436410840273e-06, + "loss": 0.1224, + "step": 4890 + }, + { + "epoch": 1.57, + "learning_rate": 9.69089207364143e-06, + "loss": 0.119, + "step": 4891 + }, + { + "epoch": 1.57, + "learning_rate": 9.687420076178407e-06, + "loss": 0.1088, + "step": 4892 + }, + { + "epoch": 1.57, + "learning_rate": 9.683948116432609e-06, + "loss": 0.1154, + "step": 4893 + }, + { + "epoch": 1.57, + "learning_rate": 9.680476194822973e-06, + "loss": 0.1148, + "step": 4894 + }, + { + "epoch": 1.57, + "learning_rate": 9.677004311768438e-06, + "loss": 0.1055, + "step": 4895 + }, + { + "epoch": 1.58, + "learning_rate": 9.673532467687943e-06, + "loss": 0.1082, + "step": 4896 + }, + { + "epoch": 1.58, + "learning_rate": 9.670060663000408e-06, + "loss": 0.1029, + "step": 4897 + }, + { + "epoch": 1.58, + "learning_rate": 9.666588898124754e-06, + "loss": 0.1164, + "step": 4898 + }, + { + "epoch": 1.58, + "learning_rate": 9.663117173479904e-06, + "loss": 0.1111, + "step": 4899 + }, + { + "epoch": 1.58, + "learning_rate": 9.659645489484766e-06, + "loss": 0.1049, + "step": 4900 + }, + { + "epoch": 1.58, + "learning_rate": 9.656173846558252e-06, + "loss": 0.1241, + "step": 4901 + }, + { + "epoch": 1.58, + "learning_rate": 9.652702245119267e-06, + "loss": 0.1082, + "step": 4902 + }, + { + "epoch": 1.58, + "learning_rate": 9.649230685586708e-06, + "loss": 0.1143, + "step": 4903 + }, + { + "epoch": 1.58, + "learning_rate": 9.645759168379463e-06, + "loss": 0.1276, + "step": 4904 + }, + { + "epoch": 1.58, + "learning_rate": 9.642287693916426e-06, + "loss": 0.1084, + "step": 4905 + }, + { + "epoch": 1.58, + "learning_rate": 9.638816262616476e-06, + "loss": 0.1073, + "step": 4906 + }, + { + "epoch": 1.58, + "learning_rate": 9.635344874898491e-06, + "loss": 0.1143, + "step": 4907 + }, + { + "epoch": 1.58, + "learning_rate": 9.631873531181349e-06, + "loss": 0.1174, + "step": 4908 + }, + { + "epoch": 1.58, + "learning_rate": 9.628402231883914e-06, + "loss": 0.1089, + "step": 4909 + }, + { + "epoch": 1.58, + "learning_rate": 9.62493097742504e-06, + "loss": 0.1091, + "step": 4910 + }, + { + "epoch": 1.58, + "learning_rate": 9.621459768223593e-06, + "loss": 0.1233, + "step": 4911 + }, + { + "epoch": 1.58, + "learning_rate": 9.617988604698423e-06, + "loss": 0.1183, + "step": 4912 + }, + { + "epoch": 1.58, + "learning_rate": 9.61451748726837e-06, + "loss": 0.1172, + "step": 4913 + }, + { + "epoch": 1.58, + "learning_rate": 9.611046416352278e-06, + "loss": 0.1059, + "step": 4914 + }, + { + "epoch": 1.58, + "learning_rate": 9.60757539236898e-06, + "loss": 0.1073, + "step": 4915 + }, + { + "epoch": 1.58, + "learning_rate": 9.604104415737309e-06, + "loss": 0.1077, + "step": 4916 + }, + { + "epoch": 1.58, + "learning_rate": 9.600633486876077e-06, + "loss": 0.1267, + "step": 4917 + }, + { + "epoch": 1.58, + "learning_rate": 9.597162606204112e-06, + "loss": 0.1121, + "step": 4918 + }, + { + "epoch": 1.58, + "learning_rate": 9.593691774140219e-06, + "loss": 0.1181, + "step": 4919 + }, + { + "epoch": 1.58, + "learning_rate": 9.590220991103206e-06, + "loss": 0.1215, + "step": 4920 + }, + { + "epoch": 1.58, + "learning_rate": 9.586750257511868e-06, + "loss": 0.1217, + "step": 4921 + }, + { + "epoch": 1.58, + "learning_rate": 9.583279573785009e-06, + "loss": 0.115, + "step": 4922 + }, + { + "epoch": 1.58, + "learning_rate": 9.579808940341403e-06, + "loss": 0.123, + "step": 4923 + }, + { + "epoch": 1.58, + "learning_rate": 9.576338357599842e-06, + "loss": 0.1108, + "step": 4924 + }, + { + "epoch": 1.58, + "learning_rate": 9.572867825979094e-06, + "loss": 0.1146, + "step": 4925 + }, + { + "epoch": 1.58, + "learning_rate": 9.56939734589793e-06, + "loss": 0.1103, + "step": 4926 + }, + { + "epoch": 1.59, + "learning_rate": 9.565926917775118e-06, + "loss": 0.1137, + "step": 4927 + }, + { + "epoch": 1.59, + "learning_rate": 9.562456542029409e-06, + "loss": 0.1212, + "step": 4928 + }, + { + "epoch": 1.59, + "learning_rate": 9.558986219079559e-06, + "loss": 0.127, + "step": 4929 + }, + { + "epoch": 1.59, + "learning_rate": 9.555515949344307e-06, + "loss": 0.1133, + "step": 4930 + }, + { + "epoch": 1.59, + "learning_rate": 9.552045733242386e-06, + "loss": 0.117, + "step": 4931 + }, + { + "epoch": 1.59, + "learning_rate": 9.548575571192535e-06, + "loss": 0.1103, + "step": 4932 + }, + { + "epoch": 1.59, + "learning_rate": 9.545105463613478e-06, + "loss": 0.1156, + "step": 4933 + }, + { + "epoch": 1.59, + "learning_rate": 9.541635410923929e-06, + "loss": 0.116, + "step": 4934 + }, + { + "epoch": 1.59, + "learning_rate": 9.538165413542607e-06, + "loss": 0.1137, + "step": 4935 + }, + { + "epoch": 1.59, + "learning_rate": 9.534695471888208e-06, + "loss": 0.1223, + "step": 4936 + }, + { + "epoch": 1.59, + "learning_rate": 9.53122558637943e-06, + "loss": 0.1143, + "step": 4937 + }, + { + "epoch": 1.59, + "learning_rate": 9.527755757434968e-06, + "loss": 0.1082, + "step": 4938 + }, + { + "epoch": 1.59, + "learning_rate": 9.524285985473507e-06, + "loss": 0.1225, + "step": 4939 + }, + { + "epoch": 1.59, + "learning_rate": 9.52081627091372e-06, + "loss": 0.1137, + "step": 4940 + }, + { + "epoch": 1.59, + "learning_rate": 9.517346614174283e-06, + "loss": 0.1171, + "step": 4941 + }, + { + "epoch": 1.59, + "learning_rate": 9.513877015673858e-06, + "loss": 0.1234, + "step": 4942 + }, + { + "epoch": 1.59, + "learning_rate": 9.510407475831099e-06, + "loss": 0.12, + "step": 4943 + }, + { + "epoch": 1.59, + "learning_rate": 9.506937995064655e-06, + "loss": 0.1268, + "step": 4944 + }, + { + "epoch": 1.59, + "learning_rate": 9.503468573793173e-06, + "loss": 0.1172, + "step": 4945 + }, + { + "epoch": 1.59, + "learning_rate": 9.499999212435283e-06, + "loss": 0.1156, + "step": 4946 + }, + { + "epoch": 1.59, + "learning_rate": 9.496529911409615e-06, + "loss": 0.1196, + "step": 4947 + }, + { + "epoch": 1.59, + "learning_rate": 9.493060671134795e-06, + "loss": 0.1081, + "step": 4948 + }, + { + "epoch": 1.59, + "learning_rate": 9.489591492029428e-06, + "loss": 0.111, + "step": 4949 + }, + { + "epoch": 1.59, + "learning_rate": 9.486122374512122e-06, + "loss": 0.1199, + "step": 4950 + }, + { + "epoch": 1.59, + "learning_rate": 9.482653319001478e-06, + "loss": 0.1094, + "step": 4951 + }, + { + "epoch": 1.59, + "learning_rate": 9.479184325916083e-06, + "loss": 0.1169, + "step": 4952 + }, + { + "epoch": 1.59, + "learning_rate": 9.475715395674523e-06, + "loss": 0.1166, + "step": 4953 + }, + { + "epoch": 1.59, + "learning_rate": 9.472246528695377e-06, + "loss": 0.1119, + "step": 4954 + }, + { + "epoch": 1.59, + "learning_rate": 9.46877772539721e-06, + "loss": 0.1115, + "step": 4955 + }, + { + "epoch": 1.59, + "learning_rate": 9.465308986198581e-06, + "loss": 0.1234, + "step": 4956 + }, + { + "epoch": 1.59, + "learning_rate": 9.461840311518043e-06, + "loss": 0.1174, + "step": 4957 + }, + { + "epoch": 1.6, + "learning_rate": 9.458371701774145e-06, + "loss": 0.1121, + "step": 4958 + }, + { + "epoch": 1.6, + "learning_rate": 9.454903157385421e-06, + "loss": 0.1132, + "step": 4959 + }, + { + "epoch": 1.6, + "learning_rate": 9.451434678770402e-06, + "loss": 0.1087, + "step": 4960 + }, + { + "epoch": 1.6, + "learning_rate": 9.44796626634761e-06, + "loss": 0.1153, + "step": 4961 + }, + { + "epoch": 1.6, + "learning_rate": 9.444497920535554e-06, + "loss": 0.1181, + "step": 4962 + }, + { + "epoch": 1.6, + "learning_rate": 9.441029641752741e-06, + "loss": 0.1197, + "step": 4963 + }, + { + "epoch": 1.6, + "learning_rate": 9.437561430417672e-06, + "loss": 0.1125, + "step": 4964 + }, + { + "epoch": 1.6, + "learning_rate": 9.434093286948832e-06, + "loss": 0.1096, + "step": 4965 + }, + { + "epoch": 1.6, + "learning_rate": 9.430625211764706e-06, + "loss": 0.1177, + "step": 4966 + }, + { + "epoch": 1.6, + "learning_rate": 9.427157205283762e-06, + "loss": 0.1104, + "step": 4967 + }, + { + "epoch": 1.6, + "learning_rate": 9.423689267924469e-06, + "loss": 0.1164, + "step": 4968 + }, + { + "epoch": 1.6, + "learning_rate": 9.420221400105278e-06, + "loss": 0.1209, + "step": 4969 + }, + { + "epoch": 1.6, + "learning_rate": 9.416753602244643e-06, + "loss": 0.116, + "step": 4970 + }, + { + "epoch": 1.6, + "learning_rate": 9.413285874760995e-06, + "loss": 0.1158, + "step": 4971 + }, + { + "epoch": 1.6, + "learning_rate": 9.409818218072774e-06, + "loss": 0.1212, + "step": 4972 + }, + { + "epoch": 1.6, + "learning_rate": 9.406350632598393e-06, + "loss": 0.1166, + "step": 4973 + }, + { + "epoch": 1.6, + "learning_rate": 9.402883118756277e-06, + "loss": 0.1196, + "step": 4974 + }, + { + "epoch": 1.6, + "learning_rate": 9.39941567696482e-06, + "loss": 0.1127, + "step": 4975 + }, + { + "epoch": 1.6, + "learning_rate": 9.395948307642423e-06, + "loss": 0.115, + "step": 4976 + }, + { + "epoch": 1.6, + "learning_rate": 9.39248101120747e-06, + "loss": 0.103, + "step": 4977 + }, + { + "epoch": 1.6, + "learning_rate": 9.389013788078344e-06, + "loss": 0.1182, + "step": 4978 + }, + { + "epoch": 1.6, + "learning_rate": 9.385546638673418e-06, + "loss": 0.1086, + "step": 4979 + }, + { + "epoch": 1.6, + "learning_rate": 9.382079563411045e-06, + "loss": 0.111, + "step": 4980 + }, + { + "epoch": 1.6, + "learning_rate": 9.378612562709584e-06, + "loss": 0.1224, + "step": 4981 + }, + { + "epoch": 1.6, + "learning_rate": 9.375145636987375e-06, + "loss": 0.1117, + "step": 4982 + }, + { + "epoch": 1.6, + "learning_rate": 9.371678786662746e-06, + "loss": 0.1151, + "step": 4983 + }, + { + "epoch": 1.6, + "learning_rate": 9.36821201215403e-06, + "loss": 0.1174, + "step": 4984 + }, + { + "epoch": 1.6, + "learning_rate": 9.364745313879543e-06, + "loss": 0.1196, + "step": 4985 + }, + { + "epoch": 1.6, + "learning_rate": 9.361278692257588e-06, + "loss": 0.1161, + "step": 4986 + }, + { + "epoch": 1.6, + "learning_rate": 9.357812147706466e-06, + "loss": 0.1215, + "step": 4987 + }, + { + "epoch": 1.6, + "learning_rate": 9.354345680644459e-06, + "loss": 0.1045, + "step": 4988 + }, + { + "epoch": 1.61, + "learning_rate": 9.350879291489848e-06, + "loss": 0.1126, + "step": 4989 + }, + { + "epoch": 1.61, + "learning_rate": 9.347412980660903e-06, + "loss": 0.1095, + "step": 4990 + }, + { + "epoch": 1.61, + "learning_rate": 9.343946748575889e-06, + "loss": 0.1077, + "step": 4991 + }, + { + "epoch": 1.61, + "learning_rate": 9.340480595653047e-06, + "loss": 0.1217, + "step": 4992 + }, + { + "epoch": 1.61, + "learning_rate": 9.337014522310621e-06, + "loss": 0.1094, + "step": 4993 + }, + { + "epoch": 1.61, + "learning_rate": 9.333548528966849e-06, + "loss": 0.12, + "step": 4994 + }, + { + "epoch": 1.61, + "learning_rate": 9.330082616039946e-06, + "loss": 0.1198, + "step": 4995 + }, + { + "epoch": 1.61, + "learning_rate": 9.32661678394812e-06, + "loss": 0.1176, + "step": 4996 + }, + { + "epoch": 1.61, + "learning_rate": 9.323151033109581e-06, + "loss": 0.111, + "step": 4997 + }, + { + "epoch": 1.61, + "learning_rate": 9.319685363942516e-06, + "loss": 0.1135, + "step": 4998 + }, + { + "epoch": 1.61, + "learning_rate": 9.316219776865108e-06, + "loss": 0.1282, + "step": 4999 + }, + { + "epoch": 1.61, + "learning_rate": 9.312754272295538e-06, + "loss": 0.1216, + "step": 5000 + }, + { + "epoch": 1.61, + "learning_rate": 9.309288850651956e-06, + "loss": 0.1174, + "step": 5001 + }, + { + "epoch": 1.61, + "learning_rate": 9.30582351235252e-06, + "loss": 0.1082, + "step": 5002 + }, + { + "epoch": 1.61, + "learning_rate": 9.302358257815373e-06, + "loss": 0.1036, + "step": 5003 + }, + { + "epoch": 1.61, + "learning_rate": 9.298893087458645e-06, + "loss": 0.1163, + "step": 5004 + }, + { + "epoch": 1.61, + "learning_rate": 9.29542800170046e-06, + "loss": 0.1141, + "step": 5005 + }, + { + "epoch": 1.61, + "learning_rate": 9.291963000958932e-06, + "loss": 0.1247, + "step": 5006 + }, + { + "epoch": 1.61, + "learning_rate": 9.288498085652162e-06, + "loss": 0.1131, + "step": 5007 + }, + { + "epoch": 1.61, + "learning_rate": 9.285033256198237e-06, + "loss": 0.1059, + "step": 5008 + }, + { + "epoch": 1.61, + "learning_rate": 9.28156851301524e-06, + "loss": 0.116, + "step": 5009 + }, + { + "epoch": 1.61, + "learning_rate": 9.278103856521246e-06, + "loss": 0.1077, + "step": 5010 + }, + { + "epoch": 1.61, + "learning_rate": 9.274639287134309e-06, + "loss": 0.0996, + "step": 5011 + }, + { + "epoch": 1.61, + "learning_rate": 9.271174805272482e-06, + "loss": 0.1224, + "step": 5012 + }, + { + "epoch": 1.61, + "learning_rate": 9.267710411353809e-06, + "loss": 0.1118, + "step": 5013 + }, + { + "epoch": 1.61, + "learning_rate": 9.264246105796307e-06, + "loss": 0.1481, + "step": 5014 + }, + { + "epoch": 1.61, + "learning_rate": 9.260781889018e-06, + "loss": 0.1089, + "step": 5015 + }, + { + "epoch": 1.61, + "learning_rate": 9.257317761436899e-06, + "loss": 0.1181, + "step": 5016 + }, + { + "epoch": 1.61, + "learning_rate": 9.253853723470992e-06, + "loss": 0.1283, + "step": 5017 + }, + { + "epoch": 1.61, + "learning_rate": 9.250389775538273e-06, + "loss": 0.1068, + "step": 5018 + }, + { + "epoch": 1.61, + "learning_rate": 9.24692591805671e-06, + "loss": 0.107, + "step": 5019 + }, + { + "epoch": 1.62, + "learning_rate": 9.243462151444275e-06, + "loss": 0.1253, + "step": 5020 + }, + { + "epoch": 1.62, + "learning_rate": 9.23999847611891e-06, + "loss": 0.1193, + "step": 5021 + }, + { + "epoch": 1.62, + "learning_rate": 9.236534892498564e-06, + "loss": 0.1062, + "step": 5022 + }, + { + "epoch": 1.62, + "learning_rate": 9.233071401001166e-06, + "loss": 0.1162, + "step": 5023 + }, + { + "epoch": 1.62, + "learning_rate": 9.229608002044633e-06, + "loss": 0.1204, + "step": 5024 + }, + { + "epoch": 1.62, + "learning_rate": 9.22614469604688e-06, + "loss": 0.1176, + "step": 5025 + }, + { + "epoch": 1.62, + "learning_rate": 9.222681483425801e-06, + "loss": 0.1181, + "step": 5026 + }, + { + "epoch": 1.62, + "learning_rate": 9.219218364599277e-06, + "loss": 0.1184, + "step": 5027 + }, + { + "epoch": 1.62, + "learning_rate": 9.21575533998519e-06, + "loss": 0.1129, + "step": 5028 + }, + { + "epoch": 1.62, + "learning_rate": 9.212292410001399e-06, + "loss": 0.1273, + "step": 5029 + }, + { + "epoch": 1.62, + "learning_rate": 9.208829575065754e-06, + "loss": 0.111, + "step": 5030 + }, + { + "epoch": 1.62, + "learning_rate": 9.205366835596102e-06, + "loss": 0.102, + "step": 5031 + }, + { + "epoch": 1.62, + "learning_rate": 9.201904192010272e-06, + "loss": 0.1108, + "step": 5032 + }, + { + "epoch": 1.62, + "learning_rate": 9.198441644726072e-06, + "loss": 0.1269, + "step": 5033 + }, + { + "epoch": 1.62, + "learning_rate": 9.194979194161315e-06, + "loss": 0.1237, + "step": 5034 + }, + { + "epoch": 1.62, + "learning_rate": 9.191516840733792e-06, + "loss": 0.1081, + "step": 5035 + }, + { + "epoch": 1.62, + "learning_rate": 9.188054584861285e-06, + "loss": 0.117, + "step": 5036 + }, + { + "epoch": 1.62, + "learning_rate": 9.18459242696157e-06, + "loss": 0.1215, + "step": 5037 + }, + { + "epoch": 1.62, + "learning_rate": 9.1811303674524e-06, + "loss": 0.1119, + "step": 5038 + }, + { + "epoch": 1.62, + "learning_rate": 9.177668406751526e-06, + "loss": 0.1166, + "step": 5039 + }, + { + "epoch": 1.62, + "learning_rate": 9.174206545276678e-06, + "loss": 0.1169, + "step": 5040 + }, + { + "epoch": 1.62, + "learning_rate": 9.170744783445583e-06, + "loss": 0.1102, + "step": 5041 + }, + { + "epoch": 1.62, + "learning_rate": 9.167283121675949e-06, + "loss": 0.1122, + "step": 5042 + }, + { + "epoch": 1.62, + "learning_rate": 9.163821560385478e-06, + "loss": 0.1149, + "step": 5043 + }, + { + "epoch": 1.62, + "learning_rate": 9.160360099991852e-06, + "loss": 0.1151, + "step": 5044 + }, + { + "epoch": 1.62, + "learning_rate": 9.156898740912753e-06, + "loss": 0.1143, + "step": 5045 + }, + { + "epoch": 1.62, + "learning_rate": 9.153437483565835e-06, + "loss": 0.1195, + "step": 5046 + }, + { + "epoch": 1.62, + "learning_rate": 9.149976328368754e-06, + "loss": 0.1166, + "step": 5047 + }, + { + "epoch": 1.62, + "learning_rate": 9.146515275739142e-06, + "loss": 0.1218, + "step": 5048 + }, + { + "epoch": 1.62, + "learning_rate": 9.143054326094632e-06, + "loss": 0.1129, + "step": 5049 + }, + { + "epoch": 1.62, + "learning_rate": 9.13959347985283e-06, + "loss": 0.1009, + "step": 5050 + }, + { + "epoch": 1.62, + "learning_rate": 9.136132737431339e-06, + "loss": 0.1263, + "step": 5051 + }, + { + "epoch": 1.63, + "learning_rate": 9.132672099247753e-06, + "loss": 0.116, + "step": 5052 + }, + { + "epoch": 1.63, + "learning_rate": 9.129211565719637e-06, + "loss": 0.1187, + "step": 5053 + }, + { + "epoch": 1.63, + "learning_rate": 9.125751137264556e-06, + "loss": 0.113, + "step": 5054 + }, + { + "epoch": 1.63, + "learning_rate": 9.122290814300064e-06, + "loss": 0.1145, + "step": 5055 + }, + { + "epoch": 1.63, + "learning_rate": 9.118830597243697e-06, + "loss": 0.1187, + "step": 5056 + }, + { + "epoch": 1.63, + "learning_rate": 9.115370486512979e-06, + "loss": 0.1165, + "step": 5057 + }, + { + "epoch": 1.63, + "learning_rate": 9.111910482525423e-06, + "loss": 0.1094, + "step": 5058 + }, + { + "epoch": 1.63, + "learning_rate": 9.108450585698527e-06, + "loss": 0.0989, + "step": 5059 + }, + { + "epoch": 1.63, + "learning_rate": 9.104990796449774e-06, + "loss": 0.1158, + "step": 5060 + }, + { + "epoch": 1.63, + "learning_rate": 9.101531115196637e-06, + "loss": 0.1066, + "step": 5061 + }, + { + "epoch": 1.63, + "learning_rate": 9.098071542356581e-06, + "loss": 0.1117, + "step": 5062 + }, + { + "epoch": 1.63, + "learning_rate": 9.09461207834705e-06, + "loss": 0.1072, + "step": 5063 + }, + { + "epoch": 1.63, + "learning_rate": 9.091152723585477e-06, + "loss": 0.1171, + "step": 5064 + }, + { + "epoch": 1.63, + "learning_rate": 9.087693478489285e-06, + "loss": 0.1026, + "step": 5065 + }, + { + "epoch": 1.63, + "learning_rate": 9.084234343475877e-06, + "loss": 0.1036, + "step": 5066 + }, + { + "epoch": 1.63, + "learning_rate": 9.080775318962648e-06, + "loss": 0.1204, + "step": 5067 + }, + { + "epoch": 1.63, + "learning_rate": 9.07731640536698e-06, + "loss": 0.1035, + "step": 5068 + }, + { + "epoch": 1.63, + "learning_rate": 9.073857603106243e-06, + "loss": 0.1092, + "step": 5069 + }, + { + "epoch": 1.63, + "learning_rate": 9.070398912597785e-06, + "loss": 0.1118, + "step": 5070 + }, + { + "epoch": 1.63, + "learning_rate": 9.066940334258951e-06, + "loss": 0.1037, + "step": 5071 + }, + { + "epoch": 1.63, + "learning_rate": 9.063481868507066e-06, + "loss": 0.1217, + "step": 5072 + }, + { + "epoch": 1.63, + "learning_rate": 9.06002351575944e-06, + "loss": 0.1194, + "step": 5073 + }, + { + "epoch": 1.63, + "learning_rate": 9.056565276433378e-06, + "loss": 0.1128, + "step": 5074 + }, + { + "epoch": 1.63, + "learning_rate": 9.053107150946163e-06, + "loss": 0.1138, + "step": 5075 + }, + { + "epoch": 1.63, + "learning_rate": 9.049649139715067e-06, + "loss": 0.1067, + "step": 5076 + }, + { + "epoch": 1.63, + "learning_rate": 9.04619124315735e-06, + "loss": 0.1108, + "step": 5077 + }, + { + "epoch": 1.63, + "learning_rate": 9.042733461690259e-06, + "loss": 0.1108, + "step": 5078 + }, + { + "epoch": 1.63, + "learning_rate": 9.039275795731014e-06, + "loss": 0.1126, + "step": 5079 + }, + { + "epoch": 1.63, + "learning_rate": 9.035818245696843e-06, + "loss": 0.1087, + "step": 5080 + }, + { + "epoch": 1.63, + "learning_rate": 9.032360812004944e-06, + "loss": 0.1161, + "step": 5081 + }, + { + "epoch": 1.63, + "learning_rate": 9.028903495072503e-06, + "loss": 0.1172, + "step": 5082 + }, + { + "epoch": 1.64, + "learning_rate": 9.025446295316701e-06, + "loss": 0.1175, + "step": 5083 + }, + { + "epoch": 1.64, + "learning_rate": 9.021989213154699e-06, + "loss": 0.1006, + "step": 5084 + }, + { + "epoch": 1.64, + "learning_rate": 9.018532249003633e-06, + "loss": 0.1202, + "step": 5085 + }, + { + "epoch": 1.64, + "learning_rate": 9.015075403280644e-06, + "loss": 0.1165, + "step": 5086 + }, + { + "epoch": 1.64, + "learning_rate": 9.011618676402845e-06, + "loss": 0.1078, + "step": 5087 + }, + { + "epoch": 1.64, + "learning_rate": 9.008162068787341e-06, + "loss": 0.1213, + "step": 5088 + }, + { + "epoch": 1.64, + "learning_rate": 9.004705580851225e-06, + "loss": 0.1168, + "step": 5089 + }, + { + "epoch": 1.64, + "learning_rate": 9.001249213011565e-06, + "loss": 0.1095, + "step": 5090 + }, + { + "epoch": 1.64, + "learning_rate": 8.99779296568543e-06, + "loss": 0.1065, + "step": 5091 + }, + { + "epoch": 1.64, + "learning_rate": 8.994336839289853e-06, + "loss": 0.1051, + "step": 5092 + }, + { + "epoch": 1.64, + "learning_rate": 8.990880834241873e-06, + "loss": 0.1115, + "step": 5093 + }, + { + "epoch": 1.64, + "learning_rate": 8.987424950958505e-06, + "loss": 0.1083, + "step": 5094 + }, + { + "epoch": 1.64, + "learning_rate": 8.98396918985675e-06, + "loss": 0.1124, + "step": 5095 + }, + { + "epoch": 1.64, + "learning_rate": 8.980513551353595e-06, + "loss": 0.1124, + "step": 5096 + }, + { + "epoch": 1.64, + "learning_rate": 8.977058035866015e-06, + "loss": 0.1189, + "step": 5097 + }, + { + "epoch": 1.64, + "learning_rate": 8.973602643810962e-06, + "loss": 0.104, + "step": 5098 + }, + { + "epoch": 1.64, + "learning_rate": 8.97014737560538e-06, + "loss": 0.1185, + "step": 5099 + }, + { + "epoch": 1.64, + "learning_rate": 8.9666922316662e-06, + "loss": 0.115, + "step": 5100 + }, + { + "epoch": 1.64, + "learning_rate": 8.963237212410329e-06, + "loss": 0.1081, + "step": 5101 + }, + { + "epoch": 1.64, + "learning_rate": 8.959782318254665e-06, + "loss": 0.1074, + "step": 5102 + }, + { + "epoch": 1.64, + "learning_rate": 8.956327549616093e-06, + "loss": 0.1251, + "step": 5103 + }, + { + "epoch": 1.64, + "learning_rate": 8.952872906911484e-06, + "loss": 0.1227, + "step": 5104 + }, + { + "epoch": 1.64, + "learning_rate": 8.949418390557682e-06, + "loss": 0.1174, + "step": 5105 + }, + { + "epoch": 1.64, + "learning_rate": 8.945964000971525e-06, + "loss": 0.1167, + "step": 5106 + }, + { + "epoch": 1.64, + "learning_rate": 8.942509738569834e-06, + "loss": 0.1151, + "step": 5107 + }, + { + "epoch": 1.64, + "learning_rate": 8.93905560376942e-06, + "loss": 0.1112, + "step": 5108 + }, + { + "epoch": 1.64, + "learning_rate": 8.935601596987069e-06, + "loss": 0.1266, + "step": 5109 + }, + { + "epoch": 1.64, + "learning_rate": 8.932147718639562e-06, + "loss": 0.1128, + "step": 5110 + }, + { + "epoch": 1.64, + "learning_rate": 8.928693969143652e-06, + "loss": 0.1333, + "step": 5111 + }, + { + "epoch": 1.64, + "learning_rate": 8.925240348916085e-06, + "loss": 0.1279, + "step": 5112 + }, + { + "epoch": 1.64, + "learning_rate": 8.921786858373587e-06, + "loss": 0.1168, + "step": 5113 + }, + { + "epoch": 1.65, + "learning_rate": 8.918333497932878e-06, + "loss": 0.1123, + "step": 5114 + }, + { + "epoch": 1.65, + "learning_rate": 8.914880268010647e-06, + "loss": 0.1193, + "step": 5115 + }, + { + "epoch": 1.65, + "learning_rate": 8.911427169023583e-06, + "loss": 0.1183, + "step": 5116 + }, + { + "epoch": 1.65, + "learning_rate": 8.90797420138835e-06, + "loss": 0.1112, + "step": 5117 + }, + { + "epoch": 1.65, + "learning_rate": 8.904521365521591e-06, + "loss": 0.1067, + "step": 5118 + }, + { + "epoch": 1.65, + "learning_rate": 8.901068661839945e-06, + "loss": 0.1184, + "step": 5119 + }, + { + "epoch": 1.65, + "learning_rate": 8.897616090760032e-06, + "loss": 0.1088, + "step": 5120 + }, + { + "epoch": 1.65, + "learning_rate": 8.894163652698448e-06, + "loss": 0.111, + "step": 5121 + }, + { + "epoch": 1.65, + "learning_rate": 8.890711348071784e-06, + "loss": 0.1165, + "step": 5122 + }, + { + "epoch": 1.65, + "learning_rate": 8.887259177296612e-06, + "loss": 0.1177, + "step": 5123 + }, + { + "epoch": 1.65, + "learning_rate": 8.883807140789478e-06, + "loss": 0.0923, + "step": 5124 + }, + { + "epoch": 1.65, + "learning_rate": 8.880355238966923e-06, + "loss": 0.1111, + "step": 5125 + }, + { + "epoch": 1.65, + "learning_rate": 8.876903472245469e-06, + "loss": 0.1171, + "step": 5126 + }, + { + "epoch": 1.65, + "learning_rate": 8.873451841041619e-06, + "loss": 0.1178, + "step": 5127 + }, + { + "epoch": 1.65, + "learning_rate": 8.870000345771863e-06, + "loss": 0.1116, + "step": 5128 + }, + { + "epoch": 1.65, + "learning_rate": 8.866548986852673e-06, + "loss": 0.0997, + "step": 5129 + }, + { + "epoch": 1.65, + "learning_rate": 8.863097764700508e-06, + "loss": 0.1179, + "step": 5130 + }, + { + "epoch": 1.65, + "learning_rate": 8.859646679731799e-06, + "loss": 0.1181, + "step": 5131 + }, + { + "epoch": 1.65, + "learning_rate": 8.856195732362975e-06, + "loss": 0.114, + "step": 5132 + }, + { + "epoch": 1.65, + "learning_rate": 8.852744923010438e-06, + "loss": 0.1071, + "step": 5133 + }, + { + "epoch": 1.65, + "learning_rate": 8.84929425209058e-06, + "loss": 0.1147, + "step": 5134 + }, + { + "epoch": 1.65, + "learning_rate": 8.845843720019776e-06, + "loss": 0.1159, + "step": 5135 + }, + { + "epoch": 1.65, + "learning_rate": 8.842393327214378e-06, + "loss": 0.1198, + "step": 5136 + }, + { + "epoch": 1.65, + "learning_rate": 8.838943074090726e-06, + "loss": 0.1224, + "step": 5137 + }, + { + "epoch": 1.65, + "learning_rate": 8.835492961065139e-06, + "loss": 0.1255, + "step": 5138 + }, + { + "epoch": 1.65, + "learning_rate": 8.832042988553929e-06, + "loss": 0.108, + "step": 5139 + }, + { + "epoch": 1.65, + "learning_rate": 8.828593156973379e-06, + "loss": 0.1087, + "step": 5140 + }, + { + "epoch": 1.65, + "learning_rate": 8.825143466739764e-06, + "loss": 0.1084, + "step": 5141 + }, + { + "epoch": 1.65, + "learning_rate": 8.821693918269334e-06, + "loss": 0.1124, + "step": 5142 + }, + { + "epoch": 1.65, + "learning_rate": 8.818244511978335e-06, + "loss": 0.1181, + "step": 5143 + }, + { + "epoch": 1.65, + "learning_rate": 8.814795248282974e-06, + "loss": 0.1123, + "step": 5144 + }, + { + "epoch": 1.66, + "learning_rate": 8.811346127599465e-06, + "loss": 0.123, + "step": 5145 + }, + { + "epoch": 1.66, + "learning_rate": 8.807897150343985e-06, + "loss": 0.1026, + "step": 5146 + }, + { + "epoch": 1.66, + "learning_rate": 8.804448316932711e-06, + "loss": 0.0981, + "step": 5147 + }, + { + "epoch": 1.66, + "learning_rate": 8.800999627781786e-06, + "loss": 0.1082, + "step": 5148 + }, + { + "epoch": 1.66, + "learning_rate": 8.797551083307352e-06, + "loss": 0.1168, + "step": 5149 + }, + { + "epoch": 1.66, + "learning_rate": 8.794102683925515e-06, + "loss": 0.101, + "step": 5150 + }, + { + "epoch": 1.66, + "learning_rate": 8.79065443005238e-06, + "loss": 0.1046, + "step": 5151 + }, + { + "epoch": 1.66, + "learning_rate": 8.787206322104025e-06, + "loss": 0.1066, + "step": 5152 + }, + { + "epoch": 1.66, + "learning_rate": 8.783758360496515e-06, + "loss": 0.1262, + "step": 5153 + }, + { + "epoch": 1.66, + "learning_rate": 8.780310545645897e-06, + "loss": 0.0963, + "step": 5154 + }, + { + "epoch": 1.66, + "learning_rate": 8.776862877968198e-06, + "loss": 0.106, + "step": 5155 + }, + { + "epoch": 1.66, + "learning_rate": 8.773415357879429e-06, + "loss": 0.1318, + "step": 5156 + }, + { + "epoch": 1.66, + "learning_rate": 8.769967985795581e-06, + "loss": 0.1113, + "step": 5157 + }, + { + "epoch": 1.66, + "learning_rate": 8.766520762132627e-06, + "loss": 0.1094, + "step": 5158 + }, + { + "epoch": 1.66, + "learning_rate": 8.763073687306523e-06, + "loss": 0.1065, + "step": 5159 + }, + { + "epoch": 1.66, + "learning_rate": 8.759626761733215e-06, + "loss": 0.1222, + "step": 5160 + }, + { + "epoch": 1.66, + "learning_rate": 8.756179985828617e-06, + "loss": 0.1133, + "step": 5161 + }, + { + "epoch": 1.66, + "learning_rate": 8.752733360008635e-06, + "loss": 0.1104, + "step": 5162 + }, + { + "epoch": 1.66, + "learning_rate": 8.749286884689153e-06, + "loss": 0.108, + "step": 5163 + }, + { + "epoch": 1.66, + "learning_rate": 8.745840560286034e-06, + "loss": 0.1271, + "step": 5164 + }, + { + "epoch": 1.66, + "learning_rate": 8.742394387215128e-06, + "loss": 0.1142, + "step": 5165 + }, + { + "epoch": 1.66, + "learning_rate": 8.738948365892267e-06, + "loss": 0.1094, + "step": 5166 + }, + { + "epoch": 1.66, + "learning_rate": 8.73550249673326e-06, + "loss": 0.1085, + "step": 5167 + }, + { + "epoch": 1.66, + "learning_rate": 8.732056780153904e-06, + "loss": 0.1006, + "step": 5168 + }, + { + "epoch": 1.66, + "learning_rate": 8.72861121656997e-06, + "loss": 0.1093, + "step": 5169 + }, + { + "epoch": 1.66, + "learning_rate": 8.725165806397216e-06, + "loss": 0.1133, + "step": 5170 + }, + { + "epoch": 1.66, + "learning_rate": 8.721720550051378e-06, + "loss": 0.1194, + "step": 5171 + }, + { + "epoch": 1.66, + "learning_rate": 8.718275447948178e-06, + "loss": 0.1142, + "step": 5172 + }, + { + "epoch": 1.66, + "learning_rate": 8.714830500503314e-06, + "loss": 0.1159, + "step": 5173 + }, + { + "epoch": 1.66, + "learning_rate": 8.71138570813247e-06, + "loss": 0.1118, + "step": 5174 + }, + { + "epoch": 1.66, + "learning_rate": 8.707941071251311e-06, + "loss": 0.1162, + "step": 5175 + }, + { + "epoch": 1.67, + "learning_rate": 8.704496590275479e-06, + "loss": 0.1237, + "step": 5176 + }, + { + "epoch": 1.67, + "learning_rate": 8.701052265620597e-06, + "loss": 0.1165, + "step": 5177 + }, + { + "epoch": 1.67, + "learning_rate": 8.697608097702277e-06, + "loss": 0.1224, + "step": 5178 + }, + { + "epoch": 1.67, + "learning_rate": 8.694164086936103e-06, + "loss": 0.115, + "step": 5179 + }, + { + "epoch": 1.67, + "learning_rate": 8.690720233737645e-06, + "loss": 0.114, + "step": 5180 + }, + { + "epoch": 1.67, + "learning_rate": 8.687276538522458e-06, + "loss": 0.1124, + "step": 5181 + }, + { + "epoch": 1.67, + "learning_rate": 8.683833001706068e-06, + "loss": 0.1215, + "step": 5182 + }, + { + "epoch": 1.67, + "learning_rate": 8.680389623703985e-06, + "loss": 0.1055, + "step": 5183 + }, + { + "epoch": 1.67, + "learning_rate": 8.676946404931706e-06, + "loss": 0.1033, + "step": 5184 + }, + { + "epoch": 1.67, + "learning_rate": 8.6735033458047e-06, + "loss": 0.1159, + "step": 5185 + }, + { + "epoch": 1.67, + "learning_rate": 8.670060446738425e-06, + "loss": 0.1136, + "step": 5186 + }, + { + "epoch": 1.67, + "learning_rate": 8.666617708148315e-06, + "loss": 0.1222, + "step": 5187 + }, + { + "epoch": 1.67, + "learning_rate": 8.66317513044979e-06, + "loss": 0.1055, + "step": 5188 + }, + { + "epoch": 1.67, + "learning_rate": 8.659732714058233e-06, + "loss": 0.1104, + "step": 5189 + }, + { + "epoch": 1.67, + "learning_rate": 8.656290459389032e-06, + "loss": 0.1226, + "step": 5190 + }, + { + "epoch": 1.67, + "learning_rate": 8.652848366857541e-06, + "loss": 0.1076, + "step": 5191 + }, + { + "epoch": 1.67, + "learning_rate": 8.649406436879095e-06, + "loss": 0.1122, + "step": 5192 + }, + { + "epoch": 1.67, + "learning_rate": 8.645964669869018e-06, + "loss": 0.1175, + "step": 5193 + }, + { + "epoch": 1.67, + "learning_rate": 8.642523066242607e-06, + "loss": 0.0996, + "step": 5194 + }, + { + "epoch": 1.67, + "learning_rate": 8.639081626415132e-06, + "loss": 0.119, + "step": 5195 + }, + { + "epoch": 1.67, + "learning_rate": 8.63564035080186e-06, + "loss": 0.1177, + "step": 5196 + }, + { + "epoch": 1.67, + "learning_rate": 8.632199239818028e-06, + "loss": 0.1036, + "step": 5197 + }, + { + "epoch": 1.67, + "learning_rate": 8.628758293878853e-06, + "loss": 0.0977, + "step": 5198 + }, + { + "epoch": 1.67, + "learning_rate": 8.625317513399538e-06, + "loss": 0.1176, + "step": 5199 + }, + { + "epoch": 1.67, + "learning_rate": 8.62187689879526e-06, + "loss": 0.1166, + "step": 5200 + }, + { + "epoch": 1.67, + "learning_rate": 8.618436450481182e-06, + "loss": 0.1193, + "step": 5201 + }, + { + "epoch": 1.67, + "learning_rate": 8.614996168872434e-06, + "loss": 0.1191, + "step": 5202 + }, + { + "epoch": 1.67, + "learning_rate": 8.611556054384144e-06, + "loss": 0.1168, + "step": 5203 + }, + { + "epoch": 1.67, + "learning_rate": 8.608116107431405e-06, + "loss": 0.1223, + "step": 5204 + }, + { + "epoch": 1.67, + "learning_rate": 8.604676328429297e-06, + "loss": 0.1119, + "step": 5205 + }, + { + "epoch": 1.67, + "learning_rate": 8.601236717792884e-06, + "loss": 0.1031, + "step": 5206 + }, + { + "epoch": 1.68, + "learning_rate": 8.5977972759372e-06, + "loss": 0.1161, + "step": 5207 + }, + { + "epoch": 1.68, + "learning_rate": 8.594358003277257e-06, + "loss": 0.1273, + "step": 5208 + }, + { + "epoch": 1.68, + "learning_rate": 8.590918900228062e-06, + "loss": 0.1144, + "step": 5209 + }, + { + "epoch": 1.68, + "learning_rate": 8.587479967204584e-06, + "loss": 0.1135, + "step": 5210 + }, + { + "epoch": 1.68, + "learning_rate": 8.584041204621783e-06, + "loss": 0.1193, + "step": 5211 + }, + { + "epoch": 1.68, + "learning_rate": 8.580602612894595e-06, + "loss": 0.1323, + "step": 5212 + }, + { + "epoch": 1.68, + "learning_rate": 8.577164192437933e-06, + "loss": 0.1178, + "step": 5213 + }, + { + "epoch": 1.68, + "learning_rate": 8.573725943666698e-06, + "loss": 0.1163, + "step": 5214 + }, + { + "epoch": 1.68, + "learning_rate": 8.570287866995756e-06, + "loss": 0.1046, + "step": 5215 + }, + { + "epoch": 1.68, + "learning_rate": 8.56684996283996e-06, + "loss": 0.1251, + "step": 5216 + }, + { + "epoch": 1.68, + "learning_rate": 8.563412231614146e-06, + "loss": 0.1136, + "step": 5217 + }, + { + "epoch": 1.68, + "learning_rate": 8.559974673733125e-06, + "loss": 0.1304, + "step": 5218 + }, + { + "epoch": 1.68, + "learning_rate": 8.556537289611684e-06, + "loss": 0.1071, + "step": 5219 + }, + { + "epoch": 1.68, + "learning_rate": 8.553100079664598e-06, + "loss": 0.1232, + "step": 5220 + }, + { + "epoch": 1.68, + "learning_rate": 8.54966304430661e-06, + "loss": 0.1173, + "step": 5221 + }, + { + "epoch": 1.68, + "learning_rate": 8.546226183952452e-06, + "loss": 0.1093, + "step": 5222 + }, + { + "epoch": 1.68, + "learning_rate": 8.542789499016822e-06, + "loss": 0.1152, + "step": 5223 + }, + { + "epoch": 1.68, + "learning_rate": 8.539352989914416e-06, + "loss": 0.121, + "step": 5224 + }, + { + "epoch": 1.68, + "learning_rate": 8.535916657059889e-06, + "loss": 0.1213, + "step": 5225 + }, + { + "epoch": 1.68, + "learning_rate": 8.532480500867887e-06, + "loss": 0.105, + "step": 5226 + }, + { + "epoch": 1.68, + "learning_rate": 8.529044521753035e-06, + "loss": 0.1021, + "step": 5227 + }, + { + "epoch": 1.68, + "learning_rate": 8.525608720129928e-06, + "loss": 0.1128, + "step": 5228 + }, + { + "epoch": 1.68, + "learning_rate": 8.522173096413143e-06, + "loss": 0.121, + "step": 5229 + }, + { + "epoch": 1.68, + "learning_rate": 8.518737651017241e-06, + "loss": 0.1231, + "step": 5230 + }, + { + "epoch": 1.68, + "learning_rate": 8.515302384356753e-06, + "loss": 0.109, + "step": 5231 + }, + { + "epoch": 1.68, + "learning_rate": 8.511867296846197e-06, + "loss": 0.1203, + "step": 5232 + }, + { + "epoch": 1.68, + "learning_rate": 8.508432388900069e-06, + "loss": 0.1163, + "step": 5233 + }, + { + "epoch": 1.68, + "learning_rate": 8.504997660932832e-06, + "loss": 0.1249, + "step": 5234 + }, + { + "epoch": 1.68, + "learning_rate": 8.501563113358933e-06, + "loss": 0.1126, + "step": 5235 + }, + { + "epoch": 1.68, + "learning_rate": 8.498128746592806e-06, + "loss": 0.0991, + "step": 5236 + }, + { + "epoch": 1.68, + "learning_rate": 8.494694561048858e-06, + "loss": 0.1067, + "step": 5237 + }, + { + "epoch": 1.69, + "learning_rate": 8.491260557141462e-06, + "loss": 0.1122, + "step": 5238 + }, + { + "epoch": 1.69, + "learning_rate": 8.487826735284991e-06, + "loss": 0.1062, + "step": 5239 + }, + { + "epoch": 1.69, + "learning_rate": 8.484393095893781e-06, + "loss": 0.0943, + "step": 5240 + }, + { + "epoch": 1.69, + "learning_rate": 8.480959639382144e-06, + "loss": 0.1239, + "step": 5241 + }, + { + "epoch": 1.69, + "learning_rate": 8.477526366164378e-06, + "loss": 0.0964, + "step": 5242 + }, + { + "epoch": 1.69, + "learning_rate": 8.474093276654764e-06, + "loss": 0.0983, + "step": 5243 + }, + { + "epoch": 1.69, + "learning_rate": 8.47066037126754e-06, + "loss": 0.1111, + "step": 5244 + }, + { + "epoch": 1.69, + "learning_rate": 8.467227650416947e-06, + "loss": 0.1019, + "step": 5245 + }, + { + "epoch": 1.69, + "learning_rate": 8.463795114517189e-06, + "loss": 0.1264, + "step": 5246 + }, + { + "epoch": 1.69, + "learning_rate": 8.460362763982443e-06, + "loss": 0.109, + "step": 5247 + }, + { + "epoch": 1.69, + "learning_rate": 8.456930599226876e-06, + "loss": 0.1103, + "step": 5248 + }, + { + "epoch": 1.69, + "learning_rate": 8.453498620664631e-06, + "loss": 0.1165, + "step": 5249 + }, + { + "epoch": 1.69, + "learning_rate": 8.450066828709818e-06, + "loss": 0.1001, + "step": 5250 + }, + { + "epoch": 1.69, + "learning_rate": 8.446635223776535e-06, + "loss": 0.1145, + "step": 5251 + }, + { + "epoch": 1.69, + "learning_rate": 8.443203806278859e-06, + "loss": 0.1162, + "step": 5252 + }, + { + "epoch": 1.69, + "learning_rate": 8.439772576630837e-06, + "loss": 0.112, + "step": 5253 + }, + { + "epoch": 1.69, + "learning_rate": 8.436341535246489e-06, + "loss": 0.1079, + "step": 5254 + }, + { + "epoch": 1.69, + "learning_rate": 8.432910682539824e-06, + "loss": 0.1139, + "step": 5255 + }, + { + "epoch": 1.69, + "learning_rate": 8.429480018924823e-06, + "loss": 0.1205, + "step": 5256 + }, + { + "epoch": 1.69, + "learning_rate": 8.426049544815445e-06, + "loss": 0.1188, + "step": 5257 + }, + { + "epoch": 1.69, + "learning_rate": 8.422619260625626e-06, + "loss": 0.1126, + "step": 5258 + }, + { + "epoch": 1.69, + "learning_rate": 8.41918916676928e-06, + "loss": 0.1046, + "step": 5259 + }, + { + "epoch": 1.69, + "learning_rate": 8.41575926366029e-06, + "loss": 0.1121, + "step": 5260 + }, + { + "epoch": 1.69, + "learning_rate": 8.41232955171253e-06, + "loss": 0.1205, + "step": 5261 + }, + { + "epoch": 1.69, + "learning_rate": 8.408900031339838e-06, + "loss": 0.1033, + "step": 5262 + }, + { + "epoch": 1.69, + "learning_rate": 8.405470702956039e-06, + "loss": 0.0933, + "step": 5263 + }, + { + "epoch": 1.69, + "learning_rate": 8.40204156697493e-06, + "loss": 0.1122, + "step": 5264 + }, + { + "epoch": 1.69, + "learning_rate": 8.398612623810281e-06, + "loss": 0.1031, + "step": 5265 + }, + { + "epoch": 1.69, + "learning_rate": 8.395183873875849e-06, + "loss": 0.1249, + "step": 5266 + }, + { + "epoch": 1.69, + "learning_rate": 8.391755317585358e-06, + "loss": 0.1022, + "step": 5267 + }, + { + "epoch": 1.69, + "learning_rate": 8.38832695535251e-06, + "loss": 0.1089, + "step": 5268 + }, + { + "epoch": 1.7, + "learning_rate": 8.38489878759099e-06, + "loss": 0.1107, + "step": 5269 + }, + { + "epoch": 1.7, + "learning_rate": 8.381470814714455e-06, + "loss": 0.1098, + "step": 5270 + }, + { + "epoch": 1.7, + "learning_rate": 8.378043037136533e-06, + "loss": 0.1136, + "step": 5271 + }, + { + "epoch": 1.7, + "learning_rate": 8.374615455270843e-06, + "loss": 0.1112, + "step": 5272 + }, + { + "epoch": 1.7, + "learning_rate": 8.371188069530965e-06, + "loss": 0.0921, + "step": 5273 + }, + { + "epoch": 1.7, + "learning_rate": 8.367760880330465e-06, + "loss": 0.1003, + "step": 5274 + }, + { + "epoch": 1.7, + "learning_rate": 8.36433388808288e-06, + "loss": 0.1094, + "step": 5275 + }, + { + "epoch": 1.7, + "learning_rate": 8.36090709320173e-06, + "loss": 0.1087, + "step": 5276 + }, + { + "epoch": 1.7, + "learning_rate": 8.357480496100498e-06, + "loss": 0.1184, + "step": 5277 + }, + { + "epoch": 1.7, + "learning_rate": 8.35405409719266e-06, + "loss": 0.1147, + "step": 5278 + }, + { + "epoch": 1.7, + "learning_rate": 8.350627896891661e-06, + "loss": 0.1118, + "step": 5279 + }, + { + "epoch": 1.7, + "learning_rate": 8.347201895610915e-06, + "loss": 0.1233, + "step": 5280 + }, + { + "epoch": 1.7, + "learning_rate": 8.343776093763817e-06, + "loss": 0.1112, + "step": 5281 + }, + { + "epoch": 1.7, + "learning_rate": 8.340350491763745e-06, + "loss": 0.1056, + "step": 5282 + }, + { + "epoch": 1.7, + "learning_rate": 8.336925090024039e-06, + "loss": 0.1205, + "step": 5283 + }, + { + "epoch": 1.7, + "learning_rate": 8.33349988895803e-06, + "loss": 0.0998, + "step": 5284 + }, + { + "epoch": 1.7, + "learning_rate": 8.330074888979016e-06, + "loss": 0.1124, + "step": 5285 + }, + { + "epoch": 1.7, + "learning_rate": 8.32665009050027e-06, + "loss": 0.1186, + "step": 5286 + }, + { + "epoch": 1.7, + "learning_rate": 8.32322549393504e-06, + "loss": 0.1049, + "step": 5287 + }, + { + "epoch": 1.7, + "learning_rate": 8.319801099696558e-06, + "loss": 0.1051, + "step": 5288 + }, + { + "epoch": 1.7, + "learning_rate": 8.316376908198023e-06, + "loss": 0.1128, + "step": 5289 + }, + { + "epoch": 1.7, + "learning_rate": 8.312952919852612e-06, + "loss": 0.1123, + "step": 5290 + }, + { + "epoch": 1.7, + "learning_rate": 8.309529135073481e-06, + "loss": 0.117, + "step": 5291 + }, + { + "epoch": 1.7, + "learning_rate": 8.306105554273758e-06, + "loss": 0.1153, + "step": 5292 + }, + { + "epoch": 1.7, + "learning_rate": 8.302682177866543e-06, + "loss": 0.1364, + "step": 5293 + }, + { + "epoch": 1.7, + "learning_rate": 8.299259006264915e-06, + "loss": 0.1011, + "step": 5294 + }, + { + "epoch": 1.7, + "learning_rate": 8.295836039881934e-06, + "loss": 0.1174, + "step": 5295 + }, + { + "epoch": 1.7, + "learning_rate": 8.292413279130625e-06, + "loss": 0.1118, + "step": 5296 + }, + { + "epoch": 1.7, + "learning_rate": 8.288990724423997e-06, + "loss": 0.1052, + "step": 5297 + }, + { + "epoch": 1.7, + "learning_rate": 8.285568376175026e-06, + "loss": 0.1158, + "step": 5298 + }, + { + "epoch": 1.7, + "learning_rate": 8.28214623479667e-06, + "loss": 0.1081, + "step": 5299 + }, + { + "epoch": 1.71, + "learning_rate": 8.278724300701855e-06, + "loss": 0.1108, + "step": 5300 + }, + { + "epoch": 1.71, + "learning_rate": 8.27530257430349e-06, + "loss": 0.1105, + "step": 5301 + }, + { + "epoch": 1.71, + "learning_rate": 8.271881056014454e-06, + "loss": 0.105, + "step": 5302 + }, + { + "epoch": 1.71, + "learning_rate": 8.268459746247602e-06, + "loss": 0.1102, + "step": 5303 + }, + { + "epoch": 1.71, + "learning_rate": 8.265038645415764e-06, + "loss": 0.1152, + "step": 5304 + }, + { + "epoch": 1.71, + "learning_rate": 8.261617753931747e-06, + "loss": 0.1078, + "step": 5305 + }, + { + "epoch": 1.71, + "learning_rate": 8.258197072208322e-06, + "loss": 0.1179, + "step": 5306 + }, + { + "epoch": 1.71, + "learning_rate": 8.254776600658253e-06, + "loss": 0.1139, + "step": 5307 + }, + { + "epoch": 1.71, + "learning_rate": 8.25135633969426e-06, + "loss": 0.1197, + "step": 5308 + }, + { + "epoch": 1.71, + "learning_rate": 8.247936289729052e-06, + "loss": 0.1093, + "step": 5309 + }, + { + "epoch": 1.71, + "learning_rate": 8.244516451175307e-06, + "loss": 0.1068, + "step": 5310 + }, + { + "epoch": 1.71, + "learning_rate": 8.241096824445677e-06, + "loss": 0.1086, + "step": 5311 + }, + { + "epoch": 1.71, + "learning_rate": 8.237677409952784e-06, + "loss": 0.1158, + "step": 5312 + }, + { + "epoch": 1.71, + "learning_rate": 8.234258208109234e-06, + "loss": 0.1057, + "step": 5313 + }, + { + "epoch": 1.71, + "learning_rate": 8.2308392193276e-06, + "loss": 0.1162, + "step": 5314 + }, + { + "epoch": 1.71, + "learning_rate": 8.22742044402043e-06, + "loss": 0.1149, + "step": 5315 + }, + { + "epoch": 1.71, + "learning_rate": 8.224001882600254e-06, + "loss": 0.1068, + "step": 5316 + }, + { + "epoch": 1.71, + "learning_rate": 8.220583535479564e-06, + "loss": 0.1045, + "step": 5317 + }, + { + "epoch": 1.71, + "learning_rate": 8.21716540307084e-06, + "loss": 0.114, + "step": 5318 + }, + { + "epoch": 1.71, + "learning_rate": 8.21374748578652e-06, + "loss": 0.1131, + "step": 5319 + }, + { + "epoch": 1.71, + "learning_rate": 8.210329784039028e-06, + "loss": 0.1055, + "step": 5320 + }, + { + "epoch": 1.71, + "learning_rate": 8.206912298240757e-06, + "loss": 0.1087, + "step": 5321 + }, + { + "epoch": 1.71, + "learning_rate": 8.203495028804079e-06, + "loss": 0.1112, + "step": 5322 + }, + { + "epoch": 1.71, + "learning_rate": 8.200077976141331e-06, + "loss": 0.1053, + "step": 5323 + }, + { + "epoch": 1.71, + "learning_rate": 8.196661140664836e-06, + "loss": 0.1097, + "step": 5324 + }, + { + "epoch": 1.71, + "learning_rate": 8.193244522786877e-06, + "loss": 0.1144, + "step": 5325 + }, + { + "epoch": 1.71, + "learning_rate": 8.189828122919721e-06, + "loss": 0.1036, + "step": 5326 + }, + { + "epoch": 1.71, + "learning_rate": 8.186411941475603e-06, + "loss": 0.1194, + "step": 5327 + }, + { + "epoch": 1.71, + "learning_rate": 8.182995978866738e-06, + "loss": 0.1098, + "step": 5328 + }, + { + "epoch": 1.71, + "learning_rate": 8.17958023550531e-06, + "loss": 0.1147, + "step": 5329 + }, + { + "epoch": 1.71, + "learning_rate": 8.176164711803474e-06, + "loss": 0.112, + "step": 5330 + }, + { + "epoch": 1.72, + "learning_rate": 8.172749408173362e-06, + "loss": 0.1029, + "step": 5331 + }, + { + "epoch": 1.72, + "learning_rate": 8.169334325027084e-06, + "loss": 0.1041, + "step": 5332 + }, + { + "epoch": 1.72, + "learning_rate": 8.16591946277671e-06, + "loss": 0.106, + "step": 5333 + }, + { + "epoch": 1.72, + "learning_rate": 8.162504821834296e-06, + "loss": 0.1136, + "step": 5334 + }, + { + "epoch": 1.72, + "learning_rate": 8.15909040261187e-06, + "loss": 0.1125, + "step": 5335 + }, + { + "epoch": 1.72, + "learning_rate": 8.155676205521428e-06, + "loss": 0.0974, + "step": 5336 + }, + { + "epoch": 1.72, + "learning_rate": 8.152262230974945e-06, + "loss": 0.1065, + "step": 5337 + }, + { + "epoch": 1.72, + "learning_rate": 8.14884847938436e-06, + "loss": 0.1008, + "step": 5338 + }, + { + "epoch": 1.72, + "learning_rate": 8.14543495116159e-06, + "loss": 0.107, + "step": 5339 + }, + { + "epoch": 1.72, + "learning_rate": 8.14202164671853e-06, + "loss": 0.1127, + "step": 5340 + }, + { + "epoch": 1.72, + "learning_rate": 8.138608566467043e-06, + "loss": 0.1164, + "step": 5341 + }, + { + "epoch": 1.72, + "learning_rate": 8.135195710818966e-06, + "loss": 0.0981, + "step": 5342 + }, + { + "epoch": 1.72, + "learning_rate": 8.131783080186111e-06, + "loss": 0.1153, + "step": 5343 + }, + { + "epoch": 1.72, + "learning_rate": 8.128370674980255e-06, + "loss": 0.1116, + "step": 5344 + }, + { + "epoch": 1.72, + "learning_rate": 8.124958495613154e-06, + "loss": 0.109, + "step": 5345 + }, + { + "epoch": 1.72, + "learning_rate": 8.12154654249654e-06, + "loss": 0.1149, + "step": 5346 + }, + { + "epoch": 1.72, + "learning_rate": 8.118134816042111e-06, + "loss": 0.1052, + "step": 5347 + }, + { + "epoch": 1.72, + "learning_rate": 8.114723316661541e-06, + "loss": 0.1048, + "step": 5348 + }, + { + "epoch": 1.72, + "learning_rate": 8.111312044766477e-06, + "loss": 0.1112, + "step": 5349 + }, + { + "epoch": 1.72, + "learning_rate": 8.10790100076854e-06, + "loss": 0.101, + "step": 5350 + }, + { + "epoch": 1.72, + "learning_rate": 8.104490185079316e-06, + "loss": 0.1026, + "step": 5351 + }, + { + "epoch": 1.72, + "learning_rate": 8.101079598110368e-06, + "loss": 0.1122, + "step": 5352 + }, + { + "epoch": 1.72, + "learning_rate": 8.097669240273237e-06, + "loss": 0.1156, + "step": 5353 + }, + { + "epoch": 1.72, + "learning_rate": 8.094259111979427e-06, + "loss": 0.1147, + "step": 5354 + }, + { + "epoch": 1.72, + "learning_rate": 8.09084921364042e-06, + "loss": 0.1069, + "step": 5355 + }, + { + "epoch": 1.72, + "learning_rate": 8.087439545667674e-06, + "loss": 0.1273, + "step": 5356 + }, + { + "epoch": 1.72, + "learning_rate": 8.084030108472606e-06, + "loss": 0.1208, + "step": 5357 + }, + { + "epoch": 1.72, + "learning_rate": 8.080620902466615e-06, + "loss": 0.1002, + "step": 5358 + }, + { + "epoch": 1.72, + "learning_rate": 8.077211928061072e-06, + "loss": 0.1119, + "step": 5359 + }, + { + "epoch": 1.72, + "learning_rate": 8.07380318566732e-06, + "loss": 0.107, + "step": 5360 + }, + { + "epoch": 1.72, + "learning_rate": 8.070394675696666e-06, + "loss": 0.1132, + "step": 5361 + }, + { + "epoch": 1.73, + "learning_rate": 8.066986398560405e-06, + "loss": 0.1096, + "step": 5362 + }, + { + "epoch": 1.73, + "learning_rate": 8.06357835466979e-06, + "loss": 0.1177, + "step": 5363 + }, + { + "epoch": 1.73, + "learning_rate": 8.060170544436045e-06, + "loss": 0.1131, + "step": 5364 + }, + { + "epoch": 1.73, + "learning_rate": 8.056762968270374e-06, + "loss": 0.1146, + "step": 5365 + }, + { + "epoch": 1.73, + "learning_rate": 8.053355626583954e-06, + "loss": 0.1162, + "step": 5366 + }, + { + "epoch": 1.73, + "learning_rate": 8.049948519787923e-06, + "loss": 0.1173, + "step": 5367 + }, + { + "epoch": 1.73, + "learning_rate": 8.046541648293402e-06, + "loss": 0.1091, + "step": 5368 + }, + { + "epoch": 1.73, + "learning_rate": 8.043135012511478e-06, + "loss": 0.1049, + "step": 5369 + }, + { + "epoch": 1.73, + "learning_rate": 8.039728612853206e-06, + "loss": 0.1205, + "step": 5370 + }, + { + "epoch": 1.73, + "learning_rate": 8.036322449729619e-06, + "loss": 0.1111, + "step": 5371 + }, + { + "epoch": 1.73, + "learning_rate": 8.03291652355172e-06, + "loss": 0.116, + "step": 5372 + }, + { + "epoch": 1.73, + "learning_rate": 8.029510834730482e-06, + "loss": 0.1063, + "step": 5373 + }, + { + "epoch": 1.73, + "learning_rate": 8.026105383676851e-06, + "loss": 0.1071, + "step": 5374 + }, + { + "epoch": 1.73, + "learning_rate": 8.022700170801741e-06, + "loss": 0.1081, + "step": 5375 + }, + { + "epoch": 1.73, + "learning_rate": 8.019295196516044e-06, + "loss": 0.1004, + "step": 5376 + }, + { + "epoch": 1.73, + "learning_rate": 8.01589046123061e-06, + "loss": 0.1009, + "step": 5377 + }, + { + "epoch": 1.73, + "learning_rate": 8.012485965356278e-06, + "loss": 0.1048, + "step": 5378 + }, + { + "epoch": 1.73, + "learning_rate": 8.009081709303842e-06, + "loss": 0.1076, + "step": 5379 + }, + { + "epoch": 1.73, + "learning_rate": 8.005677693484077e-06, + "loss": 0.1028, + "step": 5380 + }, + { + "epoch": 1.73, + "learning_rate": 8.00227391830773e-06, + "loss": 0.1105, + "step": 5381 + }, + { + "epoch": 1.73, + "learning_rate": 7.99887038418551e-06, + "loss": 0.1081, + "step": 5382 + }, + { + "epoch": 1.73, + "learning_rate": 7.995467091528101e-06, + "loss": 0.1117, + "step": 5383 + }, + { + "epoch": 1.73, + "learning_rate": 7.992064040746163e-06, + "loss": 0.1123, + "step": 5384 + }, + { + "epoch": 1.73, + "learning_rate": 7.988661232250319e-06, + "loss": 0.1131, + "step": 5385 + }, + { + "epoch": 1.73, + "learning_rate": 7.985258666451166e-06, + "loss": 0.1126, + "step": 5386 + }, + { + "epoch": 1.73, + "learning_rate": 7.981856343759277e-06, + "loss": 0.1115, + "step": 5387 + }, + { + "epoch": 1.73, + "learning_rate": 7.978454264585185e-06, + "loss": 0.132, + "step": 5388 + }, + { + "epoch": 1.73, + "learning_rate": 7.975052429339406e-06, + "loss": 0.1021, + "step": 5389 + }, + { + "epoch": 1.73, + "learning_rate": 7.971650838432414e-06, + "loss": 0.1119, + "step": 5390 + }, + { + "epoch": 1.73, + "learning_rate": 7.96824949227466e-06, + "loss": 0.118, + "step": 5391 + }, + { + "epoch": 1.73, + "learning_rate": 7.964848391276565e-06, + "loss": 0.1184, + "step": 5392 + }, + { + "epoch": 1.73, + "learning_rate": 7.961447535848523e-06, + "loss": 0.1153, + "step": 5393 + }, + { + "epoch": 1.74, + "learning_rate": 7.958046926400893e-06, + "loss": 0.1131, + "step": 5394 + }, + { + "epoch": 1.74, + "learning_rate": 7.954646563344013e-06, + "loss": 0.1091, + "step": 5395 + }, + { + "epoch": 1.74, + "learning_rate": 7.951246447088174e-06, + "loss": 0.1178, + "step": 5396 + }, + { + "epoch": 1.74, + "learning_rate": 7.947846578043658e-06, + "loss": 0.1154, + "step": 5397 + }, + { + "epoch": 1.74, + "learning_rate": 7.944446956620703e-06, + "loss": 0.1072, + "step": 5398 + }, + { + "epoch": 1.74, + "learning_rate": 7.941047583229526e-06, + "loss": 0.1155, + "step": 5399 + }, + { + "epoch": 1.74, + "learning_rate": 7.937648458280305e-06, + "loss": 0.1149, + "step": 5400 + }, + { + "epoch": 1.74, + "learning_rate": 7.934249582183194e-06, + "loss": 0.1187, + "step": 5401 + }, + { + "epoch": 1.74, + "learning_rate": 7.930850955348321e-06, + "loss": 0.1078, + "step": 5402 + }, + { + "epoch": 1.74, + "learning_rate": 7.927452578185774e-06, + "loss": 0.114, + "step": 5403 + }, + { + "epoch": 1.74, + "learning_rate": 7.924054451105614e-06, + "loss": 0.1045, + "step": 5404 + }, + { + "epoch": 1.74, + "learning_rate": 7.920656574517877e-06, + "loss": 0.1084, + "step": 5405 + }, + { + "epoch": 1.74, + "learning_rate": 7.917258948832563e-06, + "loss": 0.1089, + "step": 5406 + }, + { + "epoch": 1.74, + "learning_rate": 7.913861574459644e-06, + "loss": 0.1082, + "step": 5407 + }, + { + "epoch": 1.74, + "learning_rate": 7.910464451809068e-06, + "loss": 0.1129, + "step": 5408 + }, + { + "epoch": 1.74, + "learning_rate": 7.907067581290738e-06, + "loss": 0.1268, + "step": 5409 + }, + { + "epoch": 1.74, + "learning_rate": 7.903670963314536e-06, + "loss": 0.1045, + "step": 5410 + }, + { + "epoch": 1.74, + "learning_rate": 7.900274598290316e-06, + "loss": 0.11, + "step": 5411 + }, + { + "epoch": 1.74, + "learning_rate": 7.896878486627894e-06, + "loss": 0.1173, + "step": 5412 + }, + { + "epoch": 1.74, + "learning_rate": 7.893482628737062e-06, + "loss": 0.1172, + "step": 5413 + }, + { + "epoch": 1.74, + "learning_rate": 7.89008702502758e-06, + "loss": 0.1059, + "step": 5414 + }, + { + "epoch": 1.74, + "learning_rate": 7.886691675909175e-06, + "loss": 0.1158, + "step": 5415 + }, + { + "epoch": 1.74, + "learning_rate": 7.88329658179154e-06, + "loss": 0.1232, + "step": 5416 + }, + { + "epoch": 1.74, + "learning_rate": 7.879901743084346e-06, + "loss": 0.1211, + "step": 5417 + }, + { + "epoch": 1.74, + "learning_rate": 7.876507160197226e-06, + "loss": 0.1091, + "step": 5418 + }, + { + "epoch": 1.74, + "learning_rate": 7.873112833539787e-06, + "loss": 0.1123, + "step": 5419 + }, + { + "epoch": 1.74, + "learning_rate": 7.869718763521604e-06, + "loss": 0.104, + "step": 5420 + }, + { + "epoch": 1.74, + "learning_rate": 7.866324950552219e-06, + "loss": 0.1107, + "step": 5421 + }, + { + "epoch": 1.74, + "learning_rate": 7.862931395041139e-06, + "loss": 0.1259, + "step": 5422 + }, + { + "epoch": 1.74, + "learning_rate": 7.85953809739785e-06, + "loss": 0.1137, + "step": 5423 + }, + { + "epoch": 1.74, + "learning_rate": 7.856145058031802e-06, + "loss": 0.1143, + "step": 5424 + }, + { + "epoch": 1.75, + "learning_rate": 7.85275227735241e-06, + "loss": 0.1161, + "step": 5425 + }, + { + "epoch": 1.75, + "learning_rate": 7.849359755769063e-06, + "loss": 0.1156, + "step": 5426 + }, + { + "epoch": 1.75, + "learning_rate": 7.84596749369112e-06, + "loss": 0.1067, + "step": 5427 + }, + { + "epoch": 1.75, + "learning_rate": 7.842575491527903e-06, + "loss": 0.1079, + "step": 5428 + }, + { + "epoch": 1.75, + "learning_rate": 7.839183749688705e-06, + "loss": 0.103, + "step": 5429 + }, + { + "epoch": 1.75, + "learning_rate": 7.835792268582789e-06, + "loss": 0.1149, + "step": 5430 + }, + { + "epoch": 1.75, + "learning_rate": 7.832401048619385e-06, + "loss": 0.1063, + "step": 5431 + }, + { + "epoch": 1.75, + "learning_rate": 7.82901009020769e-06, + "loss": 0.1062, + "step": 5432 + }, + { + "epoch": 1.75, + "learning_rate": 7.825619393756878e-06, + "loss": 0.1082, + "step": 5433 + }, + { + "epoch": 1.75, + "learning_rate": 7.822228959676082e-06, + "loss": 0.1048, + "step": 5434 + }, + { + "epoch": 1.75, + "learning_rate": 7.8188387883744e-06, + "loss": 0.111, + "step": 5435 + }, + { + "epoch": 1.75, + "learning_rate": 7.815448880260912e-06, + "loss": 0.1185, + "step": 5436 + }, + { + "epoch": 1.75, + "learning_rate": 7.812059235744654e-06, + "loss": 0.1189, + "step": 5437 + }, + { + "epoch": 1.75, + "learning_rate": 7.808669855234636e-06, + "loss": 0.1119, + "step": 5438 + }, + { + "epoch": 1.75, + "learning_rate": 7.80528073913984e-06, + "loss": 0.1097, + "step": 5439 + }, + { + "epoch": 1.75, + "learning_rate": 7.801891887869205e-06, + "loss": 0.1123, + "step": 5440 + }, + { + "epoch": 1.75, + "learning_rate": 7.798503301831647e-06, + "loss": 0.1006, + "step": 5441 + }, + { + "epoch": 1.75, + "learning_rate": 7.795114981436048e-06, + "loss": 0.1099, + "step": 5442 + }, + { + "epoch": 1.75, + "learning_rate": 7.791726927091254e-06, + "loss": 0.1097, + "step": 5443 + }, + { + "epoch": 1.75, + "learning_rate": 7.788339139206083e-06, + "loss": 0.0996, + "step": 5444 + }, + { + "epoch": 1.75, + "learning_rate": 7.784951618189322e-06, + "loss": 0.1139, + "step": 5445 + }, + { + "epoch": 1.75, + "learning_rate": 7.78156436444972e-06, + "loss": 0.1039, + "step": 5446 + }, + { + "epoch": 1.75, + "learning_rate": 7.778177378396006e-06, + "loss": 0.1049, + "step": 5447 + }, + { + "epoch": 1.75, + "learning_rate": 7.774790660436857e-06, + "loss": 0.1122, + "step": 5448 + }, + { + "epoch": 1.75, + "learning_rate": 7.771404210980936e-06, + "loss": 0.1158, + "step": 5449 + }, + { + "epoch": 1.75, + "learning_rate": 7.768018030436862e-06, + "loss": 0.1146, + "step": 5450 + }, + { + "epoch": 1.75, + "learning_rate": 7.76463211921323e-06, + "loss": 0.1118, + "step": 5451 + }, + { + "epoch": 1.75, + "learning_rate": 7.761246477718595e-06, + "loss": 0.1156, + "step": 5452 + }, + { + "epoch": 1.75, + "learning_rate": 7.757861106361486e-06, + "loss": 0.1124, + "step": 5453 + }, + { + "epoch": 1.75, + "learning_rate": 7.754476005550396e-06, + "loss": 0.1121, + "step": 5454 + }, + { + "epoch": 1.75, + "learning_rate": 7.751091175693784e-06, + "loss": 0.1093, + "step": 5455 + }, + { + "epoch": 1.76, + "learning_rate": 7.747706617200078e-06, + "loss": 0.1097, + "step": 5456 + }, + { + "epoch": 1.76, + "learning_rate": 7.744322330477673e-06, + "loss": 0.1224, + "step": 5457 + }, + { + "epoch": 1.76, + "learning_rate": 7.740938315934932e-06, + "loss": 0.1134, + "step": 5458 + }, + { + "epoch": 1.76, + "learning_rate": 7.737554573980182e-06, + "loss": 0.0905, + "step": 5459 + }, + { + "epoch": 1.76, + "learning_rate": 7.73417110502173e-06, + "loss": 0.1057, + "step": 5460 + }, + { + "epoch": 1.76, + "learning_rate": 7.730787909467829e-06, + "loss": 0.104, + "step": 5461 + }, + { + "epoch": 1.76, + "learning_rate": 7.72740498772671e-06, + "loss": 0.1083, + "step": 5462 + }, + { + "epoch": 1.76, + "learning_rate": 7.724022340206575e-06, + "loss": 0.1062, + "step": 5463 + }, + { + "epoch": 1.76, + "learning_rate": 7.720639967315588e-06, + "loss": 0.1162, + "step": 5464 + }, + { + "epoch": 1.76, + "learning_rate": 7.717257869461879e-06, + "loss": 0.1191, + "step": 5465 + }, + { + "epoch": 1.76, + "learning_rate": 7.713876047053548e-06, + "loss": 0.1124, + "step": 5466 + }, + { + "epoch": 1.76, + "learning_rate": 7.710494500498662e-06, + "loss": 0.1073, + "step": 5467 + }, + { + "epoch": 1.76, + "learning_rate": 7.707113230205247e-06, + "loss": 0.1087, + "step": 5468 + }, + { + "epoch": 1.76, + "learning_rate": 7.703732236581303e-06, + "loss": 0.1111, + "step": 5469 + }, + { + "epoch": 1.76, + "learning_rate": 7.700351520034798e-06, + "loss": 0.1088, + "step": 5470 + }, + { + "epoch": 1.76, + "learning_rate": 7.696971080973661e-06, + "loss": 0.1083, + "step": 5471 + }, + { + "epoch": 1.76, + "learning_rate": 7.693590919805796e-06, + "loss": 0.1278, + "step": 5472 + }, + { + "epoch": 1.76, + "learning_rate": 7.69021103693906e-06, + "loss": 0.1066, + "step": 5473 + }, + { + "epoch": 1.76, + "learning_rate": 7.686831432781288e-06, + "loss": 0.1121, + "step": 5474 + }, + { + "epoch": 1.76, + "learning_rate": 7.683452107740274e-06, + "loss": 0.111, + "step": 5475 + }, + { + "epoch": 1.76, + "learning_rate": 7.680073062223786e-06, + "loss": 0.1103, + "step": 5476 + }, + { + "epoch": 1.76, + "learning_rate": 7.676694296639552e-06, + "loss": 0.1077, + "step": 5477 + }, + { + "epoch": 1.76, + "learning_rate": 7.673315811395265e-06, + "loss": 0.1071, + "step": 5478 + }, + { + "epoch": 1.76, + "learning_rate": 7.669937606898596e-06, + "loss": 0.1205, + "step": 5479 + }, + { + "epoch": 1.76, + "learning_rate": 7.666559683557169e-06, + "loss": 0.1069, + "step": 5480 + }, + { + "epoch": 1.76, + "learning_rate": 7.663182041778574e-06, + "loss": 0.1104, + "step": 5481 + }, + { + "epoch": 1.76, + "learning_rate": 7.659804681970378e-06, + "loss": 0.1204, + "step": 5482 + }, + { + "epoch": 1.76, + "learning_rate": 7.656427604540104e-06, + "loss": 0.1077, + "step": 5483 + }, + { + "epoch": 1.76, + "learning_rate": 7.653050809895245e-06, + "loss": 0.1097, + "step": 5484 + }, + { + "epoch": 1.76, + "learning_rate": 7.649674298443261e-06, + "loss": 0.1094, + "step": 5485 + }, + { + "epoch": 1.76, + "learning_rate": 7.646298070591578e-06, + "loss": 0.1043, + "step": 5486 + }, + { + "epoch": 1.77, + "learning_rate": 7.642922126747579e-06, + "loss": 0.119, + "step": 5487 + }, + { + "epoch": 1.77, + "learning_rate": 7.639546467318629e-06, + "loss": 0.1001, + "step": 5488 + }, + { + "epoch": 1.77, + "learning_rate": 7.63617109271204e-06, + "loss": 0.1113, + "step": 5489 + }, + { + "epoch": 1.77, + "learning_rate": 7.632796003335104e-06, + "loss": 0.1121, + "step": 5490 + }, + { + "epoch": 1.77, + "learning_rate": 7.629421199595073e-06, + "loss": 0.099, + "step": 5491 + }, + { + "epoch": 1.77, + "learning_rate": 7.626046681899171e-06, + "loss": 0.112, + "step": 5492 + }, + { + "epoch": 1.77, + "learning_rate": 7.6226724506545715e-06, + "loss": 0.1058, + "step": 5493 + }, + { + "epoch": 1.77, + "learning_rate": 7.619298506268427e-06, + "loss": 0.109, + "step": 5494 + }, + { + "epoch": 1.77, + "learning_rate": 7.6159248491478556e-06, + "loss": 0.1048, + "step": 5495 + }, + { + "epoch": 1.77, + "learning_rate": 7.612551479699933e-06, + "loss": 0.1144, + "step": 5496 + }, + { + "epoch": 1.77, + "learning_rate": 7.609178398331707e-06, + "loss": 0.1107, + "step": 5497 + }, + { + "epoch": 1.77, + "learning_rate": 7.605805605450186e-06, + "loss": 0.1125, + "step": 5498 + }, + { + "epoch": 1.77, + "learning_rate": 7.602433101462351e-06, + "loss": 0.1016, + "step": 5499 + }, + { + "epoch": 1.77, + "learning_rate": 7.599060886775134e-06, + "loss": 0.1045, + "step": 5500 + }, + { + "epoch": 1.77, + "learning_rate": 7.595688961795445e-06, + "loss": 0.1083, + "step": 5501 + }, + { + "epoch": 1.77, + "learning_rate": 7.5923173269301545e-06, + "loss": 0.1031, + "step": 5502 + }, + { + "epoch": 1.77, + "learning_rate": 7.5889459825861e-06, + "loss": 0.1082, + "step": 5503 + }, + { + "epoch": 1.77, + "learning_rate": 7.58557492917008e-06, + "loss": 0.1161, + "step": 5504 + }, + { + "epoch": 1.77, + "learning_rate": 7.582204167088864e-06, + "loss": 0.103, + "step": 5505 + }, + { + "epoch": 1.77, + "learning_rate": 7.578833696749175e-06, + "loss": 0.1198, + "step": 5506 + }, + { + "epoch": 1.77, + "learning_rate": 7.575463518557717e-06, + "loss": 0.1169, + "step": 5507 + }, + { + "epoch": 1.77, + "learning_rate": 7.572093632921141e-06, + "loss": 0.1043, + "step": 5508 + }, + { + "epoch": 1.77, + "learning_rate": 7.56872404024608e-06, + "loss": 0.1171, + "step": 5509 + }, + { + "epoch": 1.77, + "learning_rate": 7.5653547409391175e-06, + "loss": 0.1101, + "step": 5510 + }, + { + "epoch": 1.77, + "learning_rate": 7.561985735406809e-06, + "loss": 0.1078, + "step": 5511 + }, + { + "epoch": 1.77, + "learning_rate": 7.5586170240556796e-06, + "loss": 0.1087, + "step": 5512 + }, + { + "epoch": 1.77, + "learning_rate": 7.555248607292205e-06, + "loss": 0.1116, + "step": 5513 + }, + { + "epoch": 1.77, + "learning_rate": 7.551880485522831e-06, + "loss": 0.1082, + "step": 5514 + }, + { + "epoch": 1.77, + "learning_rate": 7.548512659153973e-06, + "loss": 0.1119, + "step": 5515 + }, + { + "epoch": 1.77, + "learning_rate": 7.545145128592009e-06, + "loss": 0.1053, + "step": 5516 + }, + { + "epoch": 1.77, + "learning_rate": 7.541777894243276e-06, + "loss": 0.1114, + "step": 5517 + }, + { + "epoch": 1.78, + "learning_rate": 7.538410956514086e-06, + "loss": 0.1051, + "step": 5518 + }, + { + "epoch": 1.78, + "learning_rate": 7.535044315810699e-06, + "loss": 0.1086, + "step": 5519 + }, + { + "epoch": 1.78, + "learning_rate": 7.5316779725393495e-06, + "loss": 0.0996, + "step": 5520 + }, + { + "epoch": 1.78, + "learning_rate": 7.5283119271062376e-06, + "loss": 0.1081, + "step": 5521 + }, + { + "epoch": 1.78, + "learning_rate": 7.524946179917527e-06, + "loss": 0.108, + "step": 5522 + }, + { + "epoch": 1.78, + "learning_rate": 7.521580731379337e-06, + "loss": 0.0949, + "step": 5523 + }, + { + "epoch": 1.78, + "learning_rate": 7.518215581897763e-06, + "loss": 0.101, + "step": 5524 + }, + { + "epoch": 1.78, + "learning_rate": 7.514850731878854e-06, + "loss": 0.1154, + "step": 5525 + }, + { + "epoch": 1.78, + "learning_rate": 7.51148618172863e-06, + "loss": 0.114, + "step": 5526 + }, + { + "epoch": 1.78, + "learning_rate": 7.508121931853067e-06, + "loss": 0.1175, + "step": 5527 + }, + { + "epoch": 1.78, + "learning_rate": 7.504757982658117e-06, + "loss": 0.1157, + "step": 5528 + }, + { + "epoch": 1.78, + "learning_rate": 7.501394334549681e-06, + "loss": 0.1155, + "step": 5529 + }, + { + "epoch": 1.78, + "learning_rate": 7.498030987933635e-06, + "loss": 0.1016, + "step": 5530 + }, + { + "epoch": 1.78, + "learning_rate": 7.494667943215819e-06, + "loss": 0.1158, + "step": 5531 + }, + { + "epoch": 1.78, + "learning_rate": 7.491305200802026e-06, + "loss": 0.108, + "step": 5532 + }, + { + "epoch": 1.78, + "learning_rate": 7.487942761098017e-06, + "loss": 0.1237, + "step": 5533 + }, + { + "epoch": 1.78, + "learning_rate": 7.4845806245095245e-06, + "loss": 0.1009, + "step": 5534 + }, + { + "epoch": 1.78, + "learning_rate": 7.481218791442233e-06, + "loss": 0.1177, + "step": 5535 + }, + { + "epoch": 1.78, + "learning_rate": 7.477857262301799e-06, + "loss": 0.1032, + "step": 5536 + }, + { + "epoch": 1.78, + "learning_rate": 7.474496037493839e-06, + "loss": 0.1128, + "step": 5537 + }, + { + "epoch": 1.78, + "learning_rate": 7.471135117423935e-06, + "loss": 0.1068, + "step": 5538 + }, + { + "epoch": 1.78, + "learning_rate": 7.46777450249762e-06, + "loss": 0.108, + "step": 5539 + }, + { + "epoch": 1.78, + "learning_rate": 7.464414193120411e-06, + "loss": 0.0996, + "step": 5540 + }, + { + "epoch": 1.78, + "learning_rate": 7.46105418969777e-06, + "loss": 0.106, + "step": 5541 + }, + { + "epoch": 1.78, + "learning_rate": 7.457694492635132e-06, + "loss": 0.1117, + "step": 5542 + }, + { + "epoch": 1.78, + "learning_rate": 7.454335102337895e-06, + "loss": 0.1089, + "step": 5543 + }, + { + "epoch": 1.78, + "learning_rate": 7.450976019211416e-06, + "loss": 0.1158, + "step": 5544 + }, + { + "epoch": 1.78, + "learning_rate": 7.447617243661011e-06, + "loss": 0.1222, + "step": 5545 + }, + { + "epoch": 1.78, + "learning_rate": 7.4442587760919696e-06, + "loss": 0.1161, + "step": 5546 + }, + { + "epoch": 1.78, + "learning_rate": 7.440900616909539e-06, + "loss": 0.111, + "step": 5547 + }, + { + "epoch": 1.78, + "learning_rate": 7.4375427665189244e-06, + "loss": 0.1043, + "step": 5548 + }, + { + "epoch": 1.79, + "learning_rate": 7.434185225325305e-06, + "loss": 0.1076, + "step": 5549 + }, + { + "epoch": 1.79, + "learning_rate": 7.430827993733809e-06, + "loss": 0.1116, + "step": 5550 + }, + { + "epoch": 1.79, + "learning_rate": 7.427471072149542e-06, + "loss": 0.1024, + "step": 5551 + }, + { + "epoch": 1.79, + "learning_rate": 7.424114460977555e-06, + "loss": 0.1108, + "step": 5552 + }, + { + "epoch": 1.79, + "learning_rate": 7.420758160622878e-06, + "loss": 0.098, + "step": 5553 + }, + { + "epoch": 1.79, + "learning_rate": 7.417402171490495e-06, + "loss": 0.1159, + "step": 5554 + }, + { + "epoch": 1.79, + "learning_rate": 7.414046493985353e-06, + "loss": 0.1098, + "step": 5555 + }, + { + "epoch": 1.79, + "learning_rate": 7.410691128512362e-06, + "loss": 0.1139, + "step": 5556 + }, + { + "epoch": 1.79, + "learning_rate": 7.4073360754763975e-06, + "loss": 0.1036, + "step": 5557 + }, + { + "epoch": 1.79, + "learning_rate": 7.40398133528229e-06, + "loss": 0.1174, + "step": 5558 + }, + { + "epoch": 1.79, + "learning_rate": 7.40062690833484e-06, + "loss": 0.1076, + "step": 5559 + }, + { + "epoch": 1.79, + "learning_rate": 7.397272795038806e-06, + "loss": 0.1055, + "step": 5560 + }, + { + "epoch": 1.79, + "learning_rate": 7.393918995798908e-06, + "loss": 0.114, + "step": 5561 + }, + { + "epoch": 1.79, + "learning_rate": 7.390565511019834e-06, + "loss": 0.1116, + "step": 5562 + }, + { + "epoch": 1.79, + "learning_rate": 7.387212341106224e-06, + "loss": 0.1137, + "step": 5563 + }, + { + "epoch": 1.79, + "learning_rate": 7.383859486462693e-06, + "loss": 0.1033, + "step": 5564 + }, + { + "epoch": 1.79, + "learning_rate": 7.380506947493805e-06, + "loss": 0.1108, + "step": 5565 + }, + { + "epoch": 1.79, + "learning_rate": 7.377154724604091e-06, + "loss": 0.1105, + "step": 5566 + }, + { + "epoch": 1.79, + "learning_rate": 7.373802818198046e-06, + "loss": 0.1169, + "step": 5567 + }, + { + "epoch": 1.79, + "learning_rate": 7.370451228680129e-06, + "loss": 0.1163, + "step": 5568 + }, + { + "epoch": 1.79, + "learning_rate": 7.36709995645475e-06, + "loss": 0.1146, + "step": 5569 + }, + { + "epoch": 1.79, + "learning_rate": 7.363749001926298e-06, + "loss": 0.1115, + "step": 5570 + }, + { + "epoch": 1.79, + "learning_rate": 7.360398365499104e-06, + "loss": 0.1138, + "step": 5571 + }, + { + "epoch": 1.79, + "learning_rate": 7.357048047577471e-06, + "loss": 0.1103, + "step": 5572 + }, + { + "epoch": 1.79, + "learning_rate": 7.353698048565665e-06, + "loss": 0.1109, + "step": 5573 + }, + { + "epoch": 1.79, + "learning_rate": 7.350348368867912e-06, + "loss": 0.1106, + "step": 5574 + }, + { + "epoch": 1.79, + "learning_rate": 7.346999008888397e-06, + "loss": 0.1062, + "step": 5575 + }, + { + "epoch": 1.79, + "learning_rate": 7.343649969031268e-06, + "loss": 0.1087, + "step": 5576 + }, + { + "epoch": 1.79, + "learning_rate": 7.340301249700639e-06, + "loss": 0.1164, + "step": 5577 + }, + { + "epoch": 1.79, + "learning_rate": 7.336952851300574e-06, + "loss": 0.101, + "step": 5578 + }, + { + "epoch": 1.79, + "learning_rate": 7.333604774235106e-06, + "loss": 0.1019, + "step": 5579 + }, + { + "epoch": 1.8, + "learning_rate": 7.330257018908231e-06, + "loss": 0.1143, + "step": 5580 + }, + { + "epoch": 1.8, + "learning_rate": 7.326909585723901e-06, + "loss": 0.1089, + "step": 5581 + }, + { + "epoch": 1.8, + "learning_rate": 7.323562475086033e-06, + "loss": 0.1068, + "step": 5582 + }, + { + "epoch": 1.8, + "learning_rate": 7.3202156873985075e-06, + "loss": 0.1194, + "step": 5583 + }, + { + "epoch": 1.8, + "learning_rate": 7.316869223065156e-06, + "loss": 0.1105, + "step": 5584 + }, + { + "epoch": 1.8, + "learning_rate": 7.313523082489777e-06, + "loss": 0.1134, + "step": 5585 + }, + { + "epoch": 1.8, + "learning_rate": 7.310177266076134e-06, + "loss": 0.1096, + "step": 5586 + }, + { + "epoch": 1.8, + "learning_rate": 7.306831774227944e-06, + "loss": 0.1092, + "step": 5587 + }, + { + "epoch": 1.8, + "learning_rate": 7.3034866073488885e-06, + "loss": 0.1198, + "step": 5588 + }, + { + "epoch": 1.8, + "learning_rate": 7.300141765842614e-06, + "loss": 0.1227, + "step": 5589 + }, + { + "epoch": 1.8, + "learning_rate": 7.296797250112724e-06, + "loss": 0.1135, + "step": 5590 + }, + { + "epoch": 1.8, + "learning_rate": 7.293453060562772e-06, + "loss": 0.1016, + "step": 5591 + }, + { + "epoch": 1.8, + "learning_rate": 7.290109197596289e-06, + "loss": 0.1122, + "step": 5592 + }, + { + "epoch": 1.8, + "learning_rate": 7.286765661616761e-06, + "loss": 0.1176, + "step": 5593 + }, + { + "epoch": 1.8, + "learning_rate": 7.283422453027631e-06, + "loss": 0.1129, + "step": 5594 + }, + { + "epoch": 1.8, + "learning_rate": 7.2800795722323055e-06, + "loss": 0.1044, + "step": 5595 + }, + { + "epoch": 1.8, + "learning_rate": 7.276737019634155e-06, + "loss": 0.1263, + "step": 5596 + }, + { + "epoch": 1.8, + "learning_rate": 7.273394795636495e-06, + "loss": 0.1087, + "step": 5597 + }, + { + "epoch": 1.8, + "learning_rate": 7.270052900642619e-06, + "loss": 0.1131, + "step": 5598 + }, + { + "epoch": 1.8, + "learning_rate": 7.2667113350557775e-06, + "loss": 0.1124, + "step": 5599 + }, + { + "epoch": 1.8, + "learning_rate": 7.263370099279173e-06, + "loss": 0.1013, + "step": 5600 + }, + { + "epoch": 1.8, + "learning_rate": 7.260029193715975e-06, + "loss": 0.1092, + "step": 5601 + }, + { + "epoch": 1.8, + "learning_rate": 7.256688618769311e-06, + "loss": 0.1034, + "step": 5602 + }, + { + "epoch": 1.8, + "learning_rate": 7.253348374842273e-06, + "loss": 0.1169, + "step": 5603 + }, + { + "epoch": 1.8, + "learning_rate": 7.2500084623379005e-06, + "loss": 0.118, + "step": 5604 + }, + { + "epoch": 1.8, + "learning_rate": 7.2466688816592115e-06, + "loss": 0.1044, + "step": 5605 + }, + { + "epoch": 1.8, + "learning_rate": 7.243329633209164e-06, + "loss": 0.1138, + "step": 5606 + }, + { + "epoch": 1.8, + "learning_rate": 7.239990717390695e-06, + "loss": 0.1045, + "step": 5607 + }, + { + "epoch": 1.8, + "learning_rate": 7.236652134606684e-06, + "loss": 0.1075, + "step": 5608 + }, + { + "epoch": 1.8, + "learning_rate": 7.2333138852599884e-06, + "loss": 0.1168, + "step": 5609 + }, + { + "epoch": 1.8, + "learning_rate": 7.229975969753406e-06, + "loss": 0.1088, + "step": 5610 + }, + { + "epoch": 1.81, + "learning_rate": 7.226638388489708e-06, + "loss": 0.1126, + "step": 5611 + }, + { + "epoch": 1.81, + "learning_rate": 7.223301141871621e-06, + "loss": 0.1063, + "step": 5612 + }, + { + "epoch": 1.81, + "learning_rate": 7.219964230301831e-06, + "loss": 0.1133, + "step": 5613 + }, + { + "epoch": 1.81, + "learning_rate": 7.2166276541829864e-06, + "loss": 0.11, + "step": 5614 + }, + { + "epoch": 1.81, + "learning_rate": 7.213291413917689e-06, + "loss": 0.1049, + "step": 5615 + }, + { + "epoch": 1.81, + "learning_rate": 7.209955509908508e-06, + "loss": 0.1187, + "step": 5616 + }, + { + "epoch": 1.81, + "learning_rate": 7.2066199425579655e-06, + "loss": 0.103, + "step": 5617 + }, + { + "epoch": 1.81, + "learning_rate": 7.203284712268541e-06, + "loss": 0.1037, + "step": 5618 + }, + { + "epoch": 1.81, + "learning_rate": 7.199949819442682e-06, + "loss": 0.1054, + "step": 5619 + }, + { + "epoch": 1.81, + "learning_rate": 7.196615264482791e-06, + "loss": 0.1104, + "step": 5620 + }, + { + "epoch": 1.81, + "learning_rate": 7.193281047791229e-06, + "loss": 0.1051, + "step": 5621 + }, + { + "epoch": 1.81, + "learning_rate": 7.18994716977032e-06, + "loss": 0.1128, + "step": 5622 + }, + { + "epoch": 1.81, + "learning_rate": 7.1866136308223365e-06, + "loss": 0.1044, + "step": 5623 + }, + { + "epoch": 1.81, + "learning_rate": 7.183280431349524e-06, + "loss": 0.1247, + "step": 5624 + }, + { + "epoch": 1.81, + "learning_rate": 7.179947571754076e-06, + "loss": 0.11, + "step": 5625 + }, + { + "epoch": 1.81, + "learning_rate": 7.1766150524381515e-06, + "loss": 0.1198, + "step": 5626 + }, + { + "epoch": 1.81, + "learning_rate": 7.173282873803866e-06, + "loss": 0.107, + "step": 5627 + }, + { + "epoch": 1.81, + "learning_rate": 7.169951036253296e-06, + "loss": 0.1255, + "step": 5628 + }, + { + "epoch": 1.81, + "learning_rate": 7.166619540188478e-06, + "loss": 0.1071, + "step": 5629 + }, + { + "epoch": 1.81, + "learning_rate": 7.163288386011397e-06, + "loss": 0.1107, + "step": 5630 + }, + { + "epoch": 1.81, + "learning_rate": 7.159957574124007e-06, + "loss": 0.1131, + "step": 5631 + }, + { + "epoch": 1.81, + "learning_rate": 7.15662710492822e-06, + "loss": 0.1037, + "step": 5632 + }, + { + "epoch": 1.81, + "learning_rate": 7.153296978825903e-06, + "loss": 0.1113, + "step": 5633 + }, + { + "epoch": 1.81, + "learning_rate": 7.149967196218882e-06, + "loss": 0.1055, + "step": 5634 + }, + { + "epoch": 1.81, + "learning_rate": 7.14663775750895e-06, + "loss": 0.1178, + "step": 5635 + }, + { + "epoch": 1.81, + "learning_rate": 7.143308663097841e-06, + "loss": 0.114, + "step": 5636 + }, + { + "epoch": 1.81, + "learning_rate": 7.1399799133872615e-06, + "loss": 0.1183, + "step": 5637 + }, + { + "epoch": 1.81, + "learning_rate": 7.136651508778876e-06, + "loss": 0.1087, + "step": 5638 + }, + { + "epoch": 1.81, + "learning_rate": 7.133323449674299e-06, + "loss": 0.1107, + "step": 5639 + }, + { + "epoch": 1.81, + "learning_rate": 7.1299957364751085e-06, + "loss": 0.1037, + "step": 5640 + }, + { + "epoch": 1.81, + "learning_rate": 7.126668369582849e-06, + "loss": 0.1164, + "step": 5641 + }, + { + "epoch": 1.82, + "learning_rate": 7.123341349399006e-06, + "loss": 0.1082, + "step": 5642 + }, + { + "epoch": 1.82, + "learning_rate": 7.120014676325031e-06, + "loss": 0.1113, + "step": 5643 + }, + { + "epoch": 1.82, + "learning_rate": 7.116688350762338e-06, + "loss": 0.1045, + "step": 5644 + }, + { + "epoch": 1.82, + "learning_rate": 7.113362373112298e-06, + "loss": 0.113, + "step": 5645 + }, + { + "epoch": 1.82, + "learning_rate": 7.110036743776234e-06, + "loss": 0.1336, + "step": 5646 + }, + { + "epoch": 1.82, + "learning_rate": 7.106711463155431e-06, + "loss": 0.1153, + "step": 5647 + }, + { + "epoch": 1.82, + "learning_rate": 7.103386531651137e-06, + "loss": 0.0945, + "step": 5648 + }, + { + "epoch": 1.82, + "learning_rate": 7.100061949664542e-06, + "loss": 0.1013, + "step": 5649 + }, + { + "epoch": 1.82, + "learning_rate": 7.096737717596809e-06, + "loss": 0.111, + "step": 5650 + }, + { + "epoch": 1.82, + "learning_rate": 7.0934138358490565e-06, + "loss": 0.1063, + "step": 5651 + }, + { + "epoch": 1.82, + "learning_rate": 7.090090304822356e-06, + "loss": 0.1066, + "step": 5652 + }, + { + "epoch": 1.82, + "learning_rate": 7.086767124917739e-06, + "loss": 0.1177, + "step": 5653 + }, + { + "epoch": 1.82, + "learning_rate": 7.083444296536199e-06, + "loss": 0.1165, + "step": 5654 + }, + { + "epoch": 1.82, + "learning_rate": 7.080121820078673e-06, + "loss": 0.1012, + "step": 5655 + }, + { + "epoch": 1.82, + "learning_rate": 7.0767996959460714e-06, + "loss": 0.1243, + "step": 5656 + }, + { + "epoch": 1.82, + "learning_rate": 7.073477924539256e-06, + "loss": 0.1162, + "step": 5657 + }, + { + "epoch": 1.82, + "learning_rate": 7.070156506259044e-06, + "loss": 0.1029, + "step": 5658 + }, + { + "epoch": 1.82, + "learning_rate": 7.066835441506212e-06, + "loss": 0.1245, + "step": 5659 + }, + { + "epoch": 1.82, + "learning_rate": 7.0635147306814965e-06, + "loss": 0.1082, + "step": 5660 + }, + { + "epoch": 1.82, + "learning_rate": 7.060194374185589e-06, + "loss": 0.1164, + "step": 5661 + }, + { + "epoch": 1.82, + "learning_rate": 7.056874372419131e-06, + "loss": 0.111, + "step": 5662 + }, + { + "epoch": 1.82, + "learning_rate": 7.053554725782734e-06, + "loss": 0.1109, + "step": 5663 + }, + { + "epoch": 1.82, + "learning_rate": 7.0502354346769575e-06, + "loss": 0.1117, + "step": 5664 + }, + { + "epoch": 1.82, + "learning_rate": 7.046916499502323e-06, + "loss": 0.1188, + "step": 5665 + }, + { + "epoch": 1.82, + "learning_rate": 7.043597920659308e-06, + "loss": 0.1246, + "step": 5666 + }, + { + "epoch": 1.82, + "learning_rate": 7.0402796985483505e-06, + "loss": 0.1068, + "step": 5667 + }, + { + "epoch": 1.82, + "learning_rate": 7.036961833569831e-06, + "loss": 0.1003, + "step": 5668 + }, + { + "epoch": 1.82, + "learning_rate": 7.033644326124104e-06, + "loss": 0.1269, + "step": 5669 + }, + { + "epoch": 1.82, + "learning_rate": 7.030327176611471e-06, + "loss": 0.1144, + "step": 5670 + }, + { + "epoch": 1.82, + "learning_rate": 7.027010385432196e-06, + "loss": 0.1151, + "step": 5671 + }, + { + "epoch": 1.82, + "learning_rate": 7.023693952986496e-06, + "loss": 0.1185, + "step": 5672 + }, + { + "epoch": 1.83, + "learning_rate": 7.020377879674546e-06, + "loss": 0.1115, + "step": 5673 + }, + { + "epoch": 1.83, + "learning_rate": 7.01706216589648e-06, + "loss": 0.1041, + "step": 5674 + }, + { + "epoch": 1.83, + "learning_rate": 7.013746812052381e-06, + "loss": 0.0946, + "step": 5675 + }, + { + "epoch": 1.83, + "learning_rate": 7.010431818542298e-06, + "loss": 0.116, + "step": 5676 + }, + { + "epoch": 1.83, + "learning_rate": 7.007117185766228e-06, + "loss": 0.1052, + "step": 5677 + }, + { + "epoch": 1.83, + "learning_rate": 7.0038029141241335e-06, + "loss": 0.1112, + "step": 5678 + }, + { + "epoch": 1.83, + "learning_rate": 7.0004890040159256e-06, + "loss": 0.0988, + "step": 5679 + }, + { + "epoch": 1.83, + "learning_rate": 6.997175455841478e-06, + "loss": 0.0978, + "step": 5680 + }, + { + "epoch": 1.83, + "learning_rate": 6.993862270000613e-06, + "loss": 0.1, + "step": 5681 + }, + { + "epoch": 1.83, + "learning_rate": 6.990549446893116e-06, + "loss": 0.0989, + "step": 5682 + }, + { + "epoch": 1.83, + "learning_rate": 6.987236986918725e-06, + "loss": 0.1046, + "step": 5683 + }, + { + "epoch": 1.83, + "learning_rate": 6.983924890477138e-06, + "loss": 0.1011, + "step": 5684 + }, + { + "epoch": 1.83, + "learning_rate": 6.9806131579680035e-06, + "loss": 0.1111, + "step": 5685 + }, + { + "epoch": 1.83, + "learning_rate": 6.977301789790931e-06, + "loss": 0.1099, + "step": 5686 + }, + { + "epoch": 1.83, + "learning_rate": 6.973990786345489e-06, + "loss": 0.1024, + "step": 5687 + }, + { + "epoch": 1.83, + "learning_rate": 6.970680148031191e-06, + "loss": 0.1098, + "step": 5688 + }, + { + "epoch": 1.83, + "learning_rate": 6.967369875247512e-06, + "loss": 0.1138, + "step": 5689 + }, + { + "epoch": 1.83, + "learning_rate": 6.9640599683938835e-06, + "loss": 0.1057, + "step": 5690 + }, + { + "epoch": 1.83, + "learning_rate": 6.960750427869701e-06, + "loss": 0.1105, + "step": 5691 + }, + { + "epoch": 1.83, + "learning_rate": 6.957441254074297e-06, + "loss": 0.1137, + "step": 5692 + }, + { + "epoch": 1.83, + "learning_rate": 6.95413244740698e-06, + "loss": 0.115, + "step": 5693 + }, + { + "epoch": 1.83, + "learning_rate": 6.9508240082669995e-06, + "loss": 0.1059, + "step": 5694 + }, + { + "epoch": 1.83, + "learning_rate": 6.9475159370535635e-06, + "loss": 0.0983, + "step": 5695 + }, + { + "epoch": 1.83, + "learning_rate": 6.944208234165843e-06, + "loss": 0.1058, + "step": 5696 + }, + { + "epoch": 1.83, + "learning_rate": 6.940900900002957e-06, + "loss": 0.1065, + "step": 5697 + }, + { + "epoch": 1.83, + "learning_rate": 6.9375939349639825e-06, + "loss": 0.1165, + "step": 5698 + }, + { + "epoch": 1.83, + "learning_rate": 6.934287339447954e-06, + "loss": 0.1271, + "step": 5699 + }, + { + "epoch": 1.83, + "learning_rate": 6.930981113853861e-06, + "loss": 0.1103, + "step": 5700 + }, + { + "epoch": 1.83, + "learning_rate": 6.927675258580637e-06, + "loss": 0.1017, + "step": 5701 + }, + { + "epoch": 1.83, + "learning_rate": 6.924369774027189e-06, + "loss": 0.0996, + "step": 5702 + }, + { + "epoch": 1.83, + "learning_rate": 6.9210646605923715e-06, + "loss": 0.11, + "step": 5703 + }, + { + "epoch": 1.84, + "learning_rate": 6.917759918674988e-06, + "loss": 0.1074, + "step": 5704 + }, + { + "epoch": 1.84, + "learning_rate": 6.9144555486738085e-06, + "loss": 0.1158, + "step": 5705 + }, + { + "epoch": 1.84, + "learning_rate": 6.91115155098755e-06, + "loss": 0.1078, + "step": 5706 + }, + { + "epoch": 1.84, + "learning_rate": 6.9078479260148855e-06, + "loss": 0.1057, + "step": 5707 + }, + { + "epoch": 1.84, + "learning_rate": 6.904544674154443e-06, + "loss": 0.1203, + "step": 5708 + }, + { + "epoch": 1.84, + "learning_rate": 6.901241795804813e-06, + "loss": 0.1027, + "step": 5709 + }, + { + "epoch": 1.84, + "learning_rate": 6.897939291364528e-06, + "loss": 0.1221, + "step": 5710 + }, + { + "epoch": 1.84, + "learning_rate": 6.894637161232086e-06, + "loss": 0.1014, + "step": 5711 + }, + { + "epoch": 1.84, + "learning_rate": 6.8913354058059374e-06, + "loss": 0.1089, + "step": 5712 + }, + { + "epoch": 1.84, + "learning_rate": 6.888034025484487e-06, + "loss": 0.1082, + "step": 5713 + }, + { + "epoch": 1.84, + "learning_rate": 6.884733020666086e-06, + "loss": 0.0993, + "step": 5714 + }, + { + "epoch": 1.84, + "learning_rate": 6.881432391749054e-06, + "loss": 0.1022, + "step": 5715 + }, + { + "epoch": 1.84, + "learning_rate": 6.878132139131657e-06, + "loss": 0.119, + "step": 5716 + }, + { + "epoch": 1.84, + "learning_rate": 6.874832263212117e-06, + "loss": 0.1045, + "step": 5717 + }, + { + "epoch": 1.84, + "learning_rate": 6.871532764388613e-06, + "loss": 0.1105, + "step": 5718 + }, + { + "epoch": 1.84, + "learning_rate": 6.868233643059277e-06, + "loss": 0.1043, + "step": 5719 + }, + { + "epoch": 1.84, + "learning_rate": 6.864934899622191e-06, + "loss": 0.1141, + "step": 5720 + }, + { + "epoch": 1.84, + "learning_rate": 6.861636534475396e-06, + "loss": 0.1011, + "step": 5721 + }, + { + "epoch": 1.84, + "learning_rate": 6.858338548016892e-06, + "loss": 0.1038, + "step": 5722 + }, + { + "epoch": 1.84, + "learning_rate": 6.855040940644622e-06, + "loss": 0.1134, + "step": 5723 + }, + { + "epoch": 1.84, + "learning_rate": 6.851743712756494e-06, + "loss": 0.1024, + "step": 5724 + }, + { + "epoch": 1.84, + "learning_rate": 6.848446864750362e-06, + "loss": 0.1113, + "step": 5725 + }, + { + "epoch": 1.84, + "learning_rate": 6.845150397024043e-06, + "loss": 0.0957, + "step": 5726 + }, + { + "epoch": 1.84, + "learning_rate": 6.841854309975296e-06, + "loss": 0.106, + "step": 5727 + }, + { + "epoch": 1.84, + "learning_rate": 6.838558604001843e-06, + "loss": 0.1014, + "step": 5728 + }, + { + "epoch": 1.84, + "learning_rate": 6.83526327950136e-06, + "loss": 0.105, + "step": 5729 + }, + { + "epoch": 1.84, + "learning_rate": 6.831968336871475e-06, + "loss": 0.1143, + "step": 5730 + }, + { + "epoch": 1.84, + "learning_rate": 6.8286737765097666e-06, + "loss": 0.1153, + "step": 5731 + }, + { + "epoch": 1.84, + "learning_rate": 6.825379598813776e-06, + "loss": 0.1064, + "step": 5732 + }, + { + "epoch": 1.84, + "learning_rate": 6.822085804180985e-06, + "loss": 0.1132, + "step": 5733 + }, + { + "epoch": 1.84, + "learning_rate": 6.818792393008844e-06, + "loss": 0.0982, + "step": 5734 + }, + { + "epoch": 1.84, + "learning_rate": 6.815499365694744e-06, + "loss": 0.1161, + "step": 5735 + }, + { + "epoch": 1.85, + "learning_rate": 6.8122067226360415e-06, + "loss": 0.1093, + "step": 5736 + }, + { + "epoch": 1.85, + "learning_rate": 6.808914464230034e-06, + "loss": 0.1155, + "step": 5737 + }, + { + "epoch": 1.85, + "learning_rate": 6.805622590873985e-06, + "loss": 0.1096, + "step": 5738 + }, + { + "epoch": 1.85, + "learning_rate": 6.802331102965108e-06, + "loss": 0.1136, + "step": 5739 + }, + { + "epoch": 1.85, + "learning_rate": 6.799040000900563e-06, + "loss": 0.1091, + "step": 5740 + }, + { + "epoch": 1.85, + "learning_rate": 6.795749285077466e-06, + "loss": 0.1037, + "step": 5741 + }, + { + "epoch": 1.85, + "learning_rate": 6.792458955892895e-06, + "loss": 0.0971, + "step": 5742 + }, + { + "epoch": 1.85, + "learning_rate": 6.789169013743872e-06, + "loss": 0.1151, + "step": 5743 + }, + { + "epoch": 1.85, + "learning_rate": 6.785879459027376e-06, + "loss": 0.1192, + "step": 5744 + }, + { + "epoch": 1.85, + "learning_rate": 6.782590292140341e-06, + "loss": 0.1123, + "step": 5745 + }, + { + "epoch": 1.85, + "learning_rate": 6.779301513479648e-06, + "loss": 0.1016, + "step": 5746 + }, + { + "epoch": 1.85, + "learning_rate": 6.7760131234421345e-06, + "loss": 0.1076, + "step": 5747 + }, + { + "epoch": 1.85, + "learning_rate": 6.772725122424594e-06, + "loss": 0.1018, + "step": 5748 + }, + { + "epoch": 1.85, + "learning_rate": 6.7694375108237716e-06, + "loss": 0.1041, + "step": 5749 + }, + { + "epoch": 1.85, + "learning_rate": 6.7661502890363615e-06, + "loss": 0.1081, + "step": 5750 + }, + { + "epoch": 1.85, + "learning_rate": 6.762863457459019e-06, + "loss": 0.113, + "step": 5751 + }, + { + "epoch": 1.85, + "learning_rate": 6.759577016488343e-06, + "loss": 0.1202, + "step": 5752 + }, + { + "epoch": 1.85, + "learning_rate": 6.7562909665208885e-06, + "loss": 0.1121, + "step": 5753 + }, + { + "epoch": 1.85, + "learning_rate": 6.7530053079531664e-06, + "loss": 0.1059, + "step": 5754 + }, + { + "epoch": 1.85, + "learning_rate": 6.749720041181638e-06, + "loss": 0.1068, + "step": 5755 + }, + { + "epoch": 1.85, + "learning_rate": 6.746435166602716e-06, + "loss": 0.1254, + "step": 5756 + }, + { + "epoch": 1.85, + "learning_rate": 6.743150684612769e-06, + "loss": 0.1153, + "step": 5757 + }, + { + "epoch": 1.85, + "learning_rate": 6.739866595608121e-06, + "loss": 0.1127, + "step": 5758 + }, + { + "epoch": 1.85, + "learning_rate": 6.736582899985038e-06, + "loss": 0.1021, + "step": 5759 + }, + { + "epoch": 1.85, + "learning_rate": 6.733299598139742e-06, + "loss": 0.1193, + "step": 5760 + }, + { + "epoch": 1.85, + "learning_rate": 6.730016690468417e-06, + "loss": 0.1056, + "step": 5761 + }, + { + "epoch": 1.85, + "learning_rate": 6.726734177367189e-06, + "loss": 0.1198, + "step": 5762 + }, + { + "epoch": 1.85, + "learning_rate": 6.72345205923214e-06, + "loss": 0.112, + "step": 5763 + }, + { + "epoch": 1.85, + "learning_rate": 6.720170336459308e-06, + "loss": 0.1039, + "step": 5764 + }, + { + "epoch": 1.85, + "learning_rate": 6.716889009444678e-06, + "loss": 0.1016, + "step": 5765 + }, + { + "epoch": 1.85, + "learning_rate": 6.713608078584184e-06, + "loss": 0.1097, + "step": 5766 + }, + { + "epoch": 1.86, + "learning_rate": 6.710327544273724e-06, + "loss": 0.0952, + "step": 5767 + }, + { + "epoch": 1.86, + "learning_rate": 6.707047406909135e-06, + "loss": 0.1039, + "step": 5768 + }, + { + "epoch": 1.86, + "learning_rate": 6.703767666886215e-06, + "loss": 0.1038, + "step": 5769 + }, + { + "epoch": 1.86, + "learning_rate": 6.700488324600715e-06, + "loss": 0.1133, + "step": 5770 + }, + { + "epoch": 1.86, + "learning_rate": 6.697209380448333e-06, + "loss": 0.1199, + "step": 5771 + }, + { + "epoch": 1.86, + "learning_rate": 6.693930834824713e-06, + "loss": 0.1071, + "step": 5772 + }, + { + "epoch": 1.86, + "learning_rate": 6.6906526881254655e-06, + "loss": 0.1082, + "step": 5773 + }, + { + "epoch": 1.86, + "learning_rate": 6.687374940746145e-06, + "loss": 0.106, + "step": 5774 + }, + { + "epoch": 1.86, + "learning_rate": 6.684097593082256e-06, + "loss": 0.1089, + "step": 5775 + }, + { + "epoch": 1.86, + "learning_rate": 6.68082064552926e-06, + "loss": 0.1106, + "step": 5776 + }, + { + "epoch": 1.86, + "learning_rate": 6.6775440984825645e-06, + "loss": 0.1096, + "step": 5777 + }, + { + "epoch": 1.86, + "learning_rate": 6.674267952337538e-06, + "loss": 0.1257, + "step": 5778 + }, + { + "epoch": 1.86, + "learning_rate": 6.670992207489484e-06, + "loss": 0.1096, + "step": 5779 + }, + { + "epoch": 1.86, + "learning_rate": 6.667716864333675e-06, + "loss": 0.1138, + "step": 5780 + }, + { + "epoch": 1.86, + "learning_rate": 6.664441923265326e-06, + "loss": 0.1088, + "step": 5781 + }, + { + "epoch": 1.86, + "learning_rate": 6.661167384679605e-06, + "loss": 0.1061, + "step": 5782 + }, + { + "epoch": 1.86, + "learning_rate": 6.657893248971631e-06, + "loss": 0.1137, + "step": 5783 + }, + { + "epoch": 1.86, + "learning_rate": 6.65461951653648e-06, + "loss": 0.1119, + "step": 5784 + }, + { + "epoch": 1.86, + "learning_rate": 6.651346187769168e-06, + "loss": 0.1053, + "step": 5785 + }, + { + "epoch": 1.86, + "learning_rate": 6.648073263064672e-06, + "loss": 0.1019, + "step": 5786 + }, + { + "epoch": 1.86, + "learning_rate": 6.644800742817914e-06, + "loss": 0.1025, + "step": 5787 + }, + { + "epoch": 1.86, + "learning_rate": 6.6415286274237744e-06, + "loss": 0.1106, + "step": 5788 + }, + { + "epoch": 1.86, + "learning_rate": 6.6382569172770805e-06, + "loss": 0.1155, + "step": 5789 + }, + { + "epoch": 1.86, + "learning_rate": 6.634985612772611e-06, + "loss": 0.1075, + "step": 5790 + }, + { + "epoch": 1.86, + "learning_rate": 6.631714714305089e-06, + "loss": 0.1152, + "step": 5791 + }, + { + "epoch": 1.86, + "learning_rate": 6.628444222269203e-06, + "loss": 0.1107, + "step": 5792 + }, + { + "epoch": 1.86, + "learning_rate": 6.625174137059578e-06, + "loss": 0.1107, + "step": 5793 + }, + { + "epoch": 1.86, + "learning_rate": 6.621904459070798e-06, + "loss": 0.1063, + "step": 5794 + }, + { + "epoch": 1.86, + "learning_rate": 6.618635188697402e-06, + "loss": 0.1111, + "step": 5795 + }, + { + "epoch": 1.86, + "learning_rate": 6.615366326333868e-06, + "loss": 0.1034, + "step": 5796 + }, + { + "epoch": 1.86, + "learning_rate": 6.6120978723746356e-06, + "loss": 0.1137, + "step": 5797 + }, + { + "epoch": 1.87, + "learning_rate": 6.608829827214085e-06, + "loss": 0.1173, + "step": 5798 + }, + { + "epoch": 1.87, + "learning_rate": 6.605562191246554e-06, + "loss": 0.1073, + "step": 5799 + }, + { + "epoch": 1.87, + "learning_rate": 6.6022949648663294e-06, + "loss": 0.1007, + "step": 5800 + }, + { + "epoch": 1.87, + "learning_rate": 6.599028148467652e-06, + "loss": 0.11, + "step": 5801 + }, + { + "epoch": 1.87, + "learning_rate": 6.595761742444705e-06, + "loss": 0.1074, + "step": 5802 + }, + { + "epoch": 1.87, + "learning_rate": 6.592495747191634e-06, + "loss": 0.1033, + "step": 5803 + }, + { + "epoch": 1.87, + "learning_rate": 6.589230163102518e-06, + "loss": 0.1187, + "step": 5804 + }, + { + "epoch": 1.87, + "learning_rate": 6.585964990571403e-06, + "loss": 0.1071, + "step": 5805 + }, + { + "epoch": 1.87, + "learning_rate": 6.582700229992276e-06, + "loss": 0.1017, + "step": 5806 + }, + { + "epoch": 1.87, + "learning_rate": 6.57943588175908e-06, + "loss": 0.1098, + "step": 5807 + }, + { + "epoch": 1.87, + "learning_rate": 6.576171946265699e-06, + "loss": 0.1142, + "step": 5808 + }, + { + "epoch": 1.87, + "learning_rate": 6.57290842390598e-06, + "loss": 0.1049, + "step": 5809 + }, + { + "epoch": 1.87, + "learning_rate": 6.569645315073713e-06, + "loss": 0.1122, + "step": 5810 + }, + { + "epoch": 1.87, + "learning_rate": 6.566382620162635e-06, + "loss": 0.1039, + "step": 5811 + }, + { + "epoch": 1.87, + "learning_rate": 6.563120339566436e-06, + "loss": 0.1093, + "step": 5812 + }, + { + "epoch": 1.87, + "learning_rate": 6.55985847367876e-06, + "loss": 0.122, + "step": 5813 + }, + { + "epoch": 1.87, + "learning_rate": 6.556597022893194e-06, + "loss": 0.1073, + "step": 5814 + }, + { + "epoch": 1.87, + "learning_rate": 6.553335987603282e-06, + "loss": 0.1049, + "step": 5815 + }, + { + "epoch": 1.87, + "learning_rate": 6.550075368202518e-06, + "loss": 0.1119, + "step": 5816 + }, + { + "epoch": 1.87, + "learning_rate": 6.5468151650843336e-06, + "loss": 0.1132, + "step": 5817 + }, + { + "epoch": 1.87, + "learning_rate": 6.543555378642121e-06, + "loss": 0.1198, + "step": 5818 + }, + { + "epoch": 1.87, + "learning_rate": 6.540296009269223e-06, + "loss": 0.0989, + "step": 5819 + }, + { + "epoch": 1.87, + "learning_rate": 6.537037057358928e-06, + "loss": 0.1166, + "step": 5820 + }, + { + "epoch": 1.87, + "learning_rate": 6.533778523304473e-06, + "loss": 0.1061, + "step": 5821 + }, + { + "epoch": 1.87, + "learning_rate": 6.530520407499049e-06, + "loss": 0.1048, + "step": 5822 + }, + { + "epoch": 1.87, + "learning_rate": 6.527262710335797e-06, + "loss": 0.1066, + "step": 5823 + }, + { + "epoch": 1.87, + "learning_rate": 6.524005432207795e-06, + "loss": 0.1102, + "step": 5824 + }, + { + "epoch": 1.87, + "learning_rate": 6.520748573508087e-06, + "loss": 0.1101, + "step": 5825 + }, + { + "epoch": 1.87, + "learning_rate": 6.517492134629659e-06, + "loss": 0.1074, + "step": 5826 + }, + { + "epoch": 1.87, + "learning_rate": 6.514236115965446e-06, + "loss": 0.1127, + "step": 5827 + }, + { + "epoch": 1.87, + "learning_rate": 6.510980517908334e-06, + "loss": 0.1077, + "step": 5828 + }, + { + "epoch": 1.88, + "learning_rate": 6.507725340851158e-06, + "loss": 0.1136, + "step": 5829 + }, + { + "epoch": 1.88, + "learning_rate": 6.504470585186696e-06, + "loss": 0.1149, + "step": 5830 + }, + { + "epoch": 1.88, + "learning_rate": 6.501216251307685e-06, + "loss": 0.1108, + "step": 5831 + }, + { + "epoch": 1.88, + "learning_rate": 6.4979623396068095e-06, + "loss": 0.11, + "step": 5832 + }, + { + "epoch": 1.88, + "learning_rate": 6.494708850476692e-06, + "loss": 0.1317, + "step": 5833 + }, + { + "epoch": 1.88, + "learning_rate": 6.491455784309923e-06, + "loss": 0.1073, + "step": 5834 + }, + { + "epoch": 1.88, + "learning_rate": 6.488203141499021e-06, + "loss": 0.1074, + "step": 5835 + }, + { + "epoch": 1.88, + "learning_rate": 6.484950922436475e-06, + "loss": 0.1029, + "step": 5836 + }, + { + "epoch": 1.88, + "learning_rate": 6.481699127514699e-06, + "loss": 0.1043, + "step": 5837 + }, + { + "epoch": 1.88, + "learning_rate": 6.4784477571260775e-06, + "loss": 0.0992, + "step": 5838 + }, + { + "epoch": 1.88, + "learning_rate": 6.475196811662929e-06, + "loss": 0.0941, + "step": 5839 + }, + { + "epoch": 1.88, + "learning_rate": 6.4719462915175305e-06, + "loss": 0.1226, + "step": 5840 + }, + { + "epoch": 1.88, + "learning_rate": 6.468696197082103e-06, + "loss": 0.1059, + "step": 5841 + }, + { + "epoch": 1.88, + "learning_rate": 6.465446528748818e-06, + "loss": 0.1038, + "step": 5842 + }, + { + "epoch": 1.88, + "learning_rate": 6.462197286909789e-06, + "loss": 0.1126, + "step": 5843 + }, + { + "epoch": 1.88, + "learning_rate": 6.458948471957088e-06, + "loss": 0.1049, + "step": 5844 + }, + { + "epoch": 1.88, + "learning_rate": 6.4557000842827275e-06, + "loss": 0.1091, + "step": 5845 + }, + { + "epoch": 1.88, + "learning_rate": 6.452452124278674e-06, + "loss": 0.0989, + "step": 5846 + }, + { + "epoch": 1.88, + "learning_rate": 6.449204592336842e-06, + "loss": 0.0995, + "step": 5847 + }, + { + "epoch": 1.88, + "learning_rate": 6.445957488849088e-06, + "loss": 0.1069, + "step": 5848 + }, + { + "epoch": 1.88, + "learning_rate": 6.442710814207228e-06, + "loss": 0.1101, + "step": 5849 + }, + { + "epoch": 1.88, + "learning_rate": 6.439464568803014e-06, + "loss": 0.1032, + "step": 5850 + }, + { + "epoch": 1.88, + "learning_rate": 6.4362187530281495e-06, + "loss": 0.1075, + "step": 5851 + }, + { + "epoch": 1.88, + "learning_rate": 6.432973367274292e-06, + "loss": 0.1018, + "step": 5852 + }, + { + "epoch": 1.88, + "learning_rate": 6.429728411933047e-06, + "loss": 0.1108, + "step": 5853 + }, + { + "epoch": 1.88, + "learning_rate": 6.426483887395958e-06, + "loss": 0.1135, + "step": 5854 + }, + { + "epoch": 1.88, + "learning_rate": 6.42323979405453e-06, + "loss": 0.1074, + "step": 5855 + }, + { + "epoch": 1.88, + "learning_rate": 6.419996132300203e-06, + "loss": 0.1067, + "step": 5856 + }, + { + "epoch": 1.88, + "learning_rate": 6.416752902524375e-06, + "loss": 0.1049, + "step": 5857 + }, + { + "epoch": 1.88, + "learning_rate": 6.413510105118383e-06, + "loss": 0.1166, + "step": 5858 + }, + { + "epoch": 1.88, + "learning_rate": 6.410267740473523e-06, + "loss": 0.1118, + "step": 5859 + }, + { + "epoch": 1.89, + "learning_rate": 6.407025808981028e-06, + "loss": 0.1027, + "step": 5860 + }, + { + "epoch": 1.89, + "learning_rate": 6.403784311032084e-06, + "loss": 0.1001, + "step": 5861 + }, + { + "epoch": 1.89, + "learning_rate": 6.40054324701783e-06, + "loss": 0.1249, + "step": 5862 + }, + { + "epoch": 1.89, + "learning_rate": 6.3973026173293385e-06, + "loss": 0.112, + "step": 5863 + }, + { + "epoch": 1.89, + "learning_rate": 6.394062422357638e-06, + "loss": 0.1236, + "step": 5864 + }, + { + "epoch": 1.89, + "learning_rate": 6.390822662493709e-06, + "loss": 0.1065, + "step": 5865 + }, + { + "epoch": 1.89, + "learning_rate": 6.387583338128471e-06, + "loss": 0.1019, + "step": 5866 + }, + { + "epoch": 1.89, + "learning_rate": 6.384344449652795e-06, + "loss": 0.1043, + "step": 5867 + }, + { + "epoch": 1.89, + "learning_rate": 6.3811059974575065e-06, + "loss": 0.1128, + "step": 5868 + }, + { + "epoch": 1.89, + "learning_rate": 6.377867981933361e-06, + "loss": 0.1016, + "step": 5869 + }, + { + "epoch": 1.89, + "learning_rate": 6.374630403471072e-06, + "loss": 0.1027, + "step": 5870 + }, + { + "epoch": 1.89, + "learning_rate": 6.371393262461303e-06, + "loss": 0.1108, + "step": 5871 + }, + { + "epoch": 1.89, + "learning_rate": 6.368156559294662e-06, + "loss": 0.1132, + "step": 5872 + }, + { + "epoch": 1.89, + "learning_rate": 6.364920294361701e-06, + "loss": 0.1001, + "step": 5873 + }, + { + "epoch": 1.89, + "learning_rate": 6.361684468052925e-06, + "loss": 0.1083, + "step": 5874 + }, + { + "epoch": 1.89, + "learning_rate": 6.35844908075878e-06, + "loss": 0.1066, + "step": 5875 + }, + { + "epoch": 1.89, + "learning_rate": 6.355214132869658e-06, + "loss": 0.1212, + "step": 5876 + }, + { + "epoch": 1.89, + "learning_rate": 6.351979624775906e-06, + "loss": 0.1186, + "step": 5877 + }, + { + "epoch": 1.89, + "learning_rate": 6.348745556867814e-06, + "loss": 0.1202, + "step": 5878 + }, + { + "epoch": 1.89, + "learning_rate": 6.3455119295356145e-06, + "loss": 0.107, + "step": 5879 + }, + { + "epoch": 1.89, + "learning_rate": 6.3422787431694954e-06, + "loss": 0.0929, + "step": 5880 + }, + { + "epoch": 1.89, + "learning_rate": 6.339045998159588e-06, + "loss": 0.1071, + "step": 5881 + }, + { + "epoch": 1.89, + "learning_rate": 6.33581369489596e-06, + "loss": 0.101, + "step": 5882 + }, + { + "epoch": 1.89, + "learning_rate": 6.332581833768641e-06, + "loss": 0.1099, + "step": 5883 + }, + { + "epoch": 1.89, + "learning_rate": 6.3293504151676026e-06, + "loss": 0.1119, + "step": 5884 + }, + { + "epoch": 1.89, + "learning_rate": 6.3261194394827565e-06, + "loss": 0.1027, + "step": 5885 + }, + { + "epoch": 1.89, + "learning_rate": 6.3228889071039705e-06, + "loss": 0.1017, + "step": 5886 + }, + { + "epoch": 1.89, + "learning_rate": 6.319658818421055e-06, + "loss": 0.1044, + "step": 5887 + }, + { + "epoch": 1.89, + "learning_rate": 6.316429173823765e-06, + "loss": 0.1007, + "step": 5888 + }, + { + "epoch": 1.89, + "learning_rate": 6.313199973701798e-06, + "loss": 0.1097, + "step": 5889 + }, + { + "epoch": 1.89, + "learning_rate": 6.30997121844481e-06, + "loss": 0.1116, + "step": 5890 + }, + { + "epoch": 1.9, + "learning_rate": 6.306742908442393e-06, + "loss": 0.1106, + "step": 5891 + }, + { + "epoch": 1.9, + "learning_rate": 6.303515044084087e-06, + "loss": 0.1065, + "step": 5892 + }, + { + "epoch": 1.9, + "learning_rate": 6.3002876257593845e-06, + "loss": 0.1163, + "step": 5893 + }, + { + "epoch": 1.9, + "learning_rate": 6.2970606538577205e-06, + "loss": 0.1139, + "step": 5894 + }, + { + "epoch": 1.9, + "learning_rate": 6.293834128768467e-06, + "loss": 0.109, + "step": 5895 + }, + { + "epoch": 1.9, + "learning_rate": 6.290608050880957e-06, + "loss": 0.1077, + "step": 5896 + }, + { + "epoch": 1.9, + "learning_rate": 6.287382420584458e-06, + "loss": 0.1094, + "step": 5897 + }, + { + "epoch": 1.9, + "learning_rate": 6.284157238268193e-06, + "loss": 0.1029, + "step": 5898 + }, + { + "epoch": 1.9, + "learning_rate": 6.280932504321324e-06, + "loss": 0.1025, + "step": 5899 + }, + { + "epoch": 1.9, + "learning_rate": 6.277708219132961e-06, + "loss": 0.1051, + "step": 5900 + }, + { + "epoch": 1.9, + "learning_rate": 6.274484383092165e-06, + "loss": 0.1041, + "step": 5901 + }, + { + "epoch": 1.9, + "learning_rate": 6.271260996587928e-06, + "loss": 0.1084, + "step": 5902 + }, + { + "epoch": 1.9, + "learning_rate": 6.268038060009205e-06, + "loss": 0.1181, + "step": 5903 + }, + { + "epoch": 1.9, + "learning_rate": 6.264815573744884e-06, + "loss": 0.108, + "step": 5904 + }, + { + "epoch": 1.9, + "learning_rate": 6.26159353818381e-06, + "loss": 0.1034, + "step": 5905 + }, + { + "epoch": 1.9, + "learning_rate": 6.258371953714762e-06, + "loss": 0.111, + "step": 5906 + }, + { + "epoch": 1.9, + "learning_rate": 6.255150820726477e-06, + "loss": 0.1136, + "step": 5907 + }, + { + "epoch": 1.9, + "learning_rate": 6.251930139607621e-06, + "loss": 0.1091, + "step": 5908 + }, + { + "epoch": 1.9, + "learning_rate": 6.248709910746823e-06, + "loss": 0.0997, + "step": 5909 + }, + { + "epoch": 1.9, + "learning_rate": 6.245490134532644e-06, + "loss": 0.1121, + "step": 5910 + }, + { + "epoch": 1.9, + "learning_rate": 6.242270811353601e-06, + "loss": 0.0978, + "step": 5911 + }, + { + "epoch": 1.9, + "learning_rate": 6.239051941598147e-06, + "loss": 0.11, + "step": 5912 + }, + { + "epoch": 1.9, + "learning_rate": 6.235833525654686e-06, + "loss": 0.1132, + "step": 5913 + }, + { + "epoch": 1.9, + "learning_rate": 6.23261556391157e-06, + "loss": 0.1131, + "step": 5914 + }, + { + "epoch": 1.9, + "learning_rate": 6.229398056757085e-06, + "loss": 0.1122, + "step": 5915 + }, + { + "epoch": 1.9, + "learning_rate": 6.226181004579472e-06, + "loss": 0.0997, + "step": 5916 + }, + { + "epoch": 1.9, + "learning_rate": 6.2229644077669135e-06, + "loss": 0.1119, + "step": 5917 + }, + { + "epoch": 1.9, + "learning_rate": 6.2197482667075405e-06, + "loss": 0.1055, + "step": 5918 + }, + { + "epoch": 1.9, + "learning_rate": 6.216532581789423e-06, + "loss": 0.1038, + "step": 5919 + }, + { + "epoch": 1.9, + "learning_rate": 6.213317353400583e-06, + "loss": 0.1103, + "step": 5920 + }, + { + "epoch": 1.9, + "learning_rate": 6.210102581928981e-06, + "loss": 0.1158, + "step": 5921 + }, + { + "epoch": 1.91, + "learning_rate": 6.206888267762522e-06, + "loss": 0.1064, + "step": 5922 + }, + { + "epoch": 1.91, + "learning_rate": 6.203674411289063e-06, + "loss": 0.1102, + "step": 5923 + }, + { + "epoch": 1.91, + "learning_rate": 6.200461012896401e-06, + "loss": 0.1103, + "step": 5924 + }, + { + "epoch": 1.91, + "learning_rate": 6.197248072972277e-06, + "loss": 0.1131, + "step": 5925 + }, + { + "epoch": 1.91, + "learning_rate": 6.194035591904382e-06, + "loss": 0.1068, + "step": 5926 + }, + { + "epoch": 1.91, + "learning_rate": 6.190823570080344e-06, + "loss": 0.1099, + "step": 5927 + }, + { + "epoch": 1.91, + "learning_rate": 6.187612007887738e-06, + "loss": 0.1134, + "step": 5928 + }, + { + "epoch": 1.91, + "learning_rate": 6.184400905714085e-06, + "loss": 0.1048, + "step": 5929 + }, + { + "epoch": 1.91, + "learning_rate": 6.181190263946856e-06, + "loss": 0.103, + "step": 5930 + }, + { + "epoch": 1.91, + "learning_rate": 6.177980082973453e-06, + "loss": 0.1078, + "step": 5931 + }, + { + "epoch": 1.91, + "learning_rate": 6.174770363181235e-06, + "loss": 0.1186, + "step": 5932 + }, + { + "epoch": 1.91, + "learning_rate": 6.1715611049574994e-06, + "loss": 0.1032, + "step": 5933 + }, + { + "epoch": 1.91, + "learning_rate": 6.168352308689489e-06, + "loss": 0.1156, + "step": 5934 + }, + { + "epoch": 1.91, + "learning_rate": 6.165143974764389e-06, + "loss": 0.1105, + "step": 5935 + }, + { + "epoch": 1.91, + "learning_rate": 6.161936103569333e-06, + "loss": 0.1063, + "step": 5936 + }, + { + "epoch": 1.91, + "learning_rate": 6.158728695491394e-06, + "loss": 0.0983, + "step": 5937 + }, + { + "epoch": 1.91, + "learning_rate": 6.155521750917593e-06, + "loss": 0.1048, + "step": 5938 + }, + { + "epoch": 1.91, + "learning_rate": 6.152315270234897e-06, + "loss": 0.1051, + "step": 5939 + }, + { + "epoch": 1.91, + "learning_rate": 6.149109253830208e-06, + "loss": 0.1026, + "step": 5940 + }, + { + "epoch": 1.91, + "learning_rate": 6.1459037020903775e-06, + "loss": 0.107, + "step": 5941 + }, + { + "epoch": 1.91, + "learning_rate": 6.142698615402205e-06, + "loss": 0.1195, + "step": 5942 + }, + { + "epoch": 1.91, + "learning_rate": 6.139493994152428e-06, + "loss": 0.102, + "step": 5943 + }, + { + "epoch": 1.91, + "learning_rate": 6.136289838727727e-06, + "loss": 0.1059, + "step": 5944 + }, + { + "epoch": 1.91, + "learning_rate": 6.133086149514735e-06, + "loss": 0.1069, + "step": 5945 + }, + { + "epoch": 1.91, + "learning_rate": 6.129882926900024e-06, + "loss": 0.114, + "step": 5946 + }, + { + "epoch": 1.91, + "learning_rate": 6.126680171270096e-06, + "loss": 0.1129, + "step": 5947 + }, + { + "epoch": 1.91, + "learning_rate": 6.123477883011422e-06, + "loss": 0.109, + "step": 5948 + }, + { + "epoch": 1.91, + "learning_rate": 6.120276062510395e-06, + "loss": 0.115, + "step": 5949 + }, + { + "epoch": 1.91, + "learning_rate": 6.117074710153366e-06, + "loss": 0.1036, + "step": 5950 + }, + { + "epoch": 1.91, + "learning_rate": 6.113873826326623e-06, + "loss": 0.1169, + "step": 5951 + }, + { + "epoch": 1.91, + "learning_rate": 6.110673411416399e-06, + "loss": 0.0979, + "step": 5952 + }, + { + "epoch": 1.92, + "learning_rate": 6.107473465808864e-06, + "loss": 0.0941, + "step": 5953 + }, + { + "epoch": 1.92, + "learning_rate": 6.1042739898901416e-06, + "loss": 0.1026, + "step": 5954 + }, + { + "epoch": 1.92, + "learning_rate": 6.101074984046296e-06, + "loss": 0.1012, + "step": 5955 + }, + { + "epoch": 1.92, + "learning_rate": 6.0978764486633265e-06, + "loss": 0.1117, + "step": 5956 + }, + { + "epoch": 1.92, + "learning_rate": 6.09467838412719e-06, + "loss": 0.1056, + "step": 5957 + }, + { + "epoch": 1.92, + "learning_rate": 6.091480790823772e-06, + "loss": 0.1054, + "step": 5958 + }, + { + "epoch": 1.92, + "learning_rate": 6.088283669138913e-06, + "loss": 0.1067, + "step": 5959 + }, + { + "epoch": 1.92, + "learning_rate": 6.0850870194583854e-06, + "loss": 0.112, + "step": 5960 + }, + { + "epoch": 1.92, + "learning_rate": 6.081890842167916e-06, + "loss": 0.1025, + "step": 5961 + }, + { + "epoch": 1.92, + "learning_rate": 6.078695137653164e-06, + "loss": 0.0977, + "step": 5962 + }, + { + "epoch": 1.92, + "learning_rate": 6.075499906299742e-06, + "loss": 0.1131, + "step": 5963 + }, + { + "epoch": 1.92, + "learning_rate": 6.072305148493195e-06, + "loss": 0.0966, + "step": 5964 + }, + { + "epoch": 1.92, + "learning_rate": 6.069110864619023e-06, + "loss": 0.1113, + "step": 5965 + }, + { + "epoch": 1.92, + "learning_rate": 6.065917055062654e-06, + "loss": 0.1123, + "step": 5966 + }, + { + "epoch": 1.92, + "learning_rate": 6.062723720209472e-06, + "loss": 0.1058, + "step": 5967 + }, + { + "epoch": 1.92, + "learning_rate": 6.0595308604447955e-06, + "loss": 0.1088, + "step": 5968 + }, + { + "epoch": 1.92, + "learning_rate": 6.056338476153889e-06, + "loss": 0.1127, + "step": 5969 + }, + { + "epoch": 1.92, + "learning_rate": 6.0531465677219615e-06, + "loss": 0.1102, + "step": 5970 + }, + { + "epoch": 1.92, + "learning_rate": 6.049955135534159e-06, + "loss": 0.1043, + "step": 5971 + }, + { + "epoch": 1.92, + "learning_rate": 6.0467641799755796e-06, + "loss": 0.11, + "step": 5972 + }, + { + "epoch": 1.92, + "learning_rate": 6.043573701431249e-06, + "loss": 0.0994, + "step": 5973 + }, + { + "epoch": 1.92, + "learning_rate": 6.040383700286148e-06, + "loss": 0.1136, + "step": 5974 + }, + { + "epoch": 1.92, + "learning_rate": 6.037194176925194e-06, + "loss": 0.1083, + "step": 5975 + }, + { + "epoch": 1.92, + "learning_rate": 6.034005131733253e-06, + "loss": 0.1175, + "step": 5976 + }, + { + "epoch": 1.92, + "learning_rate": 6.030816565095123e-06, + "loss": 0.1062, + "step": 5977 + }, + { + "epoch": 1.92, + "learning_rate": 6.027628477395557e-06, + "loss": 0.102, + "step": 5978 + }, + { + "epoch": 1.92, + "learning_rate": 6.024440869019237e-06, + "loss": 0.1221, + "step": 5979 + }, + { + "epoch": 1.92, + "learning_rate": 6.021253740350793e-06, + "loss": 0.1038, + "step": 5980 + }, + { + "epoch": 1.92, + "learning_rate": 6.0180670917748e-06, + "loss": 0.11, + "step": 5981 + }, + { + "epoch": 1.92, + "learning_rate": 6.014880923675774e-06, + "loss": 0.1039, + "step": 5982 + }, + { + "epoch": 1.92, + "learning_rate": 6.01169523643817e-06, + "loss": 0.1116, + "step": 5983 + }, + { + "epoch": 1.93, + "learning_rate": 6.008510030446385e-06, + "loss": 0.1042, + "step": 5984 + }, + { + "epoch": 1.93, + "learning_rate": 6.005325306084766e-06, + "loss": 0.1274, + "step": 5985 + }, + { + "epoch": 1.93, + "learning_rate": 6.002141063737588e-06, + "loss": 0.1009, + "step": 5986 + }, + { + "epoch": 1.93, + "learning_rate": 5.998957303789075e-06, + "loss": 0.1021, + "step": 5987 + }, + { + "epoch": 1.93, + "learning_rate": 5.9957740266233975e-06, + "loss": 0.1179, + "step": 5988 + }, + { + "epoch": 1.93, + "learning_rate": 5.99259123262466e-06, + "loss": 0.1164, + "step": 5989 + }, + { + "epoch": 1.93, + "learning_rate": 5.989408922176916e-06, + "loss": 0.1157, + "step": 5990 + }, + { + "epoch": 1.93, + "learning_rate": 5.986227095664155e-06, + "loss": 0.1101, + "step": 5991 + }, + { + "epoch": 1.93, + "learning_rate": 5.983045753470308e-06, + "loss": 0.1032, + "step": 5992 + }, + { + "epoch": 1.93, + "learning_rate": 5.979864895979248e-06, + "loss": 0.1105, + "step": 5993 + }, + { + "epoch": 1.93, + "learning_rate": 5.976684523574795e-06, + "loss": 0.0995, + "step": 5994 + }, + { + "epoch": 1.93, + "learning_rate": 5.973504636640702e-06, + "loss": 0.1205, + "step": 5995 + }, + { + "epoch": 1.93, + "learning_rate": 5.97032523556067e-06, + "loss": 0.1008, + "step": 5996 + }, + { + "epoch": 1.93, + "learning_rate": 5.96714632071834e-06, + "loss": 0.1032, + "step": 5997 + }, + { + "epoch": 1.93, + "learning_rate": 5.9639678924972956e-06, + "loss": 0.108, + "step": 5998 + }, + { + "epoch": 1.93, + "learning_rate": 5.960789951281052e-06, + "loss": 0.1083, + "step": 5999 + }, + { + "epoch": 1.93, + "learning_rate": 5.957612497453078e-06, + "loss": 0.1065, + "step": 6000 + }, + { + "epoch": 1.93, + "learning_rate": 5.9544355313967785e-06, + "loss": 0.1006, + "step": 6001 + }, + { + "epoch": 1.93, + "learning_rate": 5.951259053495497e-06, + "loss": 0.1135, + "step": 6002 + }, + { + "epoch": 1.93, + "learning_rate": 5.948083064132526e-06, + "loss": 0.1012, + "step": 6003 + }, + { + "epoch": 1.93, + "learning_rate": 5.944907563691093e-06, + "loss": 0.1213, + "step": 6004 + }, + { + "epoch": 1.93, + "learning_rate": 5.941732552554362e-06, + "loss": 0.0963, + "step": 6005 + }, + { + "epoch": 1.93, + "learning_rate": 5.9385580311054455e-06, + "loss": 0.11, + "step": 6006 + }, + { + "epoch": 1.93, + "learning_rate": 5.935383999727398e-06, + "loss": 0.0994, + "step": 6007 + }, + { + "epoch": 1.93, + "learning_rate": 5.932210458803207e-06, + "loss": 0.1016, + "step": 6008 + }, + { + "epoch": 1.93, + "learning_rate": 5.929037408715812e-06, + "loss": 0.1146, + "step": 6009 + }, + { + "epoch": 1.93, + "learning_rate": 5.92586484984808e-06, + "loss": 0.1109, + "step": 6010 + }, + { + "epoch": 1.93, + "learning_rate": 5.922692782582831e-06, + "loss": 0.1055, + "step": 6011 + }, + { + "epoch": 1.93, + "learning_rate": 5.9195212073028145e-06, + "loss": 0.1103, + "step": 6012 + }, + { + "epoch": 1.93, + "learning_rate": 5.916350124390731e-06, + "loss": 0.1053, + "step": 6013 + }, + { + "epoch": 1.93, + "learning_rate": 5.913179534229213e-06, + "loss": 0.0938, + "step": 6014 + }, + { + "epoch": 1.94, + "learning_rate": 5.910009437200839e-06, + "loss": 0.1065, + "step": 6015 + }, + { + "epoch": 1.94, + "learning_rate": 5.9068398336881295e-06, + "loss": 0.1058, + "step": 6016 + }, + { + "epoch": 1.94, + "learning_rate": 5.90367072407354e-06, + "loss": 0.114, + "step": 6017 + }, + { + "epoch": 1.94, + "learning_rate": 5.900502108739466e-06, + "loss": 0.0981, + "step": 6018 + }, + { + "epoch": 1.94, + "learning_rate": 5.89733398806825e-06, + "loss": 0.1129, + "step": 6019 + }, + { + "epoch": 1.94, + "learning_rate": 5.894166362442167e-06, + "loss": 0.1038, + "step": 6020 + }, + { + "epoch": 1.94, + "learning_rate": 5.8909992322434396e-06, + "loss": 0.1112, + "step": 6021 + }, + { + "epoch": 1.94, + "learning_rate": 5.887832597854226e-06, + "loss": 0.1028, + "step": 6022 + }, + { + "epoch": 1.94, + "learning_rate": 5.884666459656626e-06, + "loss": 0.0975, + "step": 6023 + }, + { + "epoch": 1.94, + "learning_rate": 5.8815008180326825e-06, + "loss": 0.1053, + "step": 6024 + }, + { + "epoch": 1.94, + "learning_rate": 5.87833567336437e-06, + "loss": 0.1079, + "step": 6025 + }, + { + "epoch": 1.94, + "learning_rate": 5.875171026033609e-06, + "loss": 0.1083, + "step": 6026 + }, + { + "epoch": 1.94, + "learning_rate": 5.87200687642226e-06, + "loss": 0.1053, + "step": 6027 + }, + { + "epoch": 1.94, + "learning_rate": 5.868843224912126e-06, + "loss": 0.1216, + "step": 6028 + }, + { + "epoch": 1.94, + "learning_rate": 5.865680071884942e-06, + "loss": 0.105, + "step": 6029 + }, + { + "epoch": 1.94, + "learning_rate": 5.862517417722393e-06, + "loss": 0.1018, + "step": 6030 + }, + { + "epoch": 1.94, + "learning_rate": 5.85935526280609e-06, + "loss": 0.0916, + "step": 6031 + }, + { + "epoch": 1.94, + "learning_rate": 5.856193607517602e-06, + "loss": 0.0997, + "step": 6032 + }, + { + "epoch": 1.94, + "learning_rate": 5.853032452238419e-06, + "loss": 0.1002, + "step": 6033 + }, + { + "epoch": 1.94, + "learning_rate": 5.8498717973499845e-06, + "loss": 0.1201, + "step": 6034 + }, + { + "epoch": 1.94, + "learning_rate": 5.846711643233674e-06, + "loss": 0.1092, + "step": 6035 + }, + { + "epoch": 1.94, + "learning_rate": 5.843551990270805e-06, + "loss": 0.0928, + "step": 6036 + }, + { + "epoch": 1.94, + "learning_rate": 5.840392838842642e-06, + "loss": 0.1104, + "step": 6037 + }, + { + "epoch": 1.94, + "learning_rate": 5.83723418933037e-06, + "loss": 0.1028, + "step": 6038 + }, + { + "epoch": 1.94, + "learning_rate": 5.834076042115133e-06, + "loss": 0.1161, + "step": 6039 + }, + { + "epoch": 1.94, + "learning_rate": 5.830918397578005e-06, + "loss": 0.1014, + "step": 6040 + }, + { + "epoch": 1.94, + "learning_rate": 5.827761256099996e-06, + "loss": 0.1052, + "step": 6041 + }, + { + "epoch": 1.94, + "learning_rate": 5.824604618062069e-06, + "loss": 0.1131, + "step": 6042 + }, + { + "epoch": 1.94, + "learning_rate": 5.821448483845116e-06, + "loss": 0.1022, + "step": 6043 + }, + { + "epoch": 1.94, + "learning_rate": 5.818292853829957e-06, + "loss": 0.1023, + "step": 6044 + }, + { + "epoch": 1.94, + "learning_rate": 5.815137728397378e-06, + "loss": 0.1107, + "step": 6045 + }, + { + "epoch": 1.95, + "learning_rate": 5.811983107928085e-06, + "loss": 0.1123, + "step": 6046 + }, + { + "epoch": 1.95, + "learning_rate": 5.808828992802723e-06, + "loss": 0.1086, + "step": 6047 + }, + { + "epoch": 1.95, + "learning_rate": 5.8056753834018895e-06, + "loss": 0.0975, + "step": 6048 + }, + { + "epoch": 1.95, + "learning_rate": 5.802522280106109e-06, + "loss": 0.1243, + "step": 6049 + }, + { + "epoch": 1.95, + "learning_rate": 5.799369683295848e-06, + "loss": 0.1088, + "step": 6050 + }, + { + "epoch": 1.95, + "learning_rate": 5.796217593351512e-06, + "loss": 0.1136, + "step": 6051 + }, + { + "epoch": 1.95, + "learning_rate": 5.793066010653447e-06, + "loss": 0.1098, + "step": 6052 + }, + { + "epoch": 1.95, + "learning_rate": 5.789914935581929e-06, + "loss": 0.1055, + "step": 6053 + }, + { + "epoch": 1.95, + "learning_rate": 5.786764368517191e-06, + "loss": 0.0932, + "step": 6054 + }, + { + "epoch": 1.95, + "learning_rate": 5.7836143098393894e-06, + "loss": 0.1016, + "step": 6055 + }, + { + "epoch": 1.95, + "learning_rate": 5.780464759928623e-06, + "loss": 0.1041, + "step": 6056 + }, + { + "epoch": 1.95, + "learning_rate": 5.7773157191649305e-06, + "loss": 0.1118, + "step": 6057 + }, + { + "epoch": 1.95, + "learning_rate": 5.77416718792829e-06, + "loss": 0.1052, + "step": 6058 + }, + { + "epoch": 1.95, + "learning_rate": 5.771019166598608e-06, + "loss": 0.1008, + "step": 6059 + }, + { + "epoch": 1.95, + "learning_rate": 5.7678716555557515e-06, + "loss": 0.1051, + "step": 6060 + }, + { + "epoch": 1.95, + "learning_rate": 5.764724655179505e-06, + "loss": 0.1098, + "step": 6061 + }, + { + "epoch": 1.95, + "learning_rate": 5.761578165849596e-06, + "loss": 0.1162, + "step": 6062 + }, + { + "epoch": 1.95, + "learning_rate": 5.758432187945706e-06, + "loss": 0.1325, + "step": 6063 + }, + { + "epoch": 1.95, + "learning_rate": 5.7552867218474296e-06, + "loss": 0.1098, + "step": 6064 + }, + { + "epoch": 1.95, + "learning_rate": 5.752141767934312e-06, + "loss": 0.1024, + "step": 6065 + }, + { + "epoch": 1.95, + "learning_rate": 5.748997326585844e-06, + "loss": 0.1159, + "step": 6066 + }, + { + "epoch": 1.95, + "learning_rate": 5.745853398181445e-06, + "loss": 0.1023, + "step": 6067 + }, + { + "epoch": 1.95, + "learning_rate": 5.7427099831004695e-06, + "loss": 0.1081, + "step": 6068 + }, + { + "epoch": 1.95, + "learning_rate": 5.739567081722227e-06, + "loss": 0.0957, + "step": 6069 + }, + { + "epoch": 1.95, + "learning_rate": 5.736424694425943e-06, + "loss": 0.1044, + "step": 6070 + }, + { + "epoch": 1.95, + "learning_rate": 5.7332828215907895e-06, + "loss": 0.1033, + "step": 6071 + }, + { + "epoch": 1.95, + "learning_rate": 5.730141463595887e-06, + "loss": 0.1104, + "step": 6072 + }, + { + "epoch": 1.95, + "learning_rate": 5.7270006208202825e-06, + "loss": 0.1035, + "step": 6073 + }, + { + "epoch": 1.95, + "learning_rate": 5.7238602936429575e-06, + "loss": 0.1196, + "step": 6074 + }, + { + "epoch": 1.95, + "learning_rate": 5.7207204824428454e-06, + "loss": 0.1072, + "step": 6075 + }, + { + "epoch": 1.95, + "learning_rate": 5.71758118759881e-06, + "loss": 0.1048, + "step": 6076 + }, + { + "epoch": 1.95, + "learning_rate": 5.714442409489639e-06, + "loss": 0.1059, + "step": 6077 + }, + { + "epoch": 1.96, + "learning_rate": 5.711304148494084e-06, + "loss": 0.118, + "step": 6078 + }, + { + "epoch": 1.96, + "learning_rate": 5.7081664049908155e-06, + "loss": 0.1046, + "step": 6079 + }, + { + "epoch": 1.96, + "learning_rate": 5.705029179358445e-06, + "loss": 0.1083, + "step": 6080 + }, + { + "epoch": 1.96, + "learning_rate": 5.7018924719755294e-06, + "loss": 0.1032, + "step": 6081 + }, + { + "epoch": 1.96, + "learning_rate": 5.698756283220559e-06, + "loss": 0.106, + "step": 6082 + }, + { + "epoch": 1.96, + "learning_rate": 5.695620613471946e-06, + "loss": 0.0947, + "step": 6083 + }, + { + "epoch": 1.96, + "learning_rate": 5.692485463108066e-06, + "loss": 0.0923, + "step": 6084 + }, + { + "epoch": 1.96, + "learning_rate": 5.6893508325072165e-06, + "loss": 0.1071, + "step": 6085 + }, + { + "epoch": 1.96, + "learning_rate": 5.6862167220476305e-06, + "loss": 0.1048, + "step": 6086 + }, + { + "epoch": 1.96, + "learning_rate": 5.683083132107489e-06, + "loss": 0.1012, + "step": 6087 + }, + { + "epoch": 1.96, + "learning_rate": 5.679950063064906e-06, + "loss": 0.1228, + "step": 6088 + }, + { + "epoch": 1.96, + "learning_rate": 5.6768175152979274e-06, + "loss": 0.1138, + "step": 6089 + }, + { + "epoch": 1.96, + "learning_rate": 5.6736854891845394e-06, + "loss": 0.1081, + "step": 6090 + }, + { + "epoch": 1.96, + "learning_rate": 5.6705539851026644e-06, + "loss": 0.1119, + "step": 6091 + }, + { + "epoch": 1.96, + "learning_rate": 5.667423003430164e-06, + "loss": 0.1128, + "step": 6092 + }, + { + "epoch": 1.96, + "learning_rate": 5.664292544544838e-06, + "loss": 0.1135, + "step": 6093 + }, + { + "epoch": 1.96, + "learning_rate": 5.66116260882442e-06, + "loss": 0.1134, + "step": 6094 + }, + { + "epoch": 1.96, + "learning_rate": 5.6580331966465805e-06, + "loss": 0.1111, + "step": 6095 + }, + { + "epoch": 1.96, + "learning_rate": 5.6549043083889285e-06, + "loss": 0.0986, + "step": 6096 + }, + { + "epoch": 1.96, + "learning_rate": 5.6517759444290084e-06, + "loss": 0.0995, + "step": 6097 + }, + { + "epoch": 1.96, + "learning_rate": 5.648648105144299e-06, + "loss": 0.1057, + "step": 6098 + }, + { + "epoch": 1.96, + "learning_rate": 5.645520790912224e-06, + "loss": 0.101, + "step": 6099 + }, + { + "epoch": 1.96, + "learning_rate": 5.642394002110137e-06, + "loss": 0.1042, + "step": 6100 + }, + { + "epoch": 1.96, + "learning_rate": 5.639267739115327e-06, + "loss": 0.101, + "step": 6101 + }, + { + "epoch": 1.96, + "learning_rate": 5.636142002305025e-06, + "loss": 0.1018, + "step": 6102 + }, + { + "epoch": 1.96, + "learning_rate": 5.633016792056391e-06, + "loss": 0.1024, + "step": 6103 + }, + { + "epoch": 1.96, + "learning_rate": 5.629892108746533e-06, + "loss": 0.1034, + "step": 6104 + }, + { + "epoch": 1.96, + "learning_rate": 5.626767952752485e-06, + "loss": 0.1011, + "step": 6105 + }, + { + "epoch": 1.96, + "learning_rate": 5.623644324451223e-06, + "loss": 0.1037, + "step": 6106 + }, + { + "epoch": 1.96, + "learning_rate": 5.620521224219651e-06, + "loss": 0.1036, + "step": 6107 + }, + { + "epoch": 1.96, + "learning_rate": 5.617398652434628e-06, + "loss": 0.1061, + "step": 6108 + }, + { + "epoch": 1.97, + "learning_rate": 5.6142766094729204e-06, + "loss": 0.1039, + "step": 6109 + }, + { + "epoch": 1.97, + "learning_rate": 5.61115509571126e-06, + "loss": 0.1044, + "step": 6110 + }, + { + "epoch": 1.97, + "learning_rate": 5.608034111526298e-06, + "loss": 0.1, + "step": 6111 + }, + { + "epoch": 1.97, + "learning_rate": 5.604913657294625e-06, + "loss": 0.0914, + "step": 6112 + }, + { + "epoch": 1.97, + "learning_rate": 5.6017937333927644e-06, + "loss": 0.107, + "step": 6113 + }, + { + "epoch": 1.97, + "learning_rate": 5.598674340197192e-06, + "loss": 0.1069, + "step": 6114 + }, + { + "epoch": 1.97, + "learning_rate": 5.59555547808429e-06, + "loss": 0.1054, + "step": 6115 + }, + { + "epoch": 1.97, + "learning_rate": 5.592437147430407e-06, + "loss": 0.1012, + "step": 6116 + }, + { + "epoch": 1.97, + "learning_rate": 5.5893193486118105e-06, + "loss": 0.1043, + "step": 6117 + }, + { + "epoch": 1.97, + "learning_rate": 5.586202082004703e-06, + "loss": 0.1054, + "step": 6118 + }, + { + "epoch": 1.97, + "learning_rate": 5.583085347985228e-06, + "loss": 0.1055, + "step": 6119 + }, + { + "epoch": 1.97, + "learning_rate": 5.579969146929472e-06, + "loss": 0.1037, + "step": 6120 + }, + { + "epoch": 1.97, + "learning_rate": 5.576853479213441e-06, + "loss": 0.1247, + "step": 6121 + }, + { + "epoch": 1.97, + "learning_rate": 5.573738345213087e-06, + "loss": 0.1227, + "step": 6122 + }, + { + "epoch": 1.97, + "learning_rate": 5.570623745304298e-06, + "loss": 0.0976, + "step": 6123 + }, + { + "epoch": 1.97, + "learning_rate": 5.567509679862886e-06, + "loss": 0.1113, + "step": 6124 + }, + { + "epoch": 1.97, + "learning_rate": 5.564396149264617e-06, + "loss": 0.101, + "step": 6125 + }, + { + "epoch": 1.97, + "learning_rate": 5.5612831538851806e-06, + "loss": 0.1058, + "step": 6126 + }, + { + "epoch": 1.97, + "learning_rate": 5.5581706941002025e-06, + "loss": 0.0954, + "step": 6127 + }, + { + "epoch": 1.97, + "learning_rate": 5.5550587702852465e-06, + "loss": 0.1084, + "step": 6128 + }, + { + "epoch": 1.97, + "learning_rate": 5.55194738281581e-06, + "loss": 0.1023, + "step": 6129 + }, + { + "epoch": 1.97, + "learning_rate": 5.548836532067321e-06, + "loss": 0.1098, + "step": 6130 + }, + { + "epoch": 1.97, + "learning_rate": 5.545726218415157e-06, + "loss": 0.0997, + "step": 6131 + }, + { + "epoch": 1.97, + "learning_rate": 5.542616442234618e-06, + "loss": 0.1032, + "step": 6132 + }, + { + "epoch": 1.97, + "learning_rate": 5.539507203900942e-06, + "loss": 0.1022, + "step": 6133 + }, + { + "epoch": 1.97, + "learning_rate": 5.536398503789305e-06, + "loss": 0.1036, + "step": 6134 + }, + { + "epoch": 1.97, + "learning_rate": 5.533290342274814e-06, + "loss": 0.1034, + "step": 6135 + }, + { + "epoch": 1.97, + "learning_rate": 5.530182719732509e-06, + "loss": 0.1052, + "step": 6136 + }, + { + "epoch": 1.97, + "learning_rate": 5.527075636537378e-06, + "loss": 0.0991, + "step": 6137 + }, + { + "epoch": 1.97, + "learning_rate": 5.52396909306433e-06, + "loss": 0.1048, + "step": 6138 + }, + { + "epoch": 1.97, + "learning_rate": 5.52086308968821e-06, + "loss": 0.106, + "step": 6139 + }, + { + "epoch": 1.98, + "learning_rate": 5.517757626783813e-06, + "loss": 0.1054, + "step": 6140 + }, + { + "epoch": 1.98, + "learning_rate": 5.5146527047258465e-06, + "loss": 0.1102, + "step": 6141 + }, + { + "epoch": 1.98, + "learning_rate": 5.511548323888964e-06, + "loss": 0.1, + "step": 6142 + }, + { + "epoch": 1.98, + "learning_rate": 5.508444484647759e-06, + "loss": 0.1004, + "step": 6143 + }, + { + "epoch": 1.98, + "learning_rate": 5.505341187376753e-06, + "loss": 0.1122, + "step": 6144 + }, + { + "epoch": 1.98, + "learning_rate": 5.502238432450396e-06, + "loss": 0.1105, + "step": 6145 + }, + { + "epoch": 1.98, + "learning_rate": 5.49913622024309e-06, + "loss": 0.1026, + "step": 6146 + }, + { + "epoch": 1.98, + "learning_rate": 5.496034551129159e-06, + "loss": 0.1038, + "step": 6147 + }, + { + "epoch": 1.98, + "learning_rate": 5.492933425482854e-06, + "loss": 0.0982, + "step": 6148 + }, + { + "epoch": 1.98, + "learning_rate": 5.4898328436783795e-06, + "loss": 0.1121, + "step": 6149 + }, + { + "epoch": 1.98, + "learning_rate": 5.486732806089863e-06, + "loss": 0.1088, + "step": 6150 + }, + { + "epoch": 1.98, + "learning_rate": 5.483633313091363e-06, + "loss": 0.1035, + "step": 6151 + }, + { + "epoch": 1.98, + "learning_rate": 5.480534365056886e-06, + "loss": 0.096, + "step": 6152 + }, + { + "epoch": 1.98, + "learning_rate": 5.477435962360364e-06, + "loss": 0.1068, + "step": 6153 + }, + { + "epoch": 1.98, + "learning_rate": 5.474338105375651e-06, + "loss": 0.1058, + "step": 6154 + }, + { + "epoch": 1.98, + "learning_rate": 5.4712407944765625e-06, + "loss": 0.1016, + "step": 6155 + }, + { + "epoch": 1.98, + "learning_rate": 5.468144030036824e-06, + "loss": 0.1055, + "step": 6156 + }, + { + "epoch": 1.98, + "learning_rate": 5.465047812430104e-06, + "loss": 0.1122, + "step": 6157 + }, + { + "epoch": 1.98, + "learning_rate": 5.4619521420300136e-06, + "loss": 0.1126, + "step": 6158 + }, + { + "epoch": 1.98, + "learning_rate": 5.458857019210083e-06, + "loss": 0.1073, + "step": 6159 + }, + { + "epoch": 1.98, + "learning_rate": 5.455762444343785e-06, + "loss": 0.111, + "step": 6160 + }, + { + "epoch": 1.98, + "learning_rate": 5.452668417804523e-06, + "loss": 0.0991, + "step": 6161 + }, + { + "epoch": 1.98, + "learning_rate": 5.449574939965637e-06, + "loss": 0.0976, + "step": 6162 + }, + { + "epoch": 1.98, + "learning_rate": 5.446482011200394e-06, + "loss": 0.1046, + "step": 6163 + }, + { + "epoch": 1.98, + "learning_rate": 5.4433896318820065e-06, + "loss": 0.1094, + "step": 6164 + }, + { + "epoch": 1.98, + "learning_rate": 5.4402978023836124e-06, + "loss": 0.101, + "step": 6165 + }, + { + "epoch": 1.98, + "learning_rate": 5.437206523078285e-06, + "loss": 0.0938, + "step": 6166 + }, + { + "epoch": 1.98, + "learning_rate": 5.43411579433903e-06, + "loss": 0.1, + "step": 6167 + }, + { + "epoch": 1.98, + "learning_rate": 5.431025616538787e-06, + "loss": 0.102, + "step": 6168 + }, + { + "epoch": 1.98, + "learning_rate": 5.4279359900504294e-06, + "loss": 0.1123, + "step": 6169 + }, + { + "epoch": 1.98, + "learning_rate": 5.42484691524677e-06, + "loss": 0.1187, + "step": 6170 + }, + { + "epoch": 1.99, + "learning_rate": 5.421758392500546e-06, + "loss": 0.1049, + "step": 6171 + }, + { + "epoch": 1.99, + "learning_rate": 5.418670422184428e-06, + "loss": 0.1127, + "step": 6172 + }, + { + "epoch": 1.99, + "learning_rate": 5.415583004671036e-06, + "loss": 0.1257, + "step": 6173 + }, + { + "epoch": 1.99, + "learning_rate": 5.412496140332898e-06, + "loss": 0.0969, + "step": 6174 + }, + { + "epoch": 1.99, + "learning_rate": 5.409409829542489e-06, + "loss": 0.1044, + "step": 6175 + }, + { + "epoch": 1.99, + "learning_rate": 5.406324072672224e-06, + "loss": 0.1084, + "step": 6176 + }, + { + "epoch": 1.99, + "learning_rate": 5.403238870094441e-06, + "loss": 0.1128, + "step": 6177 + }, + { + "epoch": 1.99, + "learning_rate": 5.400154222181407e-06, + "loss": 0.1125, + "step": 6178 + }, + { + "epoch": 1.99, + "learning_rate": 5.397070129305343e-06, + "loss": 0.1119, + "step": 6179 + }, + { + "epoch": 1.99, + "learning_rate": 5.393986591838376e-06, + "loss": 0.1048, + "step": 6180 + }, + { + "epoch": 1.99, + "learning_rate": 5.390903610152578e-06, + "loss": 0.1039, + "step": 6181 + }, + { + "epoch": 1.99, + "learning_rate": 5.387821184619964e-06, + "loss": 0.1083, + "step": 6182 + }, + { + "epoch": 1.99, + "learning_rate": 5.384739315612466e-06, + "loss": 0.1096, + "step": 6183 + }, + { + "epoch": 1.99, + "learning_rate": 5.381658003501955e-06, + "loss": 0.1026, + "step": 6184 + }, + { + "epoch": 1.99, + "learning_rate": 5.378577248660242e-06, + "loss": 0.102, + "step": 6185 + }, + { + "epoch": 1.99, + "learning_rate": 5.375497051459058e-06, + "loss": 0.1009, + "step": 6186 + }, + { + "epoch": 1.99, + "learning_rate": 5.372417412270075e-06, + "loss": 0.1202, + "step": 6187 + }, + { + "epoch": 1.99, + "learning_rate": 5.369338331464895e-06, + "loss": 0.0983, + "step": 6188 + }, + { + "epoch": 1.99, + "learning_rate": 5.366259809415053e-06, + "loss": 0.1006, + "step": 6189 + }, + { + "epoch": 1.99, + "learning_rate": 5.363181846492012e-06, + "loss": 0.0999, + "step": 6190 + }, + { + "epoch": 1.99, + "learning_rate": 5.360104443067179e-06, + "loss": 0.1144, + "step": 6191 + }, + { + "epoch": 1.99, + "learning_rate": 5.3570275995118844e-06, + "loss": 0.1043, + "step": 6192 + }, + { + "epoch": 1.99, + "learning_rate": 5.353951316197393e-06, + "loss": 0.1073, + "step": 6193 + }, + { + "epoch": 1.99, + "learning_rate": 5.350875593494902e-06, + "loss": 0.1011, + "step": 6194 + }, + { + "epoch": 1.99, + "learning_rate": 5.34780043177554e-06, + "loss": 0.1002, + "step": 6195 + }, + { + "epoch": 1.99, + "learning_rate": 5.344725831410369e-06, + "loss": 0.1108, + "step": 6196 + }, + { + "epoch": 1.99, + "learning_rate": 5.341651792770386e-06, + "loss": 0.1083, + "step": 6197 + }, + { + "epoch": 1.99, + "learning_rate": 5.338578316226517e-06, + "loss": 0.1033, + "step": 6198 + }, + { + "epoch": 1.99, + "learning_rate": 5.33550540214962e-06, + "loss": 0.1073, + "step": 6199 + }, + { + "epoch": 1.99, + "learning_rate": 5.332433050910486e-06, + "loss": 0.1138, + "step": 6200 + }, + { + "epoch": 1.99, + "learning_rate": 5.329361262879833e-06, + "loss": 0.1032, + "step": 6201 + }, + { + "epoch": 2.0, + "learning_rate": 5.326290038428326e-06, + "loss": 0.1014, + "step": 6202 + }, + { + "epoch": 2.0, + "learning_rate": 5.323219377926545e-06, + "loss": 0.109, + "step": 6203 + }, + { + "epoch": 2.0, + "learning_rate": 5.3201492817450125e-06, + "loss": 0.1033, + "step": 6204 + }, + { + "epoch": 2.0, + "learning_rate": 5.317079750254177e-06, + "loss": 0.0959, + "step": 6205 + }, + { + "epoch": 2.0, + "learning_rate": 5.3140107838244215e-06, + "loss": 0.1066, + "step": 6206 + }, + { + "epoch": 2.0, + "learning_rate": 5.310942382826058e-06, + "loss": 0.1023, + "step": 6207 + }, + { + "epoch": 2.0, + "learning_rate": 5.307874547629339e-06, + "loss": 0.0988, + "step": 6208 + }, + { + "epoch": 2.0, + "learning_rate": 5.304807278604438e-06, + "loss": 0.1099, + "step": 6209 + }, + { + "epoch": 2.0, + "learning_rate": 5.301740576121468e-06, + "loss": 0.1068, + "step": 6210 + }, + { + "epoch": 2.0, + "learning_rate": 5.298674440550463e-06, + "loss": 0.102, + "step": 6211 + }, + { + "epoch": 2.0, + "learning_rate": 5.295608872261411e-06, + "loss": 0.1133, + "step": 6212 + }, + { + "epoch": 2.0, + "learning_rate": 5.2925438716241965e-06, + "loss": 0.0913, + "step": 6213 + }, + { + "epoch": 2.0, + "learning_rate": 5.2894794390086715e-06, + "loss": 0.0982, + "step": 6214 + }, + { + "epoch": 2.0, + "learning_rate": 5.286415574784597e-06, + "loss": 0.0935, + "step": 6215 + }, + { + "epoch": 2.0, + "learning_rate": 5.283352279321673e-06, + "loss": 0.1108, + "step": 6216 + }, + { + "epoch": 2.0, + "learning_rate": 5.280289552989526e-06, + "loss": 0.1043, + "step": 6217 + }, + { + "epoch": 2.0, + "learning_rate": 5.2772273961577285e-06, + "loss": 0.0798, + "step": 6218 + }, + { + "epoch": 2.0, + "learning_rate": 5.274165809195757e-06, + "loss": 0.0775, + "step": 6219 + }, + { + "epoch": 2.0, + "learning_rate": 5.27110479247305e-06, + "loss": 0.0711, + "step": 6220 + }, + { + "epoch": 2.0, + "learning_rate": 5.268044346358957e-06, + "loss": 0.0721, + "step": 6221 + }, + { + "epoch": 2.0, + "learning_rate": 5.264984471222761e-06, + "loss": 0.0895, + "step": 6222 + }, + { + "epoch": 2.0, + "learning_rate": 5.261925167433688e-06, + "loss": 0.0806, + "step": 6223 + }, + { + "epoch": 2.0, + "learning_rate": 5.258866435360881e-06, + "loss": 0.0712, + "step": 6224 + }, + { + "epoch": 2.0, + "learning_rate": 5.25580827537342e-06, + "loss": 0.078, + "step": 6225 + }, + { + "epoch": 2.0, + "learning_rate": 5.252750687840317e-06, + "loss": 0.0743, + "step": 6226 + }, + { + "epoch": 2.0, + "learning_rate": 5.249693673130512e-06, + "loss": 0.073, + "step": 6227 + }, + { + "epoch": 2.0, + "learning_rate": 5.246637231612874e-06, + "loss": 0.0676, + "step": 6228 + }, + { + "epoch": 2.0, + "learning_rate": 5.243581363656216e-06, + "loss": 0.0732, + "step": 6229 + }, + { + "epoch": 2.0, + "learning_rate": 5.240526069629265e-06, + "loss": 0.0685, + "step": 6230 + }, + { + "epoch": 2.0, + "learning_rate": 5.237471349900687e-06, + "loss": 0.0713, + "step": 6231 + }, + { + "epoch": 2.0, + "learning_rate": 5.234417204839079e-06, + "loss": 0.0784, + "step": 6232 + }, + { + "epoch": 2.01, + "learning_rate": 5.231363634812965e-06, + "loss": 0.0751, + "step": 6233 + }, + { + "epoch": 2.01, + "learning_rate": 5.228310640190798e-06, + "loss": 0.0769, + "step": 6234 + }, + { + "epoch": 2.01, + "learning_rate": 5.2252582213409745e-06, + "loss": 0.071, + "step": 6235 + }, + { + "epoch": 2.01, + "learning_rate": 5.222206378631807e-06, + "loss": 0.0713, + "step": 6236 + }, + { + "epoch": 2.01, + "learning_rate": 5.219155112431544e-06, + "loss": 0.0722, + "step": 6237 + }, + { + "epoch": 2.01, + "learning_rate": 5.2161044231083656e-06, + "loss": 0.0762, + "step": 6238 + }, + { + "epoch": 2.01, + "learning_rate": 5.2130543110303815e-06, + "loss": 0.0787, + "step": 6239 + }, + { + "epoch": 2.01, + "learning_rate": 5.210004776565624e-06, + "loss": 0.069, + "step": 6240 + }, + { + "epoch": 2.01, + "learning_rate": 5.206955820082074e-06, + "loss": 0.0796, + "step": 6241 + }, + { + "epoch": 2.01, + "learning_rate": 5.2039074419476245e-06, + "loss": 0.0754, + "step": 6242 + }, + { + "epoch": 2.01, + "learning_rate": 5.200859642530105e-06, + "loss": 0.0691, + "step": 6243 + }, + { + "epoch": 2.01, + "learning_rate": 5.197812422197286e-06, + "loss": 0.0712, + "step": 6244 + }, + { + "epoch": 2.01, + "learning_rate": 5.194765781316846e-06, + "loss": 0.0684, + "step": 6245 + }, + { + "epoch": 2.01, + "learning_rate": 5.191719720256407e-06, + "loss": 0.0739, + "step": 6246 + }, + { + "epoch": 2.01, + "learning_rate": 5.188674239383527e-06, + "loss": 0.0772, + "step": 6247 + }, + { + "epoch": 2.01, + "learning_rate": 5.185629339065681e-06, + "loss": 0.0736, + "step": 6248 + }, + { + "epoch": 2.01, + "learning_rate": 5.1825850196702785e-06, + "loss": 0.0655, + "step": 6249 + }, + { + "epoch": 2.01, + "learning_rate": 5.1795412815646705e-06, + "loss": 0.0771, + "step": 6250 + }, + { + "epoch": 2.01, + "learning_rate": 5.1764981251161166e-06, + "loss": 0.0717, + "step": 6251 + }, + { + "epoch": 2.01, + "learning_rate": 5.173455550691815e-06, + "loss": 0.0867, + "step": 6252 + }, + { + "epoch": 2.01, + "learning_rate": 5.170413558658904e-06, + "loss": 0.0765, + "step": 6253 + }, + { + "epoch": 2.01, + "learning_rate": 5.167372149384442e-06, + "loss": 0.0685, + "step": 6254 + }, + { + "epoch": 2.01, + "learning_rate": 5.164331323235411e-06, + "loss": 0.0631, + "step": 6255 + }, + { + "epoch": 2.01, + "learning_rate": 5.161291080578739e-06, + "loss": 0.0752, + "step": 6256 + }, + { + "epoch": 2.01, + "learning_rate": 5.158251421781276e-06, + "loss": 0.0753, + "step": 6257 + }, + { + "epoch": 2.01, + "learning_rate": 5.155212347209788e-06, + "loss": 0.0727, + "step": 6258 + }, + { + "epoch": 2.01, + "learning_rate": 5.152173857230992e-06, + "loss": 0.0788, + "step": 6259 + }, + { + "epoch": 2.01, + "learning_rate": 5.149135952211524e-06, + "loss": 0.0727, + "step": 6260 + }, + { + "epoch": 2.01, + "learning_rate": 5.146098632517945e-06, + "loss": 0.081, + "step": 6261 + }, + { + "epoch": 2.01, + "learning_rate": 5.143061898516759e-06, + "loss": 0.0801, + "step": 6262 + }, + { + "epoch": 2.01, + "learning_rate": 5.140025750574394e-06, + "loss": 0.0698, + "step": 6263 + }, + { + "epoch": 2.02, + "learning_rate": 5.136990189057187e-06, + "loss": 0.0679, + "step": 6264 + }, + { + "epoch": 2.02, + "learning_rate": 5.133955214331439e-06, + "loss": 0.0832, + "step": 6265 + }, + { + "epoch": 2.02, + "learning_rate": 5.130920826763357e-06, + "loss": 0.0704, + "step": 6266 + }, + { + "epoch": 2.02, + "learning_rate": 5.127887026719078e-06, + "loss": 0.0785, + "step": 6267 + }, + { + "epoch": 2.02, + "learning_rate": 5.124853814564683e-06, + "loss": 0.0696, + "step": 6268 + }, + { + "epoch": 2.02, + "learning_rate": 5.1218211906661675e-06, + "loss": 0.0696, + "step": 6269 + }, + { + "epoch": 2.02, + "learning_rate": 5.118789155389461e-06, + "loss": 0.0812, + "step": 6270 + }, + { + "epoch": 2.02, + "learning_rate": 5.115757709100421e-06, + "loss": 0.0619, + "step": 6271 + }, + { + "epoch": 2.02, + "learning_rate": 5.112726852164836e-06, + "loss": 0.075, + "step": 6272 + }, + { + "epoch": 2.02, + "learning_rate": 5.109696584948417e-06, + "loss": 0.0744, + "step": 6273 + }, + { + "epoch": 2.02, + "learning_rate": 5.106666907816818e-06, + "loss": 0.0674, + "step": 6274 + }, + { + "epoch": 2.02, + "learning_rate": 5.103637821135608e-06, + "loss": 0.0681, + "step": 6275 + }, + { + "epoch": 2.02, + "learning_rate": 5.100609325270288e-06, + "loss": 0.0755, + "step": 6276 + }, + { + "epoch": 2.02, + "learning_rate": 5.097581420586292e-06, + "loss": 0.07, + "step": 6277 + }, + { + "epoch": 2.02, + "learning_rate": 5.094554107448979e-06, + "loss": 0.0703, + "step": 6278 + }, + { + "epoch": 2.02, + "learning_rate": 5.091527386223632e-06, + "loss": 0.0745, + "step": 6279 + }, + { + "epoch": 2.02, + "learning_rate": 5.0885012572754774e-06, + "loss": 0.0654, + "step": 6280 + }, + { + "epoch": 2.02, + "learning_rate": 5.085475720969657e-06, + "loss": 0.0769, + "step": 6281 + }, + { + "epoch": 2.02, + "learning_rate": 5.082450777671242e-06, + "loss": 0.0705, + "step": 6282 + }, + { + "epoch": 2.02, + "learning_rate": 5.079426427745244e-06, + "loss": 0.0781, + "step": 6283 + }, + { + "epoch": 2.02, + "learning_rate": 5.076402671556578e-06, + "loss": 0.0648, + "step": 6284 + }, + { + "epoch": 2.02, + "learning_rate": 5.073379509470117e-06, + "loss": 0.0652, + "step": 6285 + }, + { + "epoch": 2.02, + "learning_rate": 5.070356941850646e-06, + "loss": 0.0722, + "step": 6286 + }, + { + "epoch": 2.02, + "learning_rate": 5.0673349690628785e-06, + "loss": 0.0684, + "step": 6287 + }, + { + "epoch": 2.02, + "learning_rate": 5.064313591471455e-06, + "loss": 0.0724, + "step": 6288 + }, + { + "epoch": 2.02, + "learning_rate": 5.061292809440958e-06, + "loss": 0.0693, + "step": 6289 + }, + { + "epoch": 2.02, + "learning_rate": 5.058272623335877e-06, + "loss": 0.0677, + "step": 6290 + }, + { + "epoch": 2.02, + "learning_rate": 5.055253033520648e-06, + "loss": 0.0753, + "step": 6291 + }, + { + "epoch": 2.02, + "learning_rate": 5.052234040359625e-06, + "loss": 0.069, + "step": 6292 + }, + { + "epoch": 2.02, + "learning_rate": 5.0492156442170914e-06, + "loss": 0.0738, + "step": 6293 + }, + { + "epoch": 2.02, + "learning_rate": 5.046197845457258e-06, + "loss": 0.0726, + "step": 6294 + }, + { + "epoch": 2.03, + "learning_rate": 5.043180644444272e-06, + "loss": 0.0715, + "step": 6295 + }, + { + "epoch": 2.03, + "learning_rate": 5.040164041542197e-06, + "loss": 0.0775, + "step": 6296 + }, + { + "epoch": 2.03, + "learning_rate": 5.03714803711503e-06, + "loss": 0.0714, + "step": 6297 + }, + { + "epoch": 2.03, + "learning_rate": 5.034132631526696e-06, + "loss": 0.0704, + "step": 6298 + }, + { + "epoch": 2.03, + "learning_rate": 5.03111782514104e-06, + "loss": 0.0745, + "step": 6299 + }, + { + "epoch": 2.03, + "learning_rate": 5.028103618321851e-06, + "loss": 0.0799, + "step": 6300 + }, + { + "epoch": 2.03, + "learning_rate": 5.025090011432832e-06, + "loss": 0.0703, + "step": 6301 + }, + { + "epoch": 2.03, + "learning_rate": 5.022077004837618e-06, + "loss": 0.0721, + "step": 6302 + }, + { + "epoch": 2.03, + "learning_rate": 5.01906459889977e-06, + "loss": 0.0742, + "step": 6303 + }, + { + "epoch": 2.03, + "learning_rate": 5.01605279398278e-06, + "loss": 0.0744, + "step": 6304 + }, + { + "epoch": 2.03, + "learning_rate": 5.013041590450057e-06, + "loss": 0.0846, + "step": 6305 + }, + { + "epoch": 2.03, + "learning_rate": 5.010030988664958e-06, + "loss": 0.0682, + "step": 6306 + }, + { + "epoch": 2.03, + "learning_rate": 5.007020988990748e-06, + "loss": 0.0772, + "step": 6307 + }, + { + "epoch": 2.03, + "learning_rate": 5.00401159179063e-06, + "loss": 0.0731, + "step": 6308 + }, + { + "epoch": 2.03, + "learning_rate": 5.001002797427725e-06, + "loss": 0.0796, + "step": 6309 + }, + { + "epoch": 2.03, + "learning_rate": 4.997994606265092e-06, + "loss": 0.0758, + "step": 6310 + }, + { + "epoch": 2.03, + "learning_rate": 4.994987018665707e-06, + "loss": 0.0757, + "step": 6311 + }, + { + "epoch": 2.03, + "learning_rate": 4.991980034992484e-06, + "loss": 0.0762, + "step": 6312 + }, + { + "epoch": 2.03, + "learning_rate": 4.988973655608257e-06, + "loss": 0.0776, + "step": 6313 + }, + { + "epoch": 2.03, + "learning_rate": 4.985967880875787e-06, + "loss": 0.0708, + "step": 6314 + }, + { + "epoch": 2.03, + "learning_rate": 4.982962711157765e-06, + "loss": 0.0684, + "step": 6315 + }, + { + "epoch": 2.03, + "learning_rate": 4.979958146816807e-06, + "loss": 0.0717, + "step": 6316 + }, + { + "epoch": 2.03, + "learning_rate": 4.9769541882154515e-06, + "loss": 0.0652, + "step": 6317 + }, + { + "epoch": 2.03, + "learning_rate": 4.973950835716178e-06, + "loss": 0.0703, + "step": 6318 + }, + { + "epoch": 2.03, + "learning_rate": 4.970948089681379e-06, + "loss": 0.0747, + "step": 6319 + }, + { + "epoch": 2.03, + "learning_rate": 4.967945950473377e-06, + "loss": 0.0649, + "step": 6320 + }, + { + "epoch": 2.03, + "learning_rate": 4.964944418454426e-06, + "loss": 0.0721, + "step": 6321 + }, + { + "epoch": 2.03, + "learning_rate": 4.961943493986709e-06, + "loss": 0.0761, + "step": 6322 + }, + { + "epoch": 2.03, + "learning_rate": 4.958943177432315e-06, + "loss": 0.0709, + "step": 6323 + }, + { + "epoch": 2.03, + "learning_rate": 4.955943469153287e-06, + "loss": 0.0733, + "step": 6324 + }, + { + "epoch": 2.03, + "learning_rate": 4.952944369511581e-06, + "loss": 0.0721, + "step": 6325 + }, + { + "epoch": 2.04, + "learning_rate": 4.949945878869075e-06, + "loss": 0.0728, + "step": 6326 + }, + { + "epoch": 2.04, + "learning_rate": 4.946947997587588e-06, + "loss": 0.074, + "step": 6327 + }, + { + "epoch": 2.04, + "learning_rate": 4.9439507260288565e-06, + "loss": 0.0771, + "step": 6328 + }, + { + "epoch": 2.04, + "learning_rate": 4.940954064554534e-06, + "loss": 0.076, + "step": 6329 + }, + { + "epoch": 2.04, + "learning_rate": 4.937958013526222e-06, + "loss": 0.0689, + "step": 6330 + }, + { + "epoch": 2.04, + "learning_rate": 4.934962573305431e-06, + "loss": 0.0713, + "step": 6331 + }, + { + "epoch": 2.04, + "learning_rate": 4.931967744253601e-06, + "loss": 0.0765, + "step": 6332 + }, + { + "epoch": 2.04, + "learning_rate": 4.928973526732107e-06, + "loss": 0.069, + "step": 6333 + }, + { + "epoch": 2.04, + "learning_rate": 4.925979921102243e-06, + "loss": 0.0713, + "step": 6334 + }, + { + "epoch": 2.04, + "learning_rate": 4.922986927725229e-06, + "loss": 0.0713, + "step": 6335 + }, + { + "epoch": 2.04, + "learning_rate": 4.91999454696221e-06, + "loss": 0.0712, + "step": 6336 + }, + { + "epoch": 2.04, + "learning_rate": 4.917002779174262e-06, + "loss": 0.0698, + "step": 6337 + }, + { + "epoch": 2.04, + "learning_rate": 4.9140116247223816e-06, + "loss": 0.0709, + "step": 6338 + }, + { + "epoch": 2.04, + "learning_rate": 4.9110210839674976e-06, + "loss": 0.0718, + "step": 6339 + }, + { + "epoch": 2.04, + "learning_rate": 4.908031157270462e-06, + "loss": 0.0725, + "step": 6340 + }, + { + "epoch": 2.04, + "learning_rate": 4.90504184499205e-06, + "loss": 0.0727, + "step": 6341 + }, + { + "epoch": 2.04, + "learning_rate": 4.902053147492964e-06, + "loss": 0.0687, + "step": 6342 + }, + { + "epoch": 2.04, + "learning_rate": 4.8990650651338355e-06, + "loss": 0.0798, + "step": 6343 + }, + { + "epoch": 2.04, + "learning_rate": 4.896077598275213e-06, + "loss": 0.0664, + "step": 6344 + }, + { + "epoch": 2.04, + "learning_rate": 4.893090747277585e-06, + "loss": 0.0755, + "step": 6345 + }, + { + "epoch": 2.04, + "learning_rate": 4.890104512501355e-06, + "loss": 0.0732, + "step": 6346 + }, + { + "epoch": 2.04, + "learning_rate": 4.887118894306849e-06, + "loss": 0.0737, + "step": 6347 + }, + { + "epoch": 2.04, + "learning_rate": 4.884133893054339e-06, + "loss": 0.0707, + "step": 6348 + }, + { + "epoch": 2.04, + "learning_rate": 4.881149509103993e-06, + "loss": 0.0717, + "step": 6349 + }, + { + "epoch": 2.04, + "learning_rate": 4.878165742815922e-06, + "loss": 0.0707, + "step": 6350 + }, + { + "epoch": 2.04, + "learning_rate": 4.875182594550166e-06, + "loss": 0.0699, + "step": 6351 + }, + { + "epoch": 2.04, + "learning_rate": 4.872200064666681e-06, + "loss": 0.0868, + "step": 6352 + }, + { + "epoch": 2.04, + "learning_rate": 4.869218153525349e-06, + "loss": 0.0766, + "step": 6353 + }, + { + "epoch": 2.04, + "learning_rate": 4.86623686148599e-06, + "loss": 0.0747, + "step": 6354 + }, + { + "epoch": 2.04, + "learning_rate": 4.863256188908329e-06, + "loss": 0.0714, + "step": 6355 + }, + { + "epoch": 2.04, + "learning_rate": 4.860276136152026e-06, + "loss": 0.0733, + "step": 6356 + }, + { + "epoch": 2.05, + "learning_rate": 4.857296703576675e-06, + "loss": 0.0707, + "step": 6357 + }, + { + "epoch": 2.05, + "learning_rate": 4.854317891541782e-06, + "loss": 0.072, + "step": 6358 + }, + { + "epoch": 2.05, + "learning_rate": 4.851339700406781e-06, + "loss": 0.0701, + "step": 6359 + }, + { + "epoch": 2.05, + "learning_rate": 4.848362130531039e-06, + "loss": 0.0754, + "step": 6360 + }, + { + "epoch": 2.05, + "learning_rate": 4.845385182273844e-06, + "loss": 0.0745, + "step": 6361 + }, + { + "epoch": 2.05, + "learning_rate": 4.842408855994395e-06, + "loss": 0.0756, + "step": 6362 + }, + { + "epoch": 2.05, + "learning_rate": 4.83943315205184e-06, + "loss": 0.0717, + "step": 6363 + }, + { + "epoch": 2.05, + "learning_rate": 4.836458070805235e-06, + "loss": 0.0689, + "step": 6364 + }, + { + "epoch": 2.05, + "learning_rate": 4.833483612613564e-06, + "loss": 0.0686, + "step": 6365 + }, + { + "epoch": 2.05, + "learning_rate": 4.8305097778357445e-06, + "loss": 0.0735, + "step": 6366 + }, + { + "epoch": 2.05, + "learning_rate": 4.827536566830609e-06, + "loss": 0.0712, + "step": 6367 + }, + { + "epoch": 2.05, + "learning_rate": 4.824563979956915e-06, + "loss": 0.0701, + "step": 6368 + }, + { + "epoch": 2.05, + "learning_rate": 4.821592017573351e-06, + "loss": 0.0741, + "step": 6369 + }, + { + "epoch": 2.05, + "learning_rate": 4.818620680038525e-06, + "loss": 0.0742, + "step": 6370 + }, + { + "epoch": 2.05, + "learning_rate": 4.815649967710967e-06, + "loss": 0.0737, + "step": 6371 + }, + { + "epoch": 2.05, + "learning_rate": 4.812679880949145e-06, + "loss": 0.0723, + "step": 6372 + }, + { + "epoch": 2.05, + "learning_rate": 4.8097104201114365e-06, + "loss": 0.0813, + "step": 6373 + }, + { + "epoch": 2.05, + "learning_rate": 4.806741585556151e-06, + "loss": 0.0703, + "step": 6374 + }, + { + "epoch": 2.05, + "learning_rate": 4.80377337764152e-06, + "loss": 0.0713, + "step": 6375 + }, + { + "epoch": 2.05, + "learning_rate": 4.800805796725699e-06, + "loss": 0.0847, + "step": 6376 + }, + { + "epoch": 2.05, + "learning_rate": 4.797838843166768e-06, + "loss": 0.0691, + "step": 6377 + }, + { + "epoch": 2.05, + "learning_rate": 4.794872517322737e-06, + "loss": 0.0703, + "step": 6378 + }, + { + "epoch": 2.05, + "learning_rate": 4.791906819551533e-06, + "loss": 0.0684, + "step": 6379 + }, + { + "epoch": 2.05, + "learning_rate": 4.7889417502110095e-06, + "loss": 0.0721, + "step": 6380 + }, + { + "epoch": 2.05, + "learning_rate": 4.7859773096589435e-06, + "loss": 0.073, + "step": 6381 + }, + { + "epoch": 2.05, + "learning_rate": 4.783013498253035e-06, + "loss": 0.0748, + "step": 6382 + }, + { + "epoch": 2.05, + "learning_rate": 4.780050316350916e-06, + "loss": 0.0734, + "step": 6383 + }, + { + "epoch": 2.05, + "learning_rate": 4.777087764310134e-06, + "loss": 0.0742, + "step": 6384 + }, + { + "epoch": 2.05, + "learning_rate": 4.774125842488163e-06, + "loss": 0.066, + "step": 6385 + }, + { + "epoch": 2.05, + "learning_rate": 4.771164551242401e-06, + "loss": 0.0713, + "step": 6386 + }, + { + "epoch": 2.05, + "learning_rate": 4.768203890930169e-06, + "loss": 0.0725, + "step": 6387 + }, + { + "epoch": 2.06, + "learning_rate": 4.765243861908711e-06, + "loss": 0.0836, + "step": 6388 + }, + { + "epoch": 2.06, + "learning_rate": 4.762284464535202e-06, + "loss": 0.0677, + "step": 6389 + }, + { + "epoch": 2.06, + "learning_rate": 4.759325699166734e-06, + "loss": 0.0707, + "step": 6390 + }, + { + "epoch": 2.06, + "learning_rate": 4.7563675661603215e-06, + "loss": 0.072, + "step": 6391 + }, + { + "epoch": 2.06, + "learning_rate": 4.753410065872904e-06, + "loss": 0.0667, + "step": 6392 + }, + { + "epoch": 2.06, + "learning_rate": 4.750453198661357e-06, + "loss": 0.0701, + "step": 6393 + }, + { + "epoch": 2.06, + "learning_rate": 4.747496964882452e-06, + "loss": 0.0758, + "step": 6394 + }, + { + "epoch": 2.06, + "learning_rate": 4.744541364892914e-06, + "loss": 0.0612, + "step": 6395 + }, + { + "epoch": 2.06, + "learning_rate": 4.7415863990493735e-06, + "loss": 0.0757, + "step": 6396 + }, + { + "epoch": 2.06, + "learning_rate": 4.7386320677083866e-06, + "loss": 0.0747, + "step": 6397 + }, + { + "epoch": 2.06, + "learning_rate": 4.7356783712264405e-06, + "loss": 0.0706, + "step": 6398 + }, + { + "epoch": 2.06, + "learning_rate": 4.732725309959945e-06, + "loss": 0.0692, + "step": 6399 + }, + { + "epoch": 2.06, + "learning_rate": 4.729772884265212e-06, + "loss": 0.0782, + "step": 6400 + }, + { + "epoch": 2.06, + "learning_rate": 4.72682109449851e-06, + "loss": 0.0737, + "step": 6401 + }, + { + "epoch": 2.06, + "learning_rate": 4.72386994101601e-06, + "loss": 0.0747, + "step": 6402 + }, + { + "epoch": 2.06, + "learning_rate": 4.7209194241738045e-06, + "loss": 0.0725, + "step": 6403 + }, + { + "epoch": 2.06, + "learning_rate": 4.717969544327924e-06, + "loss": 0.0721, + "step": 6404 + }, + { + "epoch": 2.06, + "learning_rate": 4.715020301834311e-06, + "loss": 0.0798, + "step": 6405 + }, + { + "epoch": 2.06, + "learning_rate": 4.712071697048833e-06, + "loss": 0.0734, + "step": 6406 + }, + { + "epoch": 2.06, + "learning_rate": 4.70912373032728e-06, + "loss": 0.0736, + "step": 6407 + }, + { + "epoch": 2.06, + "learning_rate": 4.7061764020253675e-06, + "loss": 0.0715, + "step": 6408 + }, + { + "epoch": 2.06, + "learning_rate": 4.703229712498728e-06, + "loss": 0.0735, + "step": 6409 + }, + { + "epoch": 2.06, + "learning_rate": 4.700283662102931e-06, + "loss": 0.0695, + "step": 6410 + }, + { + "epoch": 2.06, + "learning_rate": 4.697338251193454e-06, + "loss": 0.0771, + "step": 6411 + }, + { + "epoch": 2.06, + "learning_rate": 4.694393480125703e-06, + "loss": 0.0725, + "step": 6412 + }, + { + "epoch": 2.06, + "learning_rate": 4.691449349255008e-06, + "loss": 0.0707, + "step": 6413 + }, + { + "epoch": 2.06, + "learning_rate": 4.688505858936618e-06, + "loss": 0.0682, + "step": 6414 + }, + { + "epoch": 2.06, + "learning_rate": 4.685563009525705e-06, + "loss": 0.0705, + "step": 6415 + }, + { + "epoch": 2.06, + "learning_rate": 4.682620801377371e-06, + "loss": 0.0741, + "step": 6416 + }, + { + "epoch": 2.06, + "learning_rate": 4.679679234846636e-06, + "loss": 0.0733, + "step": 6417 + }, + { + "epoch": 2.06, + "learning_rate": 4.676738310288433e-06, + "loss": 0.0789, + "step": 6418 + }, + { + "epoch": 2.06, + "learning_rate": 4.673798028057642e-06, + "loss": 0.0686, + "step": 6419 + }, + { + "epoch": 2.07, + "learning_rate": 4.670858388509038e-06, + "loss": 0.0718, + "step": 6420 + }, + { + "epoch": 2.07, + "learning_rate": 4.6679193919973275e-06, + "loss": 0.0747, + "step": 6421 + }, + { + "epoch": 2.07, + "learning_rate": 4.664981038877152e-06, + "loss": 0.0755, + "step": 6422 + }, + { + "epoch": 2.07, + "learning_rate": 4.662043329503062e-06, + "loss": 0.073, + "step": 6423 + }, + { + "epoch": 2.07, + "learning_rate": 4.6591062642295305e-06, + "loss": 0.0712, + "step": 6424 + }, + { + "epoch": 2.07, + "learning_rate": 4.6561698434109655e-06, + "loss": 0.0726, + "step": 6425 + }, + { + "epoch": 2.07, + "learning_rate": 4.65323406740168e-06, + "loss": 0.0721, + "step": 6426 + }, + { + "epoch": 2.07, + "learning_rate": 4.650298936555913e-06, + "loss": 0.072, + "step": 6427 + }, + { + "epoch": 2.07, + "learning_rate": 4.647364451227841e-06, + "loss": 0.0722, + "step": 6428 + }, + { + "epoch": 2.07, + "learning_rate": 4.644430611771548e-06, + "loss": 0.0749, + "step": 6429 + }, + { + "epoch": 2.07, + "learning_rate": 4.641497418541038e-06, + "loss": 0.0703, + "step": 6430 + }, + { + "epoch": 2.07, + "learning_rate": 4.638564871890249e-06, + "loss": 0.0748, + "step": 6431 + }, + { + "epoch": 2.07, + "learning_rate": 4.635632972173036e-06, + "loss": 0.0756, + "step": 6432 + }, + { + "epoch": 2.07, + "learning_rate": 4.632701719743164e-06, + "loss": 0.0724, + "step": 6433 + }, + { + "epoch": 2.07, + "learning_rate": 4.629771114954341e-06, + "loss": 0.0737, + "step": 6434 + }, + { + "epoch": 2.07, + "learning_rate": 4.626841158160182e-06, + "loss": 0.0673, + "step": 6435 + }, + { + "epoch": 2.07, + "learning_rate": 4.623911849714226e-06, + "loss": 0.0691, + "step": 6436 + }, + { + "epoch": 2.07, + "learning_rate": 4.62098318996994e-06, + "loss": 0.0725, + "step": 6437 + }, + { + "epoch": 2.07, + "learning_rate": 4.618055179280712e-06, + "loss": 0.0659, + "step": 6438 + }, + { + "epoch": 2.07, + "learning_rate": 4.615127817999835e-06, + "loss": 0.065, + "step": 6439 + }, + { + "epoch": 2.07, + "learning_rate": 4.612201106480548e-06, + "loss": 0.071, + "step": 6440 + }, + { + "epoch": 2.07, + "learning_rate": 4.609275045075998e-06, + "loss": 0.0712, + "step": 6441 + }, + { + "epoch": 2.07, + "learning_rate": 4.606349634139251e-06, + "loss": 0.0719, + "step": 6442 + }, + { + "epoch": 2.07, + "learning_rate": 4.603424874023308e-06, + "loss": 0.0767, + "step": 6443 + }, + { + "epoch": 2.07, + "learning_rate": 4.600500765081079e-06, + "loss": 0.0665, + "step": 6444 + }, + { + "epoch": 2.07, + "learning_rate": 4.597577307665399e-06, + "loss": 0.0724, + "step": 6445 + }, + { + "epoch": 2.07, + "learning_rate": 4.594654502129026e-06, + "loss": 0.0744, + "step": 6446 + }, + { + "epoch": 2.07, + "learning_rate": 4.591732348824637e-06, + "loss": 0.0667, + "step": 6447 + }, + { + "epoch": 2.07, + "learning_rate": 4.5888108481048275e-06, + "loss": 0.0715, + "step": 6448 + }, + { + "epoch": 2.07, + "learning_rate": 4.585890000322126e-06, + "loss": 0.0751, + "step": 6449 + }, + { + "epoch": 2.07, + "learning_rate": 4.582969805828972e-06, + "loss": 0.077, + "step": 6450 + }, + { + "epoch": 2.08, + "learning_rate": 4.5800502649777255e-06, + "loss": 0.0692, + "step": 6451 + }, + { + "epoch": 2.08, + "learning_rate": 4.577131378120674e-06, + "loss": 0.0645, + "step": 6452 + }, + { + "epoch": 2.08, + "learning_rate": 4.574213145610021e-06, + "loss": 0.0752, + "step": 6453 + }, + { + "epoch": 2.08, + "learning_rate": 4.571295567797891e-06, + "loss": 0.0728, + "step": 6454 + }, + { + "epoch": 2.08, + "learning_rate": 4.568378645036336e-06, + "loss": 0.0723, + "step": 6455 + }, + { + "epoch": 2.08, + "learning_rate": 4.565462377677322e-06, + "loss": 0.0667, + "step": 6456 + }, + { + "epoch": 2.08, + "learning_rate": 4.562546766072734e-06, + "loss": 0.0785, + "step": 6457 + }, + { + "epoch": 2.08, + "learning_rate": 4.559631810574395e-06, + "loss": 0.0705, + "step": 6458 + }, + { + "epoch": 2.08, + "learning_rate": 4.556717511534023e-06, + "loss": 0.0675, + "step": 6459 + }, + { + "epoch": 2.08, + "learning_rate": 4.553803869303271e-06, + "loss": 0.0746, + "step": 6460 + }, + { + "epoch": 2.08, + "learning_rate": 4.550890884233716e-06, + "loss": 0.0749, + "step": 6461 + }, + { + "epoch": 2.08, + "learning_rate": 4.547978556676852e-06, + "loss": 0.0737, + "step": 6462 + }, + { + "epoch": 2.08, + "learning_rate": 4.545066886984086e-06, + "loss": 0.0761, + "step": 6463 + }, + { + "epoch": 2.08, + "learning_rate": 4.542155875506763e-06, + "loss": 0.0691, + "step": 6464 + }, + { + "epoch": 2.08, + "learning_rate": 4.539245522596125e-06, + "loss": 0.0752, + "step": 6465 + }, + { + "epoch": 2.08, + "learning_rate": 4.5363358286033585e-06, + "loss": 0.0776, + "step": 6466 + }, + { + "epoch": 2.08, + "learning_rate": 4.533426793879556e-06, + "loss": 0.0659, + "step": 6467 + }, + { + "epoch": 2.08, + "learning_rate": 4.530518418775734e-06, + "loss": 0.0817, + "step": 6468 + }, + { + "epoch": 2.08, + "learning_rate": 4.527610703642824e-06, + "loss": 0.0708, + "step": 6469 + }, + { + "epoch": 2.08, + "learning_rate": 4.524703648831692e-06, + "loss": 0.0749, + "step": 6470 + }, + { + "epoch": 2.08, + "learning_rate": 4.521797254693112e-06, + "loss": 0.0732, + "step": 6471 + }, + { + "epoch": 2.08, + "learning_rate": 4.518891521577783e-06, + "loss": 0.0677, + "step": 6472 + }, + { + "epoch": 2.08, + "learning_rate": 4.5159864498363204e-06, + "loss": 0.0771, + "step": 6473 + }, + { + "epoch": 2.08, + "learning_rate": 4.5130820398192645e-06, + "loss": 0.07, + "step": 6474 + }, + { + "epoch": 2.08, + "learning_rate": 4.5101782918770685e-06, + "loss": 0.068, + "step": 6475 + }, + { + "epoch": 2.08, + "learning_rate": 4.507275206360121e-06, + "loss": 0.0706, + "step": 6476 + }, + { + "epoch": 2.08, + "learning_rate": 4.504372783618715e-06, + "loss": 0.0667, + "step": 6477 + }, + { + "epoch": 2.08, + "learning_rate": 4.501471024003068e-06, + "loss": 0.07, + "step": 6478 + }, + { + "epoch": 2.08, + "learning_rate": 4.498569927863321e-06, + "loss": 0.0845, + "step": 6479 + }, + { + "epoch": 2.08, + "learning_rate": 4.495669495549529e-06, + "loss": 0.0657, + "step": 6480 + }, + { + "epoch": 2.08, + "learning_rate": 4.492769727411676e-06, + "loss": 0.0654, + "step": 6481 + }, + { + "epoch": 2.09, + "learning_rate": 4.489870623799658e-06, + "loss": 0.068, + "step": 6482 + }, + { + "epoch": 2.09, + "learning_rate": 4.486972185063293e-06, + "loss": 0.075, + "step": 6483 + }, + { + "epoch": 2.09, + "learning_rate": 4.4840744115523184e-06, + "loss": 0.0702, + "step": 6484 + }, + { + "epoch": 2.09, + "learning_rate": 4.481177303616393e-06, + "loss": 0.0709, + "step": 6485 + }, + { + "epoch": 2.09, + "learning_rate": 4.478280861605089e-06, + "loss": 0.076, + "step": 6486 + }, + { + "epoch": 2.09, + "learning_rate": 4.475385085867912e-06, + "loss": 0.0734, + "step": 6487 + }, + { + "epoch": 2.09, + "learning_rate": 4.472489976754274e-06, + "loss": 0.0691, + "step": 6488 + }, + { + "epoch": 2.09, + "learning_rate": 4.4695955346135115e-06, + "loss": 0.0688, + "step": 6489 + }, + { + "epoch": 2.09, + "learning_rate": 4.46670175979488e-06, + "loss": 0.0697, + "step": 6490 + }, + { + "epoch": 2.09, + "learning_rate": 4.463808652647557e-06, + "loss": 0.0806, + "step": 6491 + }, + { + "epoch": 2.09, + "learning_rate": 4.46091621352063e-06, + "loss": 0.0728, + "step": 6492 + }, + { + "epoch": 2.09, + "learning_rate": 4.4580244427631215e-06, + "loss": 0.0636, + "step": 6493 + }, + { + "epoch": 2.09, + "learning_rate": 4.455133340723962e-06, + "loss": 0.0716, + "step": 6494 + }, + { + "epoch": 2.09, + "learning_rate": 4.452242907752e-06, + "loss": 0.0654, + "step": 6495 + }, + { + "epoch": 2.09, + "learning_rate": 4.449353144196015e-06, + "loss": 0.0711, + "step": 6496 + }, + { + "epoch": 2.09, + "learning_rate": 4.4464640504046975e-06, + "loss": 0.0664, + "step": 6497 + }, + { + "epoch": 2.09, + "learning_rate": 4.443575626726647e-06, + "loss": 0.0778, + "step": 6498 + }, + { + "epoch": 2.09, + "learning_rate": 4.440687873510405e-06, + "loss": 0.075, + "step": 6499 + }, + { + "epoch": 2.09, + "learning_rate": 4.437800791104416e-06, + "loss": 0.0733, + "step": 6500 + }, + { + "epoch": 2.09, + "learning_rate": 4.434914379857043e-06, + "loss": 0.0669, + "step": 6501 + }, + { + "epoch": 2.09, + "learning_rate": 4.432028640116581e-06, + "loss": 0.0764, + "step": 6502 + }, + { + "epoch": 2.09, + "learning_rate": 4.429143572231235e-06, + "loss": 0.0693, + "step": 6503 + }, + { + "epoch": 2.09, + "learning_rate": 4.426259176549119e-06, + "loss": 0.0848, + "step": 6504 + }, + { + "epoch": 2.09, + "learning_rate": 4.423375453418288e-06, + "loss": 0.0748, + "step": 6505 + }, + { + "epoch": 2.09, + "learning_rate": 4.4204924031866995e-06, + "loss": 0.0736, + "step": 6506 + }, + { + "epoch": 2.09, + "learning_rate": 4.417610026202231e-06, + "loss": 0.0734, + "step": 6507 + }, + { + "epoch": 2.09, + "learning_rate": 4.414728322812691e-06, + "loss": 0.0698, + "step": 6508 + }, + { + "epoch": 2.09, + "learning_rate": 4.411847293365793e-06, + "loss": 0.0792, + "step": 6509 + }, + { + "epoch": 2.09, + "learning_rate": 4.4089669382091746e-06, + "loss": 0.0682, + "step": 6510 + }, + { + "epoch": 2.09, + "learning_rate": 4.406087257690393e-06, + "loss": 0.0651, + "step": 6511 + }, + { + "epoch": 2.09, + "learning_rate": 4.403208252156921e-06, + "loss": 0.071, + "step": 6512 + }, + { + "epoch": 2.1, + "learning_rate": 4.400329921956148e-06, + "loss": 0.0751, + "step": 6513 + }, + { + "epoch": 2.1, + "learning_rate": 4.397452267435394e-06, + "loss": 0.0704, + "step": 6514 + }, + { + "epoch": 2.1, + "learning_rate": 4.394575288941885e-06, + "loss": 0.0742, + "step": 6515 + }, + { + "epoch": 2.1, + "learning_rate": 4.391698986822769e-06, + "loss": 0.0698, + "step": 6516 + }, + { + "epoch": 2.1, + "learning_rate": 4.388823361425113e-06, + "loss": 0.0745, + "step": 6517 + }, + { + "epoch": 2.1, + "learning_rate": 4.385948413095903e-06, + "loss": 0.0737, + "step": 6518 + }, + { + "epoch": 2.1, + "learning_rate": 4.3830741421820376e-06, + "loss": 0.0762, + "step": 6519 + }, + { + "epoch": 2.1, + "learning_rate": 4.380200549030348e-06, + "loss": 0.0626, + "step": 6520 + }, + { + "epoch": 2.1, + "learning_rate": 4.377327633987567e-06, + "loss": 0.0712, + "step": 6521 + }, + { + "epoch": 2.1, + "learning_rate": 4.374455397400352e-06, + "loss": 0.0744, + "step": 6522 + }, + { + "epoch": 2.1, + "learning_rate": 4.37158383961529e-06, + "loss": 0.0698, + "step": 6523 + }, + { + "epoch": 2.1, + "learning_rate": 4.368712960978863e-06, + "loss": 0.0721, + "step": 6524 + }, + { + "epoch": 2.1, + "learning_rate": 4.365842761837485e-06, + "loss": 0.0707, + "step": 6525 + }, + { + "epoch": 2.1, + "learning_rate": 4.362973242537493e-06, + "loss": 0.0758, + "step": 6526 + }, + { + "epoch": 2.1, + "learning_rate": 4.360104403425131e-06, + "loss": 0.0763, + "step": 6527 + }, + { + "epoch": 2.1, + "learning_rate": 4.357236244846562e-06, + "loss": 0.0706, + "step": 6528 + }, + { + "epoch": 2.1, + "learning_rate": 4.354368767147882e-06, + "loss": 0.0755, + "step": 6529 + }, + { + "epoch": 2.1, + "learning_rate": 4.351501970675082e-06, + "loss": 0.0697, + "step": 6530 + }, + { + "epoch": 2.1, + "learning_rate": 4.348635855774082e-06, + "loss": 0.0748, + "step": 6531 + }, + { + "epoch": 2.1, + "learning_rate": 4.345770422790725e-06, + "loss": 0.0755, + "step": 6532 + }, + { + "epoch": 2.1, + "learning_rate": 4.342905672070765e-06, + "loss": 0.0714, + "step": 6533 + }, + { + "epoch": 2.1, + "learning_rate": 4.340041603959871e-06, + "loss": 0.0754, + "step": 6534 + }, + { + "epoch": 2.1, + "learning_rate": 4.33717821880364e-06, + "loss": 0.0702, + "step": 6535 + }, + { + "epoch": 2.1, + "learning_rate": 4.33431551694758e-06, + "loss": 0.072, + "step": 6536 + }, + { + "epoch": 2.1, + "learning_rate": 4.331453498737107e-06, + "loss": 0.0669, + "step": 6537 + }, + { + "epoch": 2.1, + "learning_rate": 4.3285921645175756e-06, + "loss": 0.0745, + "step": 6538 + }, + { + "epoch": 2.1, + "learning_rate": 4.32573151463424e-06, + "loss": 0.0698, + "step": 6539 + }, + { + "epoch": 2.1, + "learning_rate": 4.32287154943228e-06, + "loss": 0.0682, + "step": 6540 + }, + { + "epoch": 2.1, + "learning_rate": 4.320012269256793e-06, + "loss": 0.0718, + "step": 6541 + }, + { + "epoch": 2.1, + "learning_rate": 4.31715367445279e-06, + "loss": 0.0779, + "step": 6542 + }, + { + "epoch": 2.1, + "learning_rate": 4.314295765365203e-06, + "loss": 0.0674, + "step": 6543 + }, + { + "epoch": 2.11, + "learning_rate": 4.311438542338879e-06, + "loss": 0.0675, + "step": 6544 + }, + { + "epoch": 2.11, + "learning_rate": 4.308582005718579e-06, + "loss": 0.0711, + "step": 6545 + }, + { + "epoch": 2.11, + "learning_rate": 4.305726155848986e-06, + "loss": 0.0692, + "step": 6546 + }, + { + "epoch": 2.11, + "learning_rate": 4.302870993074705e-06, + "loss": 0.0685, + "step": 6547 + }, + { + "epoch": 2.11, + "learning_rate": 4.300016517740247e-06, + "loss": 0.0853, + "step": 6548 + }, + { + "epoch": 2.11, + "learning_rate": 4.297162730190046e-06, + "loss": 0.0669, + "step": 6549 + }, + { + "epoch": 2.11, + "learning_rate": 4.294309630768452e-06, + "loss": 0.066, + "step": 6550 + }, + { + "epoch": 2.11, + "learning_rate": 4.2914572198197325e-06, + "loss": 0.0727, + "step": 6551 + }, + { + "epoch": 2.11, + "learning_rate": 4.2886054976880676e-06, + "loss": 0.0745, + "step": 6552 + }, + { + "epoch": 2.11, + "learning_rate": 4.285754464717565e-06, + "loss": 0.072, + "step": 6553 + }, + { + "epoch": 2.11, + "learning_rate": 4.282904121252241e-06, + "loss": 0.0704, + "step": 6554 + }, + { + "epoch": 2.11, + "learning_rate": 4.280054467636027e-06, + "loss": 0.07, + "step": 6555 + }, + { + "epoch": 2.11, + "learning_rate": 4.2772055042127755e-06, + "loss": 0.0752, + "step": 6556 + }, + { + "epoch": 2.11, + "learning_rate": 4.274357231326256e-06, + "loss": 0.0756, + "step": 6557 + }, + { + "epoch": 2.11, + "learning_rate": 4.271509649320148e-06, + "loss": 0.0703, + "step": 6558 + }, + { + "epoch": 2.11, + "learning_rate": 4.268662758538062e-06, + "loss": 0.0724, + "step": 6559 + }, + { + "epoch": 2.11, + "learning_rate": 4.265816559323509e-06, + "loss": 0.0677, + "step": 6560 + }, + { + "epoch": 2.11, + "learning_rate": 4.2629710520199266e-06, + "loss": 0.0847, + "step": 6561 + }, + { + "epoch": 2.11, + "learning_rate": 4.260126236970664e-06, + "loss": 0.0755, + "step": 6562 + }, + { + "epoch": 2.11, + "learning_rate": 4.2572821145189856e-06, + "loss": 0.0864, + "step": 6563 + }, + { + "epoch": 2.11, + "learning_rate": 4.254438685008083e-06, + "loss": 0.0698, + "step": 6564 + }, + { + "epoch": 2.11, + "learning_rate": 4.2515959487810535e-06, + "loss": 0.0676, + "step": 6565 + }, + { + "epoch": 2.11, + "learning_rate": 4.248753906180913e-06, + "loss": 0.0739, + "step": 6566 + }, + { + "epoch": 2.11, + "learning_rate": 4.245912557550589e-06, + "loss": 0.0694, + "step": 6567 + }, + { + "epoch": 2.11, + "learning_rate": 4.243071903232944e-06, + "loss": 0.0675, + "step": 6568 + }, + { + "epoch": 2.11, + "learning_rate": 4.2402319435707274e-06, + "loss": 0.0687, + "step": 6569 + }, + { + "epoch": 2.11, + "learning_rate": 4.237392678906633e-06, + "loss": 0.082, + "step": 6570 + }, + { + "epoch": 2.11, + "learning_rate": 4.234554109583255e-06, + "loss": 0.0692, + "step": 6571 + }, + { + "epoch": 2.11, + "learning_rate": 4.231716235943106e-06, + "loss": 0.0725, + "step": 6572 + }, + { + "epoch": 2.11, + "learning_rate": 4.2288790583286135e-06, + "loss": 0.0745, + "step": 6573 + }, + { + "epoch": 2.11, + "learning_rate": 4.226042577082133e-06, + "loss": 0.0725, + "step": 6574 + }, + { + "epoch": 2.12, + "learning_rate": 4.223206792545914e-06, + "loss": 0.0708, + "step": 6575 + }, + { + "epoch": 2.12, + "learning_rate": 4.220371705062143e-06, + "loss": 0.0748, + "step": 6576 + }, + { + "epoch": 2.12, + "learning_rate": 4.217537314972911e-06, + "loss": 0.0674, + "step": 6577 + }, + { + "epoch": 2.12, + "learning_rate": 4.214703622620223e-06, + "loss": 0.066, + "step": 6578 + }, + { + "epoch": 2.12, + "learning_rate": 4.211870628346013e-06, + "loss": 0.0706, + "step": 6579 + }, + { + "epoch": 2.12, + "learning_rate": 4.209038332492118e-06, + "loss": 0.0822, + "step": 6580 + }, + { + "epoch": 2.12, + "learning_rate": 4.206206735400296e-06, + "loss": 0.0725, + "step": 6581 + }, + { + "epoch": 2.12, + "learning_rate": 4.203375837412217e-06, + "loss": 0.0689, + "step": 6582 + }, + { + "epoch": 2.12, + "learning_rate": 4.200545638869471e-06, + "loss": 0.0661, + "step": 6583 + }, + { + "epoch": 2.12, + "learning_rate": 4.197716140113558e-06, + "loss": 0.0686, + "step": 6584 + }, + { + "epoch": 2.12, + "learning_rate": 4.194887341485904e-06, + "loss": 0.0693, + "step": 6585 + }, + { + "epoch": 2.12, + "learning_rate": 4.19205924332784e-06, + "loss": 0.0805, + "step": 6586 + }, + { + "epoch": 2.12, + "learning_rate": 4.189231845980618e-06, + "loss": 0.0678, + "step": 6587 + }, + { + "epoch": 2.12, + "learning_rate": 4.186405149785403e-06, + "loss": 0.0718, + "step": 6588 + }, + { + "epoch": 2.12, + "learning_rate": 4.183579155083276e-06, + "loss": 0.0797, + "step": 6589 + }, + { + "epoch": 2.12, + "learning_rate": 4.180753862215229e-06, + "loss": 0.072, + "step": 6590 + }, + { + "epoch": 2.12, + "learning_rate": 4.177929271522183e-06, + "loss": 0.0673, + "step": 6591 + }, + { + "epoch": 2.12, + "learning_rate": 4.17510538334496e-06, + "loss": 0.076, + "step": 6592 + }, + { + "epoch": 2.12, + "learning_rate": 4.172282198024299e-06, + "loss": 0.0754, + "step": 6593 + }, + { + "epoch": 2.12, + "learning_rate": 4.169459715900869e-06, + "loss": 0.0692, + "step": 6594 + }, + { + "epoch": 2.12, + "learning_rate": 4.166637937315231e-06, + "loss": 0.072, + "step": 6595 + }, + { + "epoch": 2.12, + "learning_rate": 4.163816862607874e-06, + "loss": 0.0697, + "step": 6596 + }, + { + "epoch": 2.12, + "learning_rate": 4.160996492119208e-06, + "loss": 0.0738, + "step": 6597 + }, + { + "epoch": 2.12, + "learning_rate": 4.158176826189545e-06, + "loss": 0.08, + "step": 6598 + }, + { + "epoch": 2.12, + "learning_rate": 4.155357865159118e-06, + "loss": 0.0696, + "step": 6599 + }, + { + "epoch": 2.12, + "learning_rate": 4.152539609368083e-06, + "loss": 0.0736, + "step": 6600 + }, + { + "epoch": 2.12, + "learning_rate": 4.149722059156491e-06, + "loss": 0.068, + "step": 6601 + }, + { + "epoch": 2.12, + "learning_rate": 4.146905214864323e-06, + "loss": 0.0709, + "step": 6602 + }, + { + "epoch": 2.12, + "learning_rate": 4.144089076831476e-06, + "loss": 0.0737, + "step": 6603 + }, + { + "epoch": 2.12, + "learning_rate": 4.1412736453977545e-06, + "loss": 0.0707, + "step": 6604 + }, + { + "epoch": 2.12, + "learning_rate": 4.138458920902876e-06, + "loss": 0.0767, + "step": 6605 + }, + { + "epoch": 2.13, + "learning_rate": 4.135644903686485e-06, + "loss": 0.078, + "step": 6606 + }, + { + "epoch": 2.13, + "learning_rate": 4.132831594088134e-06, + "loss": 0.0725, + "step": 6607 + }, + { + "epoch": 2.13, + "learning_rate": 4.130018992447276e-06, + "loss": 0.0701, + "step": 6608 + }, + { + "epoch": 2.13, + "learning_rate": 4.127207099103304e-06, + "loss": 0.0767, + "step": 6609 + }, + { + "epoch": 2.13, + "learning_rate": 4.124395914395509e-06, + "loss": 0.0723, + "step": 6610 + }, + { + "epoch": 2.13, + "learning_rate": 4.121585438663096e-06, + "loss": 0.074, + "step": 6611 + }, + { + "epoch": 2.13, + "learning_rate": 4.118775672245197e-06, + "loss": 0.0768, + "step": 6612 + }, + { + "epoch": 2.13, + "learning_rate": 4.1159666154808485e-06, + "loss": 0.0754, + "step": 6613 + }, + { + "epoch": 2.13, + "learning_rate": 4.113158268708996e-06, + "loss": 0.0667, + "step": 6614 + }, + { + "epoch": 2.13, + "learning_rate": 4.110350632268514e-06, + "loss": 0.0726, + "step": 6615 + }, + { + "epoch": 2.13, + "learning_rate": 4.1075437064981824e-06, + "loss": 0.0749, + "step": 6616 + }, + { + "epoch": 2.13, + "learning_rate": 4.104737491736692e-06, + "loss": 0.0671, + "step": 6617 + }, + { + "epoch": 2.13, + "learning_rate": 4.10193198832266e-06, + "loss": 0.0704, + "step": 6618 + }, + { + "epoch": 2.13, + "learning_rate": 4.099127196594608e-06, + "loss": 0.0792, + "step": 6619 + }, + { + "epoch": 2.13, + "learning_rate": 4.096323116890972e-06, + "loss": 0.0849, + "step": 6620 + }, + { + "epoch": 2.13, + "learning_rate": 4.093519749550107e-06, + "loss": 0.069, + "step": 6621 + }, + { + "epoch": 2.13, + "learning_rate": 4.090717094910276e-06, + "loss": 0.0749, + "step": 6622 + }, + { + "epoch": 2.13, + "learning_rate": 4.087915153309657e-06, + "loss": 0.0701, + "step": 6623 + }, + { + "epoch": 2.13, + "learning_rate": 4.085113925086352e-06, + "loss": 0.0741, + "step": 6624 + }, + { + "epoch": 2.13, + "learning_rate": 4.082313410578366e-06, + "loss": 0.0789, + "step": 6625 + }, + { + "epoch": 2.13, + "learning_rate": 4.079513610123619e-06, + "loss": 0.0723, + "step": 6626 + }, + { + "epoch": 2.13, + "learning_rate": 4.076714524059948e-06, + "loss": 0.0697, + "step": 6627 + }, + { + "epoch": 2.13, + "learning_rate": 4.073916152725104e-06, + "loss": 0.0693, + "step": 6628 + }, + { + "epoch": 2.13, + "learning_rate": 4.071118496456743e-06, + "loss": 0.0678, + "step": 6629 + }, + { + "epoch": 2.13, + "learning_rate": 4.068321555592453e-06, + "loss": 0.0741, + "step": 6630 + }, + { + "epoch": 2.13, + "learning_rate": 4.065525330469719e-06, + "loss": 0.0734, + "step": 6631 + }, + { + "epoch": 2.13, + "learning_rate": 4.062729821425944e-06, + "loss": 0.0746, + "step": 6632 + }, + { + "epoch": 2.13, + "learning_rate": 4.059935028798455e-06, + "loss": 0.0696, + "step": 6633 + }, + { + "epoch": 2.13, + "learning_rate": 4.057140952924473e-06, + "loss": 0.0719, + "step": 6634 + }, + { + "epoch": 2.13, + "learning_rate": 4.054347594141144e-06, + "loss": 0.0716, + "step": 6635 + }, + { + "epoch": 2.13, + "learning_rate": 4.051554952785531e-06, + "loss": 0.0714, + "step": 6636 + }, + { + "epoch": 2.14, + "learning_rate": 4.048763029194606e-06, + "loss": 0.0689, + "step": 6637 + }, + { + "epoch": 2.14, + "learning_rate": 4.045971823705249e-06, + "loss": 0.065, + "step": 6638 + }, + { + "epoch": 2.14, + "learning_rate": 4.043181336654271e-06, + "loss": 0.0726, + "step": 6639 + }, + { + "epoch": 2.14, + "learning_rate": 4.0403915683783664e-06, + "loss": 0.0696, + "step": 6640 + }, + { + "epoch": 2.14, + "learning_rate": 4.0376025192141744e-06, + "loss": 0.0715, + "step": 6641 + }, + { + "epoch": 2.14, + "learning_rate": 4.034814189498229e-06, + "loss": 0.0707, + "step": 6642 + }, + { + "epoch": 2.14, + "learning_rate": 4.0320265795669815e-06, + "loss": 0.0721, + "step": 6643 + }, + { + "epoch": 2.14, + "learning_rate": 4.029239689756793e-06, + "loss": 0.0653, + "step": 6644 + }, + { + "epoch": 2.14, + "learning_rate": 4.026453520403949e-06, + "loss": 0.0678, + "step": 6645 + }, + { + "epoch": 2.14, + "learning_rate": 4.0236680718446376e-06, + "loss": 0.0688, + "step": 6646 + }, + { + "epoch": 2.14, + "learning_rate": 4.020883344414963e-06, + "loss": 0.0673, + "step": 6647 + }, + { + "epoch": 2.14, + "learning_rate": 4.0180993384509405e-06, + "loss": 0.0697, + "step": 6648 + }, + { + "epoch": 2.14, + "learning_rate": 4.0153160542885015e-06, + "loss": 0.0728, + "step": 6649 + }, + { + "epoch": 2.14, + "learning_rate": 4.012533492263485e-06, + "loss": 0.0682, + "step": 6650 + }, + { + "epoch": 2.14, + "learning_rate": 4.009751652711654e-06, + "loss": 0.0765, + "step": 6651 + }, + { + "epoch": 2.14, + "learning_rate": 4.006970535968674e-06, + "loss": 0.0714, + "step": 6652 + }, + { + "epoch": 2.14, + "learning_rate": 4.004190142370126e-06, + "loss": 0.077, + "step": 6653 + }, + { + "epoch": 2.14, + "learning_rate": 4.001410472251504e-06, + "loss": 0.0716, + "step": 6654 + }, + { + "epoch": 2.14, + "learning_rate": 3.998631525948214e-06, + "loss": 0.0676, + "step": 6655 + }, + { + "epoch": 2.14, + "learning_rate": 3.995853303795573e-06, + "loss": 0.0675, + "step": 6656 + }, + { + "epoch": 2.14, + "learning_rate": 3.99307580612882e-06, + "loss": 0.0726, + "step": 6657 + }, + { + "epoch": 2.14, + "learning_rate": 3.990299033283096e-06, + "loss": 0.0669, + "step": 6658 + }, + { + "epoch": 2.14, + "learning_rate": 3.987522985593459e-06, + "loss": 0.0679, + "step": 6659 + }, + { + "epoch": 2.14, + "learning_rate": 3.9847476633948765e-06, + "loss": 0.0707, + "step": 6660 + }, + { + "epoch": 2.14, + "learning_rate": 3.98197306702223e-06, + "loss": 0.069, + "step": 6661 + }, + { + "epoch": 2.14, + "learning_rate": 3.979199196810319e-06, + "loss": 0.0701, + "step": 6662 + }, + { + "epoch": 2.14, + "learning_rate": 3.976426053093849e-06, + "loss": 0.0684, + "step": 6663 + }, + { + "epoch": 2.14, + "learning_rate": 3.973653636207437e-06, + "loss": 0.0777, + "step": 6664 + }, + { + "epoch": 2.14, + "learning_rate": 3.970881946485617e-06, + "loss": 0.0741, + "step": 6665 + }, + { + "epoch": 2.14, + "learning_rate": 3.96811098426283e-06, + "loss": 0.0664, + "step": 6666 + }, + { + "epoch": 2.14, + "learning_rate": 3.965340749873432e-06, + "loss": 0.0765, + "step": 6667 + }, + { + "epoch": 2.15, + "learning_rate": 3.962571243651695e-06, + "loss": 0.0704, + "step": 6668 + }, + { + "epoch": 2.15, + "learning_rate": 3.959802465931798e-06, + "loss": 0.0712, + "step": 6669 + }, + { + "epoch": 2.15, + "learning_rate": 3.957034417047832e-06, + "loss": 0.0646, + "step": 6670 + }, + { + "epoch": 2.15, + "learning_rate": 3.9542670973338e-06, + "loss": 0.0683, + "step": 6671 + }, + { + "epoch": 2.15, + "learning_rate": 3.9515005071236274e-06, + "loss": 0.0701, + "step": 6672 + }, + { + "epoch": 2.15, + "learning_rate": 3.94873464675113e-06, + "loss": 0.0749, + "step": 6673 + }, + { + "epoch": 2.15, + "learning_rate": 3.945969516550058e-06, + "loss": 0.0769, + "step": 6674 + }, + { + "epoch": 2.15, + "learning_rate": 3.94320511685406e-06, + "loss": 0.0644, + "step": 6675 + }, + { + "epoch": 2.15, + "learning_rate": 3.9404414479966966e-06, + "loss": 0.0693, + "step": 6676 + }, + { + "epoch": 2.15, + "learning_rate": 3.937678510311452e-06, + "loss": 0.0704, + "step": 6677 + }, + { + "epoch": 2.15, + "learning_rate": 3.934916304131714e-06, + "loss": 0.0654, + "step": 6678 + }, + { + "epoch": 2.15, + "learning_rate": 3.932154829790771e-06, + "loss": 0.075, + "step": 6679 + }, + { + "epoch": 2.15, + "learning_rate": 3.929394087621844e-06, + "loss": 0.0759, + "step": 6680 + }, + { + "epoch": 2.15, + "learning_rate": 3.926634077958053e-06, + "loss": 0.0782, + "step": 6681 + }, + { + "epoch": 2.15, + "learning_rate": 3.92387480113243e-06, + "loss": 0.0783, + "step": 6682 + }, + { + "epoch": 2.15, + "learning_rate": 3.921116257477927e-06, + "loss": 0.0651, + "step": 6683 + }, + { + "epoch": 2.15, + "learning_rate": 3.918358447327399e-06, + "loss": 0.0778, + "step": 6684 + }, + { + "epoch": 2.15, + "learning_rate": 3.915601371013612e-06, + "loss": 0.0714, + "step": 6685 + }, + { + "epoch": 2.15, + "learning_rate": 3.912845028869251e-06, + "loss": 0.0757, + "step": 6686 + }, + { + "epoch": 2.15, + "learning_rate": 3.910089421226905e-06, + "loss": 0.0716, + "step": 6687 + }, + { + "epoch": 2.15, + "learning_rate": 3.907334548419076e-06, + "loss": 0.0736, + "step": 6688 + }, + { + "epoch": 2.15, + "learning_rate": 3.904580410778185e-06, + "loss": 0.0755, + "step": 6689 + }, + { + "epoch": 2.15, + "learning_rate": 3.901827008636553e-06, + "loss": 0.0743, + "step": 6690 + }, + { + "epoch": 2.15, + "learning_rate": 3.899074342326418e-06, + "loss": 0.0787, + "step": 6691 + }, + { + "epoch": 2.15, + "learning_rate": 3.896322412179929e-06, + "loss": 0.0727, + "step": 6692 + }, + { + "epoch": 2.15, + "learning_rate": 3.893571218529146e-06, + "loss": 0.0754, + "step": 6693 + }, + { + "epoch": 2.15, + "learning_rate": 3.890820761706034e-06, + "loss": 0.0689, + "step": 6694 + }, + { + "epoch": 2.15, + "learning_rate": 3.888071042042484e-06, + "loss": 0.0762, + "step": 6695 + }, + { + "epoch": 2.15, + "learning_rate": 3.885322059870284e-06, + "loss": 0.0747, + "step": 6696 + }, + { + "epoch": 2.15, + "learning_rate": 3.882573815521139e-06, + "loss": 0.0824, + "step": 6697 + }, + { + "epoch": 2.15, + "learning_rate": 3.879826309326664e-06, + "loss": 0.0734, + "step": 6698 + }, + { + "epoch": 2.16, + "learning_rate": 3.877079541618384e-06, + "loss": 0.0651, + "step": 6699 + }, + { + "epoch": 2.16, + "learning_rate": 3.874333512727732e-06, + "loss": 0.0839, + "step": 6700 + }, + { + "epoch": 2.16, + "learning_rate": 3.8715882229860626e-06, + "loss": 0.0742, + "step": 6701 + }, + { + "epoch": 2.16, + "learning_rate": 3.86884367272463e-06, + "loss": 0.071, + "step": 6702 + }, + { + "epoch": 2.16, + "learning_rate": 3.866099862274603e-06, + "loss": 0.0733, + "step": 6703 + }, + { + "epoch": 2.16, + "learning_rate": 3.863356791967069e-06, + "loss": 0.075, + "step": 6704 + }, + { + "epoch": 2.16, + "learning_rate": 3.860614462133008e-06, + "loss": 0.0666, + "step": 6705 + }, + { + "epoch": 2.16, + "learning_rate": 3.857872873103322e-06, + "loss": 0.0708, + "step": 6706 + }, + { + "epoch": 2.16, + "learning_rate": 3.855132025208829e-06, + "loss": 0.0739, + "step": 6707 + }, + { + "epoch": 2.16, + "learning_rate": 3.852391918780251e-06, + "loss": 0.0718, + "step": 6708 + }, + { + "epoch": 2.16, + "learning_rate": 3.849652554148213e-06, + "loss": 0.0673, + "step": 6709 + }, + { + "epoch": 2.16, + "learning_rate": 3.846913931643271e-06, + "loss": 0.0727, + "step": 6710 + }, + { + "epoch": 2.16, + "learning_rate": 3.844176051595869e-06, + "loss": 0.0743, + "step": 6711 + }, + { + "epoch": 2.16, + "learning_rate": 3.841438914336369e-06, + "loss": 0.0699, + "step": 6712 + }, + { + "epoch": 2.16, + "learning_rate": 3.838702520195056e-06, + "loss": 0.0805, + "step": 6713 + }, + { + "epoch": 2.16, + "learning_rate": 3.835966869502108e-06, + "loss": 0.07, + "step": 6714 + }, + { + "epoch": 2.16, + "learning_rate": 3.8332319625876195e-06, + "loss": 0.071, + "step": 6715 + }, + { + "epoch": 2.16, + "learning_rate": 3.830497799781601e-06, + "loss": 0.0717, + "step": 6716 + }, + { + "epoch": 2.16, + "learning_rate": 3.827764381413969e-06, + "loss": 0.0737, + "step": 6717 + }, + { + "epoch": 2.16, + "learning_rate": 3.82503170781454e-06, + "loss": 0.072, + "step": 6718 + }, + { + "epoch": 2.16, + "learning_rate": 3.822299779313058e-06, + "loss": 0.0745, + "step": 6719 + }, + { + "epoch": 2.16, + "learning_rate": 3.819568596239167e-06, + "loss": 0.0744, + "step": 6720 + }, + { + "epoch": 2.16, + "learning_rate": 3.81683815892242e-06, + "loss": 0.0674, + "step": 6721 + }, + { + "epoch": 2.16, + "learning_rate": 3.81410846769229e-06, + "loss": 0.0671, + "step": 6722 + }, + { + "epoch": 2.16, + "learning_rate": 3.81137952287815e-06, + "loss": 0.0693, + "step": 6723 + }, + { + "epoch": 2.16, + "learning_rate": 3.808651324809285e-06, + "loss": 0.0664, + "step": 6724 + }, + { + "epoch": 2.16, + "learning_rate": 3.805923873814892e-06, + "loss": 0.0712, + "step": 6725 + }, + { + "epoch": 2.16, + "learning_rate": 3.8031971702240766e-06, + "loss": 0.0699, + "step": 6726 + }, + { + "epoch": 2.16, + "learning_rate": 3.8004712143658507e-06, + "loss": 0.0766, + "step": 6727 + }, + { + "epoch": 2.16, + "learning_rate": 3.7977460065691463e-06, + "loss": 0.0703, + "step": 6728 + }, + { + "epoch": 2.16, + "learning_rate": 3.795021547162796e-06, + "loss": 0.0672, + "step": 6729 + }, + { + "epoch": 2.17, + "learning_rate": 3.792297836475545e-06, + "loss": 0.0663, + "step": 6730 + }, + { + "epoch": 2.17, + "learning_rate": 3.7895748748360463e-06, + "loss": 0.0756, + "step": 6731 + }, + { + "epoch": 2.17, + "learning_rate": 3.786852662572865e-06, + "loss": 0.0709, + "step": 6732 + }, + { + "epoch": 2.17, + "learning_rate": 3.7841312000144703e-06, + "loss": 0.0693, + "step": 6733 + }, + { + "epoch": 2.17, + "learning_rate": 3.7814104874892542e-06, + "loss": 0.0667, + "step": 6734 + }, + { + "epoch": 2.17, + "learning_rate": 3.7786905253255056e-06, + "loss": 0.0639, + "step": 6735 + }, + { + "epoch": 2.17, + "learning_rate": 3.775971313851425e-06, + "loss": 0.0722, + "step": 6736 + }, + { + "epoch": 2.17, + "learning_rate": 3.7732528533951264e-06, + "loss": 0.0722, + "step": 6737 + }, + { + "epoch": 2.17, + "learning_rate": 3.770535144284625e-06, + "loss": 0.0646, + "step": 6738 + }, + { + "epoch": 2.17, + "learning_rate": 3.767818186847859e-06, + "loss": 0.074, + "step": 6739 + }, + { + "epoch": 2.17, + "learning_rate": 3.7651019814126656e-06, + "loss": 0.0783, + "step": 6740 + }, + { + "epoch": 2.17, + "learning_rate": 3.762386528306793e-06, + "loss": 0.0762, + "step": 6741 + }, + { + "epoch": 2.17, + "learning_rate": 3.7596718278578946e-06, + "loss": 0.0735, + "step": 6742 + }, + { + "epoch": 2.17, + "learning_rate": 3.7569578803935504e-06, + "loss": 0.0781, + "step": 6743 + }, + { + "epoch": 2.17, + "learning_rate": 3.7542446862412207e-06, + "loss": 0.0672, + "step": 6744 + }, + { + "epoch": 2.17, + "learning_rate": 3.7515322457283043e-06, + "loss": 0.0796, + "step": 6745 + }, + { + "epoch": 2.17, + "learning_rate": 3.7488205591820894e-06, + "loss": 0.0781, + "step": 6746 + }, + { + "epoch": 2.17, + "learning_rate": 3.7461096269297804e-06, + "loss": 0.069, + "step": 6747 + }, + { + "epoch": 2.17, + "learning_rate": 3.743399449298488e-06, + "loss": 0.0729, + "step": 6748 + }, + { + "epoch": 2.17, + "learning_rate": 3.7406900266152424e-06, + "loss": 0.0703, + "step": 6749 + }, + { + "epoch": 2.17, + "learning_rate": 3.7379813592069614e-06, + "loss": 0.0767, + "step": 6750 + }, + { + "epoch": 2.17, + "learning_rate": 3.7352734474004937e-06, + "loss": 0.0697, + "step": 6751 + }, + { + "epoch": 2.17, + "learning_rate": 3.7325662915225836e-06, + "loss": 0.0715, + "step": 6752 + }, + { + "epoch": 2.17, + "learning_rate": 3.7298598918998907e-06, + "loss": 0.0721, + "step": 6753 + }, + { + "epoch": 2.17, + "learning_rate": 3.727154248858974e-06, + "loss": 0.0748, + "step": 6754 + }, + { + "epoch": 2.17, + "learning_rate": 3.724449362726318e-06, + "loss": 0.0696, + "step": 6755 + }, + { + "epoch": 2.17, + "learning_rate": 3.721745233828299e-06, + "loss": 0.0767, + "step": 6756 + }, + { + "epoch": 2.17, + "learning_rate": 3.7190418624912104e-06, + "loss": 0.0633, + "step": 6757 + }, + { + "epoch": 2.17, + "learning_rate": 3.716339249041253e-06, + "loss": 0.0665, + "step": 6758 + }, + { + "epoch": 2.17, + "learning_rate": 3.7136373938045313e-06, + "loss": 0.0779, + "step": 6759 + }, + { + "epoch": 2.17, + "learning_rate": 3.7109362971070707e-06, + "loss": 0.0721, + "step": 6760 + }, + { + "epoch": 2.17, + "learning_rate": 3.7082359592747906e-06, + "loss": 0.0701, + "step": 6761 + }, + { + "epoch": 2.18, + "learning_rate": 3.7055363806335287e-06, + "loss": 0.0744, + "step": 6762 + }, + { + "epoch": 2.18, + "learning_rate": 3.702837561509027e-06, + "loss": 0.0671, + "step": 6763 + }, + { + "epoch": 2.18, + "learning_rate": 3.700139502226935e-06, + "loss": 0.0664, + "step": 6764 + }, + { + "epoch": 2.18, + "learning_rate": 3.697442203112809e-06, + "loss": 0.0755, + "step": 6765 + }, + { + "epoch": 2.18, + "learning_rate": 3.694745664492124e-06, + "loss": 0.0728, + "step": 6766 + }, + { + "epoch": 2.18, + "learning_rate": 3.692049886690252e-06, + "loss": 0.0764, + "step": 6767 + }, + { + "epoch": 2.18, + "learning_rate": 3.6893548700324775e-06, + "loss": 0.0742, + "step": 6768 + }, + { + "epoch": 2.18, + "learning_rate": 3.6866606148439932e-06, + "loss": 0.0719, + "step": 6769 + }, + { + "epoch": 2.18, + "learning_rate": 3.683967121449897e-06, + "loss": 0.0771, + "step": 6770 + }, + { + "epoch": 2.18, + "learning_rate": 3.6812743901751968e-06, + "loss": 0.0782, + "step": 6771 + }, + { + "epoch": 2.18, + "learning_rate": 3.678582421344814e-06, + "loss": 0.07, + "step": 6772 + }, + { + "epoch": 2.18, + "learning_rate": 3.6758912152835704e-06, + "loss": 0.0745, + "step": 6773 + }, + { + "epoch": 2.18, + "learning_rate": 3.6732007723161933e-06, + "loss": 0.0681, + "step": 6774 + }, + { + "epoch": 2.18, + "learning_rate": 3.670511092767336e-06, + "loss": 0.0674, + "step": 6775 + }, + { + "epoch": 2.18, + "learning_rate": 3.6678221769615343e-06, + "loss": 0.0624, + "step": 6776 + }, + { + "epoch": 2.18, + "learning_rate": 3.6651340252232448e-06, + "loss": 0.0725, + "step": 6777 + }, + { + "epoch": 2.18, + "learning_rate": 3.6624466378768387e-06, + "loss": 0.0714, + "step": 6778 + }, + { + "epoch": 2.18, + "learning_rate": 3.659760015246584e-06, + "loss": 0.0738, + "step": 6779 + }, + { + "epoch": 2.18, + "learning_rate": 3.657074157656656e-06, + "loss": 0.0727, + "step": 6780 + }, + { + "epoch": 2.18, + "learning_rate": 3.654389065431149e-06, + "loss": 0.0739, + "step": 6781 + }, + { + "epoch": 2.18, + "learning_rate": 3.6517047388940574e-06, + "loss": 0.0773, + "step": 6782 + }, + { + "epoch": 2.18, + "learning_rate": 3.649021178369273e-06, + "loss": 0.0752, + "step": 6783 + }, + { + "epoch": 2.18, + "learning_rate": 3.6463383841806167e-06, + "loss": 0.0758, + "step": 6784 + }, + { + "epoch": 2.18, + "learning_rate": 3.6436563566518025e-06, + "loss": 0.0797, + "step": 6785 + }, + { + "epoch": 2.18, + "learning_rate": 3.6409750961064507e-06, + "loss": 0.0692, + "step": 6786 + }, + { + "epoch": 2.18, + "learning_rate": 3.638294602868101e-06, + "loss": 0.0733, + "step": 6787 + }, + { + "epoch": 2.18, + "learning_rate": 3.635614877260193e-06, + "loss": 0.0725, + "step": 6788 + }, + { + "epoch": 2.18, + "learning_rate": 3.6329359196060643e-06, + "loss": 0.0659, + "step": 6789 + }, + { + "epoch": 2.18, + "learning_rate": 3.630257730228979e-06, + "loss": 0.0693, + "step": 6790 + }, + { + "epoch": 2.18, + "learning_rate": 3.6275803094520944e-06, + "loss": 0.0729, + "step": 6791 + }, + { + "epoch": 2.18, + "learning_rate": 3.6249036575984773e-06, + "loss": 0.0588, + "step": 6792 + }, + { + "epoch": 2.19, + "learning_rate": 3.6222277749911115e-06, + "loss": 0.0705, + "step": 6793 + }, + { + "epoch": 2.19, + "learning_rate": 3.6195526619528754e-06, + "loss": 0.0697, + "step": 6794 + }, + { + "epoch": 2.19, + "learning_rate": 3.6168783188065604e-06, + "loss": 0.0712, + "step": 6795 + }, + { + "epoch": 2.19, + "learning_rate": 3.614204745874863e-06, + "loss": 0.0753, + "step": 6796 + }, + { + "epoch": 2.19, + "learning_rate": 3.6115319434803897e-06, + "loss": 0.0701, + "step": 6797 + }, + { + "epoch": 2.19, + "learning_rate": 3.608859911945647e-06, + "loss": 0.0764, + "step": 6798 + }, + { + "epoch": 2.19, + "learning_rate": 3.606188651593061e-06, + "loss": 0.0719, + "step": 6799 + }, + { + "epoch": 2.19, + "learning_rate": 3.6035181627449544e-06, + "loss": 0.0752, + "step": 6800 + }, + { + "epoch": 2.19, + "learning_rate": 3.6008484457235603e-06, + "loss": 0.0734, + "step": 6801 + }, + { + "epoch": 2.19, + "learning_rate": 3.598179500851017e-06, + "loss": 0.0704, + "step": 6802 + }, + { + "epoch": 2.19, + "learning_rate": 3.59551132844937e-06, + "loss": 0.0712, + "step": 6803 + }, + { + "epoch": 2.19, + "learning_rate": 3.5928439288405703e-06, + "loss": 0.0671, + "step": 6804 + }, + { + "epoch": 2.19, + "learning_rate": 3.5901773023464847e-06, + "loss": 0.0592, + "step": 6805 + }, + { + "epoch": 2.19, + "learning_rate": 3.5875114492888754e-06, + "loss": 0.0788, + "step": 6806 + }, + { + "epoch": 2.19, + "learning_rate": 3.584846369989413e-06, + "loss": 0.0712, + "step": 6807 + }, + { + "epoch": 2.19, + "learning_rate": 3.582182064769687e-06, + "loss": 0.0715, + "step": 6808 + }, + { + "epoch": 2.19, + "learning_rate": 3.5795185339511727e-06, + "loss": 0.0683, + "step": 6809 + }, + { + "epoch": 2.19, + "learning_rate": 3.576855777855265e-06, + "loss": 0.0647, + "step": 6810 + }, + { + "epoch": 2.19, + "learning_rate": 3.574193796803268e-06, + "loss": 0.0684, + "step": 6811 + }, + { + "epoch": 2.19, + "learning_rate": 3.5715325911163868e-06, + "loss": 0.078, + "step": 6812 + }, + { + "epoch": 2.19, + "learning_rate": 3.5688721611157284e-06, + "loss": 0.071, + "step": 6813 + }, + { + "epoch": 2.19, + "learning_rate": 3.5662125071223217e-06, + "loss": 0.0746, + "step": 6814 + }, + { + "epoch": 2.19, + "learning_rate": 3.563553629457084e-06, + "loss": 0.0688, + "step": 6815 + }, + { + "epoch": 2.19, + "learning_rate": 3.560895528440844e-06, + "loss": 0.0707, + "step": 6816 + }, + { + "epoch": 2.19, + "learning_rate": 3.5582382043943474e-06, + "loss": 0.0676, + "step": 6817 + }, + { + "epoch": 2.19, + "learning_rate": 3.5555816576382353e-06, + "loss": 0.0672, + "step": 6818 + }, + { + "epoch": 2.19, + "learning_rate": 3.552925888493054e-06, + "loss": 0.0717, + "step": 6819 + }, + { + "epoch": 2.19, + "learning_rate": 3.5502708972792675e-06, + "loss": 0.0729, + "step": 6820 + }, + { + "epoch": 2.19, + "learning_rate": 3.5476166843172334e-06, + "loss": 0.0695, + "step": 6821 + }, + { + "epoch": 2.19, + "learning_rate": 3.5449632499272215e-06, + "loss": 0.0695, + "step": 6822 + }, + { + "epoch": 2.19, + "learning_rate": 3.5423105944294055e-06, + "loss": 0.0741, + "step": 6823 + }, + { + "epoch": 2.2, + "learning_rate": 3.539658718143868e-06, + "loss": 0.0734, + "step": 6824 + }, + { + "epoch": 2.2, + "learning_rate": 3.5370076213905904e-06, + "loss": 0.0741, + "step": 6825 + }, + { + "epoch": 2.2, + "learning_rate": 3.534357304489473e-06, + "loss": 0.0767, + "step": 6826 + }, + { + "epoch": 2.2, + "learning_rate": 3.5317077677603107e-06, + "loss": 0.0747, + "step": 6827 + }, + { + "epoch": 2.2, + "learning_rate": 3.5290590115228072e-06, + "loss": 0.0696, + "step": 6828 + }, + { + "epoch": 2.2, + "learning_rate": 3.526411036096574e-06, + "loss": 0.0645, + "step": 6829 + }, + { + "epoch": 2.2, + "learning_rate": 3.5237638418011255e-06, + "loss": 0.0649, + "step": 6830 + }, + { + "epoch": 2.2, + "learning_rate": 3.5211174289558815e-06, + "loss": 0.0686, + "step": 6831 + }, + { + "epoch": 2.2, + "learning_rate": 3.5184717978801764e-06, + "loss": 0.0714, + "step": 6832 + }, + { + "epoch": 2.2, + "learning_rate": 3.5158269488932384e-06, + "loss": 0.0696, + "step": 6833 + }, + { + "epoch": 2.2, + "learning_rate": 3.5131828823142065e-06, + "loss": 0.0799, + "step": 6834 + }, + { + "epoch": 2.2, + "learning_rate": 3.510539598462127e-06, + "loss": 0.08, + "step": 6835 + }, + { + "epoch": 2.2, + "learning_rate": 3.5078970976559434e-06, + "loss": 0.0753, + "step": 6836 + }, + { + "epoch": 2.2, + "learning_rate": 3.5052553802145205e-06, + "loss": 0.0712, + "step": 6837 + }, + { + "epoch": 2.2, + "learning_rate": 3.502614446456615e-06, + "loss": 0.074, + "step": 6838 + }, + { + "epoch": 2.2, + "learning_rate": 3.499974296700891e-06, + "loss": 0.0752, + "step": 6839 + }, + { + "epoch": 2.2, + "learning_rate": 3.497334931265923e-06, + "loss": 0.0657, + "step": 6840 + }, + { + "epoch": 2.2, + "learning_rate": 3.4946963504701883e-06, + "loss": 0.0705, + "step": 6841 + }, + { + "epoch": 2.2, + "learning_rate": 3.492058554632063e-06, + "loss": 0.0699, + "step": 6842 + }, + { + "epoch": 2.2, + "learning_rate": 3.4894215440698444e-06, + "loss": 0.0779, + "step": 6843 + }, + { + "epoch": 2.2, + "learning_rate": 3.486785319101721e-06, + "loss": 0.0684, + "step": 6844 + }, + { + "epoch": 2.2, + "learning_rate": 3.4841498800457897e-06, + "loss": 0.069, + "step": 6845 + }, + { + "epoch": 2.2, + "learning_rate": 3.4815152272200557e-06, + "loss": 0.0774, + "step": 6846 + }, + { + "epoch": 2.2, + "learning_rate": 3.4788813609424266e-06, + "loss": 0.0755, + "step": 6847 + }, + { + "epoch": 2.2, + "learning_rate": 3.476248281530712e-06, + "loss": 0.0668, + "step": 6848 + }, + { + "epoch": 2.2, + "learning_rate": 3.4736159893026376e-06, + "loss": 0.0697, + "step": 6849 + }, + { + "epoch": 2.2, + "learning_rate": 3.4709844845758244e-06, + "loss": 0.0663, + "step": 6850 + }, + { + "epoch": 2.2, + "learning_rate": 3.4683537676678e-06, + "loss": 0.0746, + "step": 6851 + }, + { + "epoch": 2.2, + "learning_rate": 3.4657238388959957e-06, + "loss": 0.0638, + "step": 6852 + }, + { + "epoch": 2.2, + "learning_rate": 3.463094698577759e-06, + "loss": 0.0717, + "step": 6853 + }, + { + "epoch": 2.2, + "learning_rate": 3.460466347030319e-06, + "loss": 0.0679, + "step": 6854 + }, + { + "epoch": 2.21, + "learning_rate": 3.457838784570835e-06, + "loss": 0.0708, + "step": 6855 + }, + { + "epoch": 2.21, + "learning_rate": 3.455212011516357e-06, + "loss": 0.0683, + "step": 6856 + }, + { + "epoch": 2.21, + "learning_rate": 3.4525860281838374e-06, + "loss": 0.0694, + "step": 6857 + }, + { + "epoch": 2.21, + "learning_rate": 3.449960834890147e-06, + "loss": 0.0654, + "step": 6858 + }, + { + "epoch": 2.21, + "learning_rate": 3.447336431952052e-06, + "loss": 0.0736, + "step": 6859 + }, + { + "epoch": 2.21, + "learning_rate": 3.4447128196862134e-06, + "loss": 0.0787, + "step": 6860 + }, + { + "epoch": 2.21, + "learning_rate": 3.4420899984092194e-06, + "loss": 0.0729, + "step": 6861 + }, + { + "epoch": 2.21, + "learning_rate": 3.4394679684375456e-06, + "loss": 0.0744, + "step": 6862 + }, + { + "epoch": 2.21, + "learning_rate": 3.436846730087574e-06, + "loss": 0.0657, + "step": 6863 + }, + { + "epoch": 2.21, + "learning_rate": 3.4342262836756013e-06, + "loss": 0.0698, + "step": 6864 + }, + { + "epoch": 2.21, + "learning_rate": 3.4316066295178197e-06, + "loss": 0.0721, + "step": 6865 + }, + { + "epoch": 2.21, + "learning_rate": 3.4289877679303263e-06, + "loss": 0.0722, + "step": 6866 + }, + { + "epoch": 2.21, + "learning_rate": 3.426369699229124e-06, + "loss": 0.0722, + "step": 6867 + }, + { + "epoch": 2.21, + "learning_rate": 3.4237524237301213e-06, + "loss": 0.0699, + "step": 6868 + }, + { + "epoch": 2.21, + "learning_rate": 3.4211359417491265e-06, + "loss": 0.071, + "step": 6869 + }, + { + "epoch": 2.21, + "learning_rate": 3.41852025360186e-06, + "loss": 0.0722, + "step": 6870 + }, + { + "epoch": 2.21, + "learning_rate": 3.4159053596039403e-06, + "loss": 0.0716, + "step": 6871 + }, + { + "epoch": 2.21, + "learning_rate": 3.4132912600708925e-06, + "loss": 0.0686, + "step": 6872 + }, + { + "epoch": 2.21, + "learning_rate": 3.4106779553181426e-06, + "loss": 0.0649, + "step": 6873 + }, + { + "epoch": 2.21, + "learning_rate": 3.408065445661024e-06, + "loss": 0.076, + "step": 6874 + }, + { + "epoch": 2.21, + "learning_rate": 3.4054537314147707e-06, + "loss": 0.0743, + "step": 6875 + }, + { + "epoch": 2.21, + "learning_rate": 3.402842812894529e-06, + "loss": 0.0697, + "step": 6876 + }, + { + "epoch": 2.21, + "learning_rate": 3.4002326904153403e-06, + "loss": 0.0686, + "step": 6877 + }, + { + "epoch": 2.21, + "learning_rate": 3.397623364292151e-06, + "loss": 0.0688, + "step": 6878 + }, + { + "epoch": 2.21, + "learning_rate": 3.3950148348398204e-06, + "loss": 0.0698, + "step": 6879 + }, + { + "epoch": 2.21, + "learning_rate": 3.3924071023730986e-06, + "loss": 0.066, + "step": 6880 + }, + { + "epoch": 2.21, + "learning_rate": 3.3898001672066426e-06, + "loss": 0.0707, + "step": 6881 + }, + { + "epoch": 2.21, + "learning_rate": 3.3871940296550256e-06, + "loss": 0.0662, + "step": 6882 + }, + { + "epoch": 2.21, + "learning_rate": 3.3845886900327086e-06, + "loss": 0.0705, + "step": 6883 + }, + { + "epoch": 2.21, + "learning_rate": 3.381984148654063e-06, + "loss": 0.0674, + "step": 6884 + }, + { + "epoch": 2.21, + "learning_rate": 3.3793804058333713e-06, + "loss": 0.0755, + "step": 6885 + }, + { + "epoch": 2.22, + "learning_rate": 3.376777461884805e-06, + "loss": 0.0738, + "step": 6886 + }, + { + "epoch": 2.22, + "learning_rate": 3.3741753171224432e-06, + "loss": 0.0753, + "step": 6887 + }, + { + "epoch": 2.22, + "learning_rate": 3.3715739718602803e-06, + "loss": 0.07, + "step": 6888 + }, + { + "epoch": 2.22, + "learning_rate": 3.3689734264122008e-06, + "loss": 0.0748, + "step": 6889 + }, + { + "epoch": 2.22, + "learning_rate": 3.366373681091997e-06, + "loss": 0.0761, + "step": 6890 + }, + { + "epoch": 2.22, + "learning_rate": 3.36377473621337e-06, + "loss": 0.0735, + "step": 6891 + }, + { + "epoch": 2.22, + "learning_rate": 3.361176592089919e-06, + "loss": 0.0767, + "step": 6892 + }, + { + "epoch": 2.22, + "learning_rate": 3.3585792490351387e-06, + "loss": 0.0718, + "step": 6893 + }, + { + "epoch": 2.22, + "learning_rate": 3.355982707362444e-06, + "loss": 0.0721, + "step": 6894 + }, + { + "epoch": 2.22, + "learning_rate": 3.3533869673851427e-06, + "loss": 0.0826, + "step": 6895 + }, + { + "epoch": 2.22, + "learning_rate": 3.350792029416444e-06, + "loss": 0.0713, + "step": 6896 + }, + { + "epoch": 2.22, + "learning_rate": 3.3481978937694704e-06, + "loss": 0.0744, + "step": 6897 + }, + { + "epoch": 2.22, + "learning_rate": 3.3456045607572418e-06, + "loss": 0.0657, + "step": 6898 + }, + { + "epoch": 2.22, + "learning_rate": 3.3430120306926716e-06, + "loss": 0.0678, + "step": 6899 + }, + { + "epoch": 2.22, + "learning_rate": 3.340420303888594e-06, + "loss": 0.0772, + "step": 6900 + }, + { + "epoch": 2.22, + "learning_rate": 3.3378293806577345e-06, + "loss": 0.07, + "step": 6901 + }, + { + "epoch": 2.22, + "learning_rate": 3.3352392613127227e-06, + "loss": 0.0752, + "step": 6902 + }, + { + "epoch": 2.22, + "learning_rate": 3.3326499461660987e-06, + "loss": 0.0656, + "step": 6903 + }, + { + "epoch": 2.22, + "learning_rate": 3.3300614355302986e-06, + "loss": 0.065, + "step": 6904 + }, + { + "epoch": 2.22, + "learning_rate": 3.327473729717662e-06, + "loss": 0.0701, + "step": 6905 + }, + { + "epoch": 2.22, + "learning_rate": 3.3248868290404336e-06, + "loss": 0.0748, + "step": 6906 + }, + { + "epoch": 2.22, + "learning_rate": 3.3223007338107583e-06, + "loss": 0.0711, + "step": 6907 + }, + { + "epoch": 2.22, + "learning_rate": 3.319715444340682e-06, + "loss": 0.0673, + "step": 6908 + }, + { + "epoch": 2.22, + "learning_rate": 3.3171309609421654e-06, + "loss": 0.0754, + "step": 6909 + }, + { + "epoch": 2.22, + "learning_rate": 3.3145472839270575e-06, + "loss": 0.076, + "step": 6910 + }, + { + "epoch": 2.22, + "learning_rate": 3.311964413607117e-06, + "loss": 0.0636, + "step": 6911 + }, + { + "epoch": 2.22, + "learning_rate": 3.309382350294005e-06, + "loss": 0.0658, + "step": 6912 + }, + { + "epoch": 2.22, + "learning_rate": 3.3068010942992824e-06, + "loss": 0.0694, + "step": 6913 + }, + { + "epoch": 2.22, + "learning_rate": 3.304220645934412e-06, + "loss": 0.0728, + "step": 6914 + }, + { + "epoch": 2.22, + "learning_rate": 3.3016410055107683e-06, + "loss": 0.0676, + "step": 6915 + }, + { + "epoch": 2.22, + "learning_rate": 3.299062173339619e-06, + "loss": 0.0716, + "step": 6916 + }, + { + "epoch": 2.23, + "learning_rate": 3.296484149732132e-06, + "loss": 0.0738, + "step": 6917 + }, + { + "epoch": 2.23, + "learning_rate": 3.2939069349993946e-06, + "loss": 0.0689, + "step": 6918 + }, + { + "epoch": 2.23, + "learning_rate": 3.2913305294523688e-06, + "loss": 0.0718, + "step": 6919 + }, + { + "epoch": 2.23, + "learning_rate": 3.2887549334019477e-06, + "loss": 0.0686, + "step": 6920 + }, + { + "epoch": 2.23, + "learning_rate": 3.2861801471589073e-06, + "loss": 0.0765, + "step": 6921 + }, + { + "epoch": 2.23, + "learning_rate": 3.283606171033934e-06, + "loss": 0.0711, + "step": 6922 + }, + { + "epoch": 2.23, + "learning_rate": 3.2810330053376114e-06, + "loss": 0.0676, + "step": 6923 + }, + { + "epoch": 2.23, + "learning_rate": 3.2784606503804383e-06, + "loss": 0.0699, + "step": 6924 + }, + { + "epoch": 2.23, + "learning_rate": 3.2758891064727917e-06, + "loss": 0.0707, + "step": 6925 + }, + { + "epoch": 2.23, + "learning_rate": 3.273318373924976e-06, + "loss": 0.07, + "step": 6926 + }, + { + "epoch": 2.23, + "learning_rate": 3.2707484530471846e-06, + "loss": 0.0701, + "step": 6927 + }, + { + "epoch": 2.23, + "learning_rate": 3.2681793441495123e-06, + "loss": 0.0655, + "step": 6928 + }, + { + "epoch": 2.23, + "learning_rate": 3.2656110475419576e-06, + "loss": 0.0764, + "step": 6929 + }, + { + "epoch": 2.23, + "learning_rate": 3.2630435635344283e-06, + "loss": 0.0803, + "step": 6930 + }, + { + "epoch": 2.23, + "learning_rate": 3.2604768924367234e-06, + "loss": 0.071, + "step": 6931 + }, + { + "epoch": 2.23, + "learning_rate": 3.25791103455855e-06, + "loss": 0.0717, + "step": 6932 + }, + { + "epoch": 2.23, + "learning_rate": 3.255345990209514e-06, + "loss": 0.0704, + "step": 6933 + }, + { + "epoch": 2.23, + "learning_rate": 3.252781759699123e-06, + "loss": 0.0691, + "step": 6934 + }, + { + "epoch": 2.23, + "learning_rate": 3.250218343336793e-06, + "loss": 0.0679, + "step": 6935 + }, + { + "epoch": 2.23, + "learning_rate": 3.247655741431833e-06, + "loss": 0.0679, + "step": 6936 + }, + { + "epoch": 2.23, + "learning_rate": 3.24509395429346e-06, + "loss": 0.0687, + "step": 6937 + }, + { + "epoch": 2.23, + "learning_rate": 3.2425329822307884e-06, + "loss": 0.0841, + "step": 6938 + }, + { + "epoch": 2.23, + "learning_rate": 3.239972825552835e-06, + "loss": 0.0754, + "step": 6939 + }, + { + "epoch": 2.23, + "learning_rate": 3.237413484568518e-06, + "loss": 0.0692, + "step": 6940 + }, + { + "epoch": 2.23, + "learning_rate": 3.234854959586663e-06, + "loss": 0.0719, + "step": 6941 + }, + { + "epoch": 2.23, + "learning_rate": 3.232297250915991e-06, + "loss": 0.0741, + "step": 6942 + }, + { + "epoch": 2.23, + "learning_rate": 3.2297403588651254e-06, + "loss": 0.0708, + "step": 6943 + }, + { + "epoch": 2.23, + "learning_rate": 3.2271842837425917e-06, + "loss": 0.0701, + "step": 6944 + }, + { + "epoch": 2.23, + "learning_rate": 3.224629025856816e-06, + "loss": 0.0676, + "step": 6945 + }, + { + "epoch": 2.23, + "learning_rate": 3.2220745855161226e-06, + "loss": 0.0687, + "step": 6946 + }, + { + "epoch": 2.23, + "learning_rate": 3.2195209630287514e-06, + "loss": 0.0696, + "step": 6947 + }, + { + "epoch": 2.24, + "learning_rate": 3.2169681587028266e-06, + "loss": 0.0677, + "step": 6948 + }, + { + "epoch": 2.24, + "learning_rate": 3.2144161728463806e-06, + "loss": 0.0648, + "step": 6949 + }, + { + "epoch": 2.24, + "learning_rate": 3.2118650057673493e-06, + "loss": 0.068, + "step": 6950 + }, + { + "epoch": 2.24, + "learning_rate": 3.2093146577735656e-06, + "loss": 0.0733, + "step": 6951 + }, + { + "epoch": 2.24, + "learning_rate": 3.206765129172762e-06, + "loss": 0.0705, + "step": 6952 + }, + { + "epoch": 2.24, + "learning_rate": 3.204216420272581e-06, + "loss": 0.0756, + "step": 6953 + }, + { + "epoch": 2.24, + "learning_rate": 3.2016685313805605e-06, + "loss": 0.0713, + "step": 6954 + }, + { + "epoch": 2.24, + "learning_rate": 3.1991214628041347e-06, + "loss": 0.0724, + "step": 6955 + }, + { + "epoch": 2.24, + "learning_rate": 3.196575214850649e-06, + "loss": 0.0806, + "step": 6956 + }, + { + "epoch": 2.24, + "learning_rate": 3.194029787827346e-06, + "loss": 0.075, + "step": 6957 + }, + { + "epoch": 2.24, + "learning_rate": 3.1914851820413573e-06, + "loss": 0.076, + "step": 6958 + }, + { + "epoch": 2.24, + "learning_rate": 3.1889413977997365e-06, + "loss": 0.0657, + "step": 6959 + }, + { + "epoch": 2.24, + "learning_rate": 3.1863984354094236e-06, + "loss": 0.0745, + "step": 6960 + }, + { + "epoch": 2.24, + "learning_rate": 3.18385629517726e-06, + "loss": 0.0732, + "step": 6961 + }, + { + "epoch": 2.24, + "learning_rate": 3.1813149774099973e-06, + "loss": 0.0721, + "step": 6962 + }, + { + "epoch": 2.24, + "learning_rate": 3.178774482414282e-06, + "loss": 0.0732, + "step": 6963 + }, + { + "epoch": 2.24, + "learning_rate": 3.176234810496651e-06, + "loss": 0.0713, + "step": 6964 + }, + { + "epoch": 2.24, + "learning_rate": 3.1736959619635634e-06, + "loss": 0.0788, + "step": 6965 + }, + { + "epoch": 2.24, + "learning_rate": 3.171157937121362e-06, + "loss": 0.0723, + "step": 6966 + }, + { + "epoch": 2.24, + "learning_rate": 3.168620736276293e-06, + "loss": 0.0731, + "step": 6967 + }, + { + "epoch": 2.24, + "learning_rate": 3.1660843597345137e-06, + "loss": 0.0778, + "step": 6968 + }, + { + "epoch": 2.24, + "learning_rate": 3.1635488078020695e-06, + "loss": 0.0716, + "step": 6969 + }, + { + "epoch": 2.24, + "learning_rate": 3.16101408078491e-06, + "loss": 0.0678, + "step": 6970 + }, + { + "epoch": 2.24, + "learning_rate": 3.1584801789888887e-06, + "loss": 0.0762, + "step": 6971 + }, + { + "epoch": 2.24, + "learning_rate": 3.155947102719754e-06, + "loss": 0.073, + "step": 6972 + }, + { + "epoch": 2.24, + "learning_rate": 3.1534148522831574e-06, + "loss": 0.0734, + "step": 6973 + }, + { + "epoch": 2.24, + "learning_rate": 3.150883427984656e-06, + "loss": 0.0746, + "step": 6974 + }, + { + "epoch": 2.24, + "learning_rate": 3.1483528301296985e-06, + "loss": 0.072, + "step": 6975 + }, + { + "epoch": 2.24, + "learning_rate": 3.145823059023638e-06, + "loss": 0.0741, + "step": 6976 + }, + { + "epoch": 2.24, + "learning_rate": 3.1432941149717277e-06, + "loss": 0.0728, + "step": 6977 + }, + { + "epoch": 2.24, + "learning_rate": 3.1407659982791204e-06, + "loss": 0.0691, + "step": 6978 + }, + { + "epoch": 2.25, + "learning_rate": 3.138238709250867e-06, + "loss": 0.0738, + "step": 6979 + }, + { + "epoch": 2.25, + "learning_rate": 3.1357122481919265e-06, + "loss": 0.0776, + "step": 6980 + }, + { + "epoch": 2.25, + "learning_rate": 3.13318661540715e-06, + "loss": 0.0774, + "step": 6981 + }, + { + "epoch": 2.25, + "learning_rate": 3.1306618112012876e-06, + "loss": 0.0679, + "step": 6982 + }, + { + "epoch": 2.25, + "learning_rate": 3.1281378358790015e-06, + "loss": 0.0654, + "step": 6983 + }, + { + "epoch": 2.25, + "learning_rate": 3.1256146897448367e-06, + "loss": 0.0643, + "step": 6984 + }, + { + "epoch": 2.25, + "learning_rate": 3.123092373103248e-06, + "loss": 0.073, + "step": 6985 + }, + { + "epoch": 2.25, + "learning_rate": 3.1205708862585927e-06, + "loss": 0.0734, + "step": 6986 + }, + { + "epoch": 2.25, + "learning_rate": 3.1180502295151214e-06, + "loss": 0.0762, + "step": 6987 + }, + { + "epoch": 2.25, + "learning_rate": 3.115530403176986e-06, + "loss": 0.0744, + "step": 6988 + }, + { + "epoch": 2.25, + "learning_rate": 3.1130114075482477e-06, + "loss": 0.0667, + "step": 6989 + }, + { + "epoch": 2.25, + "learning_rate": 3.110493242932849e-06, + "loss": 0.0728, + "step": 6990 + }, + { + "epoch": 2.25, + "learning_rate": 3.107975909634644e-06, + "loss": 0.0785, + "step": 6991 + }, + { + "epoch": 2.25, + "learning_rate": 3.1054594079573907e-06, + "loss": 0.0756, + "step": 6992 + }, + { + "epoch": 2.25, + "learning_rate": 3.1029437382047368e-06, + "loss": 0.0725, + "step": 6993 + }, + { + "epoch": 2.25, + "learning_rate": 3.1004289006802304e-06, + "loss": 0.0675, + "step": 6994 + }, + { + "epoch": 2.25, + "learning_rate": 3.097914895687333e-06, + "loss": 0.0696, + "step": 6995 + }, + { + "epoch": 2.25, + "learning_rate": 3.0954017235293863e-06, + "loss": 0.072, + "step": 6996 + }, + { + "epoch": 2.25, + "learning_rate": 3.0928893845096386e-06, + "loss": 0.077, + "step": 6997 + }, + { + "epoch": 2.25, + "learning_rate": 3.0903778789312468e-06, + "loss": 0.0742, + "step": 6998 + }, + { + "epoch": 2.25, + "learning_rate": 3.0878672070972557e-06, + "loss": 0.0717, + "step": 6999 + }, + { + "epoch": 2.25, + "learning_rate": 3.085357369310612e-06, + "loss": 0.0668, + "step": 7000 + }, + { + "epoch": 2.25, + "learning_rate": 3.0828483658741693e-06, + "loss": 0.0704, + "step": 7001 + }, + { + "epoch": 2.25, + "learning_rate": 3.0803401970906698e-06, + "loss": 0.0711, + "step": 7002 + }, + { + "epoch": 2.25, + "learning_rate": 3.0778328632627618e-06, + "loss": 0.0776, + "step": 7003 + }, + { + "epoch": 2.25, + "learning_rate": 3.075326364692991e-06, + "loss": 0.068, + "step": 7004 + }, + { + "epoch": 2.25, + "learning_rate": 3.072820701683801e-06, + "loss": 0.0852, + "step": 7005 + }, + { + "epoch": 2.25, + "learning_rate": 3.0703158745375316e-06, + "loss": 0.0753, + "step": 7006 + }, + { + "epoch": 2.25, + "learning_rate": 3.067811883556435e-06, + "loss": 0.0689, + "step": 7007 + }, + { + "epoch": 2.25, + "learning_rate": 3.0653087290426484e-06, + "loss": 0.0699, + "step": 7008 + }, + { + "epoch": 2.25, + "learning_rate": 3.062806411298214e-06, + "loss": 0.0726, + "step": 7009 + }, + { + "epoch": 2.26, + "learning_rate": 3.0603049306250697e-06, + "loss": 0.0762, + "step": 7010 + }, + { + "epoch": 2.26, + "learning_rate": 3.0578042873250592e-06, + "loss": 0.072, + "step": 7011 + }, + { + "epoch": 2.26, + "learning_rate": 3.0553044816999133e-06, + "loss": 0.0683, + "step": 7012 + }, + { + "epoch": 2.26, + "learning_rate": 3.052805514051278e-06, + "loss": 0.0714, + "step": 7013 + }, + { + "epoch": 2.26, + "learning_rate": 3.0503073846806854e-06, + "loss": 0.0739, + "step": 7014 + }, + { + "epoch": 2.26, + "learning_rate": 3.0478100938895705e-06, + "loss": 0.0755, + "step": 7015 + }, + { + "epoch": 2.26, + "learning_rate": 3.045313641979267e-06, + "loss": 0.0669, + "step": 7016 + }, + { + "epoch": 2.26, + "learning_rate": 3.0428180292510055e-06, + "loss": 0.066, + "step": 7017 + }, + { + "epoch": 2.26, + "learning_rate": 3.0403232560059216e-06, + "loss": 0.0672, + "step": 7018 + }, + { + "epoch": 2.26, + "learning_rate": 3.0378293225450426e-06, + "loss": 0.0737, + "step": 7019 + }, + { + "epoch": 2.26, + "learning_rate": 3.035336229169297e-06, + "loss": 0.0719, + "step": 7020 + }, + { + "epoch": 2.26, + "learning_rate": 3.0328439761795137e-06, + "loss": 0.0715, + "step": 7021 + }, + { + "epoch": 2.26, + "learning_rate": 3.0303525638764163e-06, + "loss": 0.0799, + "step": 7022 + }, + { + "epoch": 2.26, + "learning_rate": 3.0278619925606266e-06, + "loss": 0.0728, + "step": 7023 + }, + { + "epoch": 2.26, + "learning_rate": 3.0253722625326756e-06, + "loss": 0.069, + "step": 7024 + }, + { + "epoch": 2.26, + "learning_rate": 3.0228833740929796e-06, + "loss": 0.0702, + "step": 7025 + }, + { + "epoch": 2.26, + "learning_rate": 3.0203953275418584e-06, + "loss": 0.0675, + "step": 7026 + }, + { + "epoch": 2.26, + "learning_rate": 3.0179081231795293e-06, + "loss": 0.0776, + "step": 7027 + }, + { + "epoch": 2.26, + "learning_rate": 3.0154217613061165e-06, + "loss": 0.0655, + "step": 7028 + }, + { + "epoch": 2.26, + "learning_rate": 3.0129362422216223e-06, + "loss": 0.0671, + "step": 7029 + }, + { + "epoch": 2.26, + "learning_rate": 3.0104515662259703e-06, + "loss": 0.0734, + "step": 7030 + }, + { + "epoch": 2.26, + "learning_rate": 3.0079677336189685e-06, + "loss": 0.0678, + "step": 7031 + }, + { + "epoch": 2.26, + "learning_rate": 3.0054847447003243e-06, + "loss": 0.0673, + "step": 7032 + }, + { + "epoch": 2.26, + "learning_rate": 3.003002599769651e-06, + "loss": 0.0663, + "step": 7033 + }, + { + "epoch": 2.26, + "learning_rate": 3.0005212991264554e-06, + "loss": 0.0671, + "step": 7034 + }, + { + "epoch": 2.26, + "learning_rate": 2.9980408430701326e-06, + "loss": 0.0691, + "step": 7035 + }, + { + "epoch": 2.26, + "learning_rate": 2.9955612318999927e-06, + "loss": 0.0706, + "step": 7036 + }, + { + "epoch": 2.26, + "learning_rate": 2.9930824659152348e-06, + "loss": 0.0718, + "step": 7037 + }, + { + "epoch": 2.26, + "learning_rate": 2.9906045454149535e-06, + "loss": 0.0732, + "step": 7038 + }, + { + "epoch": 2.26, + "learning_rate": 2.9881274706981522e-06, + "loss": 0.0717, + "step": 7039 + }, + { + "epoch": 2.26, + "learning_rate": 2.9856512420637206e-06, + "loss": 0.0701, + "step": 7040 + }, + { + "epoch": 2.27, + "learning_rate": 2.9831758598104533e-06, + "loss": 0.0758, + "step": 7041 + }, + { + "epoch": 2.27, + "learning_rate": 2.9807013242370386e-06, + "loss": 0.07, + "step": 7042 + }, + { + "epoch": 2.27, + "learning_rate": 2.9782276356420646e-06, + "loss": 0.0714, + "step": 7043 + }, + { + "epoch": 2.27, + "learning_rate": 2.975754794324015e-06, + "loss": 0.0745, + "step": 7044 + }, + { + "epoch": 2.27, + "learning_rate": 2.9732828005812787e-06, + "loss": 0.0649, + "step": 7045 + }, + { + "epoch": 2.27, + "learning_rate": 2.9708116547121333e-06, + "loss": 0.071, + "step": 7046 + }, + { + "epoch": 2.27, + "learning_rate": 2.96834135701476e-06, + "loss": 0.0734, + "step": 7047 + }, + { + "epoch": 2.27, + "learning_rate": 2.9658719077872334e-06, + "loss": 0.0652, + "step": 7048 + }, + { + "epoch": 2.27, + "learning_rate": 2.9634033073275283e-06, + "loss": 0.0682, + "step": 7049 + }, + { + "epoch": 2.27, + "learning_rate": 2.960935555933514e-06, + "loss": 0.0736, + "step": 7050 + }, + { + "epoch": 2.27, + "learning_rate": 2.958468653902965e-06, + "loss": 0.0768, + "step": 7051 + }, + { + "epoch": 2.27, + "learning_rate": 2.9560026015335462e-06, + "loss": 0.0712, + "step": 7052 + }, + { + "epoch": 2.27, + "learning_rate": 2.9535373991228177e-06, + "loss": 0.0685, + "step": 7053 + }, + { + "epoch": 2.27, + "learning_rate": 2.9510730469682525e-06, + "loss": 0.0697, + "step": 7054 + }, + { + "epoch": 2.27, + "learning_rate": 2.948609545367198e-06, + "loss": 0.069, + "step": 7055 + }, + { + "epoch": 2.27, + "learning_rate": 2.9461468946169137e-06, + "loss": 0.0646, + "step": 7056 + }, + { + "epoch": 2.27, + "learning_rate": 2.943685095014557e-06, + "loss": 0.0727, + "step": 7057 + }, + { + "epoch": 2.27, + "learning_rate": 2.9412241468571766e-06, + "loss": 0.0643, + "step": 7058 + }, + { + "epoch": 2.27, + "learning_rate": 2.938764050441719e-06, + "loss": 0.0722, + "step": 7059 + }, + { + "epoch": 2.27, + "learning_rate": 2.936304806065039e-06, + "loss": 0.066, + "step": 7060 + }, + { + "epoch": 2.27, + "learning_rate": 2.9338464140238687e-06, + "loss": 0.0754, + "step": 7061 + }, + { + "epoch": 2.27, + "learning_rate": 2.93138887461485e-06, + "loss": 0.0706, + "step": 7062 + }, + { + "epoch": 2.27, + "learning_rate": 2.9289321881345257e-06, + "loss": 0.0701, + "step": 7063 + }, + { + "epoch": 2.27, + "learning_rate": 2.9264763548793263e-06, + "loss": 0.0728, + "step": 7064 + }, + { + "epoch": 2.27, + "learning_rate": 2.9240213751455804e-06, + "loss": 0.0717, + "step": 7065 + }, + { + "epoch": 2.27, + "learning_rate": 2.9215672492295233e-06, + "loss": 0.077, + "step": 7066 + }, + { + "epoch": 2.27, + "learning_rate": 2.919113977427278e-06, + "loss": 0.0668, + "step": 7067 + }, + { + "epoch": 2.27, + "learning_rate": 2.9166615600348603e-06, + "loss": 0.0657, + "step": 7068 + }, + { + "epoch": 2.27, + "learning_rate": 2.9142099973481963e-06, + "loss": 0.0719, + "step": 7069 + }, + { + "epoch": 2.27, + "learning_rate": 2.911759289663101e-06, + "loss": 0.0744, + "step": 7070 + }, + { + "epoch": 2.27, + "learning_rate": 2.9093094372752815e-06, + "loss": 0.0577, + "step": 7071 + }, + { + "epoch": 2.28, + "learning_rate": 2.906860440480356e-06, + "loss": 0.0771, + "step": 7072 + }, + { + "epoch": 2.28, + "learning_rate": 2.9044122995738287e-06, + "loss": 0.0751, + "step": 7073 + }, + { + "epoch": 2.28, + "learning_rate": 2.901965014851096e-06, + "loss": 0.0701, + "step": 7074 + }, + { + "epoch": 2.28, + "learning_rate": 2.8995185866074647e-06, + "loss": 0.075, + "step": 7075 + }, + { + "epoch": 2.28, + "learning_rate": 2.897073015138129e-06, + "loss": 0.0659, + "step": 7076 + }, + { + "epoch": 2.28, + "learning_rate": 2.8946283007381794e-06, + "loss": 0.0718, + "step": 7077 + }, + { + "epoch": 2.28, + "learning_rate": 2.8921844437026113e-06, + "loss": 0.0815, + "step": 7078 + }, + { + "epoch": 2.28, + "learning_rate": 2.889741444326307e-06, + "loss": 0.0714, + "step": 7079 + }, + { + "epoch": 2.28, + "learning_rate": 2.8872993029040506e-06, + "loss": 0.0723, + "step": 7080 + }, + { + "epoch": 2.28, + "learning_rate": 2.8848580197305197e-06, + "loss": 0.0723, + "step": 7081 + }, + { + "epoch": 2.28, + "learning_rate": 2.8824175951002918e-06, + "loss": 0.0676, + "step": 7082 + }, + { + "epoch": 2.28, + "learning_rate": 2.879978029307835e-06, + "loss": 0.0754, + "step": 7083 + }, + { + "epoch": 2.28, + "learning_rate": 2.8775393226475225e-06, + "loss": 0.0682, + "step": 7084 + }, + { + "epoch": 2.28, + "learning_rate": 2.8751014754136174e-06, + "loss": 0.0707, + "step": 7085 + }, + { + "epoch": 2.28, + "learning_rate": 2.8726644879002817e-06, + "loss": 0.0726, + "step": 7086 + }, + { + "epoch": 2.28, + "learning_rate": 2.8702283604015703e-06, + "loss": 0.0702, + "step": 7087 + }, + { + "epoch": 2.28, + "learning_rate": 2.867793093211438e-06, + "loss": 0.0694, + "step": 7088 + }, + { + "epoch": 2.28, + "learning_rate": 2.8653586866237313e-06, + "loss": 0.0755, + "step": 7089 + }, + { + "epoch": 2.28, + "learning_rate": 2.8629251409322024e-06, + "loss": 0.0731, + "step": 7090 + }, + { + "epoch": 2.28, + "learning_rate": 2.8604924564304894e-06, + "loss": 0.0733, + "step": 7091 + }, + { + "epoch": 2.28, + "learning_rate": 2.8580606334121286e-06, + "loss": 0.073, + "step": 7092 + }, + { + "epoch": 2.28, + "learning_rate": 2.8556296721705622e-06, + "loss": 0.071, + "step": 7093 + }, + { + "epoch": 2.28, + "learning_rate": 2.853199572999111e-06, + "loss": 0.0692, + "step": 7094 + }, + { + "epoch": 2.28, + "learning_rate": 2.8507703361910024e-06, + "loss": 0.0706, + "step": 7095 + }, + { + "epoch": 2.28, + "learning_rate": 2.8483419620393636e-06, + "loss": 0.0708, + "step": 7096 + }, + { + "epoch": 2.28, + "learning_rate": 2.8459144508372093e-06, + "loss": 0.0622, + "step": 7097 + }, + { + "epoch": 2.28, + "learning_rate": 2.8434878028774503e-06, + "loss": 0.0725, + "step": 7098 + }, + { + "epoch": 2.28, + "learning_rate": 2.841062018452907e-06, + "loss": 0.0713, + "step": 7099 + }, + { + "epoch": 2.28, + "learning_rate": 2.8386370978562707e-06, + "loss": 0.0701, + "step": 7100 + }, + { + "epoch": 2.28, + "learning_rate": 2.8362130413801524e-06, + "loss": 0.0757, + "step": 7101 + }, + { + "epoch": 2.28, + "learning_rate": 2.833789849317046e-06, + "loss": 0.0677, + "step": 7102 + }, + { + "epoch": 2.29, + "learning_rate": 2.831367521959345e-06, + "loss": 0.0677, + "step": 7103 + }, + { + "epoch": 2.29, + "learning_rate": 2.8289460595993334e-06, + "loss": 0.0707, + "step": 7104 + }, + { + "epoch": 2.29, + "learning_rate": 2.8265254625292014e-06, + "loss": 0.0691, + "step": 7105 + }, + { + "epoch": 2.29, + "learning_rate": 2.824105731041027e-06, + "loss": 0.0654, + "step": 7106 + }, + { + "epoch": 2.29, + "learning_rate": 2.8216868654267836e-06, + "loss": 0.0704, + "step": 7107 + }, + { + "epoch": 2.29, + "learning_rate": 2.819268865978343e-06, + "loss": 0.0824, + "step": 7108 + }, + { + "epoch": 2.29, + "learning_rate": 2.81685173298747e-06, + "loss": 0.0747, + "step": 7109 + }, + { + "epoch": 2.29, + "learning_rate": 2.814435466745824e-06, + "loss": 0.0714, + "step": 7110 + }, + { + "epoch": 2.29, + "learning_rate": 2.8120200675449683e-06, + "loss": 0.0718, + "step": 7111 + }, + { + "epoch": 2.29, + "learning_rate": 2.809605535676352e-06, + "loss": 0.0747, + "step": 7112 + }, + { + "epoch": 2.29, + "learning_rate": 2.8071918714313217e-06, + "loss": 0.0685, + "step": 7113 + }, + { + "epoch": 2.29, + "learning_rate": 2.8047790751011216e-06, + "loss": 0.0716, + "step": 7114 + }, + { + "epoch": 2.29, + "learning_rate": 2.8023671469768866e-06, + "loss": 0.0714, + "step": 7115 + }, + { + "epoch": 2.29, + "learning_rate": 2.799956087349657e-06, + "loss": 0.0741, + "step": 7116 + }, + { + "epoch": 2.29, + "learning_rate": 2.7975458965103564e-06, + "loss": 0.0711, + "step": 7117 + }, + { + "epoch": 2.29, + "learning_rate": 2.7951365747498114e-06, + "loss": 0.0702, + "step": 7118 + }, + { + "epoch": 2.29, + "learning_rate": 2.792728122358739e-06, + "loss": 0.0822, + "step": 7119 + }, + { + "epoch": 2.29, + "learning_rate": 2.7903205396277546e-06, + "loss": 0.0681, + "step": 7120 + }, + { + "epoch": 2.29, + "learning_rate": 2.787913826847363e-06, + "loss": 0.0697, + "step": 7121 + }, + { + "epoch": 2.29, + "learning_rate": 2.785507984307975e-06, + "loss": 0.071, + "step": 7122 + }, + { + "epoch": 2.29, + "learning_rate": 2.783103012299887e-06, + "loss": 0.0708, + "step": 7123 + }, + { + "epoch": 2.29, + "learning_rate": 2.780698911113293e-06, + "loss": 0.0696, + "step": 7124 + }, + { + "epoch": 2.29, + "learning_rate": 2.7782956810382834e-06, + "loss": 0.0718, + "step": 7125 + }, + { + "epoch": 2.29, + "learning_rate": 2.77589332236484e-06, + "loss": 0.0732, + "step": 7126 + }, + { + "epoch": 2.29, + "learning_rate": 2.773491835382839e-06, + "loss": 0.0678, + "step": 7127 + }, + { + "epoch": 2.29, + "learning_rate": 2.7710912203820618e-06, + "loss": 0.0676, + "step": 7128 + }, + { + "epoch": 2.29, + "learning_rate": 2.768691477652171e-06, + "loss": 0.0756, + "step": 7129 + }, + { + "epoch": 2.29, + "learning_rate": 2.76629260748273e-06, + "loss": 0.0683, + "step": 7130 + }, + { + "epoch": 2.29, + "learning_rate": 2.7638946101632004e-06, + "loss": 0.0695, + "step": 7131 + }, + { + "epoch": 2.29, + "learning_rate": 2.761497485982936e-06, + "loss": 0.0658, + "step": 7132 + }, + { + "epoch": 2.29, + "learning_rate": 2.759101235231173e-06, + "loss": 0.0733, + "step": 7133 + }, + { + "epoch": 2.29, + "learning_rate": 2.7567058581970653e-06, + "loss": 0.069, + "step": 7134 + }, + { + "epoch": 2.3, + "learning_rate": 2.754311355169643e-06, + "loss": 0.0709, + "step": 7135 + }, + { + "epoch": 2.3, + "learning_rate": 2.7519177264378373e-06, + "loss": 0.069, + "step": 7136 + }, + { + "epoch": 2.3, + "learning_rate": 2.7495249722904773e-06, + "loss": 0.0757, + "step": 7137 + }, + { + "epoch": 2.3, + "learning_rate": 2.7471330930162834e-06, + "loss": 0.0705, + "step": 7138 + }, + { + "epoch": 2.3, + "learning_rate": 2.744742088903861e-06, + "loss": 0.0734, + "step": 7139 + }, + { + "epoch": 2.3, + "learning_rate": 2.742351960241728e-06, + "loss": 0.075, + "step": 7140 + }, + { + "epoch": 2.3, + "learning_rate": 2.7399627073182844e-06, + "loss": 0.0672, + "step": 7141 + }, + { + "epoch": 2.3, + "learning_rate": 2.737574330421825e-06, + "loss": 0.0739, + "step": 7142 + }, + { + "epoch": 2.3, + "learning_rate": 2.735186829840547e-06, + "loss": 0.0695, + "step": 7143 + }, + { + "epoch": 2.3, + "learning_rate": 2.732800205862536e-06, + "loss": 0.0723, + "step": 7144 + }, + { + "epoch": 2.3, + "learning_rate": 2.7304144587757643e-06, + "loss": 0.0733, + "step": 7145 + }, + { + "epoch": 2.3, + "learning_rate": 2.728029588868114e-06, + "loss": 0.0668, + "step": 7146 + }, + { + "epoch": 2.3, + "learning_rate": 2.725645596427352e-06, + "loss": 0.069, + "step": 7147 + }, + { + "epoch": 2.3, + "learning_rate": 2.723262481741138e-06, + "loss": 0.0775, + "step": 7148 + }, + { + "epoch": 2.3, + "learning_rate": 2.720880245097033e-06, + "loss": 0.0691, + "step": 7149 + }, + { + "epoch": 2.3, + "learning_rate": 2.718498886782486e-06, + "loss": 0.0717, + "step": 7150 + }, + { + "epoch": 2.3, + "learning_rate": 2.7161184070848423e-06, + "loss": 0.0613, + "step": 7151 + }, + { + "epoch": 2.3, + "learning_rate": 2.7137388062913394e-06, + "loss": 0.0744, + "step": 7152 + }, + { + "epoch": 2.3, + "learning_rate": 2.7113600846891117e-06, + "loss": 0.0755, + "step": 7153 + }, + { + "epoch": 2.3, + "learning_rate": 2.7089822425651814e-06, + "loss": 0.077, + "step": 7154 + }, + { + "epoch": 2.3, + "learning_rate": 2.706605280206476e-06, + "loss": 0.069, + "step": 7155 + }, + { + "epoch": 2.3, + "learning_rate": 2.704229197899806e-06, + "loss": 0.0712, + "step": 7156 + }, + { + "epoch": 2.3, + "learning_rate": 2.7018539959318802e-06, + "loss": 0.0677, + "step": 7157 + }, + { + "epoch": 2.3, + "learning_rate": 2.6994796745893e-06, + "loss": 0.0663, + "step": 7158 + }, + { + "epoch": 2.3, + "learning_rate": 2.697106234158563e-06, + "loss": 0.0727, + "step": 7159 + }, + { + "epoch": 2.3, + "learning_rate": 2.6947336749260533e-06, + "loss": 0.0709, + "step": 7160 + }, + { + "epoch": 2.3, + "learning_rate": 2.692361997178061e-06, + "loss": 0.0738, + "step": 7161 + }, + { + "epoch": 2.3, + "learning_rate": 2.68999120120076e-06, + "loss": 0.0658, + "step": 7162 + }, + { + "epoch": 2.3, + "learning_rate": 2.6876212872802164e-06, + "loss": 0.0751, + "step": 7163 + }, + { + "epoch": 2.3, + "learning_rate": 2.685252255702405e-06, + "loss": 0.0698, + "step": 7164 + }, + { + "epoch": 2.3, + "learning_rate": 2.682884106753174e-06, + "loss": 0.0734, + "step": 7165 + }, + { + "epoch": 2.31, + "learning_rate": 2.6805168407182726e-06, + "loss": 0.0735, + "step": 7166 + }, + { + "epoch": 2.31, + "learning_rate": 2.678150457883352e-06, + "loss": 0.0636, + "step": 7167 + }, + { + "epoch": 2.31, + "learning_rate": 2.675784958533948e-06, + "loss": 0.0723, + "step": 7168 + }, + { + "epoch": 2.31, + "learning_rate": 2.673420342955487e-06, + "loss": 0.0741, + "step": 7169 + }, + { + "epoch": 2.31, + "learning_rate": 2.6710566114333048e-06, + "loss": 0.0779, + "step": 7170 + }, + { + "epoch": 2.31, + "learning_rate": 2.668693764252608e-06, + "loss": 0.0767, + "step": 7171 + }, + { + "epoch": 2.31, + "learning_rate": 2.66633180169851e-06, + "loss": 0.0697, + "step": 7172 + }, + { + "epoch": 2.31, + "learning_rate": 2.6639707240560184e-06, + "loss": 0.0641, + "step": 7173 + }, + { + "epoch": 2.31, + "learning_rate": 2.6616105316100317e-06, + "loss": 0.0705, + "step": 7174 + }, + { + "epoch": 2.31, + "learning_rate": 2.659251224645334e-06, + "loss": 0.0675, + "step": 7175 + }, + { + "epoch": 2.31, + "learning_rate": 2.6568928034466178e-06, + "loss": 0.0696, + "step": 7176 + }, + { + "epoch": 2.31, + "learning_rate": 2.654535268298457e-06, + "loss": 0.0718, + "step": 7177 + }, + { + "epoch": 2.31, + "learning_rate": 2.6521786194853205e-06, + "loss": 0.0707, + "step": 7178 + }, + { + "epoch": 2.31, + "learning_rate": 2.649822857291573e-06, + "loss": 0.0687, + "step": 7179 + }, + { + "epoch": 2.31, + "learning_rate": 2.6474679820014694e-06, + "loss": 0.0709, + "step": 7180 + }, + { + "epoch": 2.31, + "learning_rate": 2.6451139938991565e-06, + "loss": 0.0823, + "step": 7181 + }, + { + "epoch": 2.31, + "learning_rate": 2.642760893268684e-06, + "loss": 0.0755, + "step": 7182 + }, + { + "epoch": 2.31, + "learning_rate": 2.640408680393982e-06, + "loss": 0.0712, + "step": 7183 + }, + { + "epoch": 2.31, + "learning_rate": 2.6380573555588797e-06, + "loss": 0.0745, + "step": 7184 + }, + { + "epoch": 2.31, + "learning_rate": 2.635706919047096e-06, + "loss": 0.0694, + "step": 7185 + }, + { + "epoch": 2.31, + "learning_rate": 2.633357371142248e-06, + "loss": 0.0706, + "step": 7186 + }, + { + "epoch": 2.31, + "learning_rate": 2.631008712127836e-06, + "loss": 0.0738, + "step": 7187 + }, + { + "epoch": 2.31, + "learning_rate": 2.6286609422872665e-06, + "loss": 0.0709, + "step": 7188 + }, + { + "epoch": 2.31, + "learning_rate": 2.6263140619038284e-06, + "loss": 0.0715, + "step": 7189 + }, + { + "epoch": 2.31, + "learning_rate": 2.6239680712607075e-06, + "loss": 0.0669, + "step": 7190 + }, + { + "epoch": 2.31, + "learning_rate": 2.621622970640978e-06, + "loss": 0.0741, + "step": 7191 + }, + { + "epoch": 2.31, + "learning_rate": 2.6192787603276125e-06, + "loss": 0.0665, + "step": 7192 + }, + { + "epoch": 2.31, + "learning_rate": 2.6169354406034687e-06, + "loss": 0.0656, + "step": 7193 + }, + { + "epoch": 2.31, + "learning_rate": 2.6145930117513086e-06, + "loss": 0.0737, + "step": 7194 + }, + { + "epoch": 2.31, + "learning_rate": 2.612251474053775e-06, + "loss": 0.0787, + "step": 7195 + }, + { + "epoch": 2.31, + "learning_rate": 2.6099108277934105e-06, + "loss": 0.0735, + "step": 7196 + }, + { + "epoch": 2.32, + "learning_rate": 2.6075710732526448e-06, + "loss": 0.0671, + "step": 7197 + }, + { + "epoch": 2.32, + "learning_rate": 2.6052322107138017e-06, + "loss": 0.0716, + "step": 7198 + }, + { + "epoch": 2.32, + "learning_rate": 2.602894240459103e-06, + "loss": 0.0745, + "step": 7199 + }, + { + "epoch": 2.32, + "learning_rate": 2.600557162770655e-06, + "loss": 0.0671, + "step": 7200 + }, + { + "epoch": 2.32, + "learning_rate": 2.598220977930461e-06, + "loss": 0.0743, + "step": 7201 + }, + { + "epoch": 2.32, + "learning_rate": 2.5958856862204098e-06, + "loss": 0.0684, + "step": 7202 + }, + { + "epoch": 2.32, + "learning_rate": 2.593551287922298e-06, + "loss": 0.0764, + "step": 7203 + }, + { + "epoch": 2.32, + "learning_rate": 2.591217783317792e-06, + "loss": 0.0885, + "step": 7204 + }, + { + "epoch": 2.32, + "learning_rate": 2.5888851726884712e-06, + "loss": 0.0717, + "step": 7205 + }, + { + "epoch": 2.32, + "learning_rate": 2.5865534563157956e-06, + "loss": 0.0642, + "step": 7206 + }, + { + "epoch": 2.32, + "learning_rate": 2.5842226344811193e-06, + "loss": 0.0802, + "step": 7207 + }, + { + "epoch": 2.32, + "learning_rate": 2.5818927074656874e-06, + "loss": 0.0695, + "step": 7208 + }, + { + "epoch": 2.32, + "learning_rate": 2.579563675550646e-06, + "loss": 0.0653, + "step": 7209 + }, + { + "epoch": 2.32, + "learning_rate": 2.5772355390170155e-06, + "loss": 0.0788, + "step": 7210 + }, + { + "epoch": 2.32, + "learning_rate": 2.5749082981457273e-06, + "loss": 0.069, + "step": 7211 + }, + { + "epoch": 2.32, + "learning_rate": 2.572581953217592e-06, + "loss": 0.0721, + "step": 7212 + }, + { + "epoch": 2.32, + "learning_rate": 2.5702565045133167e-06, + "loss": 0.0894, + "step": 7213 + }, + { + "epoch": 2.32, + "learning_rate": 2.5679319523135016e-06, + "loss": 0.0705, + "step": 7214 + }, + { + "epoch": 2.32, + "learning_rate": 2.5656082968986372e-06, + "loss": 0.0745, + "step": 7215 + }, + { + "epoch": 2.32, + "learning_rate": 2.563285538549104e-06, + "loss": 0.0727, + "step": 7216 + }, + { + "epoch": 2.32, + "learning_rate": 2.5609636775451762e-06, + "loss": 0.0679, + "step": 7217 + }, + { + "epoch": 2.32, + "learning_rate": 2.558642714167021e-06, + "loss": 0.067, + "step": 7218 + }, + { + "epoch": 2.32, + "learning_rate": 2.5563226486946902e-06, + "loss": 0.068, + "step": 7219 + }, + { + "epoch": 2.32, + "learning_rate": 2.5540034814081406e-06, + "loss": 0.0657, + "step": 7220 + }, + { + "epoch": 2.32, + "learning_rate": 2.551685212587209e-06, + "loss": 0.0801, + "step": 7221 + }, + { + "epoch": 2.32, + "learning_rate": 2.5493678425116275e-06, + "loss": 0.0717, + "step": 7222 + }, + { + "epoch": 2.32, + "learning_rate": 2.54705137146102e-06, + "loss": 0.0783, + "step": 7223 + }, + { + "epoch": 2.32, + "learning_rate": 2.5447357997149025e-06, + "loss": 0.0701, + "step": 7224 + }, + { + "epoch": 2.32, + "learning_rate": 2.5424211275526777e-06, + "loss": 0.0741, + "step": 7225 + }, + { + "epoch": 2.32, + "learning_rate": 2.5401073552536505e-06, + "loss": 0.0682, + "step": 7226 + }, + { + "epoch": 2.32, + "learning_rate": 2.537794483097007e-06, + "loss": 0.071, + "step": 7227 + }, + { + "epoch": 2.33, + "learning_rate": 2.5354825113618253e-06, + "loss": 0.076, + "step": 7228 + }, + { + "epoch": 2.33, + "learning_rate": 2.5331714403270867e-06, + "loss": 0.0684, + "step": 7229 + }, + { + "epoch": 2.33, + "learning_rate": 2.530861270271646e-06, + "loss": 0.0731, + "step": 7230 + }, + { + "epoch": 2.33, + "learning_rate": 2.528552001474258e-06, + "loss": 0.0655, + "step": 7231 + }, + { + "epoch": 2.33, + "learning_rate": 2.5262436342135753e-06, + "loss": 0.082, + "step": 7232 + }, + { + "epoch": 2.33, + "learning_rate": 2.523936168768132e-06, + "loss": 0.0646, + "step": 7233 + }, + { + "epoch": 2.33, + "learning_rate": 2.5216296054163547e-06, + "loss": 0.0671, + "step": 7234 + }, + { + "epoch": 2.33, + "learning_rate": 2.519323944436569e-06, + "loss": 0.0721, + "step": 7235 + }, + { + "epoch": 2.33, + "learning_rate": 2.5170191861069804e-06, + "loss": 0.0725, + "step": 7236 + }, + { + "epoch": 2.33, + "learning_rate": 2.514715330705689e-06, + "loss": 0.0747, + "step": 7237 + }, + { + "epoch": 2.33, + "learning_rate": 2.5124123785106945e-06, + "loss": 0.0693, + "step": 7238 + }, + { + "epoch": 2.33, + "learning_rate": 2.5101103297998774e-06, + "loss": 0.0663, + "step": 7239 + }, + { + "epoch": 2.33, + "learning_rate": 2.50780918485101e-06, + "loss": 0.0778, + "step": 7240 + }, + { + "epoch": 2.33, + "learning_rate": 2.5055089439417646e-06, + "loss": 0.0669, + "step": 7241 + }, + { + "epoch": 2.33, + "learning_rate": 2.503209607349697e-06, + "loss": 0.0711, + "step": 7242 + }, + { + "epoch": 2.33, + "learning_rate": 2.5009111753522463e-06, + "loss": 0.0697, + "step": 7243 + }, + { + "epoch": 2.33, + "learning_rate": 2.498613648226761e-06, + "loss": 0.0732, + "step": 7244 + }, + { + "epoch": 2.33, + "learning_rate": 2.4963170262504667e-06, + "loss": 0.0678, + "step": 7245 + }, + { + "epoch": 2.33, + "learning_rate": 2.4940213097004796e-06, + "loss": 0.0751, + "step": 7246 + }, + { + "epoch": 2.33, + "learning_rate": 2.4917264988538194e-06, + "loss": 0.0735, + "step": 7247 + }, + { + "epoch": 2.33, + "learning_rate": 2.4894325939873844e-06, + "loss": 0.0694, + "step": 7248 + }, + { + "epoch": 2.33, + "learning_rate": 2.4871395953779597e-06, + "loss": 0.0712, + "step": 7249 + }, + { + "epoch": 2.33, + "learning_rate": 2.484847503302238e-06, + "loss": 0.074, + "step": 7250 + }, + { + "epoch": 2.33, + "learning_rate": 2.4825563180367882e-06, + "loss": 0.0633, + "step": 7251 + }, + { + "epoch": 2.33, + "learning_rate": 2.4802660398580735e-06, + "loss": 0.077, + "step": 7252 + }, + { + "epoch": 2.33, + "learning_rate": 2.4779766690424522e-06, + "loss": 0.0712, + "step": 7253 + }, + { + "epoch": 2.33, + "learning_rate": 2.4756882058661693e-06, + "loss": 0.0709, + "step": 7254 + }, + { + "epoch": 2.33, + "learning_rate": 2.4734006506053574e-06, + "loss": 0.0743, + "step": 7255 + }, + { + "epoch": 2.33, + "learning_rate": 2.471114003536045e-06, + "loss": 0.0741, + "step": 7256 + }, + { + "epoch": 2.33, + "learning_rate": 2.4688282649341465e-06, + "loss": 0.0728, + "step": 7257 + }, + { + "epoch": 2.33, + "learning_rate": 2.466543435075468e-06, + "loss": 0.0718, + "step": 7258 + }, + { + "epoch": 2.34, + "learning_rate": 2.4642595142357116e-06, + "loss": 0.0756, + "step": 7259 + }, + { + "epoch": 2.34, + "learning_rate": 2.461976502690463e-06, + "loss": 0.0654, + "step": 7260 + }, + { + "epoch": 2.34, + "learning_rate": 2.459694400715199e-06, + "loss": 0.0683, + "step": 7261 + }, + { + "epoch": 2.34, + "learning_rate": 2.4574132085852888e-06, + "loss": 0.0693, + "step": 7262 + }, + { + "epoch": 2.34, + "learning_rate": 2.4551329265759894e-06, + "loss": 0.0728, + "step": 7263 + }, + { + "epoch": 2.34, + "learning_rate": 2.4528535549624464e-06, + "loss": 0.077, + "step": 7264 + }, + { + "epoch": 2.34, + "learning_rate": 2.4505750940197058e-06, + "loss": 0.07, + "step": 7265 + }, + { + "epoch": 2.34, + "learning_rate": 2.448297544022692e-06, + "loss": 0.0658, + "step": 7266 + }, + { + "epoch": 2.34, + "learning_rate": 2.4460209052462213e-06, + "loss": 0.0716, + "step": 7267 + }, + { + "epoch": 2.34, + "learning_rate": 2.4437451779650122e-06, + "loss": 0.0686, + "step": 7268 + }, + { + "epoch": 2.34, + "learning_rate": 2.4414703624536553e-06, + "loss": 0.0768, + "step": 7269 + }, + { + "epoch": 2.34, + "learning_rate": 2.439196458986638e-06, + "loss": 0.0727, + "step": 7270 + }, + { + "epoch": 2.34, + "learning_rate": 2.4369234678383445e-06, + "loss": 0.0756, + "step": 7271 + }, + { + "epoch": 2.34, + "learning_rate": 2.4346513892830427e-06, + "loss": 0.0734, + "step": 7272 + }, + { + "epoch": 2.34, + "learning_rate": 2.4323802235948866e-06, + "loss": 0.0731, + "step": 7273 + }, + { + "epoch": 2.34, + "learning_rate": 2.430109971047935e-06, + "loss": 0.073, + "step": 7274 + }, + { + "epoch": 2.34, + "learning_rate": 2.427840631916114e-06, + "loss": 0.0729, + "step": 7275 + }, + { + "epoch": 2.34, + "learning_rate": 2.425572206473259e-06, + "loss": 0.0704, + "step": 7276 + }, + { + "epoch": 2.34, + "learning_rate": 2.4233046949930873e-06, + "loss": 0.0684, + "step": 7277 + }, + { + "epoch": 2.34, + "learning_rate": 2.4210380977492055e-06, + "loss": 0.0667, + "step": 7278 + }, + { + "epoch": 2.34, + "learning_rate": 2.418772415015107e-06, + "loss": 0.072, + "step": 7279 + }, + { + "epoch": 2.34, + "learning_rate": 2.4165076470641856e-06, + "loss": 0.0663, + "step": 7280 + }, + { + "epoch": 2.34, + "learning_rate": 2.414243794169715e-06, + "loss": 0.0713, + "step": 7281 + }, + { + "epoch": 2.34, + "learning_rate": 2.4119808566048607e-06, + "loss": 0.0768, + "step": 7282 + }, + { + "epoch": 2.34, + "learning_rate": 2.4097188346426794e-06, + "loss": 0.0658, + "step": 7283 + }, + { + "epoch": 2.34, + "learning_rate": 2.407457728556115e-06, + "loss": 0.0712, + "step": 7284 + }, + { + "epoch": 2.34, + "learning_rate": 2.4051975386180004e-06, + "loss": 0.0725, + "step": 7285 + }, + { + "epoch": 2.34, + "learning_rate": 2.4029382651010647e-06, + "loss": 0.0736, + "step": 7286 + }, + { + "epoch": 2.34, + "learning_rate": 2.400679908277919e-06, + "loss": 0.0719, + "step": 7287 + }, + { + "epoch": 2.34, + "learning_rate": 2.3984224684210655e-06, + "loss": 0.0704, + "step": 7288 + }, + { + "epoch": 2.34, + "learning_rate": 2.3961659458028975e-06, + "loss": 0.0739, + "step": 7289 + }, + { + "epoch": 2.35, + "learning_rate": 2.3939103406956964e-06, + "loss": 0.075, + "step": 7290 + }, + { + "epoch": 2.35, + "learning_rate": 2.3916556533716296e-06, + "loss": 0.065, + "step": 7291 + }, + { + "epoch": 2.35, + "learning_rate": 2.3894018841027646e-06, + "loss": 0.0731, + "step": 7292 + }, + { + "epoch": 2.35, + "learning_rate": 2.387149033161046e-06, + "loss": 0.0703, + "step": 7293 + }, + { + "epoch": 2.35, + "learning_rate": 2.384897100818313e-06, + "loss": 0.0669, + "step": 7294 + }, + { + "epoch": 2.35, + "learning_rate": 2.382646087346293e-06, + "loss": 0.0747, + "step": 7295 + }, + { + "epoch": 2.35, + "learning_rate": 2.380395993016602e-06, + "loss": 0.0719, + "step": 7296 + }, + { + "epoch": 2.35, + "learning_rate": 2.3781468181007494e-06, + "loss": 0.0657, + "step": 7297 + }, + { + "epoch": 2.35, + "learning_rate": 2.3758985628701282e-06, + "loss": 0.0711, + "step": 7298 + }, + { + "epoch": 2.35, + "learning_rate": 2.3736512275960224e-06, + "loss": 0.067, + "step": 7299 + }, + { + "epoch": 2.35, + "learning_rate": 2.371404812549605e-06, + "loss": 0.0626, + "step": 7300 + }, + { + "epoch": 2.35, + "learning_rate": 2.369159318001937e-06, + "loss": 0.0695, + "step": 7301 + }, + { + "epoch": 2.35, + "learning_rate": 2.366914744223968e-06, + "loss": 0.0686, + "step": 7302 + }, + { + "epoch": 2.35, + "learning_rate": 2.364671091486543e-06, + "loss": 0.0723, + "step": 7303 + }, + { + "epoch": 2.35, + "learning_rate": 2.362428360060389e-06, + "loss": 0.0731, + "step": 7304 + }, + { + "epoch": 2.35, + "learning_rate": 2.3601865502161213e-06, + "loss": 0.0736, + "step": 7305 + }, + { + "epoch": 2.35, + "learning_rate": 2.357945662224248e-06, + "loss": 0.0717, + "step": 7306 + }, + { + "epoch": 2.35, + "learning_rate": 2.3557056963551627e-06, + "loss": 0.0771, + "step": 7307 + }, + { + "epoch": 2.35, + "learning_rate": 2.353466652879147e-06, + "loss": 0.0689, + "step": 7308 + }, + { + "epoch": 2.35, + "learning_rate": 2.351228532066381e-06, + "loss": 0.0714, + "step": 7309 + }, + { + "epoch": 2.35, + "learning_rate": 2.34899133418692e-06, + "loss": 0.0703, + "step": 7310 + }, + { + "epoch": 2.35, + "learning_rate": 2.346755059510711e-06, + "loss": 0.0627, + "step": 7311 + }, + { + "epoch": 2.35, + "learning_rate": 2.344519708307601e-06, + "loss": 0.0712, + "step": 7312 + }, + { + "epoch": 2.35, + "learning_rate": 2.342285280847314e-06, + "loss": 0.0686, + "step": 7313 + }, + { + "epoch": 2.35, + "learning_rate": 2.340051777399459e-06, + "loss": 0.0687, + "step": 7314 + }, + { + "epoch": 2.35, + "learning_rate": 2.3378191982335484e-06, + "loss": 0.0681, + "step": 7315 + }, + { + "epoch": 2.35, + "learning_rate": 2.3355875436189714e-06, + "loss": 0.0713, + "step": 7316 + }, + { + "epoch": 2.35, + "learning_rate": 2.333356813825005e-06, + "loss": 0.0709, + "step": 7317 + }, + { + "epoch": 2.35, + "learning_rate": 2.331127009120826e-06, + "loss": 0.0707, + "step": 7318 + }, + { + "epoch": 2.35, + "learning_rate": 2.328898129775491e-06, + "loss": 0.0749, + "step": 7319 + }, + { + "epoch": 2.35, + "learning_rate": 2.326670176057938e-06, + "loss": 0.0669, + "step": 7320 + }, + { + "epoch": 2.36, + "learning_rate": 2.32444314823701e-06, + "loss": 0.0761, + "step": 7321 + }, + { + "epoch": 2.36, + "learning_rate": 2.3222170465814265e-06, + "loss": 0.0738, + "step": 7322 + }, + { + "epoch": 2.36, + "learning_rate": 2.3199918713597947e-06, + "loss": 0.0662, + "step": 7323 + }, + { + "epoch": 2.36, + "learning_rate": 2.317767622840622e-06, + "loss": 0.0817, + "step": 7324 + }, + { + "epoch": 2.36, + "learning_rate": 2.3155443012922897e-06, + "loss": 0.0784, + "step": 7325 + }, + { + "epoch": 2.36, + "learning_rate": 2.3133219069830758e-06, + "loss": 0.0729, + "step": 7326 + }, + { + "epoch": 2.36, + "learning_rate": 2.311100440181142e-06, + "loss": 0.0728, + "step": 7327 + }, + { + "epoch": 2.36, + "learning_rate": 2.3088799011545414e-06, + "loss": 0.0685, + "step": 7328 + }, + { + "epoch": 2.36, + "learning_rate": 2.306660290171211e-06, + "loss": 0.0719, + "step": 7329 + }, + { + "epoch": 2.36, + "learning_rate": 2.3044416074989818e-06, + "loss": 0.0694, + "step": 7330 + }, + { + "epoch": 2.36, + "learning_rate": 2.3022238534055685e-06, + "loss": 0.0687, + "step": 7331 + }, + { + "epoch": 2.36, + "learning_rate": 2.3000070281585752e-06, + "loss": 0.0742, + "step": 7332 + }, + { + "epoch": 2.36, + "learning_rate": 2.297791132025492e-06, + "loss": 0.0688, + "step": 7333 + }, + { + "epoch": 2.36, + "learning_rate": 2.2955761652736996e-06, + "loss": 0.0752, + "step": 7334 + }, + { + "epoch": 2.36, + "learning_rate": 2.293362128170462e-06, + "loss": 0.0684, + "step": 7335 + }, + { + "epoch": 2.36, + "learning_rate": 2.29114902098294e-06, + "loss": 0.0669, + "step": 7336 + }, + { + "epoch": 2.36, + "learning_rate": 2.2889368439781733e-06, + "loss": 0.067, + "step": 7337 + }, + { + "epoch": 2.36, + "learning_rate": 2.2867255974230905e-06, + "loss": 0.0714, + "step": 7338 + }, + { + "epoch": 2.36, + "learning_rate": 2.2845152815845173e-06, + "loss": 0.0707, + "step": 7339 + }, + { + "epoch": 2.36, + "learning_rate": 2.2823058967291534e-06, + "loss": 0.0728, + "step": 7340 + }, + { + "epoch": 2.36, + "learning_rate": 2.28009744312359e-06, + "loss": 0.0675, + "step": 7341 + }, + { + "epoch": 2.36, + "learning_rate": 2.2778899210343163e-06, + "loss": 0.066, + "step": 7342 + }, + { + "epoch": 2.36, + "learning_rate": 2.275683330727697e-06, + "loss": 0.0761, + "step": 7343 + }, + { + "epoch": 2.36, + "learning_rate": 2.273477672469987e-06, + "loss": 0.0674, + "step": 7344 + }, + { + "epoch": 2.36, + "learning_rate": 2.2712729465273387e-06, + "loss": 0.0736, + "step": 7345 + }, + { + "epoch": 2.36, + "learning_rate": 2.2690691531657748e-06, + "loss": 0.0717, + "step": 7346 + }, + { + "epoch": 2.36, + "learning_rate": 2.266866292651214e-06, + "loss": 0.0713, + "step": 7347 + }, + { + "epoch": 2.36, + "learning_rate": 2.2646643652494693e-06, + "loss": 0.0713, + "step": 7348 + }, + { + "epoch": 2.36, + "learning_rate": 2.262463371226232e-06, + "loss": 0.074, + "step": 7349 + }, + { + "epoch": 2.36, + "learning_rate": 2.2602633108470794e-06, + "loss": 0.0729, + "step": 7350 + }, + { + "epoch": 2.36, + "learning_rate": 2.2580641843774863e-06, + "loss": 0.0725, + "step": 7351 + }, + { + "epoch": 2.37, + "learning_rate": 2.2558659920828095e-06, + "loss": 0.0708, + "step": 7352 + }, + { + "epoch": 2.37, + "learning_rate": 2.2536687342282826e-06, + "loss": 0.0724, + "step": 7353 + }, + { + "epoch": 2.37, + "learning_rate": 2.2514724110790455e-06, + "loss": 0.0659, + "step": 7354 + }, + { + "epoch": 2.37, + "learning_rate": 2.2492770229001128e-06, + "loss": 0.0674, + "step": 7355 + }, + { + "epoch": 2.37, + "learning_rate": 2.247082569956386e-06, + "loss": 0.0736, + "step": 7356 + }, + { + "epoch": 2.37, + "learning_rate": 2.2448890525126633e-06, + "loss": 0.072, + "step": 7357 + }, + { + "epoch": 2.37, + "learning_rate": 2.2426964708336196e-06, + "loss": 0.0722, + "step": 7358 + }, + { + "epoch": 2.37, + "learning_rate": 2.2405048251838235e-06, + "loss": 0.0717, + "step": 7359 + }, + { + "epoch": 2.37, + "learning_rate": 2.2383141158277277e-06, + "loss": 0.0699, + "step": 7360 + }, + { + "epoch": 2.37, + "learning_rate": 2.2361243430296707e-06, + "loss": 0.0659, + "step": 7361 + }, + { + "epoch": 2.37, + "learning_rate": 2.2339355070538794e-06, + "loss": 0.0695, + "step": 7362 + }, + { + "epoch": 2.37, + "learning_rate": 2.2317476081644727e-06, + "loss": 0.0792, + "step": 7363 + }, + { + "epoch": 2.37, + "learning_rate": 2.2295606466254483e-06, + "loss": 0.0745, + "step": 7364 + }, + { + "epoch": 2.37, + "learning_rate": 2.227374622700694e-06, + "loss": 0.07, + "step": 7365 + }, + { + "epoch": 2.37, + "learning_rate": 2.2251895366539866e-06, + "loss": 0.072, + "step": 7366 + }, + { + "epoch": 2.37, + "learning_rate": 2.2230053887489866e-06, + "loss": 0.0745, + "step": 7367 + }, + { + "epoch": 2.37, + "learning_rate": 2.22082217924924e-06, + "loss": 0.0671, + "step": 7368 + }, + { + "epoch": 2.37, + "learning_rate": 2.218639908418189e-06, + "loss": 0.0753, + "step": 7369 + }, + { + "epoch": 2.37, + "learning_rate": 2.2164585765191503e-06, + "loss": 0.0679, + "step": 7370 + }, + { + "epoch": 2.37, + "learning_rate": 2.214278183815334e-06, + "loss": 0.0648, + "step": 7371 + }, + { + "epoch": 2.37, + "learning_rate": 2.212098730569836e-06, + "loss": 0.0675, + "step": 7372 + }, + { + "epoch": 2.37, + "learning_rate": 2.2099202170456358e-06, + "loss": 0.0726, + "step": 7373 + }, + { + "epoch": 2.37, + "learning_rate": 2.207742643505606e-06, + "loss": 0.0738, + "step": 7374 + }, + { + "epoch": 2.37, + "learning_rate": 2.205566010212501e-06, + "loss": 0.0701, + "step": 7375 + }, + { + "epoch": 2.37, + "learning_rate": 2.2033903174289615e-06, + "loss": 0.0702, + "step": 7376 + }, + { + "epoch": 2.37, + "learning_rate": 2.2012155654175125e-06, + "loss": 0.0703, + "step": 7377 + }, + { + "epoch": 2.37, + "learning_rate": 2.1990417544405796e-06, + "loss": 0.0681, + "step": 7378 + }, + { + "epoch": 2.37, + "learning_rate": 2.1968688847604512e-06, + "loss": 0.07, + "step": 7379 + }, + { + "epoch": 2.37, + "learning_rate": 2.194696956639323e-06, + "loss": 0.069, + "step": 7380 + }, + { + "epoch": 2.37, + "learning_rate": 2.1925259703392678e-06, + "loss": 0.0664, + "step": 7381 + }, + { + "epoch": 2.37, + "learning_rate": 2.190355926122244e-06, + "loss": 0.0734, + "step": 7382 + }, + { + "epoch": 2.38, + "learning_rate": 2.1881868242500968e-06, + "loss": 0.0735, + "step": 7383 + }, + { + "epoch": 2.38, + "learning_rate": 2.186018664984568e-06, + "loss": 0.077, + "step": 7384 + }, + { + "epoch": 2.38, + "learning_rate": 2.1838514485872653e-06, + "loss": 0.0709, + "step": 7385 + }, + { + "epoch": 2.38, + "learning_rate": 2.1816851753197023e-06, + "loss": 0.0767, + "step": 7386 + }, + { + "epoch": 2.38, + "learning_rate": 2.179519845443269e-06, + "loss": 0.0721, + "step": 7387 + }, + { + "epoch": 2.38, + "learning_rate": 2.1773554592192426e-06, + "loss": 0.067, + "step": 7388 + }, + { + "epoch": 2.38, + "learning_rate": 2.1751920169087835e-06, + "loss": 0.069, + "step": 7389 + }, + { + "epoch": 2.38, + "learning_rate": 2.173029518772949e-06, + "loss": 0.0736, + "step": 7390 + }, + { + "epoch": 2.38, + "learning_rate": 2.1708679650726703e-06, + "loss": 0.0693, + "step": 7391 + }, + { + "epoch": 2.38, + "learning_rate": 2.1687073560687732e-06, + "loss": 0.0731, + "step": 7392 + }, + { + "epoch": 2.38, + "learning_rate": 2.166547692021963e-06, + "loss": 0.0779, + "step": 7393 + }, + { + "epoch": 2.38, + "learning_rate": 2.1643889731928327e-06, + "loss": 0.0676, + "step": 7394 + }, + { + "epoch": 2.38, + "learning_rate": 2.1622311998418665e-06, + "loss": 0.0672, + "step": 7395 + }, + { + "epoch": 2.38, + "learning_rate": 2.160074372229429e-06, + "loss": 0.0746, + "step": 7396 + }, + { + "epoch": 2.38, + "learning_rate": 2.1579184906157723e-06, + "loss": 0.0704, + "step": 7397 + }, + { + "epoch": 2.38, + "learning_rate": 2.1557635552610346e-06, + "loss": 0.0697, + "step": 7398 + }, + { + "epoch": 2.38, + "learning_rate": 2.153609566425238e-06, + "loss": 0.0689, + "step": 7399 + }, + { + "epoch": 2.38, + "learning_rate": 2.1514565243682894e-06, + "loss": 0.0726, + "step": 7400 + }, + { + "epoch": 2.38, + "learning_rate": 2.149304429349991e-06, + "loss": 0.0762, + "step": 7401 + }, + { + "epoch": 2.38, + "learning_rate": 2.14715328163002e-06, + "loss": 0.0716, + "step": 7402 + }, + { + "epoch": 2.38, + "learning_rate": 2.1450030814679436e-06, + "loss": 0.0744, + "step": 7403 + }, + { + "epoch": 2.38, + "learning_rate": 2.1428538291232128e-06, + "loss": 0.0721, + "step": 7404 + }, + { + "epoch": 2.38, + "learning_rate": 2.140705524855167e-06, + "loss": 0.0668, + "step": 7405 + }, + { + "epoch": 2.38, + "learning_rate": 2.1385581689230262e-06, + "loss": 0.0754, + "step": 7406 + }, + { + "epoch": 2.38, + "learning_rate": 2.136411761585906e-06, + "loss": 0.0741, + "step": 7407 + }, + { + "epoch": 2.38, + "learning_rate": 2.1342663031027967e-06, + "loss": 0.0697, + "step": 7408 + }, + { + "epoch": 2.38, + "learning_rate": 2.132121793732577e-06, + "loss": 0.0693, + "step": 7409 + }, + { + "epoch": 2.38, + "learning_rate": 2.1299782337340193e-06, + "loss": 0.0708, + "step": 7410 + }, + { + "epoch": 2.38, + "learning_rate": 2.1278356233657682e-06, + "loss": 0.0692, + "step": 7411 + }, + { + "epoch": 2.38, + "learning_rate": 2.1256939628863583e-06, + "loss": 0.0711, + "step": 7412 + }, + { + "epoch": 2.38, + "learning_rate": 2.123553252554219e-06, + "loss": 0.0716, + "step": 7413 + }, + { + "epoch": 2.39, + "learning_rate": 2.1214134926276543e-06, + "loss": 0.0621, + "step": 7414 + }, + { + "epoch": 2.39, + "learning_rate": 2.1192746833648526e-06, + "loss": 0.0678, + "step": 7415 + }, + { + "epoch": 2.39, + "learning_rate": 2.117136825023898e-06, + "loss": 0.0707, + "step": 7416 + }, + { + "epoch": 2.39, + "learning_rate": 2.1149999178627544e-06, + "loss": 0.068, + "step": 7417 + }, + { + "epoch": 2.39, + "learning_rate": 2.11286396213926e-06, + "loss": 0.0668, + "step": 7418 + }, + { + "epoch": 2.39, + "learning_rate": 2.110728958111159e-06, + "loss": 0.069, + "step": 7419 + }, + { + "epoch": 2.39, + "learning_rate": 2.1085949060360654e-06, + "loss": 0.0684, + "step": 7420 + }, + { + "epoch": 2.39, + "learning_rate": 2.1064618061714816e-06, + "loss": 0.068, + "step": 7421 + }, + { + "epoch": 2.39, + "learning_rate": 2.1043296587748e-06, + "loss": 0.0686, + "step": 7422 + }, + { + "epoch": 2.39, + "learning_rate": 2.102198464103297e-06, + "loss": 0.0693, + "step": 7423 + }, + { + "epoch": 2.39, + "learning_rate": 2.100068222414121e-06, + "loss": 0.0695, + "step": 7424 + }, + { + "epoch": 2.39, + "learning_rate": 2.097938933964326e-06, + "loss": 0.0752, + "step": 7425 + }, + { + "epoch": 2.39, + "learning_rate": 2.095810599010838e-06, + "loss": 0.0746, + "step": 7426 + }, + { + "epoch": 2.39, + "learning_rate": 2.093683217810468e-06, + "loss": 0.0675, + "step": 7427 + }, + { + "epoch": 2.39, + "learning_rate": 2.0915567906199207e-06, + "loss": 0.0704, + "step": 7428 + }, + { + "epoch": 2.39, + "learning_rate": 2.089431317695776e-06, + "loss": 0.0649, + "step": 7429 + }, + { + "epoch": 2.39, + "learning_rate": 2.087306799294504e-06, + "loss": 0.0699, + "step": 7430 + }, + { + "epoch": 2.39, + "learning_rate": 2.0851832356724567e-06, + "loss": 0.0749, + "step": 7431 + }, + { + "epoch": 2.39, + "learning_rate": 2.0830606270858735e-06, + "loss": 0.0743, + "step": 7432 + }, + { + "epoch": 2.39, + "learning_rate": 2.0809389737908735e-06, + "loss": 0.0732, + "step": 7433 + }, + { + "epoch": 2.39, + "learning_rate": 2.0788182760434707e-06, + "loss": 0.0713, + "step": 7434 + }, + { + "epoch": 2.39, + "learning_rate": 2.076698534099555e-06, + "loss": 0.07, + "step": 7435 + }, + { + "epoch": 2.39, + "learning_rate": 2.0745797482149022e-06, + "loss": 0.0678, + "step": 7436 + }, + { + "epoch": 2.39, + "learning_rate": 2.0724619186451755e-06, + "loss": 0.0716, + "step": 7437 + }, + { + "epoch": 2.39, + "learning_rate": 2.0703450456459208e-06, + "loss": 0.067, + "step": 7438 + }, + { + "epoch": 2.39, + "learning_rate": 2.0682291294725652e-06, + "loss": 0.0703, + "step": 7439 + }, + { + "epoch": 2.39, + "learning_rate": 2.0661141703804312e-06, + "loss": 0.0769, + "step": 7440 + }, + { + "epoch": 2.39, + "learning_rate": 2.064000168624716e-06, + "loss": 0.0661, + "step": 7441 + }, + { + "epoch": 2.39, + "learning_rate": 2.061887124460502e-06, + "loss": 0.0767, + "step": 7442 + }, + { + "epoch": 2.39, + "learning_rate": 2.0597750381427607e-06, + "loss": 0.0664, + "step": 7443 + }, + { + "epoch": 2.39, + "learning_rate": 2.057663909926343e-06, + "loss": 0.0757, + "step": 7444 + }, + { + "epoch": 2.4, + "learning_rate": 2.0555537400659853e-06, + "loss": 0.0701, + "step": 7445 + }, + { + "epoch": 2.4, + "learning_rate": 2.053444528816315e-06, + "loss": 0.069, + "step": 7446 + }, + { + "epoch": 2.4, + "learning_rate": 2.051336276431837e-06, + "loss": 0.0703, + "step": 7447 + }, + { + "epoch": 2.4, + "learning_rate": 2.0492289831669366e-06, + "loss": 0.0758, + "step": 7448 + }, + { + "epoch": 2.4, + "learning_rate": 2.047122649275898e-06, + "loss": 0.0699, + "step": 7449 + }, + { + "epoch": 2.4, + "learning_rate": 2.0450172750128728e-06, + "loss": 0.0638, + "step": 7450 + }, + { + "epoch": 2.4, + "learning_rate": 2.042912860631905e-06, + "loss": 0.0686, + "step": 7451 + }, + { + "epoch": 2.4, + "learning_rate": 2.0408094063869255e-06, + "loss": 0.0714, + "step": 7452 + }, + { + "epoch": 2.4, + "learning_rate": 2.038706912531746e-06, + "loss": 0.0682, + "step": 7453 + }, + { + "epoch": 2.4, + "learning_rate": 2.0366053793200567e-06, + "loss": 0.0706, + "step": 7454 + }, + { + "epoch": 2.4, + "learning_rate": 2.0345048070054485e-06, + "loss": 0.0743, + "step": 7455 + }, + { + "epoch": 2.4, + "learning_rate": 2.0324051958413728e-06, + "loss": 0.0708, + "step": 7456 + }, + { + "epoch": 2.4, + "learning_rate": 2.0303065460811865e-06, + "loss": 0.0733, + "step": 7457 + }, + { + "epoch": 2.4, + "learning_rate": 2.028208857978118e-06, + "loss": 0.0784, + "step": 7458 + }, + { + "epoch": 2.4, + "learning_rate": 2.026112131785285e-06, + "loss": 0.0717, + "step": 7459 + }, + { + "epoch": 2.4, + "learning_rate": 2.0240163677556823e-06, + "loss": 0.0678, + "step": 7460 + }, + { + "epoch": 2.4, + "learning_rate": 2.021921566142201e-06, + "loss": 0.0652, + "step": 7461 + }, + { + "epoch": 2.4, + "learning_rate": 2.019827727197605e-06, + "loss": 0.0753, + "step": 7462 + }, + { + "epoch": 2.4, + "learning_rate": 2.0177348511745477e-06, + "loss": 0.0742, + "step": 7463 + }, + { + "epoch": 2.4, + "learning_rate": 2.0156429383255617e-06, + "loss": 0.0732, + "step": 7464 + }, + { + "epoch": 2.4, + "learning_rate": 2.0135519889030676e-06, + "loss": 0.0727, + "step": 7465 + }, + { + "epoch": 2.4, + "learning_rate": 2.0114620031593646e-06, + "loss": 0.0655, + "step": 7466 + }, + { + "epoch": 2.4, + "learning_rate": 2.009372981346647e-06, + "loss": 0.0735, + "step": 7467 + }, + { + "epoch": 2.4, + "learning_rate": 2.0072849237169802e-06, + "loss": 0.0751, + "step": 7468 + }, + { + "epoch": 2.4, + "learning_rate": 2.0051978305223185e-06, + "loss": 0.0724, + "step": 7469 + }, + { + "epoch": 2.4, + "learning_rate": 2.0031117020144997e-06, + "loss": 0.0704, + "step": 7470 + }, + { + "epoch": 2.4, + "learning_rate": 2.001026538445242e-06, + "loss": 0.0751, + "step": 7471 + }, + { + "epoch": 2.4, + "learning_rate": 1.9989423400661557e-06, + "loss": 0.0662, + "step": 7472 + }, + { + "epoch": 2.4, + "learning_rate": 1.996859107128727e-06, + "loss": 0.0695, + "step": 7473 + }, + { + "epoch": 2.4, + "learning_rate": 1.994776839884326e-06, + "loss": 0.0681, + "step": 7474 + }, + { + "epoch": 2.4, + "learning_rate": 1.9926955385842093e-06, + "loss": 0.0649, + "step": 7475 + }, + { + "epoch": 2.4, + "learning_rate": 1.9906152034795158e-06, + "loss": 0.0703, + "step": 7476 + }, + { + "epoch": 2.41, + "learning_rate": 1.9885358348212635e-06, + "loss": 0.0746, + "step": 7477 + }, + { + "epoch": 2.41, + "learning_rate": 1.986457432860365e-06, + "loss": 0.0723, + "step": 7478 + }, + { + "epoch": 2.41, + "learning_rate": 1.984379997847604e-06, + "loss": 0.0703, + "step": 7479 + }, + { + "epoch": 2.41, + "learning_rate": 1.9823035300336547e-06, + "loss": 0.0653, + "step": 7480 + }, + { + "epoch": 2.41, + "learning_rate": 1.9802280296690722e-06, + "loss": 0.0703, + "step": 7481 + }, + { + "epoch": 2.41, + "learning_rate": 1.9781534970042946e-06, + "loss": 0.0669, + "step": 7482 + }, + { + "epoch": 2.41, + "learning_rate": 1.976079932289643e-06, + "loss": 0.0731, + "step": 7483 + }, + { + "epoch": 2.41, + "learning_rate": 1.974007335775324e-06, + "loss": 0.0804, + "step": 7484 + }, + { + "epoch": 2.41, + "learning_rate": 1.971935707711428e-06, + "loss": 0.0626, + "step": 7485 + }, + { + "epoch": 2.41, + "learning_rate": 1.9698650483479233e-06, + "loss": 0.0672, + "step": 7486 + }, + { + "epoch": 2.41, + "learning_rate": 1.967795357934662e-06, + "loss": 0.0772, + "step": 7487 + }, + { + "epoch": 2.41, + "learning_rate": 1.96572663672139e-06, + "loss": 0.0691, + "step": 7488 + }, + { + "epoch": 2.41, + "learning_rate": 1.963658884957719e-06, + "loss": 0.0626, + "step": 7489 + }, + { + "epoch": 2.41, + "learning_rate": 1.9615921028931585e-06, + "loss": 0.0711, + "step": 7490 + }, + { + "epoch": 2.41, + "learning_rate": 1.959526290777094e-06, + "loss": 0.0764, + "step": 7491 + }, + { + "epoch": 2.41, + "learning_rate": 1.9574614488587917e-06, + "loss": 0.0696, + "step": 7492 + }, + { + "epoch": 2.41, + "learning_rate": 1.955397577387409e-06, + "loss": 0.0683, + "step": 7493 + }, + { + "epoch": 2.41, + "learning_rate": 1.953334676611983e-06, + "loss": 0.077, + "step": 7494 + }, + { + "epoch": 2.41, + "learning_rate": 1.951272746781424e-06, + "loss": 0.0685, + "step": 7495 + }, + { + "epoch": 2.41, + "learning_rate": 1.9492117881445393e-06, + "loss": 0.0678, + "step": 7496 + }, + { + "epoch": 2.41, + "learning_rate": 1.9471518009500125e-06, + "loss": 0.0661, + "step": 7497 + }, + { + "epoch": 2.41, + "learning_rate": 1.9450927854464065e-06, + "loss": 0.0683, + "step": 7498 + }, + { + "epoch": 2.41, + "learning_rate": 1.943034741882176e-06, + "loss": 0.0707, + "step": 7499 + }, + { + "epoch": 2.41, + "learning_rate": 1.9409776705056514e-06, + "loss": 0.0715, + "step": 7500 + }, + { + "epoch": 2.41, + "learning_rate": 1.938921571565048e-06, + "loss": 0.0689, + "step": 7501 + }, + { + "epoch": 2.41, + "learning_rate": 1.9368664453084616e-06, + "loss": 0.0713, + "step": 7502 + }, + { + "epoch": 2.41, + "learning_rate": 1.934812291983874e-06, + "loss": 0.0717, + "step": 7503 + }, + { + "epoch": 2.41, + "learning_rate": 1.9327591118391466e-06, + "loss": 0.0727, + "step": 7504 + }, + { + "epoch": 2.41, + "learning_rate": 1.9307069051220273e-06, + "loss": 0.0691, + "step": 7505 + }, + { + "epoch": 2.41, + "learning_rate": 1.928655672080143e-06, + "loss": 0.0656, + "step": 7506 + }, + { + "epoch": 2.41, + "learning_rate": 1.9266054129610034e-06, + "loss": 0.069, + "step": 7507 + }, + { + "epoch": 2.42, + "learning_rate": 1.9245561280120027e-06, + "loss": 0.0703, + "step": 7508 + }, + { + "epoch": 2.42, + "learning_rate": 1.922507817480417e-06, + "loss": 0.0726, + "step": 7509 + }, + { + "epoch": 2.42, + "learning_rate": 1.9204604816133986e-06, + "loss": 0.0747, + "step": 7510 + }, + { + "epoch": 2.42, + "learning_rate": 1.918414120657995e-06, + "loss": 0.0729, + "step": 7511 + }, + { + "epoch": 2.42, + "learning_rate": 1.916368734861126e-06, + "loss": 0.0692, + "step": 7512 + }, + { + "epoch": 2.42, + "learning_rate": 1.914324324469594e-06, + "loss": 0.0723, + "step": 7513 + }, + { + "epoch": 2.42, + "learning_rate": 1.912280889730095e-06, + "loss": 0.0673, + "step": 7514 + }, + { + "epoch": 2.42, + "learning_rate": 1.9102384308891875e-06, + "loss": 0.0654, + "step": 7515 + }, + { + "epoch": 2.42, + "learning_rate": 1.908196948193327e-06, + "loss": 0.0716, + "step": 7516 + }, + { + "epoch": 2.42, + "learning_rate": 1.90615644188885e-06, + "loss": 0.0693, + "step": 7517 + }, + { + "epoch": 2.42, + "learning_rate": 1.9041169122219727e-06, + "loss": 0.0682, + "step": 7518 + }, + { + "epoch": 2.42, + "learning_rate": 1.9020783594387882e-06, + "loss": 0.0696, + "step": 7519 + }, + { + "epoch": 2.42, + "learning_rate": 1.9000407837852851e-06, + "loss": 0.0701, + "step": 7520 + }, + { + "epoch": 2.42, + "learning_rate": 1.8980041855073206e-06, + "loss": 0.0702, + "step": 7521 + }, + { + "epoch": 2.42, + "learning_rate": 1.8959685648506365e-06, + "loss": 0.0712, + "step": 7522 + }, + { + "epoch": 2.42, + "learning_rate": 1.8939339220608655e-06, + "loss": 0.0688, + "step": 7523 + }, + { + "epoch": 2.42, + "learning_rate": 1.8919002573835143e-06, + "loss": 0.0715, + "step": 7524 + }, + { + "epoch": 2.42, + "learning_rate": 1.8898675710639714e-06, + "loss": 0.0703, + "step": 7525 + }, + { + "epoch": 2.42, + "learning_rate": 1.8878358633475125e-06, + "loss": 0.0752, + "step": 7526 + }, + { + "epoch": 2.42, + "learning_rate": 1.8858051344792938e-06, + "loss": 0.0691, + "step": 7527 + }, + { + "epoch": 2.42, + "learning_rate": 1.8837753847043438e-06, + "loss": 0.0697, + "step": 7528 + }, + { + "epoch": 2.42, + "learning_rate": 1.8817466142675877e-06, + "loss": 0.0724, + "step": 7529 + }, + { + "epoch": 2.42, + "learning_rate": 1.879718823413823e-06, + "loss": 0.0671, + "step": 7530 + }, + { + "epoch": 2.42, + "learning_rate": 1.8776920123877307e-06, + "loss": 0.0778, + "step": 7531 + }, + { + "epoch": 2.42, + "learning_rate": 1.8756661814338773e-06, + "loss": 0.0741, + "step": 7532 + }, + { + "epoch": 2.42, + "learning_rate": 1.8736413307967094e-06, + "loss": 0.0716, + "step": 7533 + }, + { + "epoch": 2.42, + "learning_rate": 1.8716174607205462e-06, + "loss": 0.075, + "step": 7534 + }, + { + "epoch": 2.42, + "learning_rate": 1.8695945714496034e-06, + "loss": 0.0605, + "step": 7535 + }, + { + "epoch": 2.42, + "learning_rate": 1.86757266322797e-06, + "loss": 0.065, + "step": 7536 + }, + { + "epoch": 2.42, + "learning_rate": 1.8655517362996157e-06, + "loss": 0.0688, + "step": 7537 + }, + { + "epoch": 2.42, + "learning_rate": 1.8635317909083983e-06, + "loss": 0.071, + "step": 7538 + }, + { + "epoch": 2.43, + "learning_rate": 1.861512827298051e-06, + "loss": 0.0704, + "step": 7539 + }, + { + "epoch": 2.43, + "learning_rate": 1.8594948457121897e-06, + "loss": 0.071, + "step": 7540 + }, + { + "epoch": 2.43, + "learning_rate": 1.8574778463943145e-06, + "loss": 0.0707, + "step": 7541 + }, + { + "epoch": 2.43, + "learning_rate": 1.8554618295878023e-06, + "loss": 0.0683, + "step": 7542 + }, + { + "epoch": 2.43, + "learning_rate": 1.8534467955359147e-06, + "loss": 0.0713, + "step": 7543 + }, + { + "epoch": 2.43, + "learning_rate": 1.851432744481797e-06, + "loss": 0.0751, + "step": 7544 + }, + { + "epoch": 2.43, + "learning_rate": 1.8494196766684725e-06, + "loss": 0.0709, + "step": 7545 + }, + { + "epoch": 2.43, + "learning_rate": 1.8474075923388446e-06, + "loss": 0.0687, + "step": 7546 + }, + { + "epoch": 2.43, + "learning_rate": 1.8453964917357015e-06, + "loss": 0.0721, + "step": 7547 + }, + { + "epoch": 2.43, + "learning_rate": 1.843386375101711e-06, + "loss": 0.0675, + "step": 7548 + }, + { + "epoch": 2.43, + "learning_rate": 1.8413772426794196e-06, + "loss": 0.0779, + "step": 7549 + }, + { + "epoch": 2.43, + "learning_rate": 1.8393690947112619e-06, + "loss": 0.0746, + "step": 7550 + }, + { + "epoch": 2.43, + "learning_rate": 1.8373619314395485e-06, + "loss": 0.069, + "step": 7551 + }, + { + "epoch": 2.43, + "learning_rate": 1.8353557531064682e-06, + "loss": 0.0685, + "step": 7552 + }, + { + "epoch": 2.43, + "learning_rate": 1.8333505599541034e-06, + "loss": 0.0772, + "step": 7553 + }, + { + "epoch": 2.43, + "learning_rate": 1.8313463522244001e-06, + "loss": 0.0774, + "step": 7554 + }, + { + "epoch": 2.43, + "learning_rate": 1.8293431301592013e-06, + "loss": 0.0675, + "step": 7555 + }, + { + "epoch": 2.43, + "learning_rate": 1.8273408940002202e-06, + "loss": 0.0683, + "step": 7556 + }, + { + "epoch": 2.43, + "learning_rate": 1.825339643989058e-06, + "loss": 0.0713, + "step": 7557 + }, + { + "epoch": 2.43, + "learning_rate": 1.8233393803671895e-06, + "loss": 0.066, + "step": 7558 + }, + { + "epoch": 2.43, + "learning_rate": 1.8213401033759837e-06, + "loss": 0.07, + "step": 7559 + }, + { + "epoch": 2.43, + "learning_rate": 1.819341813256671e-06, + "loss": 0.0705, + "step": 7560 + }, + { + "epoch": 2.43, + "learning_rate": 1.8173445102503807e-06, + "loss": 0.0738, + "step": 7561 + }, + { + "epoch": 2.43, + "learning_rate": 1.8153481945981144e-06, + "loss": 0.0674, + "step": 7562 + }, + { + "epoch": 2.43, + "learning_rate": 1.8133528665407562e-06, + "loss": 0.0721, + "step": 7563 + }, + { + "epoch": 2.43, + "learning_rate": 1.811358526319068e-06, + "loss": 0.0745, + "step": 7564 + }, + { + "epoch": 2.43, + "learning_rate": 1.8093651741736994e-06, + "loss": 0.0695, + "step": 7565 + }, + { + "epoch": 2.43, + "learning_rate": 1.807372810345176e-06, + "loss": 0.0751, + "step": 7566 + }, + { + "epoch": 2.43, + "learning_rate": 1.805381435073904e-06, + "loss": 0.0705, + "step": 7567 + }, + { + "epoch": 2.43, + "learning_rate": 1.8033910486001704e-06, + "loss": 0.0623, + "step": 7568 + }, + { + "epoch": 2.43, + "learning_rate": 1.801401651164143e-06, + "loss": 0.0679, + "step": 7569 + }, + { + "epoch": 2.44, + "learning_rate": 1.7994132430058741e-06, + "loss": 0.0704, + "step": 7570 + }, + { + "epoch": 2.44, + "learning_rate": 1.7974258243652931e-06, + "loss": 0.0704, + "step": 7571 + }, + { + "epoch": 2.44, + "learning_rate": 1.795439395482208e-06, + "loss": 0.0709, + "step": 7572 + }, + { + "epoch": 2.44, + "learning_rate": 1.7934539565963115e-06, + "loss": 0.0746, + "step": 7573 + }, + { + "epoch": 2.44, + "learning_rate": 1.7914695079471744e-06, + "loss": 0.0667, + "step": 7574 + }, + { + "epoch": 2.44, + "learning_rate": 1.789486049774246e-06, + "loss": 0.0734, + "step": 7575 + }, + { + "epoch": 2.44, + "learning_rate": 1.7875035823168641e-06, + "loss": 0.07, + "step": 7576 + }, + { + "epoch": 2.44, + "learning_rate": 1.7855221058142402e-06, + "loss": 0.0689, + "step": 7577 + }, + { + "epoch": 2.44, + "learning_rate": 1.7835416205054656e-06, + "loss": 0.078, + "step": 7578 + }, + { + "epoch": 2.44, + "learning_rate": 1.7815621266295158e-06, + "loss": 0.0721, + "step": 7579 + }, + { + "epoch": 2.44, + "learning_rate": 1.779583624425244e-06, + "loss": 0.0714, + "step": 7580 + }, + { + "epoch": 2.44, + "learning_rate": 1.7776061141313827e-06, + "loss": 0.0721, + "step": 7581 + }, + { + "epoch": 2.44, + "learning_rate": 1.775629595986551e-06, + "loss": 0.0683, + "step": 7582 + }, + { + "epoch": 2.44, + "learning_rate": 1.7736540702292426e-06, + "loss": 0.069, + "step": 7583 + }, + { + "epoch": 2.44, + "learning_rate": 1.7716795370978311e-06, + "loss": 0.0684, + "step": 7584 + }, + { + "epoch": 2.44, + "learning_rate": 1.7697059968305752e-06, + "loss": 0.0703, + "step": 7585 + }, + { + "epoch": 2.44, + "learning_rate": 1.7677334496656073e-06, + "loss": 0.069, + "step": 7586 + }, + { + "epoch": 2.44, + "learning_rate": 1.7657618958409428e-06, + "loss": 0.0725, + "step": 7587 + }, + { + "epoch": 2.44, + "learning_rate": 1.7637913355944814e-06, + "loss": 0.0742, + "step": 7588 + }, + { + "epoch": 2.44, + "learning_rate": 1.7618217691639982e-06, + "loss": 0.0696, + "step": 7589 + }, + { + "epoch": 2.44, + "learning_rate": 1.7598531967871468e-06, + "loss": 0.0737, + "step": 7590 + }, + { + "epoch": 2.44, + "learning_rate": 1.7578856187014691e-06, + "loss": 0.0667, + "step": 7591 + }, + { + "epoch": 2.44, + "learning_rate": 1.7559190351443767e-06, + "loss": 0.0697, + "step": 7592 + }, + { + "epoch": 2.44, + "learning_rate": 1.7539534463531638e-06, + "loss": 0.0737, + "step": 7593 + }, + { + "epoch": 2.44, + "learning_rate": 1.751988852565013e-06, + "loss": 0.0658, + "step": 7594 + }, + { + "epoch": 2.44, + "learning_rate": 1.7500252540169782e-06, + "loss": 0.0751, + "step": 7595 + }, + { + "epoch": 2.44, + "learning_rate": 1.7480626509459919e-06, + "loss": 0.0754, + "step": 7596 + }, + { + "epoch": 2.44, + "learning_rate": 1.7461010435888749e-06, + "loss": 0.0733, + "step": 7597 + }, + { + "epoch": 2.44, + "learning_rate": 1.7441404321823252e-06, + "loss": 0.0689, + "step": 7598 + }, + { + "epoch": 2.44, + "learning_rate": 1.7421808169629084e-06, + "loss": 0.0769, + "step": 7599 + }, + { + "epoch": 2.44, + "learning_rate": 1.7402221981670897e-06, + "loss": 0.0779, + "step": 7600 + }, + { + "epoch": 2.45, + "learning_rate": 1.738264576031201e-06, + "loss": 0.073, + "step": 7601 + }, + { + "epoch": 2.45, + "learning_rate": 1.7363079507914537e-06, + "loss": 0.0758, + "step": 7602 + }, + { + "epoch": 2.45, + "learning_rate": 1.7343523226839498e-06, + "loss": 0.0734, + "step": 7603 + }, + { + "epoch": 2.45, + "learning_rate": 1.7323976919446617e-06, + "loss": 0.0754, + "step": 7604 + }, + { + "epoch": 2.45, + "learning_rate": 1.7304440588094373e-06, + "loss": 0.0674, + "step": 7605 + }, + { + "epoch": 2.45, + "learning_rate": 1.728491423514017e-06, + "loss": 0.0675, + "step": 7606 + }, + { + "epoch": 2.45, + "learning_rate": 1.7265397862940126e-06, + "loss": 0.0757, + "step": 7607 + }, + { + "epoch": 2.45, + "learning_rate": 1.724589147384913e-06, + "loss": 0.0702, + "step": 7608 + }, + { + "epoch": 2.45, + "learning_rate": 1.7226395070220958e-06, + "loss": 0.0714, + "step": 7609 + }, + { + "epoch": 2.45, + "learning_rate": 1.7206908654408117e-06, + "loss": 0.0672, + "step": 7610 + }, + { + "epoch": 2.45, + "learning_rate": 1.7187432228761902e-06, + "loss": 0.0705, + "step": 7611 + }, + { + "epoch": 2.45, + "learning_rate": 1.7167965795632447e-06, + "loss": 0.0718, + "step": 7612 + }, + { + "epoch": 2.45, + "learning_rate": 1.7148509357368626e-06, + "loss": 0.0696, + "step": 7613 + }, + { + "epoch": 2.45, + "learning_rate": 1.712906291631814e-06, + "loss": 0.0689, + "step": 7614 + }, + { + "epoch": 2.45, + "learning_rate": 1.71096264748275e-06, + "loss": 0.0697, + "step": 7615 + }, + { + "epoch": 2.45, + "learning_rate": 1.7090200035241977e-06, + "loss": 0.0674, + "step": 7616 + }, + { + "epoch": 2.45, + "learning_rate": 1.707078359990566e-06, + "loss": 0.0685, + "step": 7617 + }, + { + "epoch": 2.45, + "learning_rate": 1.7051377171161398e-06, + "loss": 0.0685, + "step": 7618 + }, + { + "epoch": 2.45, + "learning_rate": 1.7031980751350873e-06, + "loss": 0.069, + "step": 7619 + }, + { + "epoch": 2.45, + "learning_rate": 1.7012594342814493e-06, + "loss": 0.0707, + "step": 7620 + }, + { + "epoch": 2.45, + "learning_rate": 1.6993217947891571e-06, + "loss": 0.0696, + "step": 7621 + }, + { + "epoch": 2.45, + "learning_rate": 1.6973851568920119e-06, + "loss": 0.0721, + "step": 7622 + }, + { + "epoch": 2.45, + "learning_rate": 1.6954495208236944e-06, + "loss": 0.0667, + "step": 7623 + }, + { + "epoch": 2.45, + "learning_rate": 1.693514886817772e-06, + "loss": 0.0724, + "step": 7624 + }, + { + "epoch": 2.45, + "learning_rate": 1.6915812551076815e-06, + "loss": 0.0708, + "step": 7625 + }, + { + "epoch": 2.45, + "learning_rate": 1.6896486259267408e-06, + "loss": 0.0671, + "step": 7626 + }, + { + "epoch": 2.45, + "learning_rate": 1.6877169995081544e-06, + "loss": 0.0677, + "step": 7627 + }, + { + "epoch": 2.45, + "learning_rate": 1.685786376084999e-06, + "loss": 0.0717, + "step": 7628 + }, + { + "epoch": 2.45, + "learning_rate": 1.6838567558902297e-06, + "loss": 0.0731, + "step": 7629 + }, + { + "epoch": 2.45, + "learning_rate": 1.6819281391566889e-06, + "loss": 0.0713, + "step": 7630 + }, + { + "epoch": 2.45, + "learning_rate": 1.680000526117085e-06, + "loss": 0.0742, + "step": 7631 + }, + { + "epoch": 2.46, + "learning_rate": 1.6780739170040105e-06, + "loss": 0.0705, + "step": 7632 + }, + { + "epoch": 2.46, + "learning_rate": 1.6761483120499456e-06, + "loss": 0.0643, + "step": 7633 + }, + { + "epoch": 2.46, + "learning_rate": 1.6742237114872385e-06, + "loss": 0.084, + "step": 7634 + }, + { + "epoch": 2.46, + "learning_rate": 1.6723001155481167e-06, + "loss": 0.0706, + "step": 7635 + }, + { + "epoch": 2.46, + "learning_rate": 1.6703775244646947e-06, + "loss": 0.0699, + "step": 7636 + }, + { + "epoch": 2.46, + "learning_rate": 1.6684559384689581e-06, + "loss": 0.0668, + "step": 7637 + }, + { + "epoch": 2.46, + "learning_rate": 1.6665353577927744e-06, + "loss": 0.0703, + "step": 7638 + }, + { + "epoch": 2.46, + "learning_rate": 1.6646157826678878e-06, + "loss": 0.0706, + "step": 7639 + }, + { + "epoch": 2.46, + "learning_rate": 1.6626972133259223e-06, + "loss": 0.0614, + "step": 7640 + }, + { + "epoch": 2.46, + "learning_rate": 1.6607796499983798e-06, + "loss": 0.0665, + "step": 7641 + }, + { + "epoch": 2.46, + "learning_rate": 1.658863092916645e-06, + "loss": 0.0756, + "step": 7642 + }, + { + "epoch": 2.46, + "learning_rate": 1.6569475423119763e-06, + "loss": 0.0696, + "step": 7643 + }, + { + "epoch": 2.46, + "learning_rate": 1.655032998415511e-06, + "loss": 0.0729, + "step": 7644 + }, + { + "epoch": 2.46, + "learning_rate": 1.653119461458268e-06, + "loss": 0.0672, + "step": 7645 + }, + { + "epoch": 2.46, + "learning_rate": 1.651206931671141e-06, + "loss": 0.0703, + "step": 7646 + }, + { + "epoch": 2.46, + "learning_rate": 1.6492954092849033e-06, + "loss": 0.075, + "step": 7647 + }, + { + "epoch": 2.46, + "learning_rate": 1.6473848945302095e-06, + "loss": 0.0651, + "step": 7648 + }, + { + "epoch": 2.46, + "learning_rate": 1.6454753876375917e-06, + "loss": 0.074, + "step": 7649 + }, + { + "epoch": 2.46, + "learning_rate": 1.6435668888374557e-06, + "loss": 0.0724, + "step": 7650 + }, + { + "epoch": 2.46, + "learning_rate": 1.6416593983600904e-06, + "loss": 0.0641, + "step": 7651 + }, + { + "epoch": 2.46, + "learning_rate": 1.6397529164356606e-06, + "loss": 0.0745, + "step": 7652 + }, + { + "epoch": 2.46, + "learning_rate": 1.6378474432942127e-06, + "loss": 0.0708, + "step": 7653 + }, + { + "epoch": 2.46, + "learning_rate": 1.6359429791656689e-06, + "loss": 0.0717, + "step": 7654 + }, + { + "epoch": 2.46, + "learning_rate": 1.6340395242798302e-06, + "loss": 0.0674, + "step": 7655 + }, + { + "epoch": 2.46, + "learning_rate": 1.6321370788663737e-06, + "loss": 0.0697, + "step": 7656 + }, + { + "epoch": 2.46, + "learning_rate": 1.6302356431548572e-06, + "loss": 0.0683, + "step": 7657 + }, + { + "epoch": 2.46, + "learning_rate": 1.6283352173747148e-06, + "loss": 0.0699, + "step": 7658 + }, + { + "epoch": 2.46, + "learning_rate": 1.6264358017552628e-06, + "loss": 0.0785, + "step": 7659 + }, + { + "epoch": 2.46, + "learning_rate": 1.6245373965256917e-06, + "loss": 0.0672, + "step": 7660 + }, + { + "epoch": 2.46, + "learning_rate": 1.6226400019150712e-06, + "loss": 0.064, + "step": 7661 + }, + { + "epoch": 2.46, + "learning_rate": 1.6207436181523461e-06, + "loss": 0.0744, + "step": 7662 + }, + { + "epoch": 2.47, + "learning_rate": 1.6188482454663501e-06, + "loss": 0.0655, + "step": 7663 + }, + { + "epoch": 2.47, + "learning_rate": 1.6169538840857768e-06, + "loss": 0.0709, + "step": 7664 + }, + { + "epoch": 2.47, + "learning_rate": 1.6150605342392146e-06, + "loss": 0.069, + "step": 7665 + }, + { + "epoch": 2.47, + "learning_rate": 1.6131681961551215e-06, + "loss": 0.0664, + "step": 7666 + }, + { + "epoch": 2.47, + "learning_rate": 1.611276870061832e-06, + "loss": 0.0636, + "step": 7667 + }, + { + "epoch": 2.47, + "learning_rate": 1.6093865561875665e-06, + "loss": 0.0709, + "step": 7668 + }, + { + "epoch": 2.47, + "learning_rate": 1.6074972547604194e-06, + "loss": 0.0699, + "step": 7669 + }, + { + "epoch": 2.47, + "learning_rate": 1.6056089660083529e-06, + "loss": 0.0651, + "step": 7670 + }, + { + "epoch": 2.47, + "learning_rate": 1.6037216901592245e-06, + "loss": 0.0718, + "step": 7671 + }, + { + "epoch": 2.47, + "learning_rate": 1.6018354274407588e-06, + "loss": 0.0712, + "step": 7672 + }, + { + "epoch": 2.47, + "learning_rate": 1.599950178080556e-06, + "loss": 0.0725, + "step": 7673 + }, + { + "epoch": 2.47, + "learning_rate": 1.5980659423061051e-06, + "loss": 0.068, + "step": 7674 + }, + { + "epoch": 2.47, + "learning_rate": 1.5961827203447622e-06, + "loss": 0.0709, + "step": 7675 + }, + { + "epoch": 2.47, + "learning_rate": 1.5943005124237654e-06, + "loss": 0.0693, + "step": 7676 + }, + { + "epoch": 2.47, + "learning_rate": 1.5924193187702308e-06, + "loss": 0.0754, + "step": 7677 + }, + { + "epoch": 2.47, + "learning_rate": 1.5905391396111502e-06, + "loss": 0.0619, + "step": 7678 + }, + { + "epoch": 2.47, + "learning_rate": 1.5886599751733911e-06, + "loss": 0.0677, + "step": 7679 + }, + { + "epoch": 2.47, + "learning_rate": 1.5867818256837074e-06, + "loss": 0.0703, + "step": 7680 + }, + { + "epoch": 2.47, + "learning_rate": 1.5849046913687215e-06, + "loss": 0.0706, + "step": 7681 + }, + { + "epoch": 2.47, + "learning_rate": 1.5830285724549376e-06, + "loss": 0.0716, + "step": 7682 + }, + { + "epoch": 2.47, + "learning_rate": 1.5811534691687348e-06, + "loss": 0.0741, + "step": 7683 + }, + { + "epoch": 2.47, + "learning_rate": 1.5792793817363716e-06, + "loss": 0.0651, + "step": 7684 + }, + { + "epoch": 2.47, + "learning_rate": 1.577406310383981e-06, + "loss": 0.0653, + "step": 7685 + }, + { + "epoch": 2.47, + "learning_rate": 1.5755342553375797e-06, + "loss": 0.0702, + "step": 7686 + }, + { + "epoch": 2.47, + "learning_rate": 1.573663216823057e-06, + "loss": 0.0679, + "step": 7687 + }, + { + "epoch": 2.47, + "learning_rate": 1.571793195066177e-06, + "loss": 0.0685, + "step": 7688 + }, + { + "epoch": 2.47, + "learning_rate": 1.5699241902925921e-06, + "loss": 0.0683, + "step": 7689 + }, + { + "epoch": 2.47, + "learning_rate": 1.5680562027278156e-06, + "loss": 0.0665, + "step": 7690 + }, + { + "epoch": 2.47, + "learning_rate": 1.5661892325972493e-06, + "loss": 0.0639, + "step": 7691 + }, + { + "epoch": 2.47, + "learning_rate": 1.5643232801261731e-06, + "loss": 0.0743, + "step": 7692 + }, + { + "epoch": 2.47, + "learning_rate": 1.562458345539739e-06, + "loss": 0.0727, + "step": 7693 + }, + { + "epoch": 2.48, + "learning_rate": 1.560594429062975e-06, + "loss": 0.0657, + "step": 7694 + }, + { + "epoch": 2.48, + "learning_rate": 1.5587315309207973e-06, + "loss": 0.0617, + "step": 7695 + }, + { + "epoch": 2.48, + "learning_rate": 1.5568696513379822e-06, + "loss": 0.0656, + "step": 7696 + }, + { + "epoch": 2.48, + "learning_rate": 1.555008790539193e-06, + "loss": 0.0734, + "step": 7697 + }, + { + "epoch": 2.48, + "learning_rate": 1.5531489487489748e-06, + "loss": 0.0681, + "step": 7698 + }, + { + "epoch": 2.48, + "learning_rate": 1.5512901261917401e-06, + "loss": 0.066, + "step": 7699 + }, + { + "epoch": 2.48, + "learning_rate": 1.5494323230917807e-06, + "loss": 0.0705, + "step": 7700 + }, + { + "epoch": 2.48, + "learning_rate": 1.5475755396732706e-06, + "loss": 0.0693, + "step": 7701 + }, + { + "epoch": 2.48, + "learning_rate": 1.5457197761602593e-06, + "loss": 0.0668, + "step": 7702 + }, + { + "epoch": 2.48, + "learning_rate": 1.5438650327766635e-06, + "loss": 0.0696, + "step": 7703 + }, + { + "epoch": 2.48, + "learning_rate": 1.5420113097462897e-06, + "loss": 0.0711, + "step": 7704 + }, + { + "epoch": 2.48, + "learning_rate": 1.5401586072928165e-06, + "loss": 0.0756, + "step": 7705 + }, + { + "epoch": 2.48, + "learning_rate": 1.5383069256397932e-06, + "loss": 0.0669, + "step": 7706 + }, + { + "epoch": 2.48, + "learning_rate": 1.5364562650106596e-06, + "loss": 0.0653, + "step": 7707 + }, + { + "epoch": 2.48, + "learning_rate": 1.5346066256287217e-06, + "loss": 0.0748, + "step": 7708 + }, + { + "epoch": 2.48, + "learning_rate": 1.5327580077171589e-06, + "loss": 0.0739, + "step": 7709 + }, + { + "epoch": 2.48, + "learning_rate": 1.5309104114990403e-06, + "loss": 0.0679, + "step": 7710 + }, + { + "epoch": 2.48, + "learning_rate": 1.5290638371973033e-06, + "loss": 0.0672, + "step": 7711 + }, + { + "epoch": 2.48, + "learning_rate": 1.5272182850347583e-06, + "loss": 0.0716, + "step": 7712 + }, + { + "epoch": 2.48, + "learning_rate": 1.5253737552341053e-06, + "loss": 0.0704, + "step": 7713 + }, + { + "epoch": 2.48, + "learning_rate": 1.5235302480179092e-06, + "loss": 0.0698, + "step": 7714 + }, + { + "epoch": 2.48, + "learning_rate": 1.5216877636086158e-06, + "loss": 0.0697, + "step": 7715 + }, + { + "epoch": 2.48, + "learning_rate": 1.519846302228547e-06, + "loss": 0.071, + "step": 7716 + }, + { + "epoch": 2.48, + "learning_rate": 1.5180058640999008e-06, + "loss": 0.071, + "step": 7717 + }, + { + "epoch": 2.48, + "learning_rate": 1.5161664494447503e-06, + "loss": 0.0707, + "step": 7718 + }, + { + "epoch": 2.48, + "learning_rate": 1.5143280584850528e-06, + "loss": 0.0693, + "step": 7719 + }, + { + "epoch": 2.48, + "learning_rate": 1.5124906914426329e-06, + "loss": 0.0678, + "step": 7720 + }, + { + "epoch": 2.48, + "learning_rate": 1.5106543485391955e-06, + "loss": 0.0713, + "step": 7721 + }, + { + "epoch": 2.48, + "learning_rate": 1.5088190299963212e-06, + "loss": 0.0601, + "step": 7722 + }, + { + "epoch": 2.48, + "learning_rate": 1.5069847360354672e-06, + "loss": 0.0768, + "step": 7723 + }, + { + "epoch": 2.48, + "learning_rate": 1.5051514668779665e-06, + "loss": 0.0643, + "step": 7724 + }, + { + "epoch": 2.49, + "learning_rate": 1.503319222745032e-06, + "loss": 0.0699, + "step": 7725 + }, + { + "epoch": 2.49, + "learning_rate": 1.5014880038577485e-06, + "loss": 0.0739, + "step": 7726 + }, + { + "epoch": 2.49, + "learning_rate": 1.4996578104370752e-06, + "loss": 0.0744, + "step": 7727 + }, + { + "epoch": 2.49, + "learning_rate": 1.4978286427038602e-06, + "loss": 0.0664, + "step": 7728 + }, + { + "epoch": 2.49, + "learning_rate": 1.4960005008788103e-06, + "loss": 0.0705, + "step": 7729 + }, + { + "epoch": 2.49, + "learning_rate": 1.494173385182517e-06, + "loss": 0.0713, + "step": 7730 + }, + { + "epoch": 2.49, + "learning_rate": 1.4923472958354523e-06, + "loss": 0.0737, + "step": 7731 + }, + { + "epoch": 2.49, + "learning_rate": 1.4905222330579583e-06, + "loss": 0.0741, + "step": 7732 + }, + { + "epoch": 2.49, + "learning_rate": 1.4886981970702507e-06, + "loss": 0.0772, + "step": 7733 + }, + { + "epoch": 2.49, + "learning_rate": 1.486875188092435e-06, + "loss": 0.0716, + "step": 7734 + }, + { + "epoch": 2.49, + "learning_rate": 1.4850532063444723e-06, + "loss": 0.0731, + "step": 7735 + }, + { + "epoch": 2.49, + "learning_rate": 1.4832322520462183e-06, + "loss": 0.0672, + "step": 7736 + }, + { + "epoch": 2.49, + "learning_rate": 1.481412325417395e-06, + "loss": 0.0715, + "step": 7737 + }, + { + "epoch": 2.49, + "learning_rate": 1.4795934266776012e-06, + "loss": 0.0751, + "step": 7738 + }, + { + "epoch": 2.49, + "learning_rate": 1.4777755560463114e-06, + "loss": 0.0703, + "step": 7739 + }, + { + "epoch": 2.49, + "learning_rate": 1.4759587137428822e-06, + "loss": 0.0683, + "step": 7740 + }, + { + "epoch": 2.49, + "learning_rate": 1.4741428999865404e-06, + "loss": 0.0782, + "step": 7741 + }, + { + "epoch": 2.49, + "learning_rate": 1.4723281149963875e-06, + "loss": 0.07, + "step": 7742 + }, + { + "epoch": 2.49, + "learning_rate": 1.470514358991405e-06, + "loss": 0.0669, + "step": 7743 + }, + { + "epoch": 2.49, + "learning_rate": 1.468701632190447e-06, + "loss": 0.0703, + "step": 7744 + }, + { + "epoch": 2.49, + "learning_rate": 1.4668899348122434e-06, + "loss": 0.0677, + "step": 7745 + }, + { + "epoch": 2.49, + "learning_rate": 1.4650792670754065e-06, + "loss": 0.0684, + "step": 7746 + }, + { + "epoch": 2.49, + "learning_rate": 1.463269629198416e-06, + "loss": 0.0756, + "step": 7747 + }, + { + "epoch": 2.49, + "learning_rate": 1.4614610213996306e-06, + "loss": 0.077, + "step": 7748 + }, + { + "epoch": 2.49, + "learning_rate": 1.4596534438972853e-06, + "loss": 0.0776, + "step": 7749 + }, + { + "epoch": 2.49, + "learning_rate": 1.457846896909486e-06, + "loss": 0.0683, + "step": 7750 + }, + { + "epoch": 2.49, + "learning_rate": 1.456041380654225e-06, + "loss": 0.0661, + "step": 7751 + }, + { + "epoch": 2.49, + "learning_rate": 1.4542368953493601e-06, + "loss": 0.069, + "step": 7752 + }, + { + "epoch": 2.49, + "learning_rate": 1.4524334412126285e-06, + "loss": 0.0745, + "step": 7753 + }, + { + "epoch": 2.49, + "learning_rate": 1.4506310184616435e-06, + "loss": 0.0683, + "step": 7754 + }, + { + "epoch": 2.49, + "learning_rate": 1.4488296273138914e-06, + "loss": 0.0676, + "step": 7755 + }, + { + "epoch": 2.5, + "learning_rate": 1.4470292679867348e-06, + "loss": 0.0697, + "step": 7756 + }, + { + "epoch": 2.5, + "learning_rate": 1.4452299406974157e-06, + "loss": 0.0688, + "step": 7757 + }, + { + "epoch": 2.5, + "learning_rate": 1.4434316456630482e-06, + "loss": 0.0672, + "step": 7758 + }, + { + "epoch": 2.5, + "learning_rate": 1.4416343831006207e-06, + "loss": 0.0687, + "step": 7759 + }, + { + "epoch": 2.5, + "learning_rate": 1.4398381532270001e-06, + "loss": 0.0682, + "step": 7760 + }, + { + "epoch": 2.5, + "learning_rate": 1.438042956258926e-06, + "loss": 0.0738, + "step": 7761 + }, + { + "epoch": 2.5, + "learning_rate": 1.4362487924130131e-06, + "loss": 0.0673, + "step": 7762 + }, + { + "epoch": 2.5, + "learning_rate": 1.4344556619057558e-06, + "loss": 0.0735, + "step": 7763 + }, + { + "epoch": 2.5, + "learning_rate": 1.4326635649535203e-06, + "loss": 0.0712, + "step": 7764 + }, + { + "epoch": 2.5, + "learning_rate": 1.4308725017725445e-06, + "loss": 0.0709, + "step": 7765 + }, + { + "epoch": 2.5, + "learning_rate": 1.4290824725789542e-06, + "loss": 0.0696, + "step": 7766 + }, + { + "epoch": 2.5, + "learning_rate": 1.427293477588736e-06, + "loss": 0.0673, + "step": 7767 + }, + { + "epoch": 2.5, + "learning_rate": 1.4255055170177546e-06, + "loss": 0.0718, + "step": 7768 + }, + { + "epoch": 2.5, + "learning_rate": 1.4237185910817597e-06, + "loss": 0.0691, + "step": 7769 + }, + { + "epoch": 2.5, + "learning_rate": 1.4219326999963668e-06, + "loss": 0.0707, + "step": 7770 + }, + { + "epoch": 2.5, + "learning_rate": 1.4201478439770656e-06, + "loss": 0.0651, + "step": 7771 + }, + { + "epoch": 2.5, + "learning_rate": 1.4183640232392304e-06, + "loss": 0.0765, + "step": 7772 + }, + { + "epoch": 2.5, + "learning_rate": 1.4165812379981048e-06, + "loss": 0.0777, + "step": 7773 + }, + { + "epoch": 2.5, + "learning_rate": 1.4147994884687987e-06, + "loss": 0.0681, + "step": 7774 + }, + { + "epoch": 2.5, + "learning_rate": 1.4130187748663148e-06, + "loss": 0.0725, + "step": 7775 + }, + { + "epoch": 2.5, + "learning_rate": 1.4112390974055168e-06, + "loss": 0.0736, + "step": 7776 + }, + { + "epoch": 2.5, + "learning_rate": 1.409460456301147e-06, + "loss": 0.0707, + "step": 7777 + }, + { + "epoch": 2.5, + "learning_rate": 1.40768285176783e-06, + "loss": 0.068, + "step": 7778 + }, + { + "epoch": 2.5, + "learning_rate": 1.4059062840200555e-06, + "loss": 0.0686, + "step": 7779 + }, + { + "epoch": 2.5, + "learning_rate": 1.4041307532721882e-06, + "loss": 0.0698, + "step": 7780 + }, + { + "epoch": 2.5, + "learning_rate": 1.4023562597384765e-06, + "loss": 0.0757, + "step": 7781 + }, + { + "epoch": 2.5, + "learning_rate": 1.400582803633036e-06, + "loss": 0.0673, + "step": 7782 + }, + { + "epoch": 2.5, + "learning_rate": 1.3988103851698576e-06, + "loss": 0.0721, + "step": 7783 + }, + { + "epoch": 2.5, + "learning_rate": 1.3970390045628135e-06, + "loss": 0.0736, + "step": 7784 + }, + { + "epoch": 2.5, + "learning_rate": 1.395268662025643e-06, + "loss": 0.0678, + "step": 7785 + }, + { + "epoch": 2.5, + "learning_rate": 1.3934993577719646e-06, + "loss": 0.0677, + "step": 7786 + }, + { + "epoch": 2.51, + "learning_rate": 1.3917310920152683e-06, + "loss": 0.0698, + "step": 7787 + }, + { + "epoch": 2.51, + "learning_rate": 1.3899638649689219e-06, + "loss": 0.0671, + "step": 7788 + }, + { + "epoch": 2.51, + "learning_rate": 1.3881976768461636e-06, + "loss": 0.0712, + "step": 7789 + }, + { + "epoch": 2.51, + "learning_rate": 1.3864325278601142e-06, + "loss": 0.0665, + "step": 7790 + }, + { + "epoch": 2.51, + "learning_rate": 1.3846684182237614e-06, + "loss": 0.069, + "step": 7791 + }, + { + "epoch": 2.51, + "learning_rate": 1.382905348149971e-06, + "loss": 0.0683, + "step": 7792 + }, + { + "epoch": 2.51, + "learning_rate": 1.381143317851481e-06, + "loss": 0.0704, + "step": 7793 + }, + { + "epoch": 2.51, + "learning_rate": 1.3793823275409068e-06, + "loss": 0.0721, + "step": 7794 + }, + { + "epoch": 2.51, + "learning_rate": 1.377622377430734e-06, + "loss": 0.0688, + "step": 7795 + }, + { + "epoch": 2.51, + "learning_rate": 1.3758634677333304e-06, + "loss": 0.0687, + "step": 7796 + }, + { + "epoch": 2.51, + "learning_rate": 1.3741055986609308e-06, + "loss": 0.0726, + "step": 7797 + }, + { + "epoch": 2.51, + "learning_rate": 1.3723487704256466e-06, + "loss": 0.0672, + "step": 7798 + }, + { + "epoch": 2.51, + "learning_rate": 1.3705929832394693e-06, + "loss": 0.0653, + "step": 7799 + }, + { + "epoch": 2.51, + "learning_rate": 1.3688382373142529e-06, + "loss": 0.0718, + "step": 7800 + }, + { + "epoch": 2.51, + "learning_rate": 1.3670845328617332e-06, + "loss": 0.0743, + "step": 7801 + }, + { + "epoch": 2.51, + "learning_rate": 1.3653318700935237e-06, + "loss": 0.0677, + "step": 7802 + }, + { + "epoch": 2.51, + "learning_rate": 1.3635802492211059e-06, + "loss": 0.0689, + "step": 7803 + }, + { + "epoch": 2.51, + "learning_rate": 1.3618296704558364e-06, + "loss": 0.0654, + "step": 7804 + }, + { + "epoch": 2.51, + "learning_rate": 1.3600801340089532e-06, + "loss": 0.0678, + "step": 7805 + }, + { + "epoch": 2.51, + "learning_rate": 1.3583316400915568e-06, + "loss": 0.0696, + "step": 7806 + }, + { + "epoch": 2.51, + "learning_rate": 1.356584188914627e-06, + "loss": 0.0677, + "step": 7807 + }, + { + "epoch": 2.51, + "learning_rate": 1.3548377806890244e-06, + "loss": 0.0773, + "step": 7808 + }, + { + "epoch": 2.51, + "learning_rate": 1.3530924156254742e-06, + "loss": 0.07, + "step": 7809 + }, + { + "epoch": 2.51, + "learning_rate": 1.351348093934579e-06, + "loss": 0.0733, + "step": 7810 + }, + { + "epoch": 2.51, + "learning_rate": 1.3496048158268204e-06, + "loss": 0.0693, + "step": 7811 + }, + { + "epoch": 2.51, + "learning_rate": 1.3478625815125468e-06, + "loss": 0.069, + "step": 7812 + }, + { + "epoch": 2.51, + "learning_rate": 1.3461213912019832e-06, + "loss": 0.0667, + "step": 7813 + }, + { + "epoch": 2.51, + "learning_rate": 1.3443812451052297e-06, + "loss": 0.0689, + "step": 7814 + }, + { + "epoch": 2.51, + "learning_rate": 1.3426421434322589e-06, + "loss": 0.0721, + "step": 7815 + }, + { + "epoch": 2.51, + "learning_rate": 1.340904086392918e-06, + "loss": 0.0716, + "step": 7816 + }, + { + "epoch": 2.51, + "learning_rate": 1.339167074196931e-06, + "loss": 0.0702, + "step": 7817 + }, + { + "epoch": 2.51, + "learning_rate": 1.3374311070538914e-06, + "loss": 0.0654, + "step": 7818 + }, + { + "epoch": 2.52, + "learning_rate": 1.3356961851732686e-06, + "loss": 0.0717, + "step": 7819 + }, + { + "epoch": 2.52, + "learning_rate": 1.333962308764405e-06, + "loss": 0.0633, + "step": 7820 + }, + { + "epoch": 2.52, + "learning_rate": 1.3322294780365187e-06, + "loss": 0.0695, + "step": 7821 + }, + { + "epoch": 2.52, + "learning_rate": 1.3304976931986968e-06, + "loss": 0.0677, + "step": 7822 + }, + { + "epoch": 2.52, + "learning_rate": 1.3287669544599091e-06, + "loss": 0.0681, + "step": 7823 + }, + { + "epoch": 2.52, + "learning_rate": 1.327037262028993e-06, + "loss": 0.0688, + "step": 7824 + }, + { + "epoch": 2.52, + "learning_rate": 1.3253086161146577e-06, + "loss": 0.0654, + "step": 7825 + }, + { + "epoch": 2.52, + "learning_rate": 1.3235810169254903e-06, + "loss": 0.0721, + "step": 7826 + }, + { + "epoch": 2.52, + "learning_rate": 1.3218544646699505e-06, + "loss": 0.0706, + "step": 7827 + }, + { + "epoch": 2.52, + "learning_rate": 1.3201289595563693e-06, + "loss": 0.0707, + "step": 7828 + }, + { + "epoch": 2.52, + "learning_rate": 1.318404501792958e-06, + "loss": 0.0698, + "step": 7829 + }, + { + "epoch": 2.52, + "learning_rate": 1.3166810915877936e-06, + "loss": 0.0729, + "step": 7830 + }, + { + "epoch": 2.52, + "learning_rate": 1.314958729148832e-06, + "loss": 0.0738, + "step": 7831 + }, + { + "epoch": 2.52, + "learning_rate": 1.3132374146838988e-06, + "loss": 0.0675, + "step": 7832 + }, + { + "epoch": 2.52, + "learning_rate": 1.311517148400695e-06, + "loss": 0.0711, + "step": 7833 + }, + { + "epoch": 2.52, + "learning_rate": 1.3097979305067977e-06, + "loss": 0.0664, + "step": 7834 + }, + { + "epoch": 2.52, + "learning_rate": 1.308079761209654e-06, + "loss": 0.0676, + "step": 7835 + }, + { + "epoch": 2.52, + "learning_rate": 1.3063626407165853e-06, + "loss": 0.0781, + "step": 7836 + }, + { + "epoch": 2.52, + "learning_rate": 1.3046465692347842e-06, + "loss": 0.0786, + "step": 7837 + }, + { + "epoch": 2.52, + "learning_rate": 1.3029315469713267e-06, + "loss": 0.0662, + "step": 7838 + }, + { + "epoch": 2.52, + "learning_rate": 1.3012175741331446e-06, + "loss": 0.0707, + "step": 7839 + }, + { + "epoch": 2.52, + "learning_rate": 1.2995046509270593e-06, + "loss": 0.0712, + "step": 7840 + }, + { + "epoch": 2.52, + "learning_rate": 1.2977927775597599e-06, + "loss": 0.067, + "step": 7841 + }, + { + "epoch": 2.52, + "learning_rate": 1.2960819542378055e-06, + "loss": 0.0677, + "step": 7842 + }, + { + "epoch": 2.52, + "learning_rate": 1.2943721811676302e-06, + "loss": 0.077, + "step": 7843 + }, + { + "epoch": 2.52, + "learning_rate": 1.2926634585555498e-06, + "loss": 0.0771, + "step": 7844 + }, + { + "epoch": 2.52, + "learning_rate": 1.2909557866077361e-06, + "loss": 0.0715, + "step": 7845 + }, + { + "epoch": 2.52, + "learning_rate": 1.289249165530252e-06, + "loss": 0.0659, + "step": 7846 + }, + { + "epoch": 2.52, + "learning_rate": 1.2875435955290216e-06, + "loss": 0.063, + "step": 7847 + }, + { + "epoch": 2.52, + "learning_rate": 1.285839076809845e-06, + "loss": 0.0701, + "step": 7848 + }, + { + "epoch": 2.52, + "learning_rate": 1.284135609578402e-06, + "loss": 0.0729, + "step": 7849 + }, + { + "epoch": 2.53, + "learning_rate": 1.2824331940402367e-06, + "loss": 0.0653, + "step": 7850 + }, + { + "epoch": 2.53, + "learning_rate": 1.28073183040077e-06, + "loss": 0.0704, + "step": 7851 + }, + { + "epoch": 2.53, + "learning_rate": 1.2790315188652958e-06, + "loss": 0.0745, + "step": 7852 + }, + { + "epoch": 2.53, + "learning_rate": 1.2773322596389814e-06, + "loss": 0.0717, + "step": 7853 + }, + { + "epoch": 2.53, + "learning_rate": 1.275634052926864e-06, + "loss": 0.0701, + "step": 7854 + }, + { + "epoch": 2.53, + "learning_rate": 1.2739368989338608e-06, + "loss": 0.0667, + "step": 7855 + }, + { + "epoch": 2.53, + "learning_rate": 1.2722407978647556e-06, + "loss": 0.0671, + "step": 7856 + }, + { + "epoch": 2.53, + "learning_rate": 1.270545749924207e-06, + "loss": 0.0638, + "step": 7857 + }, + { + "epoch": 2.53, + "learning_rate": 1.2688517553167467e-06, + "loss": 0.0655, + "step": 7858 + }, + { + "epoch": 2.53, + "learning_rate": 1.2671588142467795e-06, + "loss": 0.0731, + "step": 7859 + }, + { + "epoch": 2.53, + "learning_rate": 1.2654669269185804e-06, + "loss": 0.0655, + "step": 7860 + }, + { + "epoch": 2.53, + "learning_rate": 1.2637760935363053e-06, + "loss": 0.0712, + "step": 7861 + }, + { + "epoch": 2.53, + "learning_rate": 1.262086314303973e-06, + "loss": 0.0616, + "step": 7862 + }, + { + "epoch": 2.53, + "learning_rate": 1.2603975894254783e-06, + "loss": 0.0707, + "step": 7863 + }, + { + "epoch": 2.53, + "learning_rate": 1.2587099191045971e-06, + "loss": 0.069, + "step": 7864 + }, + { + "epoch": 2.53, + "learning_rate": 1.2570233035449642e-06, + "loss": 0.0751, + "step": 7865 + }, + { + "epoch": 2.53, + "learning_rate": 1.2553377429500923e-06, + "loss": 0.0689, + "step": 7866 + }, + { + "epoch": 2.53, + "learning_rate": 1.2536532375233745e-06, + "loss": 0.0669, + "step": 7867 + }, + { + "epoch": 2.53, + "learning_rate": 1.251969787468068e-06, + "loss": 0.0739, + "step": 7868 + }, + { + "epoch": 2.53, + "learning_rate": 1.2502873929873015e-06, + "loss": 0.0694, + "step": 7869 + }, + { + "epoch": 2.53, + "learning_rate": 1.2486060542840882e-06, + "loss": 0.0711, + "step": 7870 + }, + { + "epoch": 2.53, + "learning_rate": 1.246925771561297e-06, + "loss": 0.0775, + "step": 7871 + }, + { + "epoch": 2.53, + "learning_rate": 1.24524654502168e-06, + "loss": 0.0692, + "step": 7872 + }, + { + "epoch": 2.53, + "learning_rate": 1.2435683748678629e-06, + "loss": 0.0676, + "step": 7873 + }, + { + "epoch": 2.53, + "learning_rate": 1.2418912613023392e-06, + "loss": 0.0744, + "step": 7874 + }, + { + "epoch": 2.53, + "learning_rate": 1.240215204527474e-06, + "loss": 0.0763, + "step": 7875 + }, + { + "epoch": 2.53, + "learning_rate": 1.2385402047455131e-06, + "loss": 0.07, + "step": 7876 + }, + { + "epoch": 2.53, + "learning_rate": 1.2368662621585669e-06, + "loss": 0.0653, + "step": 7877 + }, + { + "epoch": 2.53, + "learning_rate": 1.2351933769686154e-06, + "loss": 0.0698, + "step": 7878 + }, + { + "epoch": 2.53, + "learning_rate": 1.233521549377522e-06, + "loss": 0.0692, + "step": 7879 + }, + { + "epoch": 2.53, + "learning_rate": 1.2318507795870138e-06, + "loss": 0.0765, + "step": 7880 + }, + { + "epoch": 2.54, + "learning_rate": 1.2301810677986925e-06, + "loss": 0.0742, + "step": 7881 + }, + { + "epoch": 2.54, + "learning_rate": 1.2285124142140358e-06, + "loss": 0.0634, + "step": 7882 + }, + { + "epoch": 2.54, + "learning_rate": 1.2268448190343918e-06, + "loss": 0.0676, + "step": 7883 + }, + { + "epoch": 2.54, + "learning_rate": 1.2251782824609703e-06, + "loss": 0.0724, + "step": 7884 + }, + { + "epoch": 2.54, + "learning_rate": 1.2235128046948718e-06, + "loss": 0.067, + "step": 7885 + }, + { + "epoch": 2.54, + "learning_rate": 1.2218483859370577e-06, + "loss": 0.063, + "step": 7886 + }, + { + "epoch": 2.54, + "learning_rate": 1.2201850263883609e-06, + "loss": 0.0646, + "step": 7887 + }, + { + "epoch": 2.54, + "learning_rate": 1.2185227262494926e-06, + "loss": 0.0736, + "step": 7888 + }, + { + "epoch": 2.54, + "learning_rate": 1.2168614857210327e-06, + "loss": 0.0671, + "step": 7889 + }, + { + "epoch": 2.54, + "learning_rate": 1.2152013050034328e-06, + "loss": 0.0633, + "step": 7890 + }, + { + "epoch": 2.54, + "learning_rate": 1.2135421842970174e-06, + "loss": 0.0685, + "step": 7891 + }, + { + "epoch": 2.54, + "learning_rate": 1.211884123801984e-06, + "loss": 0.0768, + "step": 7892 + }, + { + "epoch": 2.54, + "learning_rate": 1.2102271237183971e-06, + "loss": 0.0731, + "step": 7893 + }, + { + "epoch": 2.54, + "learning_rate": 1.2085711842462033e-06, + "loss": 0.0683, + "step": 7894 + }, + { + "epoch": 2.54, + "learning_rate": 1.206916305585213e-06, + "loss": 0.071, + "step": 7895 + }, + { + "epoch": 2.54, + "learning_rate": 1.2052624879351105e-06, + "loss": 0.0702, + "step": 7896 + }, + { + "epoch": 2.54, + "learning_rate": 1.203609731495452e-06, + "loss": 0.0703, + "step": 7897 + }, + { + "epoch": 2.54, + "learning_rate": 1.2019580364656669e-06, + "loss": 0.0742, + "step": 7898 + }, + { + "epoch": 2.54, + "learning_rate": 1.2003074030450535e-06, + "loss": 0.0662, + "step": 7899 + }, + { + "epoch": 2.54, + "learning_rate": 1.198657831432788e-06, + "loss": 0.0674, + "step": 7900 + }, + { + "epoch": 2.54, + "learning_rate": 1.1970093218279133e-06, + "loss": 0.063, + "step": 7901 + }, + { + "epoch": 2.54, + "learning_rate": 1.195361874429345e-06, + "loss": 0.0692, + "step": 7902 + }, + { + "epoch": 2.54, + "learning_rate": 1.1937154894358716e-06, + "loss": 0.0692, + "step": 7903 + }, + { + "epoch": 2.54, + "learning_rate": 1.1920701670461532e-06, + "loss": 0.0717, + "step": 7904 + }, + { + "epoch": 2.54, + "learning_rate": 1.1904259074587176e-06, + "loss": 0.07, + "step": 7905 + }, + { + "epoch": 2.54, + "learning_rate": 1.1887827108719752e-06, + "loss": 0.0725, + "step": 7906 + }, + { + "epoch": 2.54, + "learning_rate": 1.187140577484196e-06, + "loss": 0.0723, + "step": 7907 + }, + { + "epoch": 2.54, + "learning_rate": 1.185499507493526e-06, + "loss": 0.0694, + "step": 7908 + }, + { + "epoch": 2.54, + "learning_rate": 1.1838595010979902e-06, + "loss": 0.0656, + "step": 7909 + }, + { + "epoch": 2.54, + "learning_rate": 1.1822205584954716e-06, + "loss": 0.0661, + "step": 7910 + }, + { + "epoch": 2.54, + "learning_rate": 1.180582679883735e-06, + "loss": 0.0742, + "step": 7911 + }, + { + "epoch": 2.55, + "learning_rate": 1.1789458654604146e-06, + "loss": 0.0681, + "step": 7912 + }, + { + "epoch": 2.55, + "learning_rate": 1.1773101154230139e-06, + "loss": 0.0761, + "step": 7913 + }, + { + "epoch": 2.55, + "learning_rate": 1.1756754299689078e-06, + "loss": 0.0692, + "step": 7914 + }, + { + "epoch": 2.55, + "learning_rate": 1.174041809295351e-06, + "loss": 0.0667, + "step": 7915 + }, + { + "epoch": 2.55, + "learning_rate": 1.1724092535994547e-06, + "loss": 0.0747, + "step": 7916 + }, + { + "epoch": 2.55, + "learning_rate": 1.1707777630782159e-06, + "loss": 0.0787, + "step": 7917 + }, + { + "epoch": 2.55, + "learning_rate": 1.1691473379284945e-06, + "loss": 0.0693, + "step": 7918 + }, + { + "epoch": 2.55, + "learning_rate": 1.1675179783470258e-06, + "loss": 0.069, + "step": 7919 + }, + { + "epoch": 2.55, + "learning_rate": 1.1658896845304124e-06, + "loss": 0.0635, + "step": 7920 + }, + { + "epoch": 2.55, + "learning_rate": 1.1642624566751359e-06, + "loss": 0.0731, + "step": 7921 + }, + { + "epoch": 2.55, + "learning_rate": 1.1626362949775428e-06, + "loss": 0.0707, + "step": 7922 + }, + { + "epoch": 2.55, + "learning_rate": 1.1610111996338513e-06, + "loss": 0.0688, + "step": 7923 + }, + { + "epoch": 2.55, + "learning_rate": 1.1593871708401528e-06, + "loss": 0.0673, + "step": 7924 + }, + { + "epoch": 2.55, + "learning_rate": 1.15776420879241e-06, + "loss": 0.0698, + "step": 7925 + }, + { + "epoch": 2.55, + "learning_rate": 1.1561423136864547e-06, + "loss": 0.0698, + "step": 7926 + }, + { + "epoch": 2.55, + "learning_rate": 1.1545214857179943e-06, + "loss": 0.066, + "step": 7927 + }, + { + "epoch": 2.55, + "learning_rate": 1.1529017250826035e-06, + "loss": 0.0733, + "step": 7928 + }, + { + "epoch": 2.55, + "learning_rate": 1.1512830319757306e-06, + "loss": 0.0685, + "step": 7929 + }, + { + "epoch": 2.55, + "learning_rate": 1.1496654065926927e-06, + "loss": 0.0692, + "step": 7930 + }, + { + "epoch": 2.55, + "learning_rate": 1.1480488491286767e-06, + "loss": 0.0651, + "step": 7931 + }, + { + "epoch": 2.55, + "learning_rate": 1.1464333597787491e-06, + "loss": 0.0695, + "step": 7932 + }, + { + "epoch": 2.55, + "learning_rate": 1.1448189387378395e-06, + "loss": 0.0799, + "step": 7933 + }, + { + "epoch": 2.55, + "learning_rate": 1.1432055862007496e-06, + "loss": 0.0687, + "step": 7934 + }, + { + "epoch": 2.55, + "learning_rate": 1.1415933023621539e-06, + "loss": 0.0656, + "step": 7935 + }, + { + "epoch": 2.55, + "learning_rate": 1.139982087416598e-06, + "loss": 0.0703, + "step": 7936 + }, + { + "epoch": 2.55, + "learning_rate": 1.138371941558495e-06, + "loss": 0.0673, + "step": 7937 + }, + { + "epoch": 2.55, + "learning_rate": 1.136762864982137e-06, + "loss": 0.069, + "step": 7938 + }, + { + "epoch": 2.55, + "learning_rate": 1.1351548578816795e-06, + "loss": 0.0716, + "step": 7939 + }, + { + "epoch": 2.55, + "learning_rate": 1.13354792045115e-06, + "loss": 0.0703, + "step": 7940 + }, + { + "epoch": 2.55, + "learning_rate": 1.1319420528844517e-06, + "loss": 0.0609, + "step": 7941 + }, + { + "epoch": 2.55, + "learning_rate": 1.1303372553753522e-06, + "loss": 0.071, + "step": 7942 + }, + { + "epoch": 2.56, + "learning_rate": 1.1287335281174938e-06, + "loss": 0.0717, + "step": 7943 + }, + { + "epoch": 2.56, + "learning_rate": 1.1271308713043904e-06, + "loss": 0.0733, + "step": 7944 + }, + { + "epoch": 2.56, + "learning_rate": 1.1255292851294263e-06, + "loss": 0.0729, + "step": 7945 + }, + { + "epoch": 2.56, + "learning_rate": 1.1239287697858513e-06, + "loss": 0.071, + "step": 7946 + }, + { + "epoch": 2.56, + "learning_rate": 1.1223293254667954e-06, + "loss": 0.0676, + "step": 7947 + }, + { + "epoch": 2.56, + "learning_rate": 1.1207309523652543e-06, + "loss": 0.0783, + "step": 7948 + }, + { + "epoch": 2.56, + "learning_rate": 1.119133650674088e-06, + "loss": 0.0726, + "step": 7949 + }, + { + "epoch": 2.56, + "learning_rate": 1.1175374205860412e-06, + "loss": 0.0677, + "step": 7950 + }, + { + "epoch": 2.56, + "learning_rate": 1.1159422622937178e-06, + "loss": 0.0682, + "step": 7951 + }, + { + "epoch": 2.56, + "learning_rate": 1.1143481759895958e-06, + "loss": 0.0736, + "step": 7952 + }, + { + "epoch": 2.56, + "learning_rate": 1.1127551618660281e-06, + "loss": 0.0692, + "step": 7953 + }, + { + "epoch": 2.56, + "learning_rate": 1.1111632201152344e-06, + "loss": 0.0674, + "step": 7954 + }, + { + "epoch": 2.56, + "learning_rate": 1.109572350929299e-06, + "loss": 0.0718, + "step": 7955 + }, + { + "epoch": 2.56, + "learning_rate": 1.1079825545001887e-06, + "loss": 0.0691, + "step": 7956 + }, + { + "epoch": 2.56, + "learning_rate": 1.1063938310197342e-06, + "loss": 0.0661, + "step": 7957 + }, + { + "epoch": 2.56, + "learning_rate": 1.104806180679635e-06, + "loss": 0.0707, + "step": 7958 + }, + { + "epoch": 2.56, + "learning_rate": 1.1032196036714682e-06, + "loss": 0.0677, + "step": 7959 + }, + { + "epoch": 2.56, + "learning_rate": 1.1016341001866738e-06, + "loss": 0.0676, + "step": 7960 + }, + { + "epoch": 2.56, + "learning_rate": 1.1000496704165664e-06, + "loss": 0.0678, + "step": 7961 + }, + { + "epoch": 2.56, + "learning_rate": 1.09846631455233e-06, + "loss": 0.0711, + "step": 7962 + }, + { + "epoch": 2.56, + "learning_rate": 1.0968840327850184e-06, + "loss": 0.0735, + "step": 7963 + }, + { + "epoch": 2.56, + "learning_rate": 1.0953028253055541e-06, + "loss": 0.0782, + "step": 7964 + }, + { + "epoch": 2.56, + "learning_rate": 1.0937226923047385e-06, + "loss": 0.067, + "step": 7965 + }, + { + "epoch": 2.56, + "learning_rate": 1.0921436339732327e-06, + "loss": 0.0651, + "step": 7966 + }, + { + "epoch": 2.56, + "learning_rate": 1.0905656505015738e-06, + "loss": 0.0685, + "step": 7967 + }, + { + "epoch": 2.56, + "learning_rate": 1.0889887420801671e-06, + "loss": 0.0687, + "step": 7968 + }, + { + "epoch": 2.56, + "learning_rate": 1.0874129088992913e-06, + "loss": 0.0712, + "step": 7969 + }, + { + "epoch": 2.56, + "learning_rate": 1.0858381511490878e-06, + "loss": 0.0686, + "step": 7970 + }, + { + "epoch": 2.56, + "learning_rate": 1.0842644690195814e-06, + "loss": 0.0671, + "step": 7971 + }, + { + "epoch": 2.56, + "learning_rate": 1.0826918627006544e-06, + "loss": 0.0626, + "step": 7972 + }, + { + "epoch": 2.56, + "learning_rate": 1.0811203323820629e-06, + "loss": 0.0724, + "step": 7973 + }, + { + "epoch": 2.57, + "learning_rate": 1.0795498782534398e-06, + "loss": 0.0682, + "step": 7974 + }, + { + "epoch": 2.57, + "learning_rate": 1.0779805005042786e-06, + "loss": 0.0773, + "step": 7975 + }, + { + "epoch": 2.57, + "learning_rate": 1.0764121993239463e-06, + "loss": 0.0738, + "step": 7976 + }, + { + "epoch": 2.57, + "learning_rate": 1.0748449749016832e-06, + "loss": 0.0679, + "step": 7977 + }, + { + "epoch": 2.57, + "learning_rate": 1.073278827426598e-06, + "loss": 0.069, + "step": 7978 + }, + { + "epoch": 2.57, + "learning_rate": 1.0717137570876646e-06, + "loss": 0.0654, + "step": 7979 + }, + { + "epoch": 2.57, + "learning_rate": 1.0701497640737378e-06, + "loss": 0.0706, + "step": 7980 + }, + { + "epoch": 2.57, + "learning_rate": 1.0685868485735296e-06, + "loss": 0.0694, + "step": 7981 + }, + { + "epoch": 2.57, + "learning_rate": 1.0670250107756285e-06, + "loss": 0.074, + "step": 7982 + }, + { + "epoch": 2.57, + "learning_rate": 1.0654642508684954e-06, + "loss": 0.0677, + "step": 7983 + }, + { + "epoch": 2.57, + "learning_rate": 1.063904569040457e-06, + "loss": 0.069, + "step": 7984 + }, + { + "epoch": 2.57, + "learning_rate": 1.0623459654797097e-06, + "loss": 0.0693, + "step": 7985 + }, + { + "epoch": 2.57, + "learning_rate": 1.060788440374324e-06, + "loss": 0.0717, + "step": 7986 + }, + { + "epoch": 2.57, + "learning_rate": 1.0592319939122387e-06, + "loss": 0.0736, + "step": 7987 + }, + { + "epoch": 2.57, + "learning_rate": 1.0576766262812532e-06, + "loss": 0.0659, + "step": 7988 + }, + { + "epoch": 2.57, + "learning_rate": 1.0561223376690533e-06, + "loss": 0.0647, + "step": 7989 + }, + { + "epoch": 2.57, + "learning_rate": 1.0545691282631832e-06, + "loss": 0.0705, + "step": 7990 + }, + { + "epoch": 2.57, + "learning_rate": 1.0530169982510563e-06, + "loss": 0.0697, + "step": 7991 + }, + { + "epoch": 2.57, + "learning_rate": 1.0514659478199652e-06, + "loss": 0.073, + "step": 7992 + }, + { + "epoch": 2.57, + "learning_rate": 1.0499159771570633e-06, + "loss": 0.0712, + "step": 7993 + }, + { + "epoch": 2.57, + "learning_rate": 1.0483670864493777e-06, + "loss": 0.064, + "step": 7994 + }, + { + "epoch": 2.57, + "learning_rate": 1.0468192758838025e-06, + "loss": 0.0729, + "step": 7995 + }, + { + "epoch": 2.57, + "learning_rate": 1.0452725456471035e-06, + "loss": 0.0668, + "step": 7996 + }, + { + "epoch": 2.57, + "learning_rate": 1.0437268959259151e-06, + "loss": 0.0637, + "step": 7997 + }, + { + "epoch": 2.57, + "learning_rate": 1.0421823269067443e-06, + "loss": 0.07, + "step": 7998 + }, + { + "epoch": 2.57, + "learning_rate": 1.0406388387759636e-06, + "loss": 0.0677, + "step": 7999 + }, + { + "epoch": 2.57, + "learning_rate": 1.0390964317198181e-06, + "loss": 0.0728, + "step": 8000 + }, + { + "epoch": 2.57, + "learning_rate": 1.0375551059244205e-06, + "loss": 0.0805, + "step": 8001 + }, + { + "epoch": 2.57, + "learning_rate": 1.0360148615757536e-06, + "loss": 0.0661, + "step": 8002 + }, + { + "epoch": 2.57, + "learning_rate": 1.0344756988596672e-06, + "loss": 0.0714, + "step": 8003 + }, + { + "epoch": 2.57, + "learning_rate": 1.0329376179618888e-06, + "loss": 0.0769, + "step": 8004 + }, + { + "epoch": 2.58, + "learning_rate": 1.031400619068006e-06, + "loss": 0.0661, + "step": 8005 + }, + { + "epoch": 2.58, + "learning_rate": 1.029864702363481e-06, + "loss": 0.0624, + "step": 8006 + }, + { + "epoch": 2.58, + "learning_rate": 1.028329868033644e-06, + "loss": 0.0714, + "step": 8007 + }, + { + "epoch": 2.58, + "learning_rate": 1.0267961162636919e-06, + "loss": 0.0712, + "step": 8008 + }, + { + "epoch": 2.58, + "learning_rate": 1.025263447238698e-06, + "loss": 0.0741, + "step": 8009 + }, + { + "epoch": 2.58, + "learning_rate": 1.0237318611435976e-06, + "loss": 0.0706, + "step": 8010 + }, + { + "epoch": 2.58, + "learning_rate": 1.0222013581632006e-06, + "loss": 0.0696, + "step": 8011 + }, + { + "epoch": 2.58, + "learning_rate": 1.0206719384821806e-06, + "loss": 0.0692, + "step": 8012 + }, + { + "epoch": 2.58, + "learning_rate": 1.0191436022850909e-06, + "loss": 0.0694, + "step": 8013 + }, + { + "epoch": 2.58, + "learning_rate": 1.0176163497563374e-06, + "loss": 0.0733, + "step": 8014 + }, + { + "epoch": 2.58, + "learning_rate": 1.0160901810802114e-06, + "loss": 0.0672, + "step": 8015 + }, + { + "epoch": 2.58, + "learning_rate": 1.014565096440866e-06, + "loss": 0.0734, + "step": 8016 + }, + { + "epoch": 2.58, + "learning_rate": 1.0130410960223236e-06, + "loss": 0.0683, + "step": 8017 + }, + { + "epoch": 2.58, + "learning_rate": 1.011518180008475e-06, + "loss": 0.0691, + "step": 8018 + }, + { + "epoch": 2.58, + "learning_rate": 1.0099963485830866e-06, + "loss": 0.0634, + "step": 8019 + }, + { + "epoch": 2.58, + "learning_rate": 1.0084756019297826e-06, + "loss": 0.0677, + "step": 8020 + }, + { + "epoch": 2.58, + "learning_rate": 1.0069559402320672e-06, + "loss": 0.0709, + "step": 8021 + }, + { + "epoch": 2.58, + "learning_rate": 1.0054373636733084e-06, + "loss": 0.0743, + "step": 8022 + }, + { + "epoch": 2.58, + "learning_rate": 1.0039198724367439e-06, + "loss": 0.0724, + "step": 8023 + }, + { + "epoch": 2.58, + "learning_rate": 1.0024034667054783e-06, + "loss": 0.0714, + "step": 8024 + }, + { + "epoch": 2.58, + "learning_rate": 1.000888146662492e-06, + "loss": 0.0715, + "step": 8025 + }, + { + "epoch": 2.58, + "learning_rate": 9.993739124906277e-07, + "loss": 0.0714, + "step": 8026 + }, + { + "epoch": 2.58, + "learning_rate": 9.978607643726002e-07, + "loss": 0.071, + "step": 8027 + }, + { + "epoch": 2.58, + "learning_rate": 9.963487024909902e-07, + "loss": 0.0648, + "step": 8028 + }, + { + "epoch": 2.58, + "learning_rate": 9.948377270282493e-07, + "loss": 0.0699, + "step": 8029 + }, + { + "epoch": 2.58, + "learning_rate": 9.933278381667023e-07, + "loss": 0.0662, + "step": 8030 + }, + { + "epoch": 2.58, + "learning_rate": 9.918190360885361e-07, + "loss": 0.0728, + "step": 8031 + }, + { + "epoch": 2.58, + "learning_rate": 9.903113209758098e-07, + "loss": 0.0691, + "step": 8032 + }, + { + "epoch": 2.58, + "learning_rate": 9.88804693010449e-07, + "loss": 0.0635, + "step": 8033 + }, + { + "epoch": 2.58, + "learning_rate": 9.872991523742526e-07, + "loss": 0.0721, + "step": 8034 + }, + { + "epoch": 2.58, + "learning_rate": 9.85794699248881e-07, + "loss": 0.0737, + "step": 8035 + }, + { + "epoch": 2.59, + "learning_rate": 9.842913338158732e-07, + "loss": 0.0673, + "step": 8036 + }, + { + "epoch": 2.59, + "learning_rate": 9.8278905625663e-07, + "loss": 0.0665, + "step": 8037 + }, + { + "epoch": 2.59, + "learning_rate": 9.812878667524218e-07, + "loss": 0.0672, + "step": 8038 + }, + { + "epoch": 2.59, + "learning_rate": 9.79787765484389e-07, + "loss": 0.0698, + "step": 8039 + }, + { + "epoch": 2.59, + "learning_rate": 9.782887526335405e-07, + "loss": 0.0645, + "step": 8040 + }, + { + "epoch": 2.59, + "learning_rate": 9.767908283807503e-07, + "loss": 0.0707, + "step": 8041 + }, + { + "epoch": 2.59, + "learning_rate": 9.752939929067695e-07, + "loss": 0.0732, + "step": 8042 + }, + { + "epoch": 2.59, + "learning_rate": 9.737982463922102e-07, + "loss": 0.0721, + "step": 8043 + }, + { + "epoch": 2.59, + "learning_rate": 9.723035890175524e-07, + "loss": 0.0668, + "step": 8044 + }, + { + "epoch": 2.59, + "learning_rate": 9.708100209631565e-07, + "loss": 0.0691, + "step": 8045 + }, + { + "epoch": 2.59, + "learning_rate": 9.69317542409235e-07, + "loss": 0.0675, + "step": 8046 + }, + { + "epoch": 2.59, + "learning_rate": 9.678261535358758e-07, + "loss": 0.065, + "step": 8047 + }, + { + "epoch": 2.59, + "learning_rate": 9.663358545230428e-07, + "loss": 0.0714, + "step": 8048 + }, + { + "epoch": 2.59, + "learning_rate": 9.648466455505578e-07, + "loss": 0.0776, + "step": 8049 + }, + { + "epoch": 2.59, + "learning_rate": 9.63358526798114e-07, + "loss": 0.0678, + "step": 8050 + }, + { + "epoch": 2.59, + "learning_rate": 9.618714984452793e-07, + "loss": 0.0761, + "step": 8051 + }, + { + "epoch": 2.59, + "learning_rate": 9.603855606714795e-07, + "loss": 0.0658, + "step": 8052 + }, + { + "epoch": 2.59, + "learning_rate": 9.589007136560147e-07, + "loss": 0.0737, + "step": 8053 + }, + { + "epoch": 2.59, + "learning_rate": 9.574169575780557e-07, + "loss": 0.0715, + "step": 8054 + }, + { + "epoch": 2.59, + "learning_rate": 9.559342926166371e-07, + "loss": 0.0636, + "step": 8055 + }, + { + "epoch": 2.59, + "learning_rate": 9.544527189506625e-07, + "loss": 0.0693, + "step": 8056 + }, + { + "epoch": 2.59, + "learning_rate": 9.529722367589079e-07, + "loss": 0.0672, + "step": 8057 + }, + { + "epoch": 2.59, + "learning_rate": 9.514928462200146e-07, + "loss": 0.0685, + "step": 8058 + }, + { + "epoch": 2.59, + "learning_rate": 9.500145475124866e-07, + "loss": 0.0654, + "step": 8059 + }, + { + "epoch": 2.59, + "learning_rate": 9.485373408147081e-07, + "loss": 0.0623, + "step": 8060 + }, + { + "epoch": 2.59, + "learning_rate": 9.470612263049217e-07, + "loss": 0.0702, + "step": 8061 + }, + { + "epoch": 2.59, + "learning_rate": 9.455862041612418e-07, + "loss": 0.0699, + "step": 8062 + }, + { + "epoch": 2.59, + "learning_rate": 9.441122745616527e-07, + "loss": 0.0674, + "step": 8063 + }, + { + "epoch": 2.59, + "learning_rate": 9.426394376840054e-07, + "loss": 0.07, + "step": 8064 + }, + { + "epoch": 2.59, + "learning_rate": 9.411676937060143e-07, + "loss": 0.0676, + "step": 8065 + }, + { + "epoch": 2.59, + "learning_rate": 9.396970428052698e-07, + "loss": 0.0727, + "step": 8066 + }, + { + "epoch": 2.6, + "learning_rate": 9.382274851592277e-07, + "loss": 0.0663, + "step": 8067 + }, + { + "epoch": 2.6, + "learning_rate": 9.367590209452071e-07, + "loss": 0.0692, + "step": 8068 + }, + { + "epoch": 2.6, + "learning_rate": 9.352916503404042e-07, + "loss": 0.0704, + "step": 8069 + }, + { + "epoch": 2.6, + "learning_rate": 9.33825373521875e-07, + "loss": 0.0711, + "step": 8070 + }, + { + "epoch": 2.6, + "learning_rate": 9.323601906665469e-07, + "loss": 0.0671, + "step": 8071 + }, + { + "epoch": 2.6, + "learning_rate": 9.308961019512164e-07, + "loss": 0.0699, + "step": 8072 + }, + { + "epoch": 2.6, + "learning_rate": 9.294331075525453e-07, + "loss": 0.0707, + "step": 8073 + }, + { + "epoch": 2.6, + "learning_rate": 9.279712076470637e-07, + "loss": 0.0726, + "step": 8074 + }, + { + "epoch": 2.6, + "learning_rate": 9.265104024111737e-07, + "loss": 0.0735, + "step": 8075 + }, + { + "epoch": 2.6, + "learning_rate": 9.250506920211422e-07, + "loss": 0.0714, + "step": 8076 + }, + { + "epoch": 2.6, + "learning_rate": 9.235920766531015e-07, + "loss": 0.0721, + "step": 8077 + }, + { + "epoch": 2.6, + "learning_rate": 9.221345564830553e-07, + "loss": 0.0665, + "step": 8078 + }, + { + "epoch": 2.6, + "learning_rate": 9.206781316868741e-07, + "loss": 0.0747, + "step": 8079 + }, + { + "epoch": 2.6, + "learning_rate": 9.19222802440296e-07, + "loss": 0.0709, + "step": 8080 + }, + { + "epoch": 2.6, + "learning_rate": 9.177685689189286e-07, + "loss": 0.0722, + "step": 8081 + }, + { + "epoch": 2.6, + "learning_rate": 9.163154312982459e-07, + "loss": 0.0655, + "step": 8082 + }, + { + "epoch": 2.6, + "learning_rate": 9.148633897535864e-07, + "loss": 0.0727, + "step": 8083 + }, + { + "epoch": 2.6, + "learning_rate": 9.134124444601655e-07, + "loss": 0.0742, + "step": 8084 + }, + { + "epoch": 2.6, + "learning_rate": 9.119625955930566e-07, + "loss": 0.0698, + "step": 8085 + }, + { + "epoch": 2.6, + "learning_rate": 9.105138433272021e-07, + "loss": 0.0723, + "step": 8086 + }, + { + "epoch": 2.6, + "learning_rate": 9.090661878374196e-07, + "loss": 0.0662, + "step": 8087 + }, + { + "epoch": 2.6, + "learning_rate": 9.076196292983874e-07, + "loss": 0.0673, + "step": 8088 + }, + { + "epoch": 2.6, + "learning_rate": 9.061741678846514e-07, + "loss": 0.0731, + "step": 8089 + }, + { + "epoch": 2.6, + "learning_rate": 9.047298037706331e-07, + "loss": 0.0706, + "step": 8090 + }, + { + "epoch": 2.6, + "learning_rate": 9.032865371306077e-07, + "loss": 0.0698, + "step": 8091 + }, + { + "epoch": 2.6, + "learning_rate": 9.018443681387312e-07, + "loss": 0.0726, + "step": 8092 + }, + { + "epoch": 2.6, + "learning_rate": 9.004032969690202e-07, + "loss": 0.0703, + "step": 8093 + }, + { + "epoch": 2.6, + "learning_rate": 8.989633237953621e-07, + "loss": 0.0793, + "step": 8094 + }, + { + "epoch": 2.6, + "learning_rate": 8.975244487915057e-07, + "loss": 0.0657, + "step": 8095 + }, + { + "epoch": 2.6, + "learning_rate": 8.960866721310768e-07, + "loss": 0.0657, + "step": 8096 + }, + { + "epoch": 2.6, + "learning_rate": 8.94649993987563e-07, + "loss": 0.0722, + "step": 8097 + }, + { + "epoch": 2.61, + "learning_rate": 8.932144145343169e-07, + "loss": 0.0719, + "step": 8098 + }, + { + "epoch": 2.61, + "learning_rate": 8.917799339445643e-07, + "loss": 0.0664, + "step": 8099 + }, + { + "epoch": 2.61, + "learning_rate": 8.903465523913957e-07, + "loss": 0.0684, + "step": 8100 + }, + { + "epoch": 2.61, + "learning_rate": 8.889142700477649e-07, + "loss": 0.0739, + "step": 8101 + }, + { + "epoch": 2.61, + "learning_rate": 8.874830870865037e-07, + "loss": 0.0685, + "step": 8102 + }, + { + "epoch": 2.61, + "learning_rate": 8.860530036803006e-07, + "loss": 0.066, + "step": 8103 + }, + { + "epoch": 2.61, + "learning_rate": 8.846240200017165e-07, + "loss": 0.0662, + "step": 8104 + }, + { + "epoch": 2.61, + "learning_rate": 8.831961362231789e-07, + "loss": 0.0698, + "step": 8105 + }, + { + "epoch": 2.61, + "learning_rate": 8.817693525169802e-07, + "loss": 0.0684, + "step": 8106 + }, + { + "epoch": 2.61, + "learning_rate": 8.803436690552858e-07, + "loss": 0.0699, + "step": 8107 + }, + { + "epoch": 2.61, + "learning_rate": 8.789190860101226e-07, + "loss": 0.0612, + "step": 8108 + }, + { + "epoch": 2.61, + "learning_rate": 8.774956035533877e-07, + "loss": 0.0698, + "step": 8109 + }, + { + "epoch": 2.61, + "learning_rate": 8.760732218568435e-07, + "loss": 0.0655, + "step": 8110 + }, + { + "epoch": 2.61, + "learning_rate": 8.746519410921217e-07, + "loss": 0.0733, + "step": 8111 + }, + { + "epoch": 2.61, + "learning_rate": 8.732317614307173e-07, + "loss": 0.0689, + "step": 8112 + }, + { + "epoch": 2.61, + "learning_rate": 8.718126830439999e-07, + "loss": 0.071, + "step": 8113 + }, + { + "epoch": 2.61, + "learning_rate": 8.703947061031981e-07, + "loss": 0.064, + "step": 8114 + }, + { + "epoch": 2.61, + "learning_rate": 8.689778307794128e-07, + "loss": 0.0615, + "step": 8115 + }, + { + "epoch": 2.61, + "learning_rate": 8.675620572436094e-07, + "loss": 0.0691, + "step": 8116 + }, + { + "epoch": 2.61, + "learning_rate": 8.661473856666214e-07, + "loss": 0.0685, + "step": 8117 + }, + { + "epoch": 2.61, + "learning_rate": 8.647338162191465e-07, + "loss": 0.0704, + "step": 8118 + }, + { + "epoch": 2.61, + "learning_rate": 8.633213490717573e-07, + "loss": 0.0699, + "step": 8119 + }, + { + "epoch": 2.61, + "learning_rate": 8.619099843948842e-07, + "loss": 0.0715, + "step": 8120 + }, + { + "epoch": 2.61, + "learning_rate": 8.604997223588296e-07, + "loss": 0.0647, + "step": 8121 + }, + { + "epoch": 2.61, + "learning_rate": 8.590905631337599e-07, + "loss": 0.0676, + "step": 8122 + }, + { + "epoch": 2.61, + "learning_rate": 8.576825068897154e-07, + "loss": 0.0704, + "step": 8123 + }, + { + "epoch": 2.61, + "learning_rate": 8.562755537965917e-07, + "loss": 0.0679, + "step": 8124 + }, + { + "epoch": 2.61, + "learning_rate": 8.548697040241627e-07, + "loss": 0.0685, + "step": 8125 + }, + { + "epoch": 2.61, + "learning_rate": 8.534649577420617e-07, + "loss": 0.0724, + "step": 8126 + }, + { + "epoch": 2.61, + "learning_rate": 8.520613151197899e-07, + "loss": 0.0638, + "step": 8127 + }, + { + "epoch": 2.61, + "learning_rate": 8.506587763267204e-07, + "loss": 0.0651, + "step": 8128 + }, + { + "epoch": 2.62, + "learning_rate": 8.492573415320903e-07, + "loss": 0.0634, + "step": 8129 + }, + { + "epoch": 2.62, + "learning_rate": 8.478570109049967e-07, + "loss": 0.0729, + "step": 8130 + }, + { + "epoch": 2.62, + "learning_rate": 8.46457784614414e-07, + "loss": 0.0678, + "step": 8131 + }, + { + "epoch": 2.62, + "learning_rate": 8.450596628291785e-07, + "loss": 0.0664, + "step": 8132 + }, + { + "epoch": 2.62, + "learning_rate": 8.436626457179909e-07, + "loss": 0.0635, + "step": 8133 + }, + { + "epoch": 2.62, + "learning_rate": 8.42266733449425e-07, + "loss": 0.0722, + "step": 8134 + }, + { + "epoch": 2.62, + "learning_rate": 8.408719261919163e-07, + "loss": 0.0788, + "step": 8135 + }, + { + "epoch": 2.62, + "learning_rate": 8.394782241137666e-07, + "loss": 0.0705, + "step": 8136 + }, + { + "epoch": 2.62, + "learning_rate": 8.380856273831472e-07, + "loss": 0.0708, + "step": 8137 + }, + { + "epoch": 2.62, + "learning_rate": 8.366941361680947e-07, + "loss": 0.0721, + "step": 8138 + }, + { + "epoch": 2.62, + "learning_rate": 8.353037506365102e-07, + "loss": 0.0678, + "step": 8139 + }, + { + "epoch": 2.62, + "learning_rate": 8.339144709561675e-07, + "loss": 0.0702, + "step": 8140 + }, + { + "epoch": 2.62, + "learning_rate": 8.325262972947013e-07, + "loss": 0.0705, + "step": 8141 + }, + { + "epoch": 2.62, + "learning_rate": 8.311392298196131e-07, + "loss": 0.0686, + "step": 8142 + }, + { + "epoch": 2.62, + "learning_rate": 8.297532686982734e-07, + "loss": 0.0638, + "step": 8143 + }, + { + "epoch": 2.62, + "learning_rate": 8.283684140979187e-07, + "loss": 0.0717, + "step": 8144 + }, + { + "epoch": 2.62, + "learning_rate": 8.269846661856496e-07, + "loss": 0.0659, + "step": 8145 + }, + { + "epoch": 2.62, + "learning_rate": 8.256020251284381e-07, + "loss": 0.0707, + "step": 8146 + }, + { + "epoch": 2.62, + "learning_rate": 8.242204910931162e-07, + "loss": 0.0733, + "step": 8147 + }, + { + "epoch": 2.62, + "learning_rate": 8.228400642463874e-07, + "loss": 0.0631, + "step": 8148 + }, + { + "epoch": 2.62, + "learning_rate": 8.214607447548218e-07, + "loss": 0.0697, + "step": 8149 + }, + { + "epoch": 2.62, + "learning_rate": 8.200825327848516e-07, + "loss": 0.0674, + "step": 8150 + }, + { + "epoch": 2.62, + "learning_rate": 8.187054285027751e-07, + "loss": 0.0738, + "step": 8151 + }, + { + "epoch": 2.62, + "learning_rate": 8.173294320747649e-07, + "loss": 0.0743, + "step": 8152 + }, + { + "epoch": 2.62, + "learning_rate": 8.159545436668514e-07, + "loss": 0.0663, + "step": 8153 + }, + { + "epoch": 2.62, + "learning_rate": 8.145807634449343e-07, + "loss": 0.07, + "step": 8154 + }, + { + "epoch": 2.62, + "learning_rate": 8.132080915747842e-07, + "loss": 0.0638, + "step": 8155 + }, + { + "epoch": 2.62, + "learning_rate": 8.118365282220287e-07, + "loss": 0.0724, + "step": 8156 + }, + { + "epoch": 2.62, + "learning_rate": 8.104660735521652e-07, + "loss": 0.0727, + "step": 8157 + }, + { + "epoch": 2.62, + "learning_rate": 8.090967277305628e-07, + "loss": 0.0683, + "step": 8158 + }, + { + "epoch": 2.62, + "learning_rate": 8.077284909224514e-07, + "loss": 0.072, + "step": 8159 + }, + { + "epoch": 2.62, + "learning_rate": 8.063613632929268e-07, + "loss": 0.0673, + "step": 8160 + }, + { + "epoch": 2.63, + "learning_rate": 8.049953450069547e-07, + "loss": 0.0662, + "step": 8161 + }, + { + "epoch": 2.63, + "learning_rate": 8.036304362293646e-07, + "loss": 0.07, + "step": 8162 + }, + { + "epoch": 2.63, + "learning_rate": 8.022666371248489e-07, + "loss": 0.0659, + "step": 8163 + }, + { + "epoch": 2.63, + "learning_rate": 8.009039478579717e-07, + "loss": 0.0707, + "step": 8164 + }, + { + "epoch": 2.63, + "learning_rate": 7.995423685931624e-07, + "loss": 0.0697, + "step": 8165 + }, + { + "epoch": 2.63, + "learning_rate": 7.98181899494711e-07, + "loss": 0.0657, + "step": 8166 + }, + { + "epoch": 2.63, + "learning_rate": 7.968225407267815e-07, + "loss": 0.0715, + "step": 8167 + }, + { + "epoch": 2.63, + "learning_rate": 7.954642924533995e-07, + "loss": 0.0687, + "step": 8168 + }, + { + "epoch": 2.63, + "learning_rate": 7.941071548384527e-07, + "loss": 0.071, + "step": 8169 + }, + { + "epoch": 2.63, + "learning_rate": 7.927511280457034e-07, + "loss": 0.0723, + "step": 8170 + }, + { + "epoch": 2.63, + "learning_rate": 7.913962122387753e-07, + "loss": 0.0683, + "step": 8171 + }, + { + "epoch": 2.63, + "learning_rate": 7.900424075811553e-07, + "loss": 0.0734, + "step": 8172 + }, + { + "epoch": 2.63, + "learning_rate": 7.886897142362027e-07, + "loss": 0.0699, + "step": 8173 + }, + { + "epoch": 2.63, + "learning_rate": 7.873381323671381e-07, + "loss": 0.0693, + "step": 8174 + }, + { + "epoch": 2.63, + "learning_rate": 7.859876621370477e-07, + "loss": 0.066, + "step": 8175 + }, + { + "epoch": 2.63, + "learning_rate": 7.846383037088867e-07, + "loss": 0.069, + "step": 8176 + }, + { + "epoch": 2.63, + "learning_rate": 7.832900572454749e-07, + "loss": 0.0712, + "step": 8177 + }, + { + "epoch": 2.63, + "learning_rate": 7.819429229094933e-07, + "loss": 0.0697, + "step": 8178 + }, + { + "epoch": 2.63, + "learning_rate": 7.805969008634984e-07, + "loss": 0.0693, + "step": 8179 + }, + { + "epoch": 2.63, + "learning_rate": 7.792519912699037e-07, + "loss": 0.0787, + "step": 8180 + }, + { + "epoch": 2.63, + "learning_rate": 7.779081942909927e-07, + "loss": 0.0658, + "step": 8181 + }, + { + "epoch": 2.63, + "learning_rate": 7.765655100889135e-07, + "loss": 0.0712, + "step": 8182 + }, + { + "epoch": 2.63, + "learning_rate": 7.752239388256811e-07, + "loss": 0.0636, + "step": 8183 + }, + { + "epoch": 2.63, + "learning_rate": 7.738834806631712e-07, + "loss": 0.0699, + "step": 8184 + }, + { + "epoch": 2.63, + "learning_rate": 7.725441357631336e-07, + "loss": 0.0707, + "step": 8185 + }, + { + "epoch": 2.63, + "learning_rate": 7.712059042871788e-07, + "loss": 0.0719, + "step": 8186 + }, + { + "epoch": 2.63, + "learning_rate": 7.698687863967802e-07, + "loss": 0.0741, + "step": 8187 + }, + { + "epoch": 2.63, + "learning_rate": 7.685327822532862e-07, + "loss": 0.0754, + "step": 8188 + }, + { + "epoch": 2.63, + "learning_rate": 7.671978920178979e-07, + "loss": 0.0679, + "step": 8189 + }, + { + "epoch": 2.63, + "learning_rate": 7.658641158516933e-07, + "loss": 0.073, + "step": 8190 + }, + { + "epoch": 2.63, + "learning_rate": 7.645314539156101e-07, + "loss": 0.0718, + "step": 8191 + }, + { + "epoch": 2.64, + "learning_rate": 7.631999063704532e-07, + "loss": 0.0697, + "step": 8192 + }, + { + "epoch": 2.64, + "learning_rate": 7.618694733768906e-07, + "loss": 0.0682, + "step": 8193 + }, + { + "epoch": 2.64, + "learning_rate": 7.605401550954638e-07, + "loss": 0.0673, + "step": 8194 + }, + { + "epoch": 2.64, + "learning_rate": 7.59211951686567e-07, + "loss": 0.069, + "step": 8195 + }, + { + "epoch": 2.64, + "learning_rate": 7.578848633104718e-07, + "loss": 0.0747, + "step": 8196 + }, + { + "epoch": 2.64, + "learning_rate": 7.56558890127308e-07, + "loss": 0.0684, + "step": 8197 + }, + { + "epoch": 2.64, + "learning_rate": 7.552340322970753e-07, + "loss": 0.0728, + "step": 8198 + }, + { + "epoch": 2.64, + "learning_rate": 7.539102899796324e-07, + "loss": 0.0703, + "step": 8199 + }, + { + "epoch": 2.64, + "learning_rate": 7.52587663334714e-07, + "loss": 0.0666, + "step": 8200 + }, + { + "epoch": 2.64, + "learning_rate": 7.512661525219067e-07, + "loss": 0.0658, + "step": 8201 + }, + { + "epoch": 2.64, + "learning_rate": 7.499457577006753e-07, + "loss": 0.0676, + "step": 8202 + }, + { + "epoch": 2.64, + "learning_rate": 7.48626479030341e-07, + "loss": 0.0705, + "step": 8203 + }, + { + "epoch": 2.64, + "learning_rate": 7.473083166700946e-07, + "loss": 0.0702, + "step": 8204 + }, + { + "epoch": 2.64, + "learning_rate": 7.459912707789929e-07, + "loss": 0.0677, + "step": 8205 + }, + { + "epoch": 2.64, + "learning_rate": 7.446753415159536e-07, + "loss": 0.0723, + "step": 8206 + }, + { + "epoch": 2.64, + "learning_rate": 7.433605290397649e-07, + "loss": 0.0716, + "step": 8207 + }, + { + "epoch": 2.64, + "learning_rate": 7.420468335090758e-07, + "loss": 0.0713, + "step": 8208 + }, + { + "epoch": 2.64, + "learning_rate": 7.407342550824026e-07, + "loss": 0.0661, + "step": 8209 + }, + { + "epoch": 2.64, + "learning_rate": 7.394227939181242e-07, + "loss": 0.0707, + "step": 8210 + }, + { + "epoch": 2.64, + "learning_rate": 7.381124501744929e-07, + "loss": 0.072, + "step": 8211 + }, + { + "epoch": 2.64, + "learning_rate": 7.368032240096157e-07, + "loss": 0.069, + "step": 8212 + }, + { + "epoch": 2.64, + "learning_rate": 7.354951155814716e-07, + "loss": 0.0734, + "step": 8213 + }, + { + "epoch": 2.64, + "learning_rate": 7.341881250479022e-07, + "loss": 0.0691, + "step": 8214 + }, + { + "epoch": 2.64, + "learning_rate": 7.328822525666135e-07, + "loss": 0.0732, + "step": 8215 + }, + { + "epoch": 2.64, + "learning_rate": 7.315774982951762e-07, + "loss": 0.0735, + "step": 8216 + }, + { + "epoch": 2.64, + "learning_rate": 7.302738623910322e-07, + "loss": 0.0698, + "step": 8217 + }, + { + "epoch": 2.64, + "learning_rate": 7.289713450114811e-07, + "loss": 0.069, + "step": 8218 + }, + { + "epoch": 2.64, + "learning_rate": 7.276699463136872e-07, + "loss": 0.0707, + "step": 8219 + }, + { + "epoch": 2.64, + "learning_rate": 7.263696664546904e-07, + "loss": 0.0679, + "step": 8220 + }, + { + "epoch": 2.64, + "learning_rate": 7.250705055913809e-07, + "loss": 0.0698, + "step": 8221 + }, + { + "epoch": 2.64, + "learning_rate": 7.237724638805221e-07, + "loss": 0.0679, + "step": 8222 + }, + { + "epoch": 2.65, + "learning_rate": 7.224755414787444e-07, + "loss": 0.0651, + "step": 8223 + }, + { + "epoch": 2.65, + "learning_rate": 7.211797385425379e-07, + "loss": 0.0728, + "step": 8224 + }, + { + "epoch": 2.65, + "learning_rate": 7.198850552282577e-07, + "loss": 0.0675, + "step": 8225 + }, + { + "epoch": 2.65, + "learning_rate": 7.185914916921322e-07, + "loss": 0.0791, + "step": 8226 + }, + { + "epoch": 2.65, + "learning_rate": 7.172990480902431e-07, + "loss": 0.0631, + "step": 8227 + }, + { + "epoch": 2.65, + "learning_rate": 7.160077245785402e-07, + "loss": 0.0727, + "step": 8228 + }, + { + "epoch": 2.65, + "learning_rate": 7.147175213128454e-07, + "loss": 0.0662, + "step": 8229 + }, + { + "epoch": 2.65, + "learning_rate": 7.134284384488377e-07, + "loss": 0.067, + "step": 8230 + }, + { + "epoch": 2.65, + "learning_rate": 7.121404761420614e-07, + "loss": 0.0739, + "step": 8231 + }, + { + "epoch": 2.65, + "learning_rate": 7.108536345479323e-07, + "loss": 0.0746, + "step": 8232 + }, + { + "epoch": 2.65, + "learning_rate": 7.095679138217237e-07, + "loss": 0.0688, + "step": 8233 + }, + { + "epoch": 2.65, + "learning_rate": 7.08283314118573e-07, + "loss": 0.0679, + "step": 8234 + }, + { + "epoch": 2.65, + "learning_rate": 7.069998355934904e-07, + "loss": 0.0692, + "step": 8235 + }, + { + "epoch": 2.65, + "learning_rate": 7.057174784013432e-07, + "loss": 0.0671, + "step": 8236 + }, + { + "epoch": 2.65, + "learning_rate": 7.044362426968643e-07, + "loss": 0.0677, + "step": 8237 + }, + { + "epoch": 2.65, + "learning_rate": 7.031561286346589e-07, + "loss": 0.071, + "step": 8238 + }, + { + "epoch": 2.65, + "learning_rate": 7.018771363691879e-07, + "loss": 0.0663, + "step": 8239 + }, + { + "epoch": 2.65, + "learning_rate": 7.005992660547767e-07, + "loss": 0.0679, + "step": 8240 + }, + { + "epoch": 2.65, + "learning_rate": 6.99322517845622e-07, + "loss": 0.0703, + "step": 8241 + }, + { + "epoch": 2.65, + "learning_rate": 6.980468918957827e-07, + "loss": 0.0643, + "step": 8242 + }, + { + "epoch": 2.65, + "learning_rate": 6.967723883591771e-07, + "loss": 0.0703, + "step": 8243 + }, + { + "epoch": 2.65, + "learning_rate": 6.954990073895962e-07, + "loss": 0.0699, + "step": 8244 + }, + { + "epoch": 2.65, + "learning_rate": 6.942267491406907e-07, + "loss": 0.0657, + "step": 8245 + }, + { + "epoch": 2.65, + "learning_rate": 6.929556137659766e-07, + "loss": 0.0749, + "step": 8246 + }, + { + "epoch": 2.65, + "learning_rate": 6.916856014188334e-07, + "loss": 0.0678, + "step": 8247 + }, + { + "epoch": 2.65, + "learning_rate": 6.904167122525063e-07, + "loss": 0.0675, + "step": 8248 + }, + { + "epoch": 2.65, + "learning_rate": 6.89148946420104e-07, + "loss": 0.0674, + "step": 8249 + }, + { + "epoch": 2.65, + "learning_rate": 6.878823040746041e-07, + "loss": 0.0677, + "step": 8250 + }, + { + "epoch": 2.65, + "learning_rate": 6.866167853688421e-07, + "loss": 0.0662, + "step": 8251 + }, + { + "epoch": 2.65, + "learning_rate": 6.853523904555226e-07, + "loss": 0.0703, + "step": 8252 + }, + { + "epoch": 2.65, + "learning_rate": 6.840891194872112e-07, + "loss": 0.072, + "step": 8253 + }, + { + "epoch": 2.66, + "learning_rate": 6.828269726163405e-07, + "loss": 0.0657, + "step": 8254 + }, + { + "epoch": 2.66, + "learning_rate": 6.815659499952043e-07, + "loss": 0.0646, + "step": 8255 + }, + { + "epoch": 2.66, + "learning_rate": 6.803060517759674e-07, + "loss": 0.0677, + "step": 8256 + }, + { + "epoch": 2.66, + "learning_rate": 6.790472781106517e-07, + "loss": 0.0728, + "step": 8257 + }, + { + "epoch": 2.66, + "learning_rate": 6.777896291511443e-07, + "loss": 0.0713, + "step": 8258 + }, + { + "epoch": 2.66, + "learning_rate": 6.765331050492041e-07, + "loss": 0.071, + "step": 8259 + }, + { + "epoch": 2.66, + "learning_rate": 6.752777059564431e-07, + "loss": 0.0675, + "step": 8260 + }, + { + "epoch": 2.66, + "learning_rate": 6.740234320243444e-07, + "loss": 0.0699, + "step": 8261 + }, + { + "epoch": 2.66, + "learning_rate": 6.72770283404256e-07, + "loss": 0.0645, + "step": 8262 + }, + { + "epoch": 2.66, + "learning_rate": 6.715182602473869e-07, + "loss": 0.0746, + "step": 8263 + }, + { + "epoch": 2.66, + "learning_rate": 6.702673627048107e-07, + "loss": 0.0701, + "step": 8264 + }, + { + "epoch": 2.66, + "learning_rate": 6.690175909274699e-07, + "loss": 0.0727, + "step": 8265 + }, + { + "epoch": 2.66, + "learning_rate": 6.677689450661607e-07, + "loss": 0.072, + "step": 8266 + }, + { + "epoch": 2.66, + "learning_rate": 6.665214252715557e-07, + "loss": 0.068, + "step": 8267 + }, + { + "epoch": 2.66, + "learning_rate": 6.652750316941836e-07, + "loss": 0.0661, + "step": 8268 + }, + { + "epoch": 2.66, + "learning_rate": 6.640297644844395e-07, + "loss": 0.0751, + "step": 8269 + }, + { + "epoch": 2.66, + "learning_rate": 6.627856237925812e-07, + "loss": 0.0665, + "step": 8270 + }, + { + "epoch": 2.66, + "learning_rate": 6.615426097687361e-07, + "loss": 0.0658, + "step": 8271 + }, + { + "epoch": 2.66, + "learning_rate": 6.603007225628888e-07, + "loss": 0.0639, + "step": 8272 + }, + { + "epoch": 2.66, + "learning_rate": 6.590599623248917e-07, + "loss": 0.0688, + "step": 8273 + }, + { + "epoch": 2.66, + "learning_rate": 6.578203292044594e-07, + "loss": 0.0716, + "step": 8274 + }, + { + "epoch": 2.66, + "learning_rate": 6.565818233511712e-07, + "loss": 0.0689, + "step": 8275 + }, + { + "epoch": 2.66, + "learning_rate": 6.553444449144697e-07, + "loss": 0.069, + "step": 8276 + }, + { + "epoch": 2.66, + "learning_rate": 6.541081940436644e-07, + "loss": 0.0689, + "step": 8277 + }, + { + "epoch": 2.66, + "learning_rate": 6.528730708879261e-07, + "loss": 0.073, + "step": 8278 + }, + { + "epoch": 2.66, + "learning_rate": 6.516390755962887e-07, + "loss": 0.0752, + "step": 8279 + }, + { + "epoch": 2.66, + "learning_rate": 6.504062083176533e-07, + "loss": 0.0746, + "step": 8280 + }, + { + "epoch": 2.66, + "learning_rate": 6.491744692007807e-07, + "loss": 0.0666, + "step": 8281 + }, + { + "epoch": 2.66, + "learning_rate": 6.479438583942976e-07, + "loss": 0.0645, + "step": 8282 + }, + { + "epoch": 2.66, + "learning_rate": 6.467143760466976e-07, + "loss": 0.0698, + "step": 8283 + }, + { + "epoch": 2.66, + "learning_rate": 6.454860223063331e-07, + "loss": 0.0736, + "step": 8284 + }, + { + "epoch": 2.67, + "learning_rate": 6.442587973214231e-07, + "loss": 0.073, + "step": 8285 + }, + { + "epoch": 2.67, + "learning_rate": 6.430327012400506e-07, + "loss": 0.0687, + "step": 8286 + }, + { + "epoch": 2.67, + "learning_rate": 6.418077342101581e-07, + "loss": 0.0737, + "step": 8287 + }, + { + "epoch": 2.67, + "learning_rate": 6.405838963795597e-07, + "loss": 0.0675, + "step": 8288 + }, + { + "epoch": 2.67, + "learning_rate": 6.393611878959272e-07, + "loss": 0.0689, + "step": 8289 + }, + { + "epoch": 2.67, + "learning_rate": 6.38139608906797e-07, + "loss": 0.0661, + "step": 8290 + }, + { + "epoch": 2.67, + "learning_rate": 6.369191595595714e-07, + "loss": 0.0667, + "step": 8291 + }, + { + "epoch": 2.67, + "learning_rate": 6.356998400015147e-07, + "loss": 0.0727, + "step": 8292 + }, + { + "epoch": 2.67, + "learning_rate": 6.344816503797524e-07, + "loss": 0.0688, + "step": 8293 + }, + { + "epoch": 2.67, + "learning_rate": 6.332645908412805e-07, + "loss": 0.0678, + "step": 8294 + }, + { + "epoch": 2.67, + "learning_rate": 6.320486615329535e-07, + "loss": 0.071, + "step": 8295 + }, + { + "epoch": 2.67, + "learning_rate": 6.308338626014888e-07, + "loss": 0.0784, + "step": 8296 + }, + { + "epoch": 2.67, + "learning_rate": 6.296201941934699e-07, + "loss": 0.0692, + "step": 8297 + }, + { + "epoch": 2.67, + "learning_rate": 6.284076564553465e-07, + "loss": 0.0711, + "step": 8298 + }, + { + "epoch": 2.67, + "learning_rate": 6.271962495334228e-07, + "loss": 0.0749, + "step": 8299 + }, + { + "epoch": 2.67, + "learning_rate": 6.259859735738772e-07, + "loss": 0.0697, + "step": 8300 + }, + { + "epoch": 2.67, + "learning_rate": 6.24776828722744e-07, + "loss": 0.075, + "step": 8301 + }, + { + "epoch": 2.67, + "learning_rate": 6.235688151259234e-07, + "loss": 0.0724, + "step": 8302 + }, + { + "epoch": 2.67, + "learning_rate": 6.22361932929182e-07, + "loss": 0.0687, + "step": 8303 + }, + { + "epoch": 2.67, + "learning_rate": 6.211561822781476e-07, + "loss": 0.0808, + "step": 8304 + }, + { + "epoch": 2.67, + "learning_rate": 6.199515633183073e-07, + "loss": 0.0716, + "step": 8305 + }, + { + "epoch": 2.67, + "learning_rate": 6.187480761950182e-07, + "loss": 0.0685, + "step": 8306 + }, + { + "epoch": 2.67, + "learning_rate": 6.175457210534986e-07, + "loss": 0.0696, + "step": 8307 + }, + { + "epoch": 2.67, + "learning_rate": 6.163444980388267e-07, + "loss": 0.0688, + "step": 8308 + }, + { + "epoch": 2.67, + "learning_rate": 6.151444072959522e-07, + "loss": 0.067, + "step": 8309 + }, + { + "epoch": 2.67, + "learning_rate": 6.139454489696795e-07, + "loss": 0.0639, + "step": 8310 + }, + { + "epoch": 2.67, + "learning_rate": 6.127476232046814e-07, + "loss": 0.0682, + "step": 8311 + }, + { + "epoch": 2.67, + "learning_rate": 6.115509301454925e-07, + "loss": 0.073, + "step": 8312 + }, + { + "epoch": 2.67, + "learning_rate": 6.103553699365095e-07, + "loss": 0.0701, + "step": 8313 + }, + { + "epoch": 2.67, + "learning_rate": 6.091609427219947e-07, + "loss": 0.0672, + "step": 8314 + }, + { + "epoch": 2.67, + "learning_rate": 6.079676486460728e-07, + "loss": 0.0674, + "step": 8315 + }, + { + "epoch": 2.68, + "learning_rate": 6.067754878527332e-07, + "loss": 0.0687, + "step": 8316 + }, + { + "epoch": 2.68, + "learning_rate": 6.055844604858252e-07, + "loss": 0.0667, + "step": 8317 + }, + { + "epoch": 2.68, + "learning_rate": 6.043945666890638e-07, + "loss": 0.0726, + "step": 8318 + }, + { + "epoch": 2.68, + "learning_rate": 6.032058066060276e-07, + "loss": 0.0651, + "step": 8319 + }, + { + "epoch": 2.68, + "learning_rate": 6.020181803801539e-07, + "loss": 0.0733, + "step": 8320 + }, + { + "epoch": 2.68, + "learning_rate": 6.008316881547515e-07, + "loss": 0.0736, + "step": 8321 + }, + { + "epoch": 2.68, + "learning_rate": 5.996463300729849e-07, + "loss": 0.069, + "step": 8322 + }, + { + "epoch": 2.68, + "learning_rate": 5.984621062778829e-07, + "loss": 0.0697, + "step": 8323 + }, + { + "epoch": 2.68, + "learning_rate": 5.972790169123454e-07, + "loss": 0.0783, + "step": 8324 + }, + { + "epoch": 2.68, + "learning_rate": 5.96097062119122e-07, + "loss": 0.0685, + "step": 8325 + }, + { + "epoch": 2.68, + "learning_rate": 5.949162420408338e-07, + "loss": 0.0753, + "step": 8326 + }, + { + "epoch": 2.68, + "learning_rate": 5.937365568199671e-07, + "loss": 0.0678, + "step": 8327 + }, + { + "epoch": 2.68, + "learning_rate": 5.925580065988645e-07, + "loss": 0.0693, + "step": 8328 + }, + { + "epoch": 2.68, + "learning_rate": 5.913805915197357e-07, + "loss": 0.0712, + "step": 8329 + }, + { + "epoch": 2.68, + "learning_rate": 5.902043117246547e-07, + "loss": 0.0716, + "step": 8330 + }, + { + "epoch": 2.68, + "learning_rate": 5.890291673555526e-07, + "loss": 0.0675, + "step": 8331 + }, + { + "epoch": 2.68, + "learning_rate": 5.87855158554228e-07, + "loss": 0.0647, + "step": 8332 + }, + { + "epoch": 2.68, + "learning_rate": 5.866822854623456e-07, + "loss": 0.0708, + "step": 8333 + }, + { + "epoch": 2.68, + "learning_rate": 5.855105482214252e-07, + "loss": 0.0749, + "step": 8334 + }, + { + "epoch": 2.68, + "learning_rate": 5.843399469728539e-07, + "loss": 0.0626, + "step": 8335 + }, + { + "epoch": 2.68, + "learning_rate": 5.831704818578842e-07, + "loss": 0.0708, + "step": 8336 + }, + { + "epoch": 2.68, + "learning_rate": 5.82002153017629e-07, + "loss": 0.0814, + "step": 8337 + }, + { + "epoch": 2.68, + "learning_rate": 5.808349605930586e-07, + "loss": 0.0708, + "step": 8338 + }, + { + "epoch": 2.68, + "learning_rate": 5.79668904725017e-07, + "loss": 0.0655, + "step": 8339 + }, + { + "epoch": 2.68, + "learning_rate": 5.785039855542018e-07, + "loss": 0.0657, + "step": 8340 + }, + { + "epoch": 2.68, + "learning_rate": 5.773402032211783e-07, + "loss": 0.069, + "step": 8341 + }, + { + "epoch": 2.68, + "learning_rate": 5.761775578663742e-07, + "loss": 0.0641, + "step": 8342 + }, + { + "epoch": 2.68, + "learning_rate": 5.750160496300805e-07, + "loss": 0.068, + "step": 8343 + }, + { + "epoch": 2.68, + "learning_rate": 5.738556786524452e-07, + "loss": 0.0663, + "step": 8344 + }, + { + "epoch": 2.68, + "learning_rate": 5.726964450734874e-07, + "loss": 0.0678, + "step": 8345 + }, + { + "epoch": 2.68, + "learning_rate": 5.715383490330839e-07, + "loss": 0.0747, + "step": 8346 + }, + { + "epoch": 2.69, + "learning_rate": 5.703813906709743e-07, + "loss": 0.0709, + "step": 8347 + }, + { + "epoch": 2.69, + "learning_rate": 5.692255701267657e-07, + "loss": 0.0719, + "step": 8348 + }, + { + "epoch": 2.69, + "learning_rate": 5.68070887539921e-07, + "loss": 0.0684, + "step": 8349 + }, + { + "epoch": 2.69, + "learning_rate": 5.66917343049771e-07, + "loss": 0.0743, + "step": 8350 + }, + { + "epoch": 2.69, + "learning_rate": 5.657649367955065e-07, + "loss": 0.0656, + "step": 8351 + }, + { + "epoch": 2.69, + "learning_rate": 5.646136689161808e-07, + "loss": 0.0666, + "step": 8352 + }, + { + "epoch": 2.69, + "learning_rate": 5.634635395507116e-07, + "loss": 0.0643, + "step": 8353 + }, + { + "epoch": 2.69, + "learning_rate": 5.62314548837879e-07, + "loss": 0.0633, + "step": 8354 + }, + { + "epoch": 2.69, + "learning_rate": 5.611666969163243e-07, + "loss": 0.0731, + "step": 8355 + }, + { + "epoch": 2.69, + "learning_rate": 5.600199839245535e-07, + "loss": 0.0713, + "step": 8356 + }, + { + "epoch": 2.69, + "learning_rate": 5.588744100009325e-07, + "loss": 0.0729, + "step": 8357 + }, + { + "epoch": 2.69, + "learning_rate": 5.577299752836917e-07, + "loss": 0.0714, + "step": 8358 + }, + { + "epoch": 2.69, + "learning_rate": 5.56586679910921e-07, + "loss": 0.0638, + "step": 8359 + }, + { + "epoch": 2.69, + "learning_rate": 5.554445240205797e-07, + "loss": 0.0708, + "step": 8360 + }, + { + "epoch": 2.69, + "learning_rate": 5.543035077504832e-07, + "loss": 0.072, + "step": 8361 + }, + { + "epoch": 2.69, + "learning_rate": 5.531636312383115e-07, + "loss": 0.071, + "step": 8362 + }, + { + "epoch": 2.69, + "learning_rate": 5.520248946216056e-07, + "loss": 0.0693, + "step": 8363 + }, + { + "epoch": 2.69, + "learning_rate": 5.508872980377711e-07, + "loss": 0.073, + "step": 8364 + }, + { + "epoch": 2.69, + "learning_rate": 5.49750841624076e-07, + "loss": 0.0737, + "step": 8365 + }, + { + "epoch": 2.69, + "learning_rate": 5.486155255176495e-07, + "loss": 0.0638, + "step": 8366 + }, + { + "epoch": 2.69, + "learning_rate": 5.474813498554843e-07, + "loss": 0.0758, + "step": 8367 + }, + { + "epoch": 2.69, + "learning_rate": 5.463483147744319e-07, + "loss": 0.0742, + "step": 8368 + }, + { + "epoch": 2.69, + "learning_rate": 5.452164204112154e-07, + "loss": 0.0647, + "step": 8369 + }, + { + "epoch": 2.69, + "learning_rate": 5.440856669024063e-07, + "loss": 0.066, + "step": 8370 + }, + { + "epoch": 2.69, + "learning_rate": 5.429560543844514e-07, + "loss": 0.0712, + "step": 8371 + }, + { + "epoch": 2.69, + "learning_rate": 5.418275829936537e-07, + "loss": 0.0697, + "step": 8372 + }, + { + "epoch": 2.69, + "learning_rate": 5.407002528661787e-07, + "loss": 0.0604, + "step": 8373 + }, + { + "epoch": 2.69, + "learning_rate": 5.395740641380532e-07, + "loss": 0.0631, + "step": 8374 + }, + { + "epoch": 2.69, + "learning_rate": 5.384490169451717e-07, + "loss": 0.0621, + "step": 8375 + }, + { + "epoch": 2.69, + "learning_rate": 5.373251114232824e-07, + "loss": 0.0657, + "step": 8376 + }, + { + "epoch": 2.69, + "learning_rate": 5.362023477080048e-07, + "loss": 0.0681, + "step": 8377 + }, + { + "epoch": 2.7, + "learning_rate": 5.350807259348145e-07, + "loss": 0.0704, + "step": 8378 + }, + { + "epoch": 2.7, + "learning_rate": 5.339602462390525e-07, + "loss": 0.0726, + "step": 8379 + }, + { + "epoch": 2.7, + "learning_rate": 5.328409087559161e-07, + "loss": 0.069, + "step": 8380 + }, + { + "epoch": 2.7, + "learning_rate": 5.317227136204761e-07, + "loss": 0.0686, + "step": 8381 + }, + { + "epoch": 2.7, + "learning_rate": 5.306056609676546e-07, + "loss": 0.071, + "step": 8382 + }, + { + "epoch": 2.7, + "learning_rate": 5.294897509322405e-07, + "loss": 0.0673, + "step": 8383 + }, + { + "epoch": 2.7, + "learning_rate": 5.283749836488839e-07, + "loss": 0.065, + "step": 8384 + }, + { + "epoch": 2.7, + "learning_rate": 5.272613592520969e-07, + "loss": 0.0698, + "step": 8385 + }, + { + "epoch": 2.7, + "learning_rate": 5.261488778762569e-07, + "loss": 0.0722, + "step": 8386 + }, + { + "epoch": 2.7, + "learning_rate": 5.250375396555985e-07, + "loss": 0.0657, + "step": 8387 + }, + { + "epoch": 2.7, + "learning_rate": 5.239273447242199e-07, + "loss": 0.0646, + "step": 8388 + }, + { + "epoch": 2.7, + "learning_rate": 5.228182932160841e-07, + "loss": 0.0704, + "step": 8389 + }, + { + "epoch": 2.7, + "learning_rate": 5.217103852650129e-07, + "loss": 0.0727, + "step": 8390 + }, + { + "epoch": 2.7, + "learning_rate": 5.206036210046883e-07, + "loss": 0.0661, + "step": 8391 + }, + { + "epoch": 2.7, + "learning_rate": 5.194980005686623e-07, + "loss": 0.0709, + "step": 8392 + }, + { + "epoch": 2.7, + "learning_rate": 5.183935240903415e-07, + "loss": 0.0739, + "step": 8393 + }, + { + "epoch": 2.7, + "learning_rate": 5.172901917029971e-07, + "loss": 0.0679, + "step": 8394 + }, + { + "epoch": 2.7, + "learning_rate": 5.161880035397604e-07, + "loss": 0.0696, + "step": 8395 + }, + { + "epoch": 2.7, + "learning_rate": 5.150869597336272e-07, + "loss": 0.0723, + "step": 8396 + }, + { + "epoch": 2.7, + "learning_rate": 5.139870604174535e-07, + "loss": 0.0647, + "step": 8397 + }, + { + "epoch": 2.7, + "learning_rate": 5.128883057239587e-07, + "loss": 0.0663, + "step": 8398 + }, + { + "epoch": 2.7, + "learning_rate": 5.117906957857233e-07, + "loss": 0.0671, + "step": 8399 + }, + { + "epoch": 2.7, + "learning_rate": 5.10694230735188e-07, + "loss": 0.0724, + "step": 8400 + }, + { + "epoch": 2.7, + "learning_rate": 5.095989107046606e-07, + "loss": 0.0761, + "step": 8401 + }, + { + "epoch": 2.7, + "learning_rate": 5.08504735826304e-07, + "loss": 0.0764, + "step": 8402 + }, + { + "epoch": 2.7, + "learning_rate": 5.074117062321438e-07, + "loss": 0.0684, + "step": 8403 + }, + { + "epoch": 2.7, + "learning_rate": 5.063198220540744e-07, + "loss": 0.0679, + "step": 8404 + }, + { + "epoch": 2.7, + "learning_rate": 5.052290834238461e-07, + "loss": 0.063, + "step": 8405 + }, + { + "epoch": 2.7, + "learning_rate": 5.04139490473069e-07, + "loss": 0.0671, + "step": 8406 + }, + { + "epoch": 2.7, + "learning_rate": 5.030510433332215e-07, + "loss": 0.0741, + "step": 8407 + }, + { + "epoch": 2.7, + "learning_rate": 5.019637421356405e-07, + "loss": 0.073, + "step": 8408 + }, + { + "epoch": 2.71, + "learning_rate": 5.00877587011519e-07, + "loss": 0.0711, + "step": 8409 + }, + { + "epoch": 2.71, + "learning_rate": 4.997925780919232e-07, + "loss": 0.0751, + "step": 8410 + }, + { + "epoch": 2.71, + "learning_rate": 4.98708715507773e-07, + "loss": 0.0685, + "step": 8411 + }, + { + "epoch": 2.71, + "learning_rate": 4.976259993898503e-07, + "loss": 0.0616, + "step": 8412 + }, + { + "epoch": 2.71, + "learning_rate": 4.965444298688015e-07, + "loss": 0.0703, + "step": 8413 + }, + { + "epoch": 2.71, + "learning_rate": 4.95464007075136e-07, + "loss": 0.0768, + "step": 8414 + }, + { + "epoch": 2.71, + "learning_rate": 4.943847311392158e-07, + "loss": 0.0668, + "step": 8415 + }, + { + "epoch": 2.71, + "learning_rate": 4.933066021912758e-07, + "loss": 0.0681, + "step": 8416 + }, + { + "epoch": 2.71, + "learning_rate": 4.922296203614075e-07, + "loss": 0.071, + "step": 8417 + }, + { + "epoch": 2.71, + "learning_rate": 4.911537857795612e-07, + "loss": 0.0751, + "step": 8418 + }, + { + "epoch": 2.71, + "learning_rate": 4.900790985755544e-07, + "loss": 0.0705, + "step": 8419 + }, + { + "epoch": 2.71, + "learning_rate": 4.890055588790632e-07, + "loss": 0.074, + "step": 8420 + }, + { + "epoch": 2.71, + "learning_rate": 4.879331668196241e-07, + "loss": 0.0651, + "step": 8421 + }, + { + "epoch": 2.71, + "learning_rate": 4.86861922526638e-07, + "loss": 0.0689, + "step": 8422 + }, + { + "epoch": 2.71, + "learning_rate": 4.857918261293649e-07, + "loss": 0.0744, + "step": 8423 + }, + { + "epoch": 2.71, + "learning_rate": 4.847228777569258e-07, + "loss": 0.068, + "step": 8424 + }, + { + "epoch": 2.71, + "learning_rate": 4.836550775383076e-07, + "loss": 0.0678, + "step": 8425 + }, + { + "epoch": 2.71, + "learning_rate": 4.825884256023538e-07, + "loss": 0.0757, + "step": 8426 + }, + { + "epoch": 2.71, + "learning_rate": 4.815229220777717e-07, + "loss": 0.0659, + "step": 8427 + }, + { + "epoch": 2.71, + "learning_rate": 4.804585670931294e-07, + "loss": 0.0747, + "step": 8428 + }, + { + "epoch": 2.71, + "learning_rate": 4.793953607768553e-07, + "loss": 0.067, + "step": 8429 + }, + { + "epoch": 2.71, + "learning_rate": 4.783333032572413e-07, + "loss": 0.0674, + "step": 8430 + }, + { + "epoch": 2.71, + "learning_rate": 4.772723946624414e-07, + "loss": 0.0779, + "step": 8431 + }, + { + "epoch": 2.71, + "learning_rate": 4.7621263512046654e-07, + "loss": 0.07, + "step": 8432 + }, + { + "epoch": 2.71, + "learning_rate": 4.7515402475919235e-07, + "loss": 0.0722, + "step": 8433 + }, + { + "epoch": 2.71, + "learning_rate": 4.740965637063588e-07, + "loss": 0.0664, + "step": 8434 + }, + { + "epoch": 2.71, + "learning_rate": 4.730402520895594e-07, + "loss": 0.071, + "step": 8435 + }, + { + "epoch": 2.71, + "learning_rate": 4.7198509003625434e-07, + "loss": 0.0693, + "step": 8436 + }, + { + "epoch": 2.71, + "learning_rate": 4.7093107767376414e-07, + "loss": 0.0677, + "step": 8437 + }, + { + "epoch": 2.71, + "learning_rate": 4.698782151292713e-07, + "loss": 0.0684, + "step": 8438 + }, + { + "epoch": 2.71, + "learning_rate": 4.6882650252981663e-07, + "loss": 0.0664, + "step": 8439 + }, + { + "epoch": 2.72, + "learning_rate": 4.6777594000230855e-07, + "loss": 0.0752, + "step": 8440 + }, + { + "epoch": 2.72, + "learning_rate": 4.6672652767350777e-07, + "loss": 0.07, + "step": 8441 + }, + { + "epoch": 2.72, + "learning_rate": 4.6567826567004094e-07, + "loss": 0.0691, + "step": 8442 + }, + { + "epoch": 2.72, + "learning_rate": 4.64631154118399e-07, + "loss": 0.0687, + "step": 8443 + }, + { + "epoch": 2.72, + "learning_rate": 4.635851931449287e-07, + "loss": 0.0659, + "step": 8444 + }, + { + "epoch": 2.72, + "learning_rate": 4.6254038287584013e-07, + "loss": 0.0658, + "step": 8445 + }, + { + "epoch": 2.72, + "learning_rate": 4.6149672343720587e-07, + "loss": 0.063, + "step": 8446 + }, + { + "epoch": 2.72, + "learning_rate": 4.6045421495495845e-07, + "loss": 0.0675, + "step": 8447 + }, + { + "epoch": 2.72, + "learning_rate": 4.5941285755488953e-07, + "loss": 0.0742, + "step": 8448 + }, + { + "epoch": 2.72, + "learning_rate": 4.5837265136265517e-07, + "loss": 0.0645, + "step": 8449 + }, + { + "epoch": 2.72, + "learning_rate": 4.573335965037706e-07, + "loss": 0.0639, + "step": 8450 + }, + { + "epoch": 2.72, + "learning_rate": 4.562956931036111e-07, + "loss": 0.0693, + "step": 8451 + }, + { + "epoch": 2.72, + "learning_rate": 4.5525894128741753e-07, + "loss": 0.0668, + "step": 8452 + }, + { + "epoch": 2.72, + "learning_rate": 4.5422334118028657e-07, + "loss": 0.0659, + "step": 8453 + }, + { + "epoch": 2.72, + "learning_rate": 4.531888929071804e-07, + "loss": 0.0643, + "step": 8454 + }, + { + "epoch": 2.72, + "learning_rate": 4.52155596592917e-07, + "loss": 0.0652, + "step": 8455 + }, + { + "epoch": 2.72, + "learning_rate": 4.511234523621799e-07, + "loss": 0.071, + "step": 8456 + }, + { + "epoch": 2.72, + "learning_rate": 4.500924603395107e-07, + "loss": 0.0746, + "step": 8457 + }, + { + "epoch": 2.72, + "learning_rate": 4.490626206493154e-07, + "loss": 0.062, + "step": 8458 + }, + { + "epoch": 2.72, + "learning_rate": 4.4803393341585786e-07, + "loss": 0.0686, + "step": 8459 + }, + { + "epoch": 2.72, + "learning_rate": 4.4700639876326333e-07, + "loss": 0.0751, + "step": 8460 + }, + { + "epoch": 2.72, + "learning_rate": 4.459800168155204e-07, + "loss": 0.0677, + "step": 8461 + }, + { + "epoch": 2.72, + "learning_rate": 4.449547876964722e-07, + "loss": 0.0711, + "step": 8462 + }, + { + "epoch": 2.72, + "learning_rate": 4.4393071152983305e-07, + "loss": 0.0676, + "step": 8463 + }, + { + "epoch": 2.72, + "learning_rate": 4.429077884391686e-07, + "loss": 0.0661, + "step": 8464 + }, + { + "epoch": 2.72, + "learning_rate": 4.4188601854791126e-07, + "loss": 0.068, + "step": 8465 + }, + { + "epoch": 2.72, + "learning_rate": 4.4086540197935013e-07, + "loss": 0.0742, + "step": 8466 + }, + { + "epoch": 2.72, + "learning_rate": 4.398459388566378e-07, + "loss": 0.07, + "step": 8467 + }, + { + "epoch": 2.72, + "learning_rate": 4.38827629302786e-07, + "loss": 0.0725, + "step": 8468 + }, + { + "epoch": 2.72, + "learning_rate": 4.3781047344067074e-07, + "loss": 0.0715, + "step": 8469 + }, + { + "epoch": 2.72, + "learning_rate": 4.36794471393025e-07, + "loss": 0.0726, + "step": 8470 + }, + { + "epoch": 2.73, + "learning_rate": 4.3577962328244516e-07, + "loss": 0.0686, + "step": 8471 + }, + { + "epoch": 2.73, + "learning_rate": 4.3476592923138327e-07, + "loss": 0.0637, + "step": 8472 + }, + { + "epoch": 2.73, + "learning_rate": 4.3375338936216147e-07, + "loss": 0.0652, + "step": 8473 + }, + { + "epoch": 2.73, + "learning_rate": 4.327420037969532e-07, + "loss": 0.0615, + "step": 8474 + }, + { + "epoch": 2.73, + "learning_rate": 4.317317726577974e-07, + "loss": 0.0687, + "step": 8475 + }, + { + "epoch": 2.73, + "learning_rate": 4.3072269606659333e-07, + "loss": 0.0636, + "step": 8476 + }, + { + "epoch": 2.73, + "learning_rate": 4.297147741451013e-07, + "loss": 0.0738, + "step": 8477 + }, + { + "epoch": 2.73, + "learning_rate": 4.2870800701493854e-07, + "loss": 0.0656, + "step": 8478 + }, + { + "epoch": 2.73, + "learning_rate": 4.2770239479759e-07, + "loss": 0.0687, + "step": 8479 + }, + { + "epoch": 2.73, + "learning_rate": 4.2669793761439313e-07, + "loss": 0.0649, + "step": 8480 + }, + { + "epoch": 2.73, + "learning_rate": 4.2569463558655213e-07, + "loss": 0.0643, + "step": 8481 + }, + { + "epoch": 2.73, + "learning_rate": 4.2469248883513e-07, + "loss": 0.0683, + "step": 8482 + }, + { + "epoch": 2.73, + "learning_rate": 4.23691497481048e-07, + "loss": 0.0689, + "step": 8483 + }, + { + "epoch": 2.73, + "learning_rate": 4.226916616450916e-07, + "loss": 0.0751, + "step": 8484 + }, + { + "epoch": 2.73, + "learning_rate": 4.216929814479065e-07, + "loss": 0.074, + "step": 8485 + }, + { + "epoch": 2.73, + "learning_rate": 4.2069545700999524e-07, + "loss": 0.072, + "step": 8486 + }, + { + "epoch": 2.73, + "learning_rate": 4.196990884517249e-07, + "loss": 0.0648, + "step": 8487 + }, + { + "epoch": 2.73, + "learning_rate": 4.187038758933204e-07, + "loss": 0.0659, + "step": 8488 + }, + { + "epoch": 2.73, + "learning_rate": 4.177098194548679e-07, + "loss": 0.0668, + "step": 8489 + }, + { + "epoch": 2.73, + "learning_rate": 4.1671691925631697e-07, + "loss": 0.0665, + "step": 8490 + }, + { + "epoch": 2.73, + "learning_rate": 4.1572517541747294e-07, + "loss": 0.0658, + "step": 8491 + }, + { + "epoch": 2.73, + "learning_rate": 4.147345880580056e-07, + "loss": 0.0732, + "step": 8492 + }, + { + "epoch": 2.73, + "learning_rate": 4.1374515729744157e-07, + "loss": 0.0684, + "step": 8493 + }, + { + "epoch": 2.73, + "learning_rate": 4.127568832551709e-07, + "loss": 0.0783, + "step": 8494 + }, + { + "epoch": 2.73, + "learning_rate": 4.117697660504416e-07, + "loss": 0.0713, + "step": 8495 + }, + { + "epoch": 2.73, + "learning_rate": 4.107838058023661e-07, + "loss": 0.0749, + "step": 8496 + }, + { + "epoch": 2.73, + "learning_rate": 4.0979900262991147e-07, + "loss": 0.0707, + "step": 8497 + }, + { + "epoch": 2.73, + "learning_rate": 4.088153566519115e-07, + "loss": 0.0766, + "step": 8498 + }, + { + "epoch": 2.73, + "learning_rate": 4.078328679870547e-07, + "loss": 0.0675, + "step": 8499 + }, + { + "epoch": 2.73, + "learning_rate": 4.0685153675389276e-07, + "loss": 0.0698, + "step": 8500 + }, + { + "epoch": 2.73, + "learning_rate": 4.058713630708355e-07, + "loss": 0.0667, + "step": 8501 + }, + { + "epoch": 2.73, + "learning_rate": 4.048923470561594e-07, + "loss": 0.0734, + "step": 8502 + }, + { + "epoch": 2.74, + "learning_rate": 4.0391448882799334e-07, + "loss": 0.0681, + "step": 8503 + }, + { + "epoch": 2.74, + "learning_rate": 4.029377885043295e-07, + "loss": 0.0712, + "step": 8504 + }, + { + "epoch": 2.74, + "learning_rate": 4.019622462030248e-07, + "loss": 0.0686, + "step": 8505 + }, + { + "epoch": 2.74, + "learning_rate": 4.009878620417873e-07, + "loss": 0.0655, + "step": 8506 + }, + { + "epoch": 2.74, + "learning_rate": 4.000146361381918e-07, + "loss": 0.0672, + "step": 8507 + }, + { + "epoch": 2.74, + "learning_rate": 3.9904256860967436e-07, + "loss": 0.0702, + "step": 8508 + }, + { + "epoch": 2.74, + "learning_rate": 3.980716595735257e-07, + "loss": 0.0722, + "step": 8509 + }, + { + "epoch": 2.74, + "learning_rate": 3.971019091468997e-07, + "loss": 0.0735, + "step": 8510 + }, + { + "epoch": 2.74, + "learning_rate": 3.961333174468152e-07, + "loss": 0.067, + "step": 8511 + }, + { + "epoch": 2.74, + "learning_rate": 3.9516588459014184e-07, + "loss": 0.0637, + "step": 8512 + }, + { + "epoch": 2.74, + "learning_rate": 3.9419961069361414e-07, + "loss": 0.073, + "step": 8513 + }, + { + "epoch": 2.74, + "learning_rate": 3.932344958738299e-07, + "loss": 0.0689, + "step": 8514 + }, + { + "epoch": 2.74, + "learning_rate": 3.922705402472426e-07, + "loss": 0.0687, + "step": 8515 + }, + { + "epoch": 2.74, + "learning_rate": 3.913077439301649e-07, + "loss": 0.0682, + "step": 8516 + }, + { + "epoch": 2.74, + "learning_rate": 3.9034610703877596e-07, + "loss": 0.073, + "step": 8517 + }, + { + "epoch": 2.74, + "learning_rate": 3.8938562968911074e-07, + "loss": 0.0658, + "step": 8518 + }, + { + "epoch": 2.74, + "learning_rate": 3.8842631199705884e-07, + "loss": 0.0716, + "step": 8519 + }, + { + "epoch": 2.74, + "learning_rate": 3.87468154078382e-07, + "loss": 0.074, + "step": 8520 + }, + { + "epoch": 2.74, + "learning_rate": 3.865111560486934e-07, + "loss": 0.0699, + "step": 8521 + }, + { + "epoch": 2.74, + "learning_rate": 3.855553180234661e-07, + "loss": 0.0691, + "step": 8522 + }, + { + "epoch": 2.74, + "learning_rate": 3.846006401180402e-07, + "loss": 0.0674, + "step": 8523 + }, + { + "epoch": 2.74, + "learning_rate": 3.8364712244760907e-07, + "loss": 0.0651, + "step": 8524 + }, + { + "epoch": 2.74, + "learning_rate": 3.826947651272261e-07, + "loss": 0.0689, + "step": 8525 + }, + { + "epoch": 2.74, + "learning_rate": 3.817435682718096e-07, + "loss": 0.0705, + "step": 8526 + }, + { + "epoch": 2.74, + "learning_rate": 3.807935319961342e-07, + "loss": 0.0658, + "step": 8527 + }, + { + "epoch": 2.74, + "learning_rate": 3.798446564148339e-07, + "loss": 0.0763, + "step": 8528 + }, + { + "epoch": 2.74, + "learning_rate": 3.788969416424071e-07, + "loss": 0.0754, + "step": 8529 + }, + { + "epoch": 2.74, + "learning_rate": 3.779503877932067e-07, + "loss": 0.0719, + "step": 8530 + }, + { + "epoch": 2.74, + "learning_rate": 3.7700499498144803e-07, + "loss": 0.0682, + "step": 8531 + }, + { + "epoch": 2.74, + "learning_rate": 3.7606076332120766e-07, + "loss": 0.0665, + "step": 8532 + }, + { + "epoch": 2.74, + "learning_rate": 3.7511769292642e-07, + "loss": 0.0709, + "step": 8533 + }, + { + "epoch": 2.75, + "learning_rate": 3.741757839108773e-07, + "loss": 0.0701, + "step": 8534 + }, + { + "epoch": 2.75, + "learning_rate": 3.732350363882387e-07, + "loss": 0.0806, + "step": 8535 + }, + { + "epoch": 2.75, + "learning_rate": 3.7229545047201555e-07, + "loss": 0.0689, + "step": 8536 + }, + { + "epoch": 2.75, + "learning_rate": 3.71357026275585e-07, + "loss": 0.0728, + "step": 8537 + }, + { + "epoch": 2.75, + "learning_rate": 3.704197639121787e-07, + "loss": 0.0694, + "step": 8538 + }, + { + "epoch": 2.75, + "learning_rate": 3.6948366349489175e-07, + "loss": 0.0756, + "step": 8539 + }, + { + "epoch": 2.75, + "learning_rate": 3.6854872513667706e-07, + "loss": 0.0762, + "step": 8540 + }, + { + "epoch": 2.75, + "learning_rate": 3.676149489503511e-07, + "loss": 0.0692, + "step": 8541 + }, + { + "epoch": 2.75, + "learning_rate": 3.6668233504858486e-07, + "loss": 0.0686, + "step": 8542 + }, + { + "epoch": 2.75, + "learning_rate": 3.6575088354391054e-07, + "loss": 0.0695, + "step": 8543 + }, + { + "epoch": 2.75, + "learning_rate": 3.648205945487249e-07, + "loss": 0.0684, + "step": 8544 + }, + { + "epoch": 2.75, + "learning_rate": 3.638914681752759e-07, + "loss": 0.0751, + "step": 8545 + }, + { + "epoch": 2.75, + "learning_rate": 3.6296350453567943e-07, + "loss": 0.0702, + "step": 8546 + }, + { + "epoch": 2.75, + "learning_rate": 3.6203670374190704e-07, + "loss": 0.0694, + "step": 8547 + }, + { + "epoch": 2.75, + "learning_rate": 3.611110659057893e-07, + "loss": 0.0773, + "step": 8548 + }, + { + "epoch": 2.75, + "learning_rate": 3.6018659113901676e-07, + "loss": 0.0683, + "step": 8549 + }, + { + "epoch": 2.75, + "learning_rate": 3.592632795531437e-07, + "loss": 0.0696, + "step": 8550 + }, + { + "epoch": 2.75, + "learning_rate": 3.5834113125957747e-07, + "loss": 0.0762, + "step": 8551 + }, + { + "epoch": 2.75, + "learning_rate": 3.574201463695903e-07, + "loss": 0.0687, + "step": 8552 + }, + { + "epoch": 2.75, + "learning_rate": 3.5650032499431107e-07, + "loss": 0.0752, + "step": 8553 + }, + { + "epoch": 2.75, + "learning_rate": 3.55581667244731e-07, + "loss": 0.069, + "step": 8554 + }, + { + "epoch": 2.75, + "learning_rate": 3.546641732316969e-07, + "loss": 0.0676, + "step": 8555 + }, + { + "epoch": 2.75, + "learning_rate": 3.5374784306591913e-07, + "loss": 0.0735, + "step": 8556 + }, + { + "epoch": 2.75, + "learning_rate": 3.5283267685796707e-07, + "loss": 0.066, + "step": 8557 + }, + { + "epoch": 2.75, + "learning_rate": 3.5191867471826567e-07, + "loss": 0.065, + "step": 8558 + }, + { + "epoch": 2.75, + "learning_rate": 3.510058367571045e-07, + "loss": 0.0688, + "step": 8559 + }, + { + "epoch": 2.75, + "learning_rate": 3.5009416308462886e-07, + "loss": 0.0737, + "step": 8560 + }, + { + "epoch": 2.75, + "learning_rate": 3.491836538108462e-07, + "loss": 0.0707, + "step": 8561 + }, + { + "epoch": 2.75, + "learning_rate": 3.482743090456231e-07, + "loss": 0.0706, + "step": 8562 + }, + { + "epoch": 2.75, + "learning_rate": 3.4736612889868407e-07, + "loss": 0.0688, + "step": 8563 + }, + { + "epoch": 2.75, + "learning_rate": 3.4645911347961357e-07, + "loss": 0.0669, + "step": 8564 + }, + { + "epoch": 2.76, + "learning_rate": 3.455532628978575e-07, + "loss": 0.0735, + "step": 8565 + }, + { + "epoch": 2.76, + "learning_rate": 3.4464857726271614e-07, + "loss": 0.0692, + "step": 8566 + }, + { + "epoch": 2.76, + "learning_rate": 3.4374505668335776e-07, + "loss": 0.0711, + "step": 8567 + }, + { + "epoch": 2.76, + "learning_rate": 3.428427012688007e-07, + "loss": 0.0729, + "step": 8568 + }, + { + "epoch": 2.76, + "learning_rate": 3.4194151112793006e-07, + "loss": 0.0675, + "step": 8569 + }, + { + "epoch": 2.76, + "learning_rate": 3.4104148636948554e-07, + "loss": 0.065, + "step": 8570 + }, + { + "epoch": 2.76, + "learning_rate": 3.4014262710206803e-07, + "loss": 0.0728, + "step": 8571 + }, + { + "epoch": 2.76, + "learning_rate": 3.3924493343413743e-07, + "loss": 0.0684, + "step": 8572 + }, + { + "epoch": 2.76, + "learning_rate": 3.3834840547401493e-07, + "loss": 0.0683, + "step": 8573 + }, + { + "epoch": 2.76, + "learning_rate": 3.374530433298784e-07, + "loss": 0.07, + "step": 8574 + }, + { + "epoch": 2.76, + "learning_rate": 3.365588471097658e-07, + "loss": 0.0661, + "step": 8575 + }, + { + "epoch": 2.76, + "learning_rate": 3.356658169215743e-07, + "loss": 0.0649, + "step": 8576 + }, + { + "epoch": 2.76, + "learning_rate": 3.34773952873062e-07, + "loss": 0.0652, + "step": 8577 + }, + { + "epoch": 2.76, + "learning_rate": 3.338832550718429e-07, + "loss": 0.0688, + "step": 8578 + }, + { + "epoch": 2.76, + "learning_rate": 3.329937236253944e-07, + "loss": 0.0694, + "step": 8579 + }, + { + "epoch": 2.76, + "learning_rate": 3.3210535864105166e-07, + "loss": 0.0643, + "step": 8580 + }, + { + "epoch": 2.76, + "learning_rate": 3.312181602260056e-07, + "loss": 0.0746, + "step": 8581 + }, + { + "epoch": 2.76, + "learning_rate": 3.303321284873129e-07, + "loss": 0.0682, + "step": 8582 + }, + { + "epoch": 2.76, + "learning_rate": 3.294472635318846e-07, + "loss": 0.0673, + "step": 8583 + }, + { + "epoch": 2.76, + "learning_rate": 3.2856356546648983e-07, + "loss": 0.078, + "step": 8584 + }, + { + "epoch": 2.76, + "learning_rate": 3.276810343977621e-07, + "loss": 0.0708, + "step": 8585 + }, + { + "epoch": 2.76, + "learning_rate": 3.2679967043219185e-07, + "loss": 0.0734, + "step": 8586 + }, + { + "epoch": 2.76, + "learning_rate": 3.259194736761251e-07, + "loss": 0.0678, + "step": 8587 + }, + { + "epoch": 2.76, + "learning_rate": 3.250404442357735e-07, + "loss": 0.0722, + "step": 8588 + }, + { + "epoch": 2.76, + "learning_rate": 3.2416258221720345e-07, + "loss": 0.0708, + "step": 8589 + }, + { + "epoch": 2.76, + "learning_rate": 3.2328588772633895e-07, + "loss": 0.068, + "step": 8590 + }, + { + "epoch": 2.76, + "learning_rate": 3.224103608689699e-07, + "loss": 0.0703, + "step": 8591 + }, + { + "epoch": 2.76, + "learning_rate": 3.215360017507385e-07, + "loss": 0.0683, + "step": 8592 + }, + { + "epoch": 2.76, + "learning_rate": 3.206628104771481e-07, + "loss": 0.07, + "step": 8593 + }, + { + "epoch": 2.76, + "learning_rate": 3.197907871535633e-07, + "loss": 0.0678, + "step": 8594 + }, + { + "epoch": 2.76, + "learning_rate": 3.189199318852065e-07, + "loss": 0.0719, + "step": 8595 + }, + { + "epoch": 2.77, + "learning_rate": 3.180502447771572e-07, + "loss": 0.0703, + "step": 8596 + }, + { + "epoch": 2.77, + "learning_rate": 3.171817259343568e-07, + "loss": 0.0768, + "step": 8597 + }, + { + "epoch": 2.77, + "learning_rate": 3.1631437546160383e-07, + "loss": 0.0677, + "step": 8598 + }, + { + "epoch": 2.77, + "learning_rate": 3.154481934635556e-07, + "loss": 0.0734, + "step": 8599 + }, + { + "epoch": 2.77, + "learning_rate": 3.1458318004473075e-07, + "loss": 0.0685, + "step": 8600 + }, + { + "epoch": 2.77, + "learning_rate": 3.1371933530950584e-07, + "loss": 0.0743, + "step": 8601 + }, + { + "epoch": 2.77, + "learning_rate": 3.128566593621152e-07, + "loss": 0.0684, + "step": 8602 + }, + { + "epoch": 2.77, + "learning_rate": 3.119951523066522e-07, + "loss": 0.0745, + "step": 8603 + }, + { + "epoch": 2.77, + "learning_rate": 3.1113481424707157e-07, + "loss": 0.0751, + "step": 8604 + }, + { + "epoch": 2.77, + "learning_rate": 3.1027564528718247e-07, + "loss": 0.0719, + "step": 8605 + }, + { + "epoch": 2.77, + "learning_rate": 3.094176455306597e-07, + "loss": 0.0666, + "step": 8606 + }, + { + "epoch": 2.77, + "learning_rate": 3.085608150810315e-07, + "loss": 0.0691, + "step": 8607 + }, + { + "epoch": 2.77, + "learning_rate": 3.0770515404168645e-07, + "loss": 0.068, + "step": 8608 + }, + { + "epoch": 2.77, + "learning_rate": 3.068506625158729e-07, + "loss": 0.0669, + "step": 8609 + }, + { + "epoch": 2.77, + "learning_rate": 3.059973406066963e-07, + "loss": 0.0679, + "step": 8610 + }, + { + "epoch": 2.77, + "learning_rate": 3.051451884171219e-07, + "loss": 0.0689, + "step": 8611 + }, + { + "epoch": 2.77, + "learning_rate": 3.042942060499765e-07, + "loss": 0.0692, + "step": 8612 + }, + { + "epoch": 2.77, + "learning_rate": 3.034443936079412e-07, + "loss": 0.0693, + "step": 8613 + }, + { + "epoch": 2.77, + "learning_rate": 3.025957511935573e-07, + "loss": 0.0708, + "step": 8614 + }, + { + "epoch": 2.77, + "learning_rate": 3.017482789092285e-07, + "loss": 0.0804, + "step": 8615 + }, + { + "epoch": 2.77, + "learning_rate": 3.009019768572119e-07, + "loss": 0.0707, + "step": 8616 + }, + { + "epoch": 2.77, + "learning_rate": 3.0005684513962464e-07, + "loss": 0.0731, + "step": 8617 + }, + { + "epoch": 2.77, + "learning_rate": 2.992128838584485e-07, + "loss": 0.0728, + "step": 8618 + }, + { + "epoch": 2.77, + "learning_rate": 2.9837009311551535e-07, + "loss": 0.063, + "step": 8619 + }, + { + "epoch": 2.77, + "learning_rate": 2.9752847301252165e-07, + "loss": 0.0666, + "step": 8620 + }, + { + "epoch": 2.77, + "learning_rate": 2.9668802365102054e-07, + "loss": 0.0669, + "step": 8621 + }, + { + "epoch": 2.77, + "learning_rate": 2.9584874513242544e-07, + "loss": 0.0698, + "step": 8622 + }, + { + "epoch": 2.77, + "learning_rate": 2.950106375580053e-07, + "loss": 0.0777, + "step": 8623 + }, + { + "epoch": 2.77, + "learning_rate": 2.941737010288903e-07, + "loss": 0.0647, + "step": 8624 + }, + { + "epoch": 2.77, + "learning_rate": 2.933379356460686e-07, + "loss": 0.0717, + "step": 8625 + }, + { + "epoch": 2.77, + "learning_rate": 2.925033415103873e-07, + "loss": 0.0674, + "step": 8626 + }, + { + "epoch": 2.78, + "learning_rate": 2.9166991872255355e-07, + "loss": 0.0769, + "step": 8627 + }, + { + "epoch": 2.78, + "learning_rate": 2.9083766738313035e-07, + "loss": 0.0735, + "step": 8628 + }, + { + "epoch": 2.78, + "learning_rate": 2.900065875925406e-07, + "loss": 0.0677, + "step": 8629 + }, + { + "epoch": 2.78, + "learning_rate": 2.891766794510664e-07, + "loss": 0.0751, + "step": 8630 + }, + { + "epoch": 2.78, + "learning_rate": 2.8834794305884874e-07, + "loss": 0.0659, + "step": 8631 + }, + { + "epoch": 2.78, + "learning_rate": 2.875203785158831e-07, + "loss": 0.0746, + "step": 8632 + }, + { + "epoch": 2.78, + "learning_rate": 2.866939859220308e-07, + "loss": 0.0658, + "step": 8633 + }, + { + "epoch": 2.78, + "learning_rate": 2.8586876537700645e-07, + "loss": 0.0701, + "step": 8634 + }, + { + "epoch": 2.78, + "learning_rate": 2.850447169803849e-07, + "loss": 0.0696, + "step": 8635 + }, + { + "epoch": 2.78, + "learning_rate": 2.842218408315989e-07, + "loss": 0.0743, + "step": 8636 + }, + { + "epoch": 2.78, + "learning_rate": 2.8340013702993995e-07, + "loss": 0.0696, + "step": 8637 + }, + { + "epoch": 2.78, + "learning_rate": 2.825796056745589e-07, + "loss": 0.0676, + "step": 8638 + }, + { + "epoch": 2.78, + "learning_rate": 2.8176024686446425e-07, + "loss": 0.0668, + "step": 8639 + }, + { + "epoch": 2.78, + "learning_rate": 2.809420606985236e-07, + "loss": 0.0678, + "step": 8640 + }, + { + "epoch": 2.78, + "learning_rate": 2.801250472754635e-07, + "loss": 0.0678, + "step": 8641 + }, + { + "epoch": 2.78, + "learning_rate": 2.7930920669386743e-07, + "loss": 0.0752, + "step": 8642 + }, + { + "epoch": 2.78, + "learning_rate": 2.784945390521765e-07, + "loss": 0.07, + "step": 8643 + }, + { + "epoch": 2.78, + "learning_rate": 2.776810444486944e-07, + "loss": 0.0647, + "step": 8644 + }, + { + "epoch": 2.78, + "learning_rate": 2.768687229815803e-07, + "loss": 0.0725, + "step": 8645 + }, + { + "epoch": 2.78, + "learning_rate": 2.7605757474885255e-07, + "loss": 0.0722, + "step": 8646 + }, + { + "epoch": 2.78, + "learning_rate": 2.752475998483872e-07, + "loss": 0.0682, + "step": 8647 + }, + { + "epoch": 2.78, + "learning_rate": 2.744387983779195e-07, + "loss": 0.0825, + "step": 8648 + }, + { + "epoch": 2.78, + "learning_rate": 2.736311704350425e-07, + "loss": 0.0633, + "step": 8649 + }, + { + "epoch": 2.78, + "learning_rate": 2.7282471611720817e-07, + "loss": 0.0676, + "step": 8650 + }, + { + "epoch": 2.78, + "learning_rate": 2.720194355217276e-07, + "loss": 0.0686, + "step": 8651 + }, + { + "epoch": 2.78, + "learning_rate": 2.7121532874576863e-07, + "loss": 0.0671, + "step": 8652 + }, + { + "epoch": 2.78, + "learning_rate": 2.7041239588635693e-07, + "loss": 0.0727, + "step": 8653 + }, + { + "epoch": 2.78, + "learning_rate": 2.6961063704038057e-07, + "loss": 0.063, + "step": 8654 + }, + { + "epoch": 2.78, + "learning_rate": 2.6881005230458115e-07, + "loss": 0.0665, + "step": 8655 + }, + { + "epoch": 2.78, + "learning_rate": 2.6801064177556123e-07, + "loss": 0.075, + "step": 8656 + }, + { + "epoch": 2.78, + "learning_rate": 2.6721240554978046e-07, + "loss": 0.0698, + "step": 8657 + }, + { + "epoch": 2.79, + "learning_rate": 2.664153437235584e-07, + "loss": 0.0651, + "step": 8658 + }, + { + "epoch": 2.79, + "learning_rate": 2.656194563930714e-07, + "loss": 0.0657, + "step": 8659 + }, + { + "epoch": 2.79, + "learning_rate": 2.6482474365435604e-07, + "loss": 0.0694, + "step": 8660 + }, + { + "epoch": 2.79, + "learning_rate": 2.6403120560330233e-07, + "loss": 0.0654, + "step": 8661 + }, + { + "epoch": 2.79, + "learning_rate": 2.6323884233566576e-07, + "loss": 0.0652, + "step": 8662 + }, + { + "epoch": 2.79, + "learning_rate": 2.624476539470544e-07, + "loss": 0.0663, + "step": 8663 + }, + { + "epoch": 2.79, + "learning_rate": 2.616576405329352e-07, + "loss": 0.0691, + "step": 8664 + }, + { + "epoch": 2.79, + "learning_rate": 2.6086880218863633e-07, + "loss": 0.0668, + "step": 8665 + }, + { + "epoch": 2.79, + "learning_rate": 2.600811390093427e-07, + "loss": 0.0685, + "step": 8666 + }, + { + "epoch": 2.79, + "learning_rate": 2.592946510900962e-07, + "loss": 0.0663, + "step": 8667 + }, + { + "epoch": 2.79, + "learning_rate": 2.5850933852579865e-07, + "loss": 0.0689, + "step": 8668 + }, + { + "epoch": 2.79, + "learning_rate": 2.5772520141120747e-07, + "loss": 0.06, + "step": 8669 + }, + { + "epoch": 2.79, + "learning_rate": 2.5694223984094044e-07, + "loss": 0.07, + "step": 8670 + }, + { + "epoch": 2.79, + "learning_rate": 2.561604539094742e-07, + "loss": 0.0772, + "step": 8671 + }, + { + "epoch": 2.79, + "learning_rate": 2.553798437111421e-07, + "loss": 0.0692, + "step": 8672 + }, + { + "epoch": 2.79, + "learning_rate": 2.546004093401355e-07, + "loss": 0.0688, + "step": 8673 + }, + { + "epoch": 2.79, + "learning_rate": 2.538221508905048e-07, + "loss": 0.0669, + "step": 8674 + }, + { + "epoch": 2.79, + "learning_rate": 2.5304506845615695e-07, + "loss": 0.0729, + "step": 8675 + }, + { + "epoch": 2.79, + "learning_rate": 2.5226916213085706e-07, + "loss": 0.0634, + "step": 8676 + }, + { + "epoch": 2.79, + "learning_rate": 2.5149443200823244e-07, + "loss": 0.0724, + "step": 8677 + }, + { + "epoch": 2.79, + "learning_rate": 2.507208781817638e-07, + "loss": 0.0704, + "step": 8678 + }, + { + "epoch": 2.79, + "learning_rate": 2.4994850074479104e-07, + "loss": 0.0737, + "step": 8679 + }, + { + "epoch": 2.79, + "learning_rate": 2.49177299790514e-07, + "loss": 0.0703, + "step": 8680 + }, + { + "epoch": 2.79, + "learning_rate": 2.4840727541198597e-07, + "loss": 0.0707, + "step": 8681 + }, + { + "epoch": 2.79, + "learning_rate": 2.4763842770212374e-07, + "loss": 0.0691, + "step": 8682 + }, + { + "epoch": 2.79, + "learning_rate": 2.4687075675369985e-07, + "loss": 0.0644, + "step": 8683 + }, + { + "epoch": 2.79, + "learning_rate": 2.461042626593435e-07, + "loss": 0.0704, + "step": 8684 + }, + { + "epoch": 2.79, + "learning_rate": 2.4533894551154404e-07, + "loss": 0.0665, + "step": 8685 + }, + { + "epoch": 2.79, + "learning_rate": 2.4457480540264864e-07, + "loss": 0.077, + "step": 8686 + }, + { + "epoch": 2.79, + "learning_rate": 2.438118424248592e-07, + "loss": 0.0725, + "step": 8687 + }, + { + "epoch": 2.79, + "learning_rate": 2.4305005667023985e-07, + "loss": 0.0705, + "step": 8688 + }, + { + "epoch": 2.8, + "learning_rate": 2.4228944823071144e-07, + "loss": 0.0637, + "step": 8689 + }, + { + "epoch": 2.8, + "learning_rate": 2.4153001719805057e-07, + "loss": 0.0693, + "step": 8690 + }, + { + "epoch": 2.8, + "learning_rate": 2.40771763663894e-07, + "loss": 0.0699, + "step": 8691 + }, + { + "epoch": 2.8, + "learning_rate": 2.4001468771973623e-07, + "loss": 0.0655, + "step": 8692 + }, + { + "epoch": 2.8, + "learning_rate": 2.392587894569309e-07, + "loss": 0.0644, + "step": 8693 + }, + { + "epoch": 2.8, + "learning_rate": 2.38504068966684e-07, + "loss": 0.0726, + "step": 8694 + }, + { + "epoch": 2.8, + "learning_rate": 2.3775052634006702e-07, + "loss": 0.0734, + "step": 8695 + }, + { + "epoch": 2.8, + "learning_rate": 2.3699816166800284e-07, + "loss": 0.067, + "step": 8696 + }, + { + "epoch": 2.8, + "learning_rate": 2.3624697504127547e-07, + "loss": 0.0694, + "step": 8697 + }, + { + "epoch": 2.8, + "learning_rate": 2.354969665505291e-07, + "loss": 0.0705, + "step": 8698 + }, + { + "epoch": 2.8, + "learning_rate": 2.3474813628626026e-07, + "loss": 0.0682, + "step": 8699 + }, + { + "epoch": 2.8, + "learning_rate": 2.340004843388255e-07, + "loss": 0.0689, + "step": 8700 + }, + { + "epoch": 2.8, + "learning_rate": 2.3325401079844157e-07, + "loss": 0.0646, + "step": 8701 + }, + { + "epoch": 2.8, + "learning_rate": 2.3250871575518086e-07, + "loss": 0.0659, + "step": 8702 + }, + { + "epoch": 2.8, + "learning_rate": 2.3176459929897143e-07, + "loss": 0.0642, + "step": 8703 + }, + { + "epoch": 2.8, + "learning_rate": 2.3102166151960482e-07, + "loss": 0.0682, + "step": 8704 + }, + { + "epoch": 2.8, + "learning_rate": 2.3027990250672593e-07, + "loss": 0.0733, + "step": 8705 + }, + { + "epoch": 2.8, + "learning_rate": 2.2953932234983766e-07, + "loss": 0.0741, + "step": 8706 + }, + { + "epoch": 2.8, + "learning_rate": 2.2879992113830297e-07, + "loss": 0.0706, + "step": 8707 + }, + { + "epoch": 2.8, + "learning_rate": 2.2806169896134045e-07, + "loss": 0.067, + "step": 8708 + }, + { + "epoch": 2.8, + "learning_rate": 2.2732465590802554e-07, + "loss": 0.0689, + "step": 8709 + }, + { + "epoch": 2.8, + "learning_rate": 2.265887920672949e-07, + "loss": 0.0654, + "step": 8710 + }, + { + "epoch": 2.8, + "learning_rate": 2.258541075279419e-07, + "loss": 0.0703, + "step": 8711 + }, + { + "epoch": 2.8, + "learning_rate": 2.2512060237861455e-07, + "loss": 0.067, + "step": 8712 + }, + { + "epoch": 2.8, + "learning_rate": 2.2438827670782203e-07, + "loss": 0.0679, + "step": 8713 + }, + { + "epoch": 2.8, + "learning_rate": 2.2365713060392925e-07, + "loss": 0.0754, + "step": 8714 + }, + { + "epoch": 2.8, + "learning_rate": 2.229271641551578e-07, + "loss": 0.0707, + "step": 8715 + }, + { + "epoch": 2.8, + "learning_rate": 2.2219837744959284e-07, + "loss": 0.0638, + "step": 8716 + }, + { + "epoch": 2.8, + "learning_rate": 2.2147077057516952e-07, + "loss": 0.0748, + "step": 8717 + }, + { + "epoch": 2.8, + "learning_rate": 2.2074434361968322e-07, + "loss": 0.0687, + "step": 8718 + }, + { + "epoch": 2.8, + "learning_rate": 2.2001909667079158e-07, + "loss": 0.0721, + "step": 8719 + }, + { + "epoch": 2.81, + "learning_rate": 2.192950298160035e-07, + "loss": 0.0659, + "step": 8720 + }, + { + "epoch": 2.81, + "learning_rate": 2.1857214314268571e-07, + "loss": 0.0674, + "step": 8721 + }, + { + "epoch": 2.81, + "learning_rate": 2.178504367380696e-07, + "loss": 0.068, + "step": 8722 + }, + { + "epoch": 2.81, + "learning_rate": 2.1712991068923662e-07, + "loss": 0.0737, + "step": 8723 + }, + { + "epoch": 2.81, + "learning_rate": 2.1641056508312718e-07, + "loss": 0.0739, + "step": 8724 + }, + { + "epoch": 2.81, + "learning_rate": 2.1569240000654523e-07, + "loss": 0.0679, + "step": 8725 + }, + { + "epoch": 2.81, + "learning_rate": 2.1497541554614255e-07, + "loss": 0.0735, + "step": 8726 + }, + { + "epoch": 2.81, + "learning_rate": 2.1425961178843545e-07, + "loss": 0.0685, + "step": 8727 + }, + { + "epoch": 2.81, + "learning_rate": 2.1354498881979714e-07, + "loss": 0.0692, + "step": 8728 + }, + { + "epoch": 2.81, + "learning_rate": 2.1283154672645522e-07, + "loss": 0.0631, + "step": 8729 + }, + { + "epoch": 2.81, + "learning_rate": 2.1211928559449756e-07, + "loss": 0.0723, + "step": 8730 + }, + { + "epoch": 2.81, + "learning_rate": 2.1140820550986874e-07, + "loss": 0.0647, + "step": 8731 + }, + { + "epoch": 2.81, + "learning_rate": 2.106983065583701e-07, + "loss": 0.0681, + "step": 8732 + }, + { + "epoch": 2.81, + "learning_rate": 2.09989588825662e-07, + "loss": 0.0693, + "step": 8733 + }, + { + "epoch": 2.81, + "learning_rate": 2.0928205239726053e-07, + "loss": 0.0657, + "step": 8734 + }, + { + "epoch": 2.81, + "learning_rate": 2.0857569735853956e-07, + "loss": 0.0753, + "step": 8735 + }, + { + "epoch": 2.81, + "learning_rate": 2.0787052379473204e-07, + "loss": 0.0677, + "step": 8736 + }, + { + "epoch": 2.81, + "learning_rate": 2.071665317909266e-07, + "loss": 0.0639, + "step": 8737 + }, + { + "epoch": 2.81, + "learning_rate": 2.0646372143207083e-07, + "loss": 0.0625, + "step": 8738 + }, + { + "epoch": 2.81, + "learning_rate": 2.0576209280296688e-07, + "loss": 0.0686, + "step": 8739 + }, + { + "epoch": 2.81, + "learning_rate": 2.0506164598827815e-07, + "loss": 0.071, + "step": 8740 + }, + { + "epoch": 2.81, + "learning_rate": 2.043623810725215e-07, + "loss": 0.0698, + "step": 8741 + }, + { + "epoch": 2.81, + "learning_rate": 2.0366429814007605e-07, + "loss": 0.0733, + "step": 8742 + }, + { + "epoch": 2.81, + "learning_rate": 2.0296739727517335e-07, + "loss": 0.0651, + "step": 8743 + }, + { + "epoch": 2.81, + "learning_rate": 2.0227167856190612e-07, + "loss": 0.0706, + "step": 8744 + }, + { + "epoch": 2.81, + "learning_rate": 2.0157714208422053e-07, + "loss": 0.0672, + "step": 8745 + }, + { + "epoch": 2.81, + "learning_rate": 2.0088378792592288e-07, + "loss": 0.0647, + "step": 8746 + }, + { + "epoch": 2.81, + "learning_rate": 2.0019161617067738e-07, + "loss": 0.0692, + "step": 8747 + }, + { + "epoch": 2.81, + "learning_rate": 1.995006269020039e-07, + "loss": 0.0681, + "step": 8748 + }, + { + "epoch": 2.81, + "learning_rate": 1.9881082020327901e-07, + "loss": 0.0656, + "step": 8749 + }, + { + "epoch": 2.81, + "learning_rate": 1.9812219615774064e-07, + "loss": 0.0646, + "step": 8750 + }, + { + "epoch": 2.82, + "learning_rate": 1.9743475484847785e-07, + "loss": 0.0681, + "step": 8751 + }, + { + "epoch": 2.82, + "learning_rate": 1.9674849635844206e-07, + "loss": 0.0729, + "step": 8752 + }, + { + "epoch": 2.82, + "learning_rate": 1.9606342077043815e-07, + "loss": 0.0671, + "step": 8753 + }, + { + "epoch": 2.82, + "learning_rate": 1.9537952816713334e-07, + "loss": 0.0716, + "step": 8754 + }, + { + "epoch": 2.82, + "learning_rate": 1.9469681863104716e-07, + "loss": 0.0665, + "step": 8755 + }, + { + "epoch": 2.82, + "learning_rate": 1.9401529224455817e-07, + "loss": 0.0656, + "step": 8756 + }, + { + "epoch": 2.82, + "learning_rate": 1.933349490899028e-07, + "loss": 0.0682, + "step": 8757 + }, + { + "epoch": 2.82, + "learning_rate": 1.926557892491754e-07, + "loss": 0.0654, + "step": 8758 + }, + { + "epoch": 2.82, + "learning_rate": 1.919778128043226e-07, + "loss": 0.071, + "step": 8759 + }, + { + "epoch": 2.82, + "learning_rate": 1.9130101983715455e-07, + "loss": 0.065, + "step": 8760 + }, + { + "epoch": 2.82, + "learning_rate": 1.9062541042933703e-07, + "loss": 0.0673, + "step": 8761 + }, + { + "epoch": 2.82, + "learning_rate": 1.8995098466238816e-07, + "loss": 0.0721, + "step": 8762 + }, + { + "epoch": 2.82, + "learning_rate": 1.892777426176906e-07, + "loss": 0.0631, + "step": 8763 + }, + { + "epoch": 2.82, + "learning_rate": 1.8860568437648053e-07, + "loss": 0.0665, + "step": 8764 + }, + { + "epoch": 2.82, + "learning_rate": 1.879348100198486e-07, + "loss": 0.0701, + "step": 8765 + }, + { + "epoch": 2.82, + "learning_rate": 1.8726511962874783e-07, + "loss": 0.0744, + "step": 8766 + }, + { + "epoch": 2.82, + "learning_rate": 1.865966132839847e-07, + "loss": 0.0707, + "step": 8767 + }, + { + "epoch": 2.82, + "learning_rate": 1.859292910662236e-07, + "loss": 0.0689, + "step": 8768 + }, + { + "epoch": 2.82, + "learning_rate": 1.8526315305598785e-07, + "loss": 0.0707, + "step": 8769 + }, + { + "epoch": 2.82, + "learning_rate": 1.8459819933365541e-07, + "loss": 0.0772, + "step": 8770 + }, + { + "epoch": 2.82, + "learning_rate": 1.839344299794632e-07, + "loss": 0.0664, + "step": 8771 + }, + { + "epoch": 2.82, + "learning_rate": 1.8327184507350493e-07, + "loss": 0.0686, + "step": 8772 + }, + { + "epoch": 2.82, + "learning_rate": 1.8261044469572998e-07, + "loss": 0.066, + "step": 8773 + }, + { + "epoch": 2.82, + "learning_rate": 1.8195022892594449e-07, + "loss": 0.0702, + "step": 8774 + }, + { + "epoch": 2.82, + "learning_rate": 1.8129119784381478e-07, + "loss": 0.0684, + "step": 8775 + }, + { + "epoch": 2.82, + "learning_rate": 1.8063335152886273e-07, + "loss": 0.0672, + "step": 8776 + }, + { + "epoch": 2.82, + "learning_rate": 1.7997669006046604e-07, + "loss": 0.0694, + "step": 8777 + }, + { + "epoch": 2.82, + "learning_rate": 1.7932121351785903e-07, + "loss": 0.0627, + "step": 8778 + }, + { + "epoch": 2.82, + "learning_rate": 1.7866692198013623e-07, + "loss": 0.0662, + "step": 8779 + }, + { + "epoch": 2.82, + "learning_rate": 1.7801381552624565e-07, + "loss": 0.0692, + "step": 8780 + }, + { + "epoch": 2.82, + "learning_rate": 1.7736189423499638e-07, + "loss": 0.0665, + "step": 8781 + }, + { + "epoch": 2.83, + "learning_rate": 1.7671115818504892e-07, + "loss": 0.0661, + "step": 8782 + }, + { + "epoch": 2.83, + "learning_rate": 1.7606160745492595e-07, + "loss": 0.066, + "step": 8783 + }, + { + "epoch": 2.83, + "learning_rate": 1.7541324212300482e-07, + "loss": 0.0713, + "step": 8784 + }, + { + "epoch": 2.83, + "learning_rate": 1.7476606226751958e-07, + "loss": 0.0667, + "step": 8785 + }, + { + "epoch": 2.83, + "learning_rate": 1.7412006796656e-07, + "loss": 0.0688, + "step": 8786 + }, + { + "epoch": 2.83, + "learning_rate": 1.7347525929807817e-07, + "loss": 0.0689, + "step": 8787 + }, + { + "epoch": 2.83, + "learning_rate": 1.728316363398763e-07, + "loss": 0.0699, + "step": 8788 + }, + { + "epoch": 2.83, + "learning_rate": 1.7218919916961786e-07, + "loss": 0.0696, + "step": 8789 + }, + { + "epoch": 2.83, + "learning_rate": 1.715479478648241e-07, + "loss": 0.0686, + "step": 8790 + }, + { + "epoch": 2.83, + "learning_rate": 1.709078825028676e-07, + "loss": 0.0613, + "step": 8791 + }, + { + "epoch": 2.83, + "learning_rate": 1.7026900316098217e-07, + "loss": 0.0711, + "step": 8792 + }, + { + "epoch": 2.83, + "learning_rate": 1.6963130991625942e-07, + "loss": 0.0744, + "step": 8793 + }, + { + "epoch": 2.83, + "learning_rate": 1.6899480284564562e-07, + "loss": 0.0655, + "step": 8794 + }, + { + "epoch": 2.83, + "learning_rate": 1.6835948202594156e-07, + "loss": 0.0688, + "step": 8795 + }, + { + "epoch": 2.83, + "learning_rate": 1.6772534753381254e-07, + "loss": 0.0668, + "step": 8796 + }, + { + "epoch": 2.83, + "learning_rate": 1.6709239944577405e-07, + "loss": 0.0704, + "step": 8797 + }, + { + "epoch": 2.83, + "learning_rate": 1.664606378381972e-07, + "loss": 0.0764, + "step": 8798 + }, + { + "epoch": 2.83, + "learning_rate": 1.658300627873166e-07, + "loss": 0.0664, + "step": 8799 + }, + { + "epoch": 2.83, + "learning_rate": 1.6520067436922027e-07, + "loss": 0.0653, + "step": 8800 + }, + { + "epoch": 2.83, + "learning_rate": 1.6457247265984967e-07, + "loss": 0.0652, + "step": 8801 + }, + { + "epoch": 2.83, + "learning_rate": 1.6394545773500968e-07, + "loss": 0.0669, + "step": 8802 + }, + { + "epoch": 2.83, + "learning_rate": 1.6331962967035764e-07, + "loss": 0.0747, + "step": 8803 + }, + { + "epoch": 2.83, + "learning_rate": 1.6269498854140752e-07, + "loss": 0.0633, + "step": 8804 + }, + { + "epoch": 2.83, + "learning_rate": 1.620715344235324e-07, + "loss": 0.0702, + "step": 8805 + }, + { + "epoch": 2.83, + "learning_rate": 1.6144926739195989e-07, + "loss": 0.0662, + "step": 8806 + }, + { + "epoch": 2.83, + "learning_rate": 1.6082818752177654e-07, + "loss": 0.0721, + "step": 8807 + }, + { + "epoch": 2.83, + "learning_rate": 1.6020829488792355e-07, + "loss": 0.0777, + "step": 8808 + }, + { + "epoch": 2.83, + "learning_rate": 1.595895895652e-07, + "loss": 0.0832, + "step": 8809 + }, + { + "epoch": 2.83, + "learning_rate": 1.5897207162826167e-07, + "loss": 0.0718, + "step": 8810 + }, + { + "epoch": 2.83, + "learning_rate": 1.5835574115162123e-07, + "loss": 0.0691, + "step": 8811 + }, + { + "epoch": 2.83, + "learning_rate": 1.5774059820964693e-07, + "loss": 0.0635, + "step": 8812 + }, + { + "epoch": 2.84, + "learning_rate": 1.5712664287656388e-07, + "loss": 0.0675, + "step": 8813 + }, + { + "epoch": 2.84, + "learning_rate": 1.5651387522645721e-07, + "loss": 0.0712, + "step": 8814 + }, + { + "epoch": 2.84, + "learning_rate": 1.559022953332634e-07, + "loss": 0.0689, + "step": 8815 + }, + { + "epoch": 2.84, + "learning_rate": 1.5529190327078003e-07, + "loss": 0.0639, + "step": 8816 + }, + { + "epoch": 2.84, + "learning_rate": 1.5468269911265933e-07, + "loss": 0.0696, + "step": 8817 + }, + { + "epoch": 2.84, + "learning_rate": 1.5407468293240912e-07, + "loss": 0.069, + "step": 8818 + }, + { + "epoch": 2.84, + "learning_rate": 1.5346785480339633e-07, + "loss": 0.0673, + "step": 8819 + }, + { + "epoch": 2.84, + "learning_rate": 1.5286221479884343e-07, + "loss": 0.0663, + "step": 8820 + }, + { + "epoch": 2.84, + "learning_rate": 1.522577629918298e-07, + "loss": 0.0683, + "step": 8821 + }, + { + "epoch": 2.84, + "learning_rate": 1.5165449945529043e-07, + "loss": 0.0701, + "step": 8822 + }, + { + "epoch": 2.84, + "learning_rate": 1.5105242426201815e-07, + "loss": 0.0659, + "step": 8823 + }, + { + "epoch": 2.84, + "learning_rate": 1.5045153748466046e-07, + "loss": 0.0654, + "step": 8824 + }, + { + "epoch": 2.84, + "learning_rate": 1.4985183919572598e-07, + "loss": 0.0685, + "step": 8825 + }, + { + "epoch": 2.84, + "learning_rate": 1.4925332946757465e-07, + "loss": 0.0654, + "step": 8826 + }, + { + "epoch": 2.84, + "learning_rate": 1.4865600837242533e-07, + "loss": 0.0688, + "step": 8827 + }, + { + "epoch": 2.84, + "learning_rate": 1.4805987598235261e-07, + "loss": 0.0714, + "step": 8828 + }, + { + "epoch": 2.84, + "learning_rate": 1.4746493236929117e-07, + "loss": 0.0689, + "step": 8829 + }, + { + "epoch": 2.84, + "learning_rate": 1.4687117760502579e-07, + "loss": 0.0692, + "step": 8830 + }, + { + "epoch": 2.84, + "learning_rate": 1.462786117612036e-07, + "loss": 0.0665, + "step": 8831 + }, + { + "epoch": 2.84, + "learning_rate": 1.4568723490932746e-07, + "loss": 0.07, + "step": 8832 + }, + { + "epoch": 2.84, + "learning_rate": 1.4509704712075247e-07, + "loss": 0.0688, + "step": 8833 + }, + { + "epoch": 2.84, + "learning_rate": 1.4450804846669387e-07, + "loss": 0.0674, + "step": 8834 + }, + { + "epoch": 2.84, + "learning_rate": 1.4392023901822481e-07, + "loss": 0.0702, + "step": 8835 + }, + { + "epoch": 2.84, + "learning_rate": 1.4333361884626974e-07, + "loss": 0.0652, + "step": 8836 + }, + { + "epoch": 2.84, + "learning_rate": 1.4274818802161527e-07, + "loss": 0.0618, + "step": 8837 + }, + { + "epoch": 2.84, + "learning_rate": 1.4216394661490163e-07, + "loss": 0.068, + "step": 8838 + }, + { + "epoch": 2.84, + "learning_rate": 1.415808946966246e-07, + "loss": 0.0667, + "step": 8839 + }, + { + "epoch": 2.84, + "learning_rate": 1.4099903233713909e-07, + "loss": 0.0676, + "step": 8840 + }, + { + "epoch": 2.84, + "learning_rate": 1.404183596066533e-07, + "loss": 0.0747, + "step": 8841 + }, + { + "epoch": 2.84, + "learning_rate": 1.3983887657523676e-07, + "loss": 0.0704, + "step": 8842 + }, + { + "epoch": 2.84, + "learning_rate": 1.3926058331280912e-07, + "loss": 0.0612, + "step": 8843 + }, + { + "epoch": 2.85, + "learning_rate": 1.386834798891512e-07, + "loss": 0.0673, + "step": 8844 + }, + { + "epoch": 2.85, + "learning_rate": 1.3810756637389844e-07, + "loss": 0.0672, + "step": 8845 + }, + { + "epoch": 2.85, + "learning_rate": 1.37532842836543e-07, + "loss": 0.07, + "step": 8846 + }, + { + "epoch": 2.85, + "learning_rate": 1.3695930934643387e-07, + "loss": 0.0676, + "step": 8847 + }, + { + "epoch": 2.85, + "learning_rate": 1.3638696597277678e-07, + "loss": 0.0757, + "step": 8848 + }, + { + "epoch": 2.85, + "learning_rate": 1.3581581278463095e-07, + "loss": 0.0796, + "step": 8849 + }, + { + "epoch": 2.85, + "learning_rate": 1.3524584985091572e-07, + "loss": 0.0715, + "step": 8850 + }, + { + "epoch": 2.85, + "learning_rate": 1.3467707724040491e-07, + "loss": 0.0707, + "step": 8851 + }, + { + "epoch": 2.85, + "learning_rate": 1.3410949502172808e-07, + "loss": 0.0712, + "step": 8852 + }, + { + "epoch": 2.85, + "learning_rate": 1.3354310326337383e-07, + "loss": 0.0779, + "step": 8853 + }, + { + "epoch": 2.85, + "learning_rate": 1.329779020336841e-07, + "loss": 0.0726, + "step": 8854 + }, + { + "epoch": 2.85, + "learning_rate": 1.3241389140086103e-07, + "loss": 0.0755, + "step": 8855 + }, + { + "epoch": 2.85, + "learning_rate": 1.3185107143295684e-07, + "loss": 0.0748, + "step": 8856 + }, + { + "epoch": 2.85, + "learning_rate": 1.3128944219788497e-07, + "loss": 0.0667, + "step": 8857 + }, + { + "epoch": 2.85, + "learning_rate": 1.3072900376341459e-07, + "loss": 0.0719, + "step": 8858 + }, + { + "epoch": 2.85, + "learning_rate": 1.3016975619717042e-07, + "loss": 0.0689, + "step": 8859 + }, + { + "epoch": 2.85, + "learning_rate": 1.296116995666341e-07, + "loss": 0.0731, + "step": 8860 + }, + { + "epoch": 2.85, + "learning_rate": 1.2905483393914287e-07, + "loss": 0.0644, + "step": 8861 + }, + { + "epoch": 2.85, + "learning_rate": 1.2849915938189072e-07, + "loss": 0.0748, + "step": 8862 + }, + { + "epoch": 2.85, + "learning_rate": 1.2794467596192628e-07, + "loss": 0.0739, + "step": 8863 + }, + { + "epoch": 2.85, + "learning_rate": 1.273913837461571e-07, + "loss": 0.0668, + "step": 8864 + }, + { + "epoch": 2.85, + "learning_rate": 1.268392828013454e-07, + "loss": 0.0695, + "step": 8865 + }, + { + "epoch": 2.85, + "learning_rate": 1.2628837319411002e-07, + "loss": 0.0642, + "step": 8866 + }, + { + "epoch": 2.85, + "learning_rate": 1.2573865499092675e-07, + "loss": 0.0659, + "step": 8867 + }, + { + "epoch": 2.85, + "learning_rate": 1.2519012825812804e-07, + "loss": 0.0653, + "step": 8868 + }, + { + "epoch": 2.85, + "learning_rate": 1.2464279306189765e-07, + "loss": 0.0724, + "step": 8869 + }, + { + "epoch": 2.85, + "learning_rate": 1.2409664946828158e-07, + "loss": 0.0661, + "step": 8870 + }, + { + "epoch": 2.85, + "learning_rate": 1.2355169754318053e-07, + "loss": 0.0689, + "step": 8871 + }, + { + "epoch": 2.85, + "learning_rate": 1.230079373523496e-07, + "loss": 0.0682, + "step": 8872 + }, + { + "epoch": 2.85, + "learning_rate": 1.2246536896140192e-07, + "loss": 0.0681, + "step": 8873 + }, + { + "epoch": 2.85, + "learning_rate": 1.219239924358062e-07, + "loss": 0.067, + "step": 8874 + }, + { + "epoch": 2.85, + "learning_rate": 1.2138380784088578e-07, + "loss": 0.0703, + "step": 8875 + }, + { + "epoch": 2.86, + "learning_rate": 1.20844815241824e-07, + "loss": 0.0689, + "step": 8876 + }, + { + "epoch": 2.86, + "learning_rate": 1.2030701470365557e-07, + "loss": 0.0733, + "step": 8877 + }, + { + "epoch": 2.86, + "learning_rate": 1.1977040629127524e-07, + "loss": 0.0625, + "step": 8878 + }, + { + "epoch": 2.86, + "learning_rate": 1.1923499006943228e-07, + "loss": 0.0682, + "step": 8879 + }, + { + "epoch": 2.86, + "learning_rate": 1.1870076610273285e-07, + "loss": 0.0672, + "step": 8880 + }, + { + "epoch": 2.86, + "learning_rate": 1.1816773445563646e-07, + "loss": 0.0636, + "step": 8881 + }, + { + "epoch": 2.86, + "learning_rate": 1.1763589519246388e-07, + "loss": 0.0688, + "step": 8882 + }, + { + "epoch": 2.86, + "learning_rate": 1.1710524837738713e-07, + "loss": 0.0732, + "step": 8883 + }, + { + "epoch": 2.86, + "learning_rate": 1.1657579407443608e-07, + "loss": 0.0734, + "step": 8884 + }, + { + "epoch": 2.86, + "learning_rate": 1.1604753234749855e-07, + "loss": 0.0652, + "step": 8885 + }, + { + "epoch": 2.86, + "learning_rate": 1.1552046326031685e-07, + "loss": 0.0712, + "step": 8886 + }, + { + "epoch": 2.86, + "learning_rate": 1.1499458687648791e-07, + "loss": 0.0758, + "step": 8887 + }, + { + "epoch": 2.86, + "learning_rate": 1.1446990325946649e-07, + "loss": 0.07, + "step": 8888 + }, + { + "epoch": 2.86, + "learning_rate": 1.1394641247256421e-07, + "loss": 0.0719, + "step": 8889 + }, + { + "epoch": 2.86, + "learning_rate": 1.1342411457894609e-07, + "loss": 0.069, + "step": 8890 + }, + { + "epoch": 2.86, + "learning_rate": 1.1290300964163613e-07, + "loss": 0.0638, + "step": 8891 + }, + { + "epoch": 2.86, + "learning_rate": 1.1238309772351297e-07, + "loss": 0.0689, + "step": 8892 + }, + { + "epoch": 2.86, + "learning_rate": 1.1186437888730972e-07, + "loss": 0.0605, + "step": 8893 + }, + { + "epoch": 2.86, + "learning_rate": 1.1134685319562077e-07, + "loss": 0.0695, + "step": 8894 + }, + { + "epoch": 2.86, + "learning_rate": 1.1083052071089063e-07, + "loss": 0.0633, + "step": 8895 + }, + { + "epoch": 2.86, + "learning_rate": 1.1031538149542054e-07, + "loss": 0.0701, + "step": 8896 + }, + { + "epoch": 2.86, + "learning_rate": 1.0980143561137191e-07, + "loss": 0.0687, + "step": 8897 + }, + { + "epoch": 2.86, + "learning_rate": 1.0928868312075958e-07, + "loss": 0.0675, + "step": 8898 + }, + { + "epoch": 2.86, + "learning_rate": 1.0877712408545294e-07, + "loss": 0.0701, + "step": 8899 + }, + { + "epoch": 2.86, + "learning_rate": 1.082667585671815e-07, + "loss": 0.0694, + "step": 8900 + }, + { + "epoch": 2.86, + "learning_rate": 1.0775758662752377e-07, + "loss": 0.0714, + "step": 8901 + }, + { + "epoch": 2.86, + "learning_rate": 1.0724960832792285e-07, + "loss": 0.0763, + "step": 8902 + }, + { + "epoch": 2.86, + "learning_rate": 1.0674282372967192e-07, + "loss": 0.0604, + "step": 8903 + }, + { + "epoch": 2.86, + "learning_rate": 1.0623723289392097e-07, + "loss": 0.0703, + "step": 8904 + }, + { + "epoch": 2.86, + "learning_rate": 1.0573283588167782e-07, + "loss": 0.0679, + "step": 8905 + }, + { + "epoch": 2.86, + "learning_rate": 1.0522963275380494e-07, + "loss": 0.0665, + "step": 8906 + }, + { + "epoch": 2.87, + "learning_rate": 1.047276235710204e-07, + "loss": 0.0669, + "step": 8907 + }, + { + "epoch": 2.87, + "learning_rate": 1.0422680839390021e-07, + "loss": 0.0758, + "step": 8908 + }, + { + "epoch": 2.87, + "learning_rate": 1.0372718728287379e-07, + "loss": 0.0703, + "step": 8909 + }, + { + "epoch": 2.87, + "learning_rate": 1.0322876029822738e-07, + "loss": 0.0655, + "step": 8910 + }, + { + "epoch": 2.87, + "learning_rate": 1.0273152750010285e-07, + "loss": 0.0615, + "step": 8911 + }, + { + "epoch": 2.87, + "learning_rate": 1.0223548894849999e-07, + "loss": 0.0675, + "step": 8912 + }, + { + "epoch": 2.87, + "learning_rate": 1.0174064470327206e-07, + "loss": 0.0622, + "step": 8913 + }, + { + "epoch": 2.87, + "learning_rate": 1.0124699482412903e-07, + "loss": 0.0638, + "step": 8914 + }, + { + "epoch": 2.87, + "learning_rate": 1.0075453937063662e-07, + "loss": 0.0694, + "step": 8915 + }, + { + "epoch": 2.87, + "learning_rate": 1.0026327840221728e-07, + "loss": 0.0772, + "step": 8916 + }, + { + "epoch": 2.87, + "learning_rate": 9.977321197814694e-08, + "loss": 0.0663, + "step": 8917 + }, + { + "epoch": 2.87, + "learning_rate": 9.928434015756161e-08, + "loss": 0.0661, + "step": 8918 + }, + { + "epoch": 2.87, + "learning_rate": 9.879666299944857e-08, + "loss": 0.0699, + "step": 8919 + }, + { + "epoch": 2.87, + "learning_rate": 9.831018056265518e-08, + "loss": 0.0745, + "step": 8920 + }, + { + "epoch": 2.87, + "learning_rate": 9.782489290588004e-08, + "loss": 0.0657, + "step": 8921 + }, + { + "epoch": 2.87, + "learning_rate": 9.734080008768076e-08, + "loss": 0.0637, + "step": 8922 + }, + { + "epoch": 2.87, + "learning_rate": 9.685790216647173e-08, + "loss": 0.0665, + "step": 8923 + }, + { + "epoch": 2.87, + "learning_rate": 9.637619920051856e-08, + "loss": 0.0698, + "step": 8924 + }, + { + "epoch": 2.87, + "learning_rate": 9.589569124794918e-08, + "loss": 0.0685, + "step": 8925 + }, + { + "epoch": 2.87, + "learning_rate": 9.541637836674056e-08, + "loss": 0.0646, + "step": 8926 + }, + { + "epoch": 2.87, + "learning_rate": 9.493826061472977e-08, + "loss": 0.0647, + "step": 8927 + }, + { + "epoch": 2.87, + "learning_rate": 9.446133804960733e-08, + "loss": 0.0677, + "step": 8928 + }, + { + "epoch": 2.87, + "learning_rate": 9.398561072892387e-08, + "loss": 0.0635, + "step": 8929 + }, + { + "epoch": 2.87, + "learning_rate": 9.351107871008014e-08, + "loss": 0.0699, + "step": 8930 + }, + { + "epoch": 2.87, + "learning_rate": 9.303774205033589e-08, + "loss": 0.0697, + "step": 8931 + }, + { + "epoch": 2.87, + "learning_rate": 9.256560080680543e-08, + "loss": 0.071, + "step": 8932 + }, + { + "epoch": 2.87, + "learning_rate": 9.209465503646098e-08, + "loss": 0.0778, + "step": 8933 + }, + { + "epoch": 2.87, + "learning_rate": 9.162490479612596e-08, + "loss": 0.0754, + "step": 8934 + }, + { + "epoch": 2.87, + "learning_rate": 9.115635014248502e-08, + "loss": 0.0722, + "step": 8935 + }, + { + "epoch": 2.87, + "learning_rate": 9.068899113207519e-08, + "loss": 0.0726, + "step": 8936 + }, + { + "epoch": 2.87, + "learning_rate": 9.022282782128911e-08, + "loss": 0.0685, + "step": 8937 + }, + { + "epoch": 2.88, + "learning_rate": 8.975786026637734e-08, + "loss": 0.0709, + "step": 8938 + }, + { + "epoch": 2.88, + "learning_rate": 8.929408852344501e-08, + "loss": 0.0758, + "step": 8939 + }, + { + "epoch": 2.88, + "learning_rate": 8.883151264845069e-08, + "loss": 0.0687, + "step": 8940 + }, + { + "epoch": 2.88, + "learning_rate": 8.837013269721306e-08, + "loss": 0.073, + "step": 8941 + }, + { + "epoch": 2.88, + "learning_rate": 8.790994872540315e-08, + "loss": 0.0711, + "step": 8942 + }, + { + "epoch": 2.88, + "learning_rate": 8.745096078854765e-08, + "loss": 0.0667, + "step": 8943 + }, + { + "epoch": 2.88, + "learning_rate": 8.699316894203225e-08, + "loss": 0.0648, + "step": 8944 + }, + { + "epoch": 2.88, + "learning_rate": 8.653657324109499e-08, + "loss": 0.0609, + "step": 8945 + }, + { + "epoch": 2.88, + "learning_rate": 8.608117374083069e-08, + "loss": 0.0674, + "step": 8946 + }, + { + "epoch": 2.88, + "learning_rate": 8.562697049618874e-08, + "loss": 0.0705, + "step": 8947 + }, + { + "epoch": 2.88, + "learning_rate": 8.517396356197749e-08, + "loss": 0.0657, + "step": 8948 + }, + { + "epoch": 2.88, + "learning_rate": 8.472215299285658e-08, + "loss": 0.0647, + "step": 8949 + }, + { + "epoch": 2.88, + "learning_rate": 8.42715388433446e-08, + "loss": 0.0616, + "step": 8950 + }, + { + "epoch": 2.88, + "learning_rate": 8.382212116781475e-08, + "loss": 0.0648, + "step": 8951 + }, + { + "epoch": 2.88, + "learning_rate": 8.337390002049473e-08, + "loss": 0.0689, + "step": 8952 + }, + { + "epoch": 2.88, + "learning_rate": 8.292687545546907e-08, + "loss": 0.0701, + "step": 8953 + }, + { + "epoch": 2.88, + "learning_rate": 8.248104752667796e-08, + "loss": 0.0643, + "step": 8954 + }, + { + "epoch": 2.88, + "learning_rate": 8.203641628791614e-08, + "loss": 0.0753, + "step": 8955 + }, + { + "epoch": 2.88, + "learning_rate": 8.159298179283515e-08, + "loss": 0.0718, + "step": 8956 + }, + { + "epoch": 2.88, + "learning_rate": 8.115074409494328e-08, + "loss": 0.0675, + "step": 8957 + }, + { + "epoch": 2.88, + "learning_rate": 8.070970324760009e-08, + "loss": 0.0714, + "step": 8958 + }, + { + "epoch": 2.88, + "learning_rate": 8.02698593040252e-08, + "loss": 0.0673, + "step": 8959 + }, + { + "epoch": 2.88, + "learning_rate": 7.983121231729174e-08, + "loss": 0.0704, + "step": 8960 + }, + { + "epoch": 2.88, + "learning_rate": 7.939376234032847e-08, + "loss": 0.0707, + "step": 8961 + }, + { + "epoch": 2.88, + "learning_rate": 7.895750942591984e-08, + "loss": 0.0666, + "step": 8962 + }, + { + "epoch": 2.88, + "learning_rate": 7.852245362670707e-08, + "loss": 0.0714, + "step": 8963 + }, + { + "epoch": 2.88, + "learning_rate": 7.808859499518374e-08, + "loss": 0.0717, + "step": 8964 + }, + { + "epoch": 2.88, + "learning_rate": 7.765593358370349e-08, + "loss": 0.0643, + "step": 8965 + }, + { + "epoch": 2.88, + "learning_rate": 7.722446944447126e-08, + "loss": 0.0653, + "step": 8966 + }, + { + "epoch": 2.88, + "learning_rate": 7.679420262954984e-08, + "loss": 0.065, + "step": 8967 + }, + { + "epoch": 2.88, + "learning_rate": 7.636513319085659e-08, + "loss": 0.0627, + "step": 8968 + }, + { + "epoch": 2.89, + "learning_rate": 7.593726118016676e-08, + "loss": 0.0681, + "step": 8969 + }, + { + "epoch": 2.89, + "learning_rate": 7.551058664910683e-08, + "loss": 0.0705, + "step": 8970 + }, + { + "epoch": 2.89, + "learning_rate": 7.508510964916338e-08, + "loss": 0.0664, + "step": 8971 + }, + { + "epoch": 2.89, + "learning_rate": 7.466083023167536e-08, + "loss": 0.0673, + "step": 8972 + }, + { + "epoch": 2.89, + "learning_rate": 7.423774844783626e-08, + "loss": 0.0658, + "step": 8973 + }, + { + "epoch": 2.89, + "learning_rate": 7.38158643486997e-08, + "loss": 0.0708, + "step": 8974 + }, + { + "epoch": 2.89, + "learning_rate": 7.339517798517048e-08, + "loss": 0.0648, + "step": 8975 + }, + { + "epoch": 2.89, + "learning_rate": 7.297568940801136e-08, + "loss": 0.0668, + "step": 8976 + }, + { + "epoch": 2.89, + "learning_rate": 7.25573986678385e-08, + "loss": 0.0695, + "step": 8977 + }, + { + "epoch": 2.89, + "learning_rate": 7.214030581512599e-08, + "loss": 0.0659, + "step": 8978 + }, + { + "epoch": 2.89, + "learning_rate": 7.172441090020022e-08, + "loss": 0.0733, + "step": 8979 + }, + { + "epoch": 2.89, + "learning_rate": 7.13097139732466e-08, + "loss": 0.0679, + "step": 8980 + }, + { + "epoch": 2.89, + "learning_rate": 7.0896215084304e-08, + "loss": 0.0716, + "step": 8981 + }, + { + "epoch": 2.89, + "learning_rate": 7.048391428326585e-08, + "loss": 0.0663, + "step": 8982 + }, + { + "epoch": 2.89, + "learning_rate": 7.007281161988233e-08, + "loss": 0.0678, + "step": 8983 + }, + { + "epoch": 2.89, + "learning_rate": 6.966290714375934e-08, + "loss": 0.0647, + "step": 8984 + }, + { + "epoch": 2.89, + "learning_rate": 6.925420090435842e-08, + "loss": 0.0666, + "step": 8985 + }, + { + "epoch": 2.89, + "learning_rate": 6.884669295099456e-08, + "loss": 0.0721, + "step": 8986 + }, + { + "epoch": 2.89, + "learning_rate": 6.844038333283953e-08, + "loss": 0.0683, + "step": 8987 + }, + { + "epoch": 2.89, + "learning_rate": 6.80352720989208e-08, + "loss": 0.0649, + "step": 8988 + }, + { + "epoch": 2.89, + "learning_rate": 6.763135929812036e-08, + "loss": 0.0665, + "step": 8989 + }, + { + "epoch": 2.89, + "learning_rate": 6.722864497917703e-08, + "loss": 0.0706, + "step": 8990 + }, + { + "epoch": 2.89, + "learning_rate": 6.682712919068301e-08, + "loss": 0.0668, + "step": 8991 + }, + { + "epoch": 2.89, + "learning_rate": 6.642681198108735e-08, + "loss": 0.0601, + "step": 8992 + }, + { + "epoch": 2.89, + "learning_rate": 6.602769339869475e-08, + "loss": 0.0666, + "step": 8993 + }, + { + "epoch": 2.89, + "learning_rate": 6.562977349166222e-08, + "loss": 0.0681, + "step": 8994 + }, + { + "epoch": 2.89, + "learning_rate": 6.523305230800691e-08, + "loss": 0.0667, + "step": 8995 + }, + { + "epoch": 2.89, + "learning_rate": 6.483752989559722e-08, + "loss": 0.064, + "step": 8996 + }, + { + "epoch": 2.89, + "learning_rate": 6.44432063021605e-08, + "loss": 0.0779, + "step": 8997 + }, + { + "epoch": 2.89, + "learning_rate": 6.405008157527537e-08, + "loss": 0.0634, + "step": 8998 + }, + { + "epoch": 2.89, + "learning_rate": 6.365815576237944e-08, + "loss": 0.067, + "step": 8999 + }, + { + "epoch": 2.9, + "learning_rate": 6.326742891076376e-08, + "loss": 0.0755, + "step": 9000 + }, + { + "epoch": 2.9, + "learning_rate": 6.287790106757396e-08, + "loss": 0.0689, + "step": 9001 + }, + { + "epoch": 2.9, + "learning_rate": 6.248957227981467e-08, + "loss": 0.0671, + "step": 9002 + }, + { + "epoch": 2.9, + "learning_rate": 6.210244259433951e-08, + "loss": 0.0665, + "step": 9003 + }, + { + "epoch": 2.9, + "learning_rate": 6.171651205786556e-08, + "loss": 0.0693, + "step": 9004 + }, + { + "epoch": 2.9, + "learning_rate": 6.133178071695778e-08, + "loss": 0.0654, + "step": 9005 + }, + { + "epoch": 2.9, + "learning_rate": 6.094824861804017e-08, + "loss": 0.0722, + "step": 9006 + }, + { + "epoch": 2.9, + "learning_rate": 6.056591580739235e-08, + "loss": 0.0712, + "step": 9007 + }, + { + "epoch": 2.9, + "learning_rate": 6.018478233114634e-08, + "loss": 0.0651, + "step": 9008 + }, + { + "epoch": 2.9, + "learning_rate": 5.980484823529309e-08, + "loss": 0.0656, + "step": 9009 + }, + { + "epoch": 2.9, + "learning_rate": 5.942611356567707e-08, + "loss": 0.0658, + "step": 9010 + }, + { + "epoch": 2.9, + "learning_rate": 5.9048578367996157e-08, + "loss": 0.0667, + "step": 9011 + }, + { + "epoch": 2.9, + "learning_rate": 5.8672242687807246e-08, + "loss": 0.0671, + "step": 9012 + }, + { + "epoch": 2.9, + "learning_rate": 5.829710657051957e-08, + "loss": 0.0719, + "step": 9013 + }, + { + "epoch": 2.9, + "learning_rate": 5.792317006139914e-08, + "loss": 0.0721, + "step": 9014 + }, + { + "epoch": 2.9, + "learning_rate": 5.7550433205566523e-08, + "loss": 0.0696, + "step": 9015 + }, + { + "epoch": 2.9, + "learning_rate": 5.717889604799798e-08, + "loss": 0.0701, + "step": 9016 + }, + { + "epoch": 2.9, + "learning_rate": 5.6808558633524304e-08, + "loss": 0.0666, + "step": 9017 + }, + { + "epoch": 2.9, + "learning_rate": 5.6439421006833086e-08, + "loss": 0.0677, + "step": 9018 + }, + { + "epoch": 2.9, + "learning_rate": 5.607148321246425e-08, + "loss": 0.0691, + "step": 9019 + }, + { + "epoch": 2.9, + "learning_rate": 5.5704745294815624e-08, + "loss": 0.07, + "step": 9020 + }, + { + "epoch": 2.9, + "learning_rate": 5.533920729814068e-08, + "loss": 0.0641, + "step": 9021 + }, + { + "epoch": 2.9, + "learning_rate": 5.4974869266544165e-08, + "loss": 0.0684, + "step": 9022 + }, + { + "epoch": 2.9, + "learning_rate": 5.461173124399088e-08, + "loss": 0.073, + "step": 9023 + }, + { + "epoch": 2.9, + "learning_rate": 5.424979327429691e-08, + "loss": 0.0683, + "step": 9024 + }, + { + "epoch": 2.9, + "learning_rate": 5.3889055401137315e-08, + "loss": 0.069, + "step": 9025 + }, + { + "epoch": 2.9, + "learning_rate": 5.352951766803727e-08, + "loss": 0.0719, + "step": 9026 + }, + { + "epoch": 2.9, + "learning_rate": 5.317118011838318e-08, + "loss": 0.0701, + "step": 9027 + }, + { + "epoch": 2.9, + "learning_rate": 5.281404279541269e-08, + "loss": 0.0706, + "step": 9028 + }, + { + "epoch": 2.9, + "learning_rate": 5.245810574221799e-08, + "loss": 0.0695, + "step": 9029 + }, + { + "epoch": 2.9, + "learning_rate": 5.210336900175028e-08, + "loss": 0.0744, + "step": 9030 + }, + { + "epoch": 2.91, + "learning_rate": 5.174983261681088e-08, + "loss": 0.0652, + "step": 9031 + }, + { + "epoch": 2.91, + "learning_rate": 5.1397496630061216e-08, + "loss": 0.0647, + "step": 9032 + }, + { + "epoch": 2.91, + "learning_rate": 5.104636108401506e-08, + "loss": 0.0695, + "step": 9033 + }, + { + "epoch": 2.91, + "learning_rate": 5.0696426021041856e-08, + "loss": 0.0722, + "step": 9034 + }, + { + "epoch": 2.91, + "learning_rate": 5.0347691483365604e-08, + "loss": 0.0708, + "step": 9035 + }, + { + "epoch": 2.91, + "learning_rate": 5.000015751306819e-08, + "loss": 0.064, + "step": 9036 + }, + { + "epoch": 2.91, + "learning_rate": 4.965382415208164e-08, + "loss": 0.0692, + "step": 9037 + }, + { + "epoch": 2.91, + "learning_rate": 4.9308691442196964e-08, + "loss": 0.0669, + "step": 9038 + }, + { + "epoch": 2.91, + "learning_rate": 4.896475942506085e-08, + "loss": 0.0684, + "step": 9039 + }, + { + "epoch": 2.91, + "learning_rate": 4.8622028142172316e-08, + "loss": 0.0666, + "step": 9040 + }, + { + "epoch": 2.91, + "learning_rate": 4.828049763488718e-08, + "loss": 0.0675, + "step": 9041 + }, + { + "epoch": 2.91, + "learning_rate": 4.7940167944415804e-08, + "loss": 0.0682, + "step": 9042 + }, + { + "epoch": 2.91, + "learning_rate": 4.760103911182423e-08, + "loss": 0.0706, + "step": 9043 + }, + { + "epoch": 2.91, + "learning_rate": 4.726311117803084e-08, + "loss": 0.0613, + "step": 9044 + }, + { + "epoch": 2.91, + "learning_rate": 4.692638418381523e-08, + "loss": 0.0664, + "step": 9045 + }, + { + "epoch": 2.91, + "learning_rate": 4.6590858169806016e-08, + "loss": 0.0739, + "step": 9046 + }, + { + "epoch": 2.91, + "learning_rate": 4.6256533176488594e-08, + "loss": 0.0726, + "step": 9047 + }, + { + "epoch": 2.91, + "learning_rate": 4.592340924420624e-08, + "loss": 0.0641, + "step": 9048 + }, + { + "epoch": 2.91, + "learning_rate": 4.5591486413154584e-08, + "loss": 0.0701, + "step": 9049 + }, + { + "epoch": 2.91, + "learning_rate": 4.52607647233827e-08, + "loss": 0.0752, + "step": 9050 + }, + { + "epoch": 2.91, + "learning_rate": 4.493124421479977e-08, + "loss": 0.0688, + "step": 9051 + }, + { + "epoch": 2.91, + "learning_rate": 4.460292492716512e-08, + "loss": 0.0684, + "step": 9052 + }, + { + "epoch": 2.91, + "learning_rate": 4.4275806900094807e-08, + "loss": 0.0643, + "step": 9053 + }, + { + "epoch": 2.91, + "learning_rate": 4.3949890173061726e-08, + "loss": 0.0638, + "step": 9054 + }, + { + "epoch": 2.91, + "learning_rate": 4.36251747853933e-08, + "loss": 0.0821, + "step": 9055 + }, + { + "epoch": 2.91, + "learning_rate": 4.3301660776268186e-08, + "loss": 0.0661, + "step": 9056 + }, + { + "epoch": 2.91, + "learning_rate": 4.2979348184725156e-08, + "loss": 0.0687, + "step": 9057 + }, + { + "epoch": 2.91, + "learning_rate": 4.2658237049655325e-08, + "loss": 0.073, + "step": 9058 + }, + { + "epoch": 2.91, + "learning_rate": 4.233832740980437e-08, + "loss": 0.0623, + "step": 9059 + }, + { + "epoch": 2.91, + "learning_rate": 4.201961930377474e-08, + "loss": 0.0726, + "step": 9060 + }, + { + "epoch": 2.91, + "learning_rate": 4.170211277002345e-08, + "loss": 0.0744, + "step": 9061 + }, + { + "epoch": 2.92, + "learning_rate": 4.1385807846862084e-08, + "loss": 0.0627, + "step": 9062 + }, + { + "epoch": 2.92, + "learning_rate": 4.1070704572456764e-08, + "loss": 0.0731, + "step": 9063 + }, + { + "epoch": 2.92, + "learning_rate": 4.0756802984829315e-08, + "loss": 0.0662, + "step": 9064 + }, + { + "epoch": 2.92, + "learning_rate": 4.044410312185609e-08, + "loss": 0.071, + "step": 9065 + }, + { + "epoch": 2.92, + "learning_rate": 4.013260502127026e-08, + "loss": 0.0667, + "step": 9066 + }, + { + "epoch": 2.92, + "learning_rate": 3.982230872065729e-08, + "loss": 0.0639, + "step": 9067 + }, + { + "epoch": 2.92, + "learning_rate": 3.951321425745946e-08, + "loss": 0.0688, + "step": 9068 + }, + { + "epoch": 2.92, + "learning_rate": 3.9205321668972506e-08, + "loss": 0.0687, + "step": 9069 + }, + { + "epoch": 2.92, + "learning_rate": 3.889863099234892e-08, + "loss": 0.0703, + "step": 9070 + }, + { + "epoch": 2.92, + "learning_rate": 3.859314226459465e-08, + "loss": 0.0734, + "step": 9071 + }, + { + "epoch": 2.92, + "learning_rate": 3.828885552257244e-08, + "loss": 0.0696, + "step": 9072 + }, + { + "epoch": 2.92, + "learning_rate": 3.7985770802997365e-08, + "loss": 0.0655, + "step": 9073 + }, + { + "epoch": 2.92, + "learning_rate": 3.768388814244128e-08, + "loss": 0.0706, + "step": 9074 + }, + { + "epoch": 2.92, + "learning_rate": 3.738320757733283e-08, + "loss": 0.071, + "step": 9075 + }, + { + "epoch": 2.92, + "learning_rate": 3.708372914394964e-08, + "loss": 0.0709, + "step": 9076 + }, + { + "epoch": 2.92, + "learning_rate": 3.6785452878429494e-08, + "loss": 0.0775, + "step": 9077 + }, + { + "epoch": 2.92, + "learning_rate": 3.648837881676581e-08, + "loss": 0.0755, + "step": 9078 + }, + { + "epoch": 2.92, + "learning_rate": 3.6192506994802146e-08, + "loss": 0.0678, + "step": 9079 + }, + { + "epoch": 2.92, + "learning_rate": 3.5897837448239935e-08, + "loss": 0.0661, + "step": 9080 + }, + { + "epoch": 2.92, + "learning_rate": 3.560437021263741e-08, + "loss": 0.0755, + "step": 9081 + }, + { + "epoch": 2.92, + "learning_rate": 3.53121053234029e-08, + "loss": 0.0698, + "step": 9082 + }, + { + "epoch": 2.92, + "learning_rate": 3.5021042815804874e-08, + "loss": 0.0679, + "step": 9083 + }, + { + "epoch": 2.92, + "learning_rate": 3.47311827249619e-08, + "loss": 0.0701, + "step": 9084 + }, + { + "epoch": 2.92, + "learning_rate": 3.4442525085850446e-08, + "loss": 0.071, + "step": 9085 + }, + { + "epoch": 2.92, + "learning_rate": 3.4155069933301535e-08, + "loss": 0.0682, + "step": 9086 + }, + { + "epoch": 2.92, + "learning_rate": 3.386881730200076e-08, + "loss": 0.0704, + "step": 9087 + }, + { + "epoch": 2.92, + "learning_rate": 3.358376722648826e-08, + "loss": 0.0704, + "step": 9088 + }, + { + "epoch": 2.92, + "learning_rate": 3.329991974115987e-08, + "loss": 0.0707, + "step": 9089 + }, + { + "epoch": 2.92, + "learning_rate": 3.301727488026485e-08, + "loss": 0.0683, + "step": 9090 + }, + { + "epoch": 2.92, + "learning_rate": 3.273583267790925e-08, + "loss": 0.066, + "step": 9091 + }, + { + "epoch": 2.92, + "learning_rate": 3.2455593168052576e-08, + "loss": 0.0706, + "step": 9092 + }, + { + "epoch": 2.93, + "learning_rate": 3.217655638451112e-08, + "loss": 0.0672, + "step": 9093 + }, + { + "epoch": 2.93, + "learning_rate": 3.1898722360952374e-08, + "loss": 0.0671, + "step": 9094 + }, + { + "epoch": 2.93, + "learning_rate": 3.162209113090286e-08, + "loss": 0.0732, + "step": 9095 + }, + { + "epoch": 2.93, + "learning_rate": 3.134666272774034e-08, + "loss": 0.0653, + "step": 9096 + }, + { + "epoch": 2.93, + "learning_rate": 3.107243718470043e-08, + "loss": 0.0667, + "step": 9097 + }, + { + "epoch": 2.93, + "learning_rate": 3.079941453487223e-08, + "loss": 0.0695, + "step": 9098 + }, + { + "epoch": 2.93, + "learning_rate": 3.052759481119938e-08, + "loss": 0.0743, + "step": 9099 + }, + { + "epoch": 2.93, + "learning_rate": 3.02569780464812e-08, + "loss": 0.0687, + "step": 9100 + }, + { + "epoch": 2.93, + "learning_rate": 2.998756427337157e-08, + "loss": 0.0708, + "step": 9101 + }, + { + "epoch": 2.93, + "learning_rate": 2.9719353524378936e-08, + "loss": 0.0719, + "step": 9102 + }, + { + "epoch": 2.93, + "learning_rate": 2.9452345831866292e-08, + "loss": 0.0625, + "step": 9103 + }, + { + "epoch": 2.93, + "learning_rate": 2.9186541228052313e-08, + "loss": 0.0642, + "step": 9104 + }, + { + "epoch": 2.93, + "learning_rate": 2.892193974501023e-08, + "loss": 0.0702, + "step": 9105 + }, + { + "epoch": 2.93, + "learning_rate": 2.865854141466784e-08, + "loss": 0.0693, + "step": 9106 + }, + { + "epoch": 2.93, + "learning_rate": 2.8396346268807497e-08, + "loss": 0.0667, + "step": 9107 + }, + { + "epoch": 2.93, + "learning_rate": 2.8135354339067223e-08, + "loss": 0.074, + "step": 9108 + }, + { + "epoch": 2.93, + "learning_rate": 2.7875565656939608e-08, + "loss": 0.0712, + "step": 9109 + }, + { + "epoch": 2.93, + "learning_rate": 2.7616980253771798e-08, + "loss": 0.07, + "step": 9110 + }, + { + "epoch": 2.93, + "learning_rate": 2.73595981607655e-08, + "loss": 0.0679, + "step": 9111 + }, + { + "epoch": 2.93, + "learning_rate": 2.7103419408978092e-08, + "loss": 0.0728, + "step": 9112 + }, + { + "epoch": 2.93, + "learning_rate": 2.684844402932041e-08, + "loss": 0.0722, + "step": 9113 + }, + { + "epoch": 2.93, + "learning_rate": 2.6594672052560054e-08, + "loss": 0.0697, + "step": 9114 + }, + { + "epoch": 2.93, + "learning_rate": 2.6342103509315874e-08, + "loss": 0.0692, + "step": 9115 + }, + { + "epoch": 2.93, + "learning_rate": 2.609073843006682e-08, + "loss": 0.0711, + "step": 9116 + }, + { + "epoch": 2.93, + "learning_rate": 2.584057684514085e-08, + "loss": 0.0675, + "step": 9117 + }, + { + "epoch": 2.93, + "learning_rate": 2.5591618784726047e-08, + "loss": 0.0704, + "step": 9118 + }, + { + "epoch": 2.93, + "learning_rate": 2.5343864278861707e-08, + "loss": 0.066, + "step": 9119 + }, + { + "epoch": 2.93, + "learning_rate": 2.509731335744281e-08, + "loss": 0.0626, + "step": 9120 + }, + { + "epoch": 2.93, + "learning_rate": 2.4851966050218888e-08, + "loss": 0.0671, + "step": 9121 + }, + { + "epoch": 2.93, + "learning_rate": 2.4607822386795154e-08, + "loss": 0.0671, + "step": 9122 + }, + { + "epoch": 2.93, + "learning_rate": 2.4364882396630264e-08, + "loss": 0.0798, + "step": 9123 + }, + { + "epoch": 2.94, + "learning_rate": 2.4123146109039654e-08, + "loss": 0.0738, + "step": 9124 + }, + { + "epoch": 2.94, + "learning_rate": 2.3882613553191102e-08, + "loss": 0.0708, + "step": 9125 + }, + { + "epoch": 2.94, + "learning_rate": 2.3643284758109175e-08, + "loss": 0.0646, + "step": 9126 + }, + { + "epoch": 2.94, + "learning_rate": 2.3405159752672992e-08, + "loss": 0.0699, + "step": 9127 + }, + { + "epoch": 2.94, + "learning_rate": 2.31682385656129e-08, + "loss": 0.0696, + "step": 9128 + }, + { + "epoch": 2.94, + "learning_rate": 2.2932521225519366e-08, + "loss": 0.0686, + "step": 9129 + }, + { + "epoch": 2.94, + "learning_rate": 2.2698007760834085e-08, + "loss": 0.0652, + "step": 9130 + }, + { + "epoch": 2.94, + "learning_rate": 2.246469819985442e-08, + "loss": 0.0742, + "step": 9131 + }, + { + "epoch": 2.94, + "learning_rate": 2.22325925707334e-08, + "loss": 0.0677, + "step": 9132 + }, + { + "epoch": 2.94, + "learning_rate": 2.2001690901476415e-08, + "loss": 0.0668, + "step": 9133 + }, + { + "epoch": 2.94, + "learning_rate": 2.177199321994672e-08, + "loss": 0.0671, + "step": 9134 + }, + { + "epoch": 2.94, + "learning_rate": 2.1543499553858817e-08, + "loss": 0.0716, + "step": 9135 + }, + { + "epoch": 2.94, + "learning_rate": 2.131620993078509e-08, + "loss": 0.0797, + "step": 9136 + }, + { + "epoch": 2.94, + "learning_rate": 2.1090124378150278e-08, + "loss": 0.0756, + "step": 9137 + }, + { + "epoch": 2.94, + "learning_rate": 2.0865242923235885e-08, + "loss": 0.064, + "step": 9138 + }, + { + "epoch": 2.94, + "learning_rate": 2.064156559317576e-08, + "loss": 0.066, + "step": 9139 + }, + { + "epoch": 2.94, + "learning_rate": 2.041909241496165e-08, + "loss": 0.0714, + "step": 9140 + }, + { + "epoch": 2.94, + "learning_rate": 2.0197823415436524e-08, + "loss": 0.0574, + "step": 9141 + }, + { + "epoch": 2.94, + "learning_rate": 1.9977758621299025e-08, + "loss": 0.069, + "step": 9142 + }, + { + "epoch": 2.94, + "learning_rate": 1.975889805910569e-08, + "loss": 0.0712, + "step": 9143 + }, + { + "epoch": 2.94, + "learning_rate": 1.9541241755263174e-08, + "loss": 0.0663, + "step": 9144 + }, + { + "epoch": 2.94, + "learning_rate": 1.932478973603491e-08, + "loss": 0.07, + "step": 9145 + }, + { + "epoch": 2.94, + "learning_rate": 1.9109542027540006e-08, + "loss": 0.0746, + "step": 9146 + }, + { + "epoch": 2.94, + "learning_rate": 1.889549865574991e-08, + "loss": 0.0757, + "step": 9147 + }, + { + "epoch": 2.94, + "learning_rate": 1.8682659646491743e-08, + "loss": 0.0651, + "step": 9148 + }, + { + "epoch": 2.94, + "learning_rate": 1.8471025025449395e-08, + "loss": 0.064, + "step": 9149 + }, + { + "epoch": 2.94, + "learning_rate": 1.8260594818159116e-08, + "loss": 0.0724, + "step": 9150 + }, + { + "epoch": 2.94, + "learning_rate": 1.805136905001059e-08, + "loss": 0.0618, + "step": 9151 + }, + { + "epoch": 2.94, + "learning_rate": 1.7843347746251403e-08, + "loss": 0.0725, + "step": 9152 + }, + { + "epoch": 2.94, + "learning_rate": 1.7636530931982586e-08, + "loss": 0.0717, + "step": 9153 + }, + { + "epoch": 2.94, + "learning_rate": 1.7430918632157513e-08, + "loss": 0.0699, + "step": 9154 + }, + { + "epoch": 2.95, + "learning_rate": 1.7226510871588553e-08, + "loss": 0.0804, + "step": 9155 + }, + { + "epoch": 2.95, + "learning_rate": 1.7023307674940424e-08, + "loss": 0.0712, + "step": 9156 + }, + { + "epoch": 2.95, + "learning_rate": 1.6821309066730183e-08, + "loss": 0.0638, + "step": 9157 + }, + { + "epoch": 2.95, + "learning_rate": 1.6620515071333887e-08, + "loss": 0.0705, + "step": 9158 + }, + { + "epoch": 2.95, + "learning_rate": 1.642092571297993e-08, + "loss": 0.0694, + "step": 9159 + }, + { + "epoch": 2.95, + "learning_rate": 1.622254101575016e-08, + "loss": 0.0697, + "step": 9160 + }, + { + "epoch": 2.95, + "learning_rate": 1.6025361003583206e-08, + "loss": 0.0675, + "step": 9161 + }, + { + "epoch": 2.95, + "learning_rate": 1.582938570027337e-08, + "loss": 0.0715, + "step": 9162 + }, + { + "epoch": 2.95, + "learning_rate": 1.563461512946618e-08, + "loss": 0.0702, + "step": 9163 + }, + { + "epoch": 2.95, + "learning_rate": 1.544104931466284e-08, + "loss": 0.0665, + "step": 9164 + }, + { + "epoch": 2.95, + "learning_rate": 1.5248688279222435e-08, + "loss": 0.0708, + "step": 9165 + }, + { + "epoch": 2.95, + "learning_rate": 1.5057532046353075e-08, + "loss": 0.063, + "step": 9166 + }, + { + "epoch": 2.95, + "learning_rate": 1.486758063912186e-08, + "loss": 0.0658, + "step": 9167 + }, + { + "epoch": 2.95, + "learning_rate": 1.4678834080450455e-08, + "loss": 0.075, + "step": 9168 + }, + { + "epoch": 2.95, + "learning_rate": 1.4491292393110645e-08, + "loss": 0.0673, + "step": 9169 + }, + { + "epoch": 2.95, + "learning_rate": 1.4304955599735437e-08, + "loss": 0.0699, + "step": 9170 + }, + { + "epoch": 2.95, + "learning_rate": 1.4119823722806847e-08, + "loss": 0.0706, + "step": 9171 + }, + { + "epoch": 2.95, + "learning_rate": 1.3935896784663671e-08, + "loss": 0.0675, + "step": 9172 + }, + { + "epoch": 2.95, + "learning_rate": 1.375317480750038e-08, + "loss": 0.0702, + "step": 9173 + }, + { + "epoch": 2.95, + "learning_rate": 1.3571657813364892e-08, + "loss": 0.0711, + "step": 9174 + }, + { + "epoch": 2.95, + "learning_rate": 1.3391345824158574e-08, + "loss": 0.061, + "step": 9175 + }, + { + "epoch": 2.95, + "learning_rate": 1.3212238861639581e-08, + "loss": 0.0667, + "step": 9176 + }, + { + "epoch": 2.95, + "learning_rate": 1.3034336947420623e-08, + "loss": 0.0679, + "step": 9177 + }, + { + "epoch": 2.95, + "learning_rate": 1.285764010296564e-08, + "loss": 0.0721, + "step": 9178 + }, + { + "epoch": 2.95, + "learning_rate": 1.2682148349598689e-08, + "loss": 0.0651, + "step": 9179 + }, + { + "epoch": 2.95, + "learning_rate": 1.250786170849283e-08, + "loss": 0.0671, + "step": 9180 + }, + { + "epoch": 2.95, + "learning_rate": 1.233478020067902e-08, + "loss": 0.0716, + "step": 9181 + }, + { + "epoch": 2.95, + "learning_rate": 1.2162903847042772e-08, + "loss": 0.066, + "step": 9182 + }, + { + "epoch": 2.95, + "learning_rate": 1.1992232668323056e-08, + "loss": 0.0661, + "step": 9183 + }, + { + "epoch": 2.95, + "learning_rate": 1.1822766685112285e-08, + "loss": 0.0719, + "step": 9184 + }, + { + "epoch": 2.95, + "learning_rate": 1.165450591786077e-08, + "loss": 0.0679, + "step": 9185 + }, + { + "epoch": 2.96, + "learning_rate": 1.1487450386871157e-08, + "loss": 0.0736, + "step": 9186 + }, + { + "epoch": 2.96, + "learning_rate": 1.1321600112300657e-08, + "loss": 0.0662, + "step": 9187 + }, + { + "epoch": 2.96, + "learning_rate": 1.1156955114162149e-08, + "loss": 0.0662, + "step": 9188 + }, + { + "epoch": 2.96, + "learning_rate": 1.0993515412321964e-08, + "loss": 0.0744, + "step": 9189 + }, + { + "epoch": 2.96, + "learning_rate": 1.0831281026500995e-08, + "loss": 0.0696, + "step": 9190 + }, + { + "epoch": 2.96, + "learning_rate": 1.0670251976275803e-08, + "loss": 0.063, + "step": 9191 + }, + { + "epoch": 2.96, + "learning_rate": 1.0510428281076401e-08, + "loss": 0.0627, + "step": 9192 + }, + { + "epoch": 2.96, + "learning_rate": 1.0351809960188475e-08, + "loss": 0.0692, + "step": 9193 + }, + { + "epoch": 2.96, + "learning_rate": 1.0194397032750047e-08, + "loss": 0.0636, + "step": 9194 + }, + { + "epoch": 2.96, + "learning_rate": 1.0038189517757036e-08, + "loss": 0.0707, + "step": 9195 + }, + { + "epoch": 2.96, + "learning_rate": 9.883187434057694e-09, + "loss": 0.0688, + "step": 9196 + }, + { + "epoch": 2.96, + "learning_rate": 9.729390800354843e-09, + "loss": 0.0665, + "step": 9197 + }, + { + "epoch": 2.96, + "learning_rate": 9.576799635205858e-09, + "loss": 0.0648, + "step": 9198 + }, + { + "epoch": 2.96, + "learning_rate": 9.425413957023788e-09, + "loss": 0.0697, + "step": 9199 + }, + { + "epoch": 2.96, + "learning_rate": 9.275233784075132e-09, + "loss": 0.0702, + "step": 9200 + }, + { + "epoch": 2.96, + "learning_rate": 9.126259134480953e-09, + "loss": 0.0729, + "step": 9201 + }, + { + "epoch": 2.96, + "learning_rate": 8.97849002621798e-09, + "loss": 0.0673, + "step": 9202 + }, + { + "epoch": 2.96, + "learning_rate": 8.831926477115283e-09, + "loss": 0.0664, + "step": 9203 + }, + { + "epoch": 2.96, + "learning_rate": 8.686568504859827e-09, + "loss": 0.0724, + "step": 9204 + }, + { + "epoch": 2.96, + "learning_rate": 8.542416126989805e-09, + "loss": 0.0639, + "step": 9205 + }, + { + "epoch": 2.96, + "learning_rate": 8.399469360899082e-09, + "loss": 0.0728, + "step": 9206 + }, + { + "epoch": 2.96, + "learning_rate": 8.257728223837191e-09, + "loss": 0.0734, + "step": 9207 + }, + { + "epoch": 2.96, + "learning_rate": 8.11719273290601e-09, + "loss": 0.0655, + "step": 9208 + }, + { + "epoch": 2.96, + "learning_rate": 7.977862905064193e-09, + "loss": 0.0647, + "step": 9209 + }, + { + "epoch": 2.96, + "learning_rate": 7.83973875712385e-09, + "loss": 0.0648, + "step": 9210 + }, + { + "epoch": 2.96, + "learning_rate": 7.702820305751647e-09, + "loss": 0.0644, + "step": 9211 + }, + { + "epoch": 2.96, + "learning_rate": 7.567107567467702e-09, + "loss": 0.0694, + "step": 9212 + }, + { + "epoch": 2.96, + "learning_rate": 7.432600558647806e-09, + "loss": 0.0686, + "step": 9213 + }, + { + "epoch": 2.96, + "learning_rate": 7.299299295523421e-09, + "loss": 0.0662, + "step": 9214 + }, + { + "epoch": 2.96, + "learning_rate": 7.167203794178346e-09, + "loss": 0.0709, + "step": 9215 + }, + { + "epoch": 2.96, + "learning_rate": 7.036314070552053e-09, + "loss": 0.0695, + "step": 9216 + }, + { + "epoch": 2.96, + "learning_rate": 6.906630140437464e-09, + "loss": 0.067, + "step": 9217 + }, + { + "epoch": 2.97, + "learning_rate": 6.778152019484286e-09, + "loss": 0.0648, + "step": 9218 + }, + { + "epoch": 2.97, + "learning_rate": 6.650879723194559e-09, + "loss": 0.0756, + "step": 9219 + }, + { + "epoch": 2.97, + "learning_rate": 6.52481326692489e-09, + "loss": 0.0677, + "step": 9220 + }, + { + "epoch": 2.97, + "learning_rate": 6.399952665887555e-09, + "loss": 0.0701, + "step": 9221 + }, + { + "epoch": 2.97, + "learning_rate": 6.276297935149389e-09, + "loss": 0.0657, + "step": 9222 + }, + { + "epoch": 2.97, + "learning_rate": 6.153849089629571e-09, + "loss": 0.0619, + "step": 9223 + }, + { + "epoch": 2.97, + "learning_rate": 6.032606144104059e-09, + "loss": 0.072, + "step": 9224 + }, + { + "epoch": 2.97, + "learning_rate": 5.912569113203371e-09, + "loss": 0.0676, + "step": 9225 + }, + { + "epoch": 2.97, + "learning_rate": 5.793738011410366e-09, + "loss": 0.0725, + "step": 9226 + }, + { + "epoch": 2.97, + "learning_rate": 5.676112853064686e-09, + "loss": 0.0684, + "step": 9227 + }, + { + "epoch": 2.97, + "learning_rate": 5.5596936523583116e-09, + "loss": 0.0662, + "step": 9228 + }, + { + "epoch": 2.97, + "learning_rate": 5.4444804233411144e-09, + "loss": 0.0696, + "step": 9229 + }, + { + "epoch": 2.97, + "learning_rate": 5.330473179911977e-09, + "loss": 0.0648, + "step": 9230 + }, + { + "epoch": 2.97, + "learning_rate": 5.217671935831004e-09, + "loss": 0.0718, + "step": 9231 + }, + { + "epoch": 2.97, + "learning_rate": 5.106076704706198e-09, + "loss": 0.0647, + "step": 9232 + }, + { + "epoch": 2.97, + "learning_rate": 4.9956875000045646e-09, + "loss": 0.0701, + "step": 9233 + }, + { + "epoch": 2.97, + "learning_rate": 4.88650433504656e-09, + "loss": 0.0717, + "step": 9234 + }, + { + "epoch": 2.97, + "learning_rate": 4.778527223006091e-09, + "loss": 0.0665, + "step": 9235 + }, + { + "epoch": 2.97, + "learning_rate": 4.6717561769116235e-09, + "loss": 0.0668, + "step": 9236 + }, + { + "epoch": 2.97, + "learning_rate": 4.566191209647297e-09, + "loss": 0.068, + "step": 9237 + }, + { + "epoch": 2.97, + "learning_rate": 4.46183233395181e-09, + "loss": 0.0651, + "step": 9238 + }, + { + "epoch": 2.97, + "learning_rate": 4.358679562416202e-09, + "loss": 0.0674, + "step": 9239 + }, + { + "epoch": 2.97, + "learning_rate": 4.256732907487182e-09, + "loss": 0.0687, + "step": 9240 + }, + { + "epoch": 2.97, + "learning_rate": 4.1559923814671335e-09, + "loss": 0.0683, + "step": 9241 + }, + { + "epoch": 2.97, + "learning_rate": 4.0564579965118865e-09, + "loss": 0.0676, + "step": 9242 + }, + { + "epoch": 2.97, + "learning_rate": 3.9581297646296145e-09, + "loss": 0.0786, + "step": 9243 + }, + { + "epoch": 2.97, + "learning_rate": 3.8610076976874915e-09, + "loss": 0.066, + "step": 9244 + }, + { + "epoch": 2.97, + "learning_rate": 3.76509180740392e-09, + "loss": 0.0665, + "step": 9245 + }, + { + "epoch": 2.97, + "learning_rate": 3.6703821053529766e-09, + "loss": 0.0658, + "step": 9246 + }, + { + "epoch": 2.97, + "learning_rate": 3.576878602961076e-09, + "loss": 0.0613, + "step": 9247 + }, + { + "epoch": 2.97, + "learning_rate": 3.4845813115114147e-09, + "loss": 0.0711, + "step": 9248 + }, + { + "epoch": 2.98, + "learning_rate": 3.3934902421417504e-09, + "loss": 0.0678, + "step": 9249 + }, + { + "epoch": 2.98, + "learning_rate": 3.3036054058444013e-09, + "loss": 0.0701, + "step": 9250 + }, + { + "epoch": 2.98, + "learning_rate": 3.2149268134629154e-09, + "loss": 0.075, + "step": 9251 + }, + { + "epoch": 2.98, + "learning_rate": 3.127454475697622e-09, + "loss": 0.0727, + "step": 9252 + }, + { + "epoch": 2.98, + "learning_rate": 3.041188403105633e-09, + "loss": 0.0683, + "step": 9253 + }, + { + "epoch": 2.98, + "learning_rate": 2.9561286060952877e-09, + "loss": 0.0646, + "step": 9254 + }, + { + "epoch": 2.98, + "learning_rate": 2.8722750949283783e-09, + "loss": 0.076, + "step": 9255 + }, + { + "epoch": 2.98, + "learning_rate": 2.7896278797256983e-09, + "loss": 0.0677, + "step": 9256 + }, + { + "epoch": 2.98, + "learning_rate": 2.70818697045816e-09, + "loss": 0.0713, + "step": 9257 + }, + { + "epoch": 2.98, + "learning_rate": 2.6279523769534575e-09, + "loss": 0.0672, + "step": 9258 + }, + { + "epoch": 2.98, + "learning_rate": 2.5489241088927363e-09, + "loss": 0.063, + "step": 9259 + }, + { + "epoch": 2.98, + "learning_rate": 2.471102175812812e-09, + "loss": 0.0714, + "step": 9260 + }, + { + "epoch": 2.98, + "learning_rate": 2.39448658710173e-09, + "loss": 0.0683, + "step": 9261 + }, + { + "epoch": 2.98, + "learning_rate": 2.319077352006538e-09, + "loss": 0.074, + "step": 9262 + }, + { + "epoch": 2.98, + "learning_rate": 2.244874479625514e-09, + "loss": 0.0692, + "step": 9263 + }, + { + "epoch": 2.98, + "learning_rate": 2.1718779789126064e-09, + "loss": 0.069, + "step": 9264 + }, + { + "epoch": 2.98, + "learning_rate": 2.1000878586752147e-09, + "loss": 0.0723, + "step": 9265 + }, + { + "epoch": 2.98, + "learning_rate": 2.0295041275764093e-09, + "loss": 0.0701, + "step": 9266 + }, + { + "epoch": 2.98, + "learning_rate": 1.960126794133821e-09, + "loss": 0.0744, + "step": 9267 + }, + { + "epoch": 2.98, + "learning_rate": 1.8919558667174208e-09, + "loss": 0.0701, + "step": 9268 + }, + { + "epoch": 2.98, + "learning_rate": 1.824991353552852e-09, + "loss": 0.0712, + "step": 9269 + }, + { + "epoch": 2.98, + "learning_rate": 1.759233262721427e-09, + "loss": 0.0666, + "step": 9270 + }, + { + "epoch": 2.98, + "learning_rate": 1.6946816021579104e-09, + "loss": 0.0642, + "step": 9271 + }, + { + "epoch": 2.98, + "learning_rate": 1.6313363796505167e-09, + "loss": 0.0672, + "step": 9272 + }, + { + "epoch": 2.98, + "learning_rate": 1.569197602843131e-09, + "loss": 0.07, + "step": 9273 + }, + { + "epoch": 2.98, + "learning_rate": 1.5082652792342e-09, + "loss": 0.0756, + "step": 9274 + }, + { + "epoch": 2.98, + "learning_rate": 1.4485394161745103e-09, + "loss": 0.0636, + "step": 9275 + }, + { + "epoch": 2.98, + "learning_rate": 1.3900200208727399e-09, + "loss": 0.065, + "step": 9276 + }, + { + "epoch": 2.98, + "learning_rate": 1.332707100388797e-09, + "loss": 0.0699, + "step": 9277 + }, + { + "epoch": 2.98, + "learning_rate": 1.2766006616393712e-09, + "loss": 0.0701, + "step": 9278 + }, + { + "epoch": 2.98, + "learning_rate": 1.221700711393492e-09, + "loss": 0.0672, + "step": 9279 + }, + { + "epoch": 2.99, + "learning_rate": 1.1680072562758604e-09, + "loss": 0.0694, + "step": 9280 + }, + { + "epoch": 2.99, + "learning_rate": 1.115520302765738e-09, + "loss": 0.0667, + "step": 9281 + }, + { + "epoch": 2.99, + "learning_rate": 1.0642398571958369e-09, + "loss": 0.0678, + "step": 9282 + }, + { + "epoch": 2.99, + "learning_rate": 1.01416592575454e-09, + "loss": 0.0691, + "step": 9283 + }, + { + "epoch": 2.99, + "learning_rate": 9.65298514482571e-10, + "loss": 0.0669, + "step": 9284 + }, + { + "epoch": 2.99, + "learning_rate": 9.176376292785449e-10, + "loss": 0.072, + "step": 9285 + }, + { + "epoch": 2.99, + "learning_rate": 8.711832758934169e-10, + "loss": 0.0662, + "step": 9286 + }, + { + "epoch": 2.99, + "learning_rate": 8.259354599304825e-10, + "loss": 0.0674, + "step": 9287 + }, + { + "epoch": 2.99, + "learning_rate": 7.818941868509289e-10, + "loss": 0.0753, + "step": 9288 + }, + { + "epoch": 2.99, + "learning_rate": 7.390594619682834e-10, + "loss": 0.0669, + "step": 9289 + }, + { + "epoch": 2.99, + "learning_rate": 6.974312904517444e-10, + "loss": 0.0681, + "step": 9290 + }, + { + "epoch": 2.99, + "learning_rate": 6.570096773239609e-10, + "loss": 0.0654, + "step": 9291 + }, + { + "epoch": 2.99, + "learning_rate": 6.177946274632529e-10, + "loss": 0.0652, + "step": 9292 + }, + { + "epoch": 2.99, + "learning_rate": 5.797861456002806e-10, + "loss": 0.0736, + "step": 9293 + }, + { + "epoch": 2.99, + "learning_rate": 5.429842363224858e-10, + "loss": 0.0743, + "step": 9294 + }, + { + "epoch": 2.99, + "learning_rate": 5.0738890406965e-10, + "loss": 0.0735, + "step": 9295 + }, + { + "epoch": 2.99, + "learning_rate": 4.730001531361161e-10, + "loss": 0.0694, + "step": 9296 + }, + { + "epoch": 2.99, + "learning_rate": 4.39817987673008e-10, + "loss": 0.0648, + "step": 9297 + }, + { + "epoch": 2.99, + "learning_rate": 4.0784241168378977e-10, + "loss": 0.0686, + "step": 9298 + }, + { + "epoch": 2.99, + "learning_rate": 3.770734290264866e-10, + "loss": 0.0689, + "step": 9299 + }, + { + "epoch": 2.99, + "learning_rate": 3.4751104341479436e-10, + "loss": 0.0705, + "step": 9300 + }, + { + "epoch": 2.99, + "learning_rate": 3.1915525841363927e-10, + "loss": 0.0701, + "step": 9301 + }, + { + "epoch": 2.99, + "learning_rate": 2.9200607744583886e-10, + "loss": 0.0693, + "step": 9302 + }, + { + "epoch": 2.99, + "learning_rate": 2.6606350378877154e-10, + "loss": 0.0701, + "step": 9303 + }, + { + "epoch": 2.99, + "learning_rate": 2.413275405699356e-10, + "loss": 0.0669, + "step": 9304 + }, + { + "epoch": 2.99, + "learning_rate": 2.1779819077583086e-10, + "loss": 0.068, + "step": 9305 + }, + { + "epoch": 2.99, + "learning_rate": 1.954754572452977e-10, + "loss": 0.0704, + "step": 9306 + }, + { + "epoch": 2.99, + "learning_rate": 1.7435934267173714e-10, + "loss": 0.0713, + "step": 9307 + }, + { + "epoch": 2.99, + "learning_rate": 1.5444984960311104e-10, + "loss": 0.0739, + "step": 9308 + }, + { + "epoch": 2.99, + "learning_rate": 1.3574698044305224e-10, + "loss": 0.073, + "step": 9309 + }, + { + "epoch": 2.99, + "learning_rate": 1.1825073744642367e-10, + "loss": 0.0676, + "step": 9310 + }, + { + "epoch": 3.0, + "learning_rate": 1.0196112272486958e-10, + "loss": 0.0668, + "step": 9311 + }, + { + "epoch": 3.0, + "learning_rate": 8.68781382445949e-11, + "loss": 0.0706, + "step": 9312 + }, + { + "epoch": 3.0, + "learning_rate": 7.300178582414497e-11, + "loss": 0.0687, + "step": 9313 + }, + { + "epoch": 3.0, + "learning_rate": 6.033206713995654e-11, + "loss": 0.068, + "step": 9314 + }, + { + "epoch": 3.0, + "learning_rate": 4.886898371969651e-11, + "loss": 0.0689, + "step": 9315 + }, + { + "epoch": 3.0, + "learning_rate": 3.8612536947812975e-11, + "loss": 0.0653, + "step": 9316 + }, + { + "epoch": 3.0, + "learning_rate": 2.9562728058873944e-11, + "loss": 0.0714, + "step": 9317 + }, + { + "epoch": 3.0, + "learning_rate": 2.1719558147559328e-11, + "loss": 0.0645, + "step": 9318 + }, + { + "epoch": 3.0, + "learning_rate": 1.5083028159779136e-11, + "loss": 0.0789, + "step": 9319 + }, + { + "epoch": 3.0, + "learning_rate": 9.653138896004166e-12, + "loss": 0.0679, + "step": 9320 + }, + { + "epoch": 3.0, + "learning_rate": 5.429891011266009e-12, + "loss": 0.0693, + "step": 9321 + }, + { + "epoch": 3.0, + "learning_rate": 2.4132850151570298e-12, + "loss": 0.0719, + "step": 9322 + }, + { + "epoch": 3.0, + "learning_rate": 6.033212718303815e-13, + "loss": 0.0744, + "step": 9323 + }, + { + "epoch": 3.0, + "learning_rate": 0.0, + "loss": 0.0723, + "step": 9324 + }, + { + "epoch": 3.0, + "step": 9324, + "total_flos": 3.269131628386976e+18, + "train_loss": 0.051072394966772315, + "train_runtime": 56917.8528, + "train_samples_per_second": 20.975, + "train_steps_per_second": 0.164 + } + ], + "max_steps": 9324, + "num_train_epochs": 3, + "total_flos": 3.269131628386976e+18, + "trial_name": null, + "trial_params": null +}