{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 6762, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.852216748768474e-08, "loss": 1.1201, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.9704433497536947e-07, "loss": 1.1611, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.955665024630542e-07, "loss": 1.1011, "step": 3 }, { "epoch": 0.0, "learning_rate": 3.9408866995073894e-07, "loss": 1.1499, "step": 4 }, { "epoch": 0.0, "learning_rate": 4.926108374384237e-07, "loss": 1.1401, "step": 5 }, { "epoch": 0.0, "learning_rate": 5.911330049261084e-07, "loss": 1.1104, "step": 6 }, { "epoch": 0.0, "learning_rate": 6.896551724137931e-07, "loss": 1.0806, "step": 7 }, { "epoch": 0.0, "learning_rate": 7.881773399014779e-07, "loss": 1.1387, "step": 8 }, { "epoch": 0.0, "learning_rate": 8.866995073891626e-07, "loss": 1.1221, "step": 9 }, { "epoch": 0.0, "learning_rate": 9.852216748768474e-07, "loss": 1.0835, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.0837438423645322e-06, "loss": 1.0166, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.1822660098522167e-06, "loss": 1.0205, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.2807881773399017e-06, "loss": 1.0645, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.3793103448275862e-06, "loss": 1.0811, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.4778325123152712e-06, "loss": 1.0117, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.5763546798029558e-06, "loss": 1.0278, "step": 16 }, { "epoch": 0.0, "learning_rate": 1.6748768472906405e-06, "loss": 0.3706, "step": 17 }, { "epoch": 0.0, "learning_rate": 1.7733990147783253e-06, "loss": 0.9995, "step": 18 }, { "epoch": 0.0, "learning_rate": 1.8719211822660098e-06, "loss": 0.9595, "step": 19 }, { "epoch": 0.0, "learning_rate": 1.970443349753695e-06, "loss": 1.0039, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.0689655172413796e-06, "loss": 0.9785, "step": 21 }, { "epoch": 0.0, "learning_rate": 2.1674876847290643e-06, "loss": 0.9346, "step": 22 }, { "epoch": 0.0, "learning_rate": 2.266009852216749e-06, "loss": 0.8794, "step": 23 }, { "epoch": 0.0, "learning_rate": 2.3645320197044334e-06, "loss": 0.9219, "step": 24 }, { "epoch": 0.0, "learning_rate": 2.4630541871921186e-06, "loss": 0.9341, "step": 25 }, { "epoch": 0.0, "learning_rate": 2.5615763546798034e-06, "loss": 0.9331, "step": 26 }, { "epoch": 0.0, "learning_rate": 2.660098522167488e-06, "loss": 0.9409, "step": 27 }, { "epoch": 0.0, "learning_rate": 2.7586206896551725e-06, "loss": 0.9766, "step": 28 }, { "epoch": 0.0, "learning_rate": 2.8571428571428573e-06, "loss": 0.9434, "step": 29 }, { "epoch": 0.0, "learning_rate": 2.9556650246305424e-06, "loss": 0.9146, "step": 30 }, { "epoch": 0.0, "learning_rate": 3.054187192118227e-06, "loss": 0.8652, "step": 31 }, { "epoch": 0.0, "learning_rate": 3.1527093596059115e-06, "loss": 0.9497, "step": 32 }, { "epoch": 0.0, "learning_rate": 3.2512315270935963e-06, "loss": 0.8242, "step": 33 }, { "epoch": 0.01, "learning_rate": 3.349753694581281e-06, "loss": 0.8765, "step": 34 }, { "epoch": 0.01, "learning_rate": 3.448275862068966e-06, "loss": 0.9253, "step": 35 }, { "epoch": 0.01, "learning_rate": 3.5467980295566506e-06, "loss": 0.8809, "step": 36 }, { "epoch": 0.01, "learning_rate": 3.6453201970443354e-06, "loss": 0.9053, "step": 37 }, { "epoch": 0.01, "learning_rate": 3.7438423645320197e-06, "loss": 0.9141, "step": 38 }, { "epoch": 0.01, "learning_rate": 3.842364532019705e-06, "loss": 0.8716, "step": 39 }, { "epoch": 0.01, "learning_rate": 3.94088669950739e-06, "loss": 0.8281, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.039408866995074e-06, "loss": 0.8843, "step": 41 }, { "epoch": 0.01, "learning_rate": 4.137931034482759e-06, "loss": 0.916, "step": 42 }, { "epoch": 0.01, "learning_rate": 4.236453201970444e-06, "loss": 0.9263, "step": 43 }, { "epoch": 0.01, "learning_rate": 4.334975369458129e-06, "loss": 0.9087, "step": 44 }, { "epoch": 0.01, "learning_rate": 4.4334975369458135e-06, "loss": 0.8799, "step": 45 }, { "epoch": 0.01, "learning_rate": 4.532019704433498e-06, "loss": 0.8257, "step": 46 }, { "epoch": 0.01, "learning_rate": 4.630541871921182e-06, "loss": 0.8774, "step": 47 }, { "epoch": 0.01, "learning_rate": 4.729064039408867e-06, "loss": 0.8799, "step": 48 }, { "epoch": 0.01, "learning_rate": 4.8275862068965525e-06, "loss": 0.8516, "step": 49 }, { "epoch": 0.01, "learning_rate": 4.926108374384237e-06, "loss": 0.8325, "step": 50 }, { "epoch": 0.01, "learning_rate": 5.024630541871922e-06, "loss": 0.8599, "step": 51 }, { "epoch": 0.01, "learning_rate": 5.123152709359607e-06, "loss": 0.813, "step": 52 }, { "epoch": 0.01, "learning_rate": 5.2216748768472915e-06, "loss": 0.8672, "step": 53 }, { "epoch": 0.01, "learning_rate": 5.320197044334976e-06, "loss": 0.7798, "step": 54 }, { "epoch": 0.01, "learning_rate": 5.41871921182266e-06, "loss": 0.8457, "step": 55 }, { "epoch": 0.01, "learning_rate": 5.517241379310345e-06, "loss": 0.8921, "step": 56 }, { "epoch": 0.01, "learning_rate": 5.61576354679803e-06, "loss": 0.8906, "step": 57 }, { "epoch": 0.01, "learning_rate": 5.7142857142857145e-06, "loss": 0.8481, "step": 58 }, { "epoch": 0.01, "learning_rate": 5.812807881773399e-06, "loss": 0.8862, "step": 59 }, { "epoch": 0.01, "learning_rate": 5.911330049261085e-06, "loss": 0.3048, "step": 60 }, { "epoch": 0.01, "learning_rate": 6.00985221674877e-06, "loss": 0.8677, "step": 61 }, { "epoch": 0.01, "learning_rate": 6.108374384236454e-06, "loss": 0.8057, "step": 62 }, { "epoch": 0.01, "learning_rate": 6.206896551724138e-06, "loss": 0.8423, "step": 63 }, { "epoch": 0.01, "learning_rate": 6.305418719211823e-06, "loss": 0.8965, "step": 64 }, { "epoch": 0.01, "learning_rate": 6.403940886699508e-06, "loss": 0.8833, "step": 65 }, { "epoch": 0.01, "learning_rate": 6.502463054187193e-06, "loss": 0.2915, "step": 66 }, { "epoch": 0.01, "learning_rate": 6.600985221674877e-06, "loss": 0.8442, "step": 67 }, { "epoch": 0.01, "learning_rate": 6.699507389162562e-06, "loss": 0.8755, "step": 68 }, { "epoch": 0.01, "learning_rate": 6.798029556650246e-06, "loss": 0.9082, "step": 69 }, { "epoch": 0.01, "learning_rate": 6.896551724137932e-06, "loss": 0.8525, "step": 70 }, { "epoch": 0.01, "learning_rate": 6.995073891625616e-06, "loss": 0.8345, "step": 71 }, { "epoch": 0.01, "learning_rate": 7.093596059113301e-06, "loss": 0.8433, "step": 72 }, { "epoch": 0.01, "learning_rate": 7.192118226600986e-06, "loss": 0.8062, "step": 73 }, { "epoch": 0.01, "learning_rate": 7.290640394088671e-06, "loss": 0.8901, "step": 74 }, { "epoch": 0.01, "learning_rate": 7.3891625615763555e-06, "loss": 0.834, "step": 75 }, { "epoch": 0.01, "learning_rate": 7.487684729064039e-06, "loss": 0.8604, "step": 76 }, { "epoch": 0.01, "learning_rate": 7.586206896551724e-06, "loss": 0.8462, "step": 77 }, { "epoch": 0.01, "learning_rate": 7.68472906403941e-06, "loss": 0.8369, "step": 78 }, { "epoch": 0.01, "learning_rate": 7.783251231527095e-06, "loss": 0.7983, "step": 79 }, { "epoch": 0.01, "learning_rate": 7.88177339901478e-06, "loss": 0.8394, "step": 80 }, { "epoch": 0.01, "learning_rate": 7.980295566502464e-06, "loss": 0.8672, "step": 81 }, { "epoch": 0.01, "learning_rate": 8.078817733990149e-06, "loss": 0.8535, "step": 82 }, { "epoch": 0.01, "learning_rate": 8.177339901477834e-06, "loss": 0.8423, "step": 83 }, { "epoch": 0.01, "learning_rate": 8.275862068965518e-06, "loss": 0.8374, "step": 84 }, { "epoch": 0.01, "learning_rate": 8.374384236453203e-06, "loss": 0.7959, "step": 85 }, { "epoch": 0.01, "learning_rate": 8.472906403940888e-06, "loss": 0.9478, "step": 86 }, { "epoch": 0.01, "learning_rate": 8.571428571428571e-06, "loss": 0.8853, "step": 87 }, { "epoch": 0.01, "learning_rate": 8.669950738916257e-06, "loss": 0.2842, "step": 88 }, { "epoch": 0.01, "learning_rate": 8.768472906403942e-06, "loss": 0.8574, "step": 89 }, { "epoch": 0.01, "learning_rate": 8.866995073891627e-06, "loss": 0.7847, "step": 90 }, { "epoch": 0.01, "learning_rate": 8.965517241379312e-06, "loss": 0.8765, "step": 91 }, { "epoch": 0.01, "learning_rate": 9.064039408866996e-06, "loss": 0.8589, "step": 92 }, { "epoch": 0.01, "learning_rate": 9.162561576354681e-06, "loss": 0.8833, "step": 93 }, { "epoch": 0.01, "learning_rate": 9.261083743842364e-06, "loss": 0.8564, "step": 94 }, { "epoch": 0.01, "learning_rate": 9.359605911330049e-06, "loss": 0.8369, "step": 95 }, { "epoch": 0.01, "learning_rate": 9.458128078817734e-06, "loss": 0.8403, "step": 96 }, { "epoch": 0.01, "learning_rate": 9.55665024630542e-06, "loss": 0.8623, "step": 97 }, { "epoch": 0.01, "learning_rate": 9.655172413793105e-06, "loss": 0.8584, "step": 98 }, { "epoch": 0.01, "learning_rate": 9.75369458128079e-06, "loss": 0.8247, "step": 99 }, { "epoch": 0.01, "learning_rate": 9.852216748768475e-06, "loss": 0.855, "step": 100 }, { "epoch": 0.01, "learning_rate": 9.95073891625616e-06, "loss": 0.8438, "step": 101 }, { "epoch": 0.02, "learning_rate": 1.0049261083743844e-05, "loss": 0.812, "step": 102 }, { "epoch": 0.02, "learning_rate": 1.0147783251231529e-05, "loss": 0.8042, "step": 103 }, { "epoch": 0.02, "learning_rate": 1.0246305418719214e-05, "loss": 0.8945, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.0344827586206898e-05, "loss": 0.8906, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.0443349753694583e-05, "loss": 0.8232, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.0541871921182268e-05, "loss": 0.8232, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.0640394088669953e-05, "loss": 0.8374, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.0738916256157637e-05, "loss": 0.854, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.083743842364532e-05, "loss": 0.8496, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.0935960591133005e-05, "loss": 0.8608, "step": 111 }, { "epoch": 0.02, "learning_rate": 1.103448275862069e-05, "loss": 0.7869, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.1133004926108375e-05, "loss": 0.7915, "step": 113 }, { "epoch": 0.02, "learning_rate": 1.123152709359606e-05, "loss": 0.8296, "step": 114 }, { "epoch": 0.02, "learning_rate": 1.1330049261083744e-05, "loss": 0.8325, "step": 115 }, { "epoch": 0.02, "learning_rate": 1.1428571428571429e-05, "loss": 0.8228, "step": 116 }, { "epoch": 0.02, "learning_rate": 1.1527093596059114e-05, "loss": 0.8545, "step": 117 }, { "epoch": 0.02, "learning_rate": 1.1625615763546799e-05, "loss": 0.9116, "step": 118 }, { "epoch": 0.02, "learning_rate": 1.1724137931034483e-05, "loss": 0.8667, "step": 119 }, { "epoch": 0.02, "learning_rate": 1.182266009852217e-05, "loss": 0.8608, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.1921182266009855e-05, "loss": 0.8335, "step": 121 }, { "epoch": 0.02, "learning_rate": 1.201970443349754e-05, "loss": 0.8306, "step": 122 }, { "epoch": 0.02, "learning_rate": 1.2118226600985224e-05, "loss": 0.7983, "step": 123 }, { "epoch": 0.02, "learning_rate": 1.2216748768472909e-05, "loss": 0.7915, "step": 124 }, { "epoch": 0.02, "learning_rate": 1.2315270935960592e-05, "loss": 0.8403, "step": 125 }, { "epoch": 0.02, "learning_rate": 1.2413793103448277e-05, "loss": 0.2775, "step": 126 }, { "epoch": 0.02, "learning_rate": 1.2512315270935961e-05, "loss": 0.8345, "step": 127 }, { "epoch": 0.02, "learning_rate": 1.2610837438423646e-05, "loss": 0.895, "step": 128 }, { "epoch": 0.02, "learning_rate": 1.2709359605911331e-05, "loss": 0.8975, "step": 129 }, { "epoch": 0.02, "learning_rate": 1.2807881773399016e-05, "loss": 0.8418, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.29064039408867e-05, "loss": 0.8579, "step": 131 }, { "epoch": 0.02, "learning_rate": 1.3004926108374385e-05, "loss": 0.8652, "step": 132 }, { "epoch": 0.02, "learning_rate": 1.310344827586207e-05, "loss": 0.8301, "step": 133 }, { "epoch": 0.02, "learning_rate": 1.3201970443349755e-05, "loss": 0.8271, "step": 134 }, { "epoch": 0.02, "learning_rate": 1.330049261083744e-05, "loss": 0.8521, "step": 135 }, { "epoch": 0.02, "learning_rate": 1.3399014778325124e-05, "loss": 0.8066, "step": 136 }, { "epoch": 0.02, "learning_rate": 1.3497536945812807e-05, "loss": 0.8389, "step": 137 }, { "epoch": 0.02, "learning_rate": 1.3596059113300492e-05, "loss": 0.8823, "step": 138 }, { "epoch": 0.02, "learning_rate": 1.369458128078818e-05, "loss": 0.3254, "step": 139 }, { "epoch": 0.02, "learning_rate": 1.3793103448275863e-05, "loss": 0.8848, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.3891625615763548e-05, "loss": 0.8013, "step": 141 }, { "epoch": 0.02, "learning_rate": 1.3990147783251233e-05, "loss": 0.8589, "step": 142 }, { "epoch": 0.02, "learning_rate": 1.4088669950738918e-05, "loss": 0.814, "step": 143 }, { "epoch": 0.02, "learning_rate": 1.4187192118226602e-05, "loss": 0.8579, "step": 144 }, { "epoch": 0.02, "learning_rate": 1.4285714285714287e-05, "loss": 0.8809, "step": 145 }, { "epoch": 0.02, "learning_rate": 1.4384236453201972e-05, "loss": 0.8716, "step": 146 }, { "epoch": 0.02, "learning_rate": 1.4482758620689657e-05, "loss": 0.874, "step": 147 }, { "epoch": 0.02, "learning_rate": 1.4581280788177341e-05, "loss": 0.8335, "step": 148 }, { "epoch": 0.02, "learning_rate": 1.4679802955665026e-05, "loss": 0.9067, "step": 149 }, { "epoch": 0.02, "learning_rate": 1.4778325123152711e-05, "loss": 0.8496, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.4876847290640396e-05, "loss": 0.8994, "step": 151 }, { "epoch": 0.02, "learning_rate": 1.4975369458128079e-05, "loss": 0.9229, "step": 152 }, { "epoch": 0.02, "learning_rate": 1.5073891625615764e-05, "loss": 0.8159, "step": 153 }, { "epoch": 0.02, "learning_rate": 1.5172413793103448e-05, "loss": 0.8315, "step": 154 }, { "epoch": 0.02, "learning_rate": 1.5270935960591133e-05, "loss": 0.8521, "step": 155 }, { "epoch": 0.02, "learning_rate": 1.536945812807882e-05, "loss": 0.8765, "step": 156 }, { "epoch": 0.02, "learning_rate": 1.5467980295566506e-05, "loss": 0.8843, "step": 157 }, { "epoch": 0.02, "learning_rate": 1.556650246305419e-05, "loss": 0.8828, "step": 158 }, { "epoch": 0.02, "learning_rate": 1.5665024630541875e-05, "loss": 0.855, "step": 159 }, { "epoch": 0.02, "learning_rate": 1.576354679802956e-05, "loss": 0.8345, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.586206896551724e-05, "loss": 0.8633, "step": 161 }, { "epoch": 0.02, "learning_rate": 1.5960591133004928e-05, "loss": 0.7974, "step": 162 }, { "epoch": 0.02, "learning_rate": 1.605911330049261e-05, "loss": 0.8433, "step": 163 }, { "epoch": 0.02, "learning_rate": 1.6157635467980298e-05, "loss": 0.877, "step": 164 }, { "epoch": 0.02, "learning_rate": 1.625615763546798e-05, "loss": 0.8423, "step": 165 }, { "epoch": 0.02, "learning_rate": 1.6354679802955667e-05, "loss": 0.8159, "step": 166 }, { "epoch": 0.02, "learning_rate": 1.645320197044335e-05, "loss": 0.855, "step": 167 }, { "epoch": 0.02, "learning_rate": 1.6551724137931037e-05, "loss": 0.8423, "step": 168 }, { "epoch": 0.02, "learning_rate": 1.665024630541872e-05, "loss": 0.9058, "step": 169 }, { "epoch": 0.03, "learning_rate": 1.6748768472906406e-05, "loss": 0.9023, "step": 170 }, { "epoch": 0.03, "learning_rate": 1.684729064039409e-05, "loss": 0.8828, "step": 171 }, { "epoch": 0.03, "learning_rate": 1.6945812807881776e-05, "loss": 0.8281, "step": 172 }, { "epoch": 0.03, "learning_rate": 1.704433497536946e-05, "loss": 0.8481, "step": 173 }, { "epoch": 0.03, "learning_rate": 1.7142857142857142e-05, "loss": 0.8569, "step": 174 }, { "epoch": 0.03, "learning_rate": 1.7241379310344828e-05, "loss": 0.9287, "step": 175 }, { "epoch": 0.03, "learning_rate": 1.7339901477832515e-05, "loss": 0.8335, "step": 176 }, { "epoch": 0.03, "learning_rate": 1.7438423645320198e-05, "loss": 0.8232, "step": 177 }, { "epoch": 0.03, "learning_rate": 1.7536945812807884e-05, "loss": 0.877, "step": 178 }, { "epoch": 0.03, "learning_rate": 1.7635467980295567e-05, "loss": 0.8188, "step": 179 }, { "epoch": 0.03, "learning_rate": 1.7733990147783254e-05, "loss": 0.8628, "step": 180 }, { "epoch": 0.03, "learning_rate": 1.7832512315270937e-05, "loss": 0.8096, "step": 181 }, { "epoch": 0.03, "learning_rate": 1.7931034482758623e-05, "loss": 0.8589, "step": 182 }, { "epoch": 0.03, "learning_rate": 1.8029556650246306e-05, "loss": 0.8477, "step": 183 }, { "epoch": 0.03, "learning_rate": 1.8128078817733993e-05, "loss": 0.8384, "step": 184 }, { "epoch": 0.03, "learning_rate": 1.8226600985221676e-05, "loss": 0.9077, "step": 185 }, { "epoch": 0.03, "learning_rate": 1.8325123152709362e-05, "loss": 0.8223, "step": 186 }, { "epoch": 0.03, "learning_rate": 1.8423645320197045e-05, "loss": 0.8306, "step": 187 }, { "epoch": 0.03, "learning_rate": 1.852216748768473e-05, "loss": 0.9048, "step": 188 }, { "epoch": 0.03, "learning_rate": 1.8620689655172415e-05, "loss": 0.8608, "step": 189 }, { "epoch": 0.03, "learning_rate": 1.8719211822660098e-05, "loss": 0.8672, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.8817733990147784e-05, "loss": 0.7964, "step": 191 }, { "epoch": 0.03, "learning_rate": 1.8916256157635468e-05, "loss": 0.8276, "step": 192 }, { "epoch": 0.03, "learning_rate": 1.9014778325123154e-05, "loss": 0.8428, "step": 193 }, { "epoch": 0.03, "learning_rate": 1.911330049261084e-05, "loss": 0.832, "step": 194 }, { "epoch": 0.03, "learning_rate": 1.9211822660098524e-05, "loss": 0.9175, "step": 195 }, { "epoch": 0.03, "learning_rate": 1.931034482758621e-05, "loss": 0.7827, "step": 196 }, { "epoch": 0.03, "learning_rate": 1.9408866995073893e-05, "loss": 0.8945, "step": 197 }, { "epoch": 0.03, "learning_rate": 1.950738916256158e-05, "loss": 0.9243, "step": 198 }, { "epoch": 0.03, "learning_rate": 1.9605911330049263e-05, "loss": 0.8804, "step": 199 }, { "epoch": 0.03, "learning_rate": 1.970443349753695e-05, "loss": 0.9077, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.9802955665024632e-05, "loss": 0.8647, "step": 201 }, { "epoch": 0.03, "learning_rate": 1.990147783251232e-05, "loss": 0.9092, "step": 202 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.9106, "step": 203 }, { "epoch": 0.03, "learning_rate": 1.9999998852917962e-05, "loss": 0.9341, "step": 204 }, { "epoch": 0.03, "learning_rate": 1.9999995411672108e-05, "loss": 0.894, "step": 205 }, { "epoch": 0.03, "learning_rate": 1.999998967626323e-05, "loss": 0.8604, "step": 206 }, { "epoch": 0.03, "learning_rate": 1.9999981646692643e-05, "loss": 0.8872, "step": 207 }, { "epoch": 0.03, "learning_rate": 1.9999971322962186e-05, "loss": 0.895, "step": 208 }, { "epoch": 0.03, "learning_rate": 1.999995870507423e-05, "loss": 0.8555, "step": 209 }, { "epoch": 0.03, "learning_rate": 1.9999943793031672e-05, "loss": 0.8506, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.999992658683793e-05, "loss": 0.8882, "step": 211 }, { "epoch": 0.03, "learning_rate": 1.9999907086496952e-05, "loss": 0.8418, "step": 212 }, { "epoch": 0.03, "learning_rate": 1.9999885292013213e-05, "loss": 0.8779, "step": 213 }, { "epoch": 0.03, "learning_rate": 1.999986120339171e-05, "loss": 0.8315, "step": 214 }, { "epoch": 0.03, "learning_rate": 1.999983482063797e-05, "loss": 0.7949, "step": 215 }, { "epoch": 0.03, "learning_rate": 1.9999806143758053e-05, "loss": 0.9224, "step": 216 }, { "epoch": 0.03, "learning_rate": 1.999977517275853e-05, "loss": 0.8643, "step": 217 }, { "epoch": 0.03, "learning_rate": 1.9999741907646506e-05, "loss": 0.8296, "step": 218 }, { "epoch": 0.03, "learning_rate": 1.9999706348429616e-05, "loss": 0.8276, "step": 219 }, { "epoch": 0.03, "learning_rate": 1.9999668495116016e-05, "loss": 0.7744, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.999962834771439e-05, "loss": 0.8706, "step": 221 }, { "epoch": 0.03, "learning_rate": 1.999958590623395e-05, "loss": 0.833, "step": 222 }, { "epoch": 0.03, "learning_rate": 1.9999541170684433e-05, "loss": 0.8477, "step": 223 }, { "epoch": 0.03, "learning_rate": 1.99994941410761e-05, "loss": 0.8784, "step": 224 }, { "epoch": 0.03, "learning_rate": 1.999944481741974e-05, "loss": 0.8081, "step": 225 }, { "epoch": 0.03, "learning_rate": 1.999939319972667e-05, "loss": 0.897, "step": 226 }, { "epoch": 0.03, "learning_rate": 1.9999339288008736e-05, "loss": 0.8481, "step": 227 }, { "epoch": 0.03, "learning_rate": 1.99992830822783e-05, "loss": 0.9092, "step": 228 }, { "epoch": 0.03, "learning_rate": 1.999922458254826e-05, "loss": 0.8784, "step": 229 }, { "epoch": 0.03, "learning_rate": 1.9999163788832035e-05, "loss": 0.8774, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.999910070114357e-05, "loss": 0.9155, "step": 231 }, { "epoch": 0.03, "learning_rate": 1.9999035319497343e-05, "loss": 0.8872, "step": 232 }, { "epoch": 0.03, "learning_rate": 1.9998967643908354e-05, "loss": 0.8589, "step": 233 }, { "epoch": 0.03, "learning_rate": 1.9998897674392123e-05, "loss": 0.9795, "step": 234 }, { "epoch": 0.03, "learning_rate": 1.9998825410964706e-05, "loss": 0.8188, "step": 235 }, { "epoch": 0.03, "learning_rate": 1.999875085364268e-05, "loss": 0.8882, "step": 236 }, { "epoch": 0.04, "learning_rate": 1.9998674002443156e-05, "loss": 0.8794, "step": 237 }, { "epoch": 0.04, "learning_rate": 1.9998594857383756e-05, "loss": 0.8618, "step": 238 }, { "epoch": 0.04, "learning_rate": 1.9998513418482642e-05, "loss": 0.8618, "step": 239 }, { "epoch": 0.04, "learning_rate": 1.9998429685758495e-05, "loss": 0.8755, "step": 240 }, { "epoch": 0.04, "learning_rate": 1.9998343659230526e-05, "loss": 0.3245, "step": 241 }, { "epoch": 0.04, "learning_rate": 1.999825533891847e-05, "loss": 0.877, "step": 242 }, { "epoch": 0.04, "learning_rate": 1.9998164724842593e-05, "loss": 0.8506, "step": 243 }, { "epoch": 0.04, "learning_rate": 1.999807181702368e-05, "loss": 0.8394, "step": 244 }, { "epoch": 0.04, "learning_rate": 1.9997976615483042e-05, "loss": 0.2776, "step": 245 }, { "epoch": 0.04, "learning_rate": 1.9997879120242527e-05, "loss": 0.8818, "step": 246 }, { "epoch": 0.04, "learning_rate": 1.99977793313245e-05, "loss": 0.8975, "step": 247 }, { "epoch": 0.04, "learning_rate": 1.999767724875185e-05, "loss": 0.8794, "step": 248 }, { "epoch": 0.04, "learning_rate": 1.9997572872548e-05, "loss": 0.8833, "step": 249 }, { "epoch": 0.04, "learning_rate": 1.9997466202736895e-05, "loss": 0.8857, "step": 250 }, { "epoch": 0.04, "learning_rate": 1.9997357239343008e-05, "loss": 0.8638, "step": 251 }, { "epoch": 0.04, "learning_rate": 1.9997245982391335e-05, "loss": 0.9189, "step": 252 }, { "epoch": 0.04, "learning_rate": 1.99971324319074e-05, "loss": 0.856, "step": 253 }, { "epoch": 0.04, "learning_rate": 1.9997016587917256e-05, "loss": 0.874, "step": 254 }, { "epoch": 0.04, "learning_rate": 1.9996898450447476e-05, "loss": 0.8291, "step": 255 }, { "epoch": 0.04, "learning_rate": 1.9996778019525164e-05, "loss": 0.9268, "step": 256 }, { "epoch": 0.04, "learning_rate": 1.9996655295177953e-05, "loss": 0.8481, "step": 257 }, { "epoch": 0.04, "learning_rate": 1.9996530277433993e-05, "loss": 0.9272, "step": 258 }, { "epoch": 0.04, "learning_rate": 1.9996402966321962e-05, "loss": 0.8418, "step": 259 }, { "epoch": 0.04, "learning_rate": 1.9996273361871076e-05, "loss": 0.3203, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.999614146411106e-05, "loss": 0.9077, "step": 261 }, { "epoch": 0.04, "learning_rate": 1.9996007273072183e-05, "loss": 0.8496, "step": 262 }, { "epoch": 0.04, "learning_rate": 1.9995870788785223e-05, "loss": 0.8013, "step": 263 }, { "epoch": 0.04, "learning_rate": 1.9995732011281493e-05, "loss": 0.8809, "step": 264 }, { "epoch": 0.04, "learning_rate": 1.9995590940592833e-05, "loss": 0.8032, "step": 265 }, { "epoch": 0.04, "learning_rate": 1.9995447576751605e-05, "loss": 0.3406, "step": 266 }, { "epoch": 0.04, "learning_rate": 1.99953019197907e-05, "loss": 0.9082, "step": 267 }, { "epoch": 0.04, "learning_rate": 1.999515396974353e-05, "loss": 0.8906, "step": 268 }, { "epoch": 0.04, "learning_rate": 1.9995003726644045e-05, "loss": 0.8516, "step": 269 }, { "epoch": 0.04, "learning_rate": 1.9994851190526712e-05, "loss": 0.835, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.999469636142652e-05, "loss": 0.8452, "step": 271 }, { "epoch": 0.04, "learning_rate": 1.9994539239378988e-05, "loss": 0.8101, "step": 272 }, { "epoch": 0.04, "learning_rate": 1.999437982442017e-05, "loss": 0.8955, "step": 273 }, { "epoch": 0.04, "learning_rate": 1.9994218116586633e-05, "loss": 0.8916, "step": 274 }, { "epoch": 0.04, "learning_rate": 1.999405411591548e-05, "loss": 0.79, "step": 275 }, { "epoch": 0.04, "learning_rate": 1.999388782244433e-05, "loss": 0.8799, "step": 276 }, { "epoch": 0.04, "learning_rate": 1.9993719236211336e-05, "loss": 0.8794, "step": 277 }, { "epoch": 0.04, "learning_rate": 1.9993548357255172e-05, "loss": 0.8149, "step": 278 }, { "epoch": 0.04, "learning_rate": 1.999337518561505e-05, "loss": 0.8838, "step": 279 }, { "epoch": 0.04, "learning_rate": 1.9993199721330684e-05, "loss": 0.8652, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.9993021964442336e-05, "loss": 0.8853, "step": 281 }, { "epoch": 0.04, "learning_rate": 1.9992841914990792e-05, "loss": 0.9355, "step": 282 }, { "epoch": 0.04, "learning_rate": 1.999265957301735e-05, "loss": 0.8936, "step": 283 }, { "epoch": 0.04, "learning_rate": 1.999247493856384e-05, "loss": 0.2738, "step": 284 }, { "epoch": 0.04, "learning_rate": 1.9992288011672628e-05, "loss": 0.8428, "step": 285 }, { "epoch": 0.04, "learning_rate": 1.9992098792386595e-05, "loss": 0.9155, "step": 286 }, { "epoch": 0.04, "learning_rate": 1.9991907280749148e-05, "loss": 0.8354, "step": 287 }, { "epoch": 0.04, "learning_rate": 1.9991713476804227e-05, "loss": 0.9146, "step": 288 }, { "epoch": 0.04, "learning_rate": 1.9991517380596294e-05, "loss": 0.8823, "step": 289 }, { "epoch": 0.04, "learning_rate": 1.9991318992170334e-05, "loss": 0.8926, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.9991118311571862e-05, "loss": 0.8774, "step": 291 }, { "epoch": 0.04, "learning_rate": 1.9990915338846918e-05, "loss": 0.9404, "step": 292 }, { "epoch": 0.04, "learning_rate": 1.9990710074042066e-05, "loss": 0.8799, "step": 293 }, { "epoch": 0.04, "learning_rate": 1.99905025172044e-05, "loss": 0.3264, "step": 294 }, { "epoch": 0.04, "learning_rate": 1.9990292668381527e-05, "loss": 0.8174, "step": 295 }, { "epoch": 0.04, "learning_rate": 1.9990080527621606e-05, "loss": 0.8843, "step": 296 }, { "epoch": 0.04, "learning_rate": 1.998986609497329e-05, "loss": 0.8203, "step": 297 }, { "epoch": 0.04, "learning_rate": 1.9989649370485784e-05, "loss": 0.8618, "step": 298 }, { "epoch": 0.04, "learning_rate": 1.9989430354208803e-05, "loss": 0.8818, "step": 299 }, { "epoch": 0.04, "learning_rate": 1.9989209046192596e-05, "loss": 0.9019, "step": 300 }, { "epoch": 0.04, "learning_rate": 1.998898544648793e-05, "loss": 0.8589, "step": 301 }, { "epoch": 0.04, "learning_rate": 1.9988759555146107e-05, "loss": 0.897, "step": 302 }, { "epoch": 0.04, "learning_rate": 1.998853137221895e-05, "loss": 0.8447, "step": 303 }, { "epoch": 0.04, "learning_rate": 1.9988300897758802e-05, "loss": 0.8633, "step": 304 }, { "epoch": 0.05, "learning_rate": 1.9988068131818545e-05, "loss": 0.8726, "step": 305 }, { "epoch": 0.05, "learning_rate": 1.998783307445158e-05, "loss": 0.8584, "step": 306 }, { "epoch": 0.05, "learning_rate": 1.9987595725711823e-05, "loss": 0.8198, "step": 307 }, { "epoch": 0.05, "learning_rate": 1.9987356085653738e-05, "loss": 0.9429, "step": 308 }, { "epoch": 0.05, "learning_rate": 1.9987114154332292e-05, "loss": 0.8638, "step": 309 }, { "epoch": 0.05, "learning_rate": 1.9986869931802993e-05, "loss": 0.9043, "step": 310 }, { "epoch": 0.05, "learning_rate": 1.9986623418121872e-05, "loss": 0.8403, "step": 311 }, { "epoch": 0.05, "learning_rate": 1.998637461334548e-05, "loss": 0.896, "step": 312 }, { "epoch": 0.05, "learning_rate": 1.9986123517530894e-05, "loss": 0.8735, "step": 313 }, { "epoch": 0.05, "learning_rate": 1.9985870130735726e-05, "loss": 0.3135, "step": 314 }, { "epoch": 0.05, "learning_rate": 1.99856144530181e-05, "loss": 0.875, "step": 315 }, { "epoch": 0.05, "learning_rate": 1.9985356484436682e-05, "loss": 0.8672, "step": 316 }, { "epoch": 0.05, "learning_rate": 1.998509622505065e-05, "loss": 0.874, "step": 317 }, { "epoch": 0.05, "learning_rate": 1.9984833674919707e-05, "loss": 0.9321, "step": 318 }, { "epoch": 0.05, "learning_rate": 1.998456883410409e-05, "loss": 0.8564, "step": 319 }, { "epoch": 0.05, "learning_rate": 1.9984301702664557e-05, "loss": 0.895, "step": 320 }, { "epoch": 0.05, "learning_rate": 1.9984032280662393e-05, "loss": 0.8999, "step": 321 }, { "epoch": 0.05, "learning_rate": 1.998376056815941e-05, "loss": 0.7822, "step": 322 }, { "epoch": 0.05, "learning_rate": 1.998348656521794e-05, "loss": 0.8813, "step": 323 }, { "epoch": 0.05, "learning_rate": 1.9983210271900845e-05, "loss": 0.9331, "step": 324 }, { "epoch": 0.05, "learning_rate": 1.9982931688271508e-05, "loss": 0.8721, "step": 325 }, { "epoch": 0.05, "learning_rate": 1.998265081439385e-05, "loss": 0.8599, "step": 326 }, { "epoch": 0.05, "learning_rate": 1.9982367650332297e-05, "loss": 0.8716, "step": 327 }, { "epoch": 0.05, "learning_rate": 1.998208219615182e-05, "loss": 0.8955, "step": 328 }, { "epoch": 0.05, "learning_rate": 1.99817944519179e-05, "loss": 0.8774, "step": 329 }, { "epoch": 0.05, "learning_rate": 1.9981504417696557e-05, "loss": 0.8403, "step": 330 }, { "epoch": 0.05, "learning_rate": 1.9981212093554325e-05, "loss": 0.8203, "step": 331 }, { "epoch": 0.05, "learning_rate": 1.9980917479558268e-05, "loss": 0.8843, "step": 332 }, { "epoch": 0.05, "learning_rate": 1.9980620575775977e-05, "loss": 0.9019, "step": 333 }, { "epoch": 0.05, "learning_rate": 1.9980321382275568e-05, "loss": 0.8394, "step": 334 }, { "epoch": 0.05, "learning_rate": 1.9980019899125674e-05, "loss": 0.9087, "step": 335 }, { "epoch": 0.05, "learning_rate": 1.997971612639547e-05, "loss": 0.7954, "step": 336 }, { "epoch": 0.05, "learning_rate": 1.997941006415464e-05, "loss": 0.8413, "step": 337 }, { "epoch": 0.05, "learning_rate": 1.99791017124734e-05, "loss": 0.8965, "step": 338 }, { "epoch": 0.05, "learning_rate": 1.9978791071422494e-05, "loss": 0.9009, "step": 339 }, { "epoch": 0.05, "learning_rate": 1.9978478141073183e-05, "loss": 0.8901, "step": 340 }, { "epoch": 0.05, "learning_rate": 1.9978162921497268e-05, "loss": 0.9341, "step": 341 }, { "epoch": 0.05, "learning_rate": 1.9977845412767053e-05, "loss": 0.8477, "step": 342 }, { "epoch": 0.05, "learning_rate": 1.9977525614955388e-05, "loss": 0.9106, "step": 343 }, { "epoch": 0.05, "learning_rate": 1.9977203528135635e-05, "loss": 0.9248, "step": 344 }, { "epoch": 0.05, "learning_rate": 1.9976879152381692e-05, "loss": 0.8491, "step": 345 }, { "epoch": 0.05, "learning_rate": 1.9976552487767975e-05, "loss": 0.8096, "step": 346 }, { "epoch": 0.05, "learning_rate": 1.997622353436942e-05, "loss": 0.8115, "step": 347 }, { "epoch": 0.05, "learning_rate": 1.99758922922615e-05, "loss": 0.9131, "step": 348 }, { "epoch": 0.05, "learning_rate": 1.9975558761520205e-05, "loss": 0.8984, "step": 349 }, { "epoch": 0.05, "learning_rate": 1.9975222942222054e-05, "loss": 0.9204, "step": 350 }, { "epoch": 0.05, "learning_rate": 1.997488483444409e-05, "loss": 0.9321, "step": 351 }, { "epoch": 0.05, "learning_rate": 1.997454443826388e-05, "loss": 0.8989, "step": 352 }, { "epoch": 0.05, "learning_rate": 1.997420175375951e-05, "loss": 0.8818, "step": 353 }, { "epoch": 0.05, "learning_rate": 1.9973856781009607e-05, "loss": 0.8882, "step": 354 }, { "epoch": 0.05, "learning_rate": 1.997350952009331e-05, "loss": 0.873, "step": 355 }, { "epoch": 0.05, "learning_rate": 1.9973159971090285e-05, "loss": 0.3008, "step": 356 }, { "epoch": 0.05, "learning_rate": 1.9972808134080726e-05, "loss": 0.9346, "step": 357 }, { "epoch": 0.05, "learning_rate": 1.997245400914535e-05, "loss": 0.8237, "step": 358 }, { "epoch": 0.05, "learning_rate": 1.9972097596365395e-05, "loss": 0.8608, "step": 359 }, { "epoch": 0.05, "learning_rate": 1.9971738895822632e-05, "loss": 0.8833, "step": 360 }, { "epoch": 0.05, "learning_rate": 1.9971377907599354e-05, "loss": 0.8257, "step": 361 }, { "epoch": 0.05, "learning_rate": 1.9971014631778376e-05, "loss": 0.9102, "step": 362 }, { "epoch": 0.05, "learning_rate": 1.997064906844304e-05, "loss": 0.8496, "step": 363 }, { "epoch": 0.05, "learning_rate": 1.9970281217677207e-05, "loss": 0.8906, "step": 364 }, { "epoch": 0.05, "learning_rate": 1.9969911079565274e-05, "loss": 0.8481, "step": 365 }, { "epoch": 0.05, "learning_rate": 1.9969538654192158e-05, "loss": 0.9092, "step": 366 }, { "epoch": 0.05, "learning_rate": 1.9969163941643296e-05, "loss": 0.8765, "step": 367 }, { "epoch": 0.05, "learning_rate": 1.996878694200465e-05, "loss": 0.9229, "step": 368 }, { "epoch": 0.05, "learning_rate": 1.9968407655362716e-05, "loss": 0.916, "step": 369 }, { "epoch": 0.05, "learning_rate": 1.9968026081804508e-05, "loss": 0.8369, "step": 370 }, { "epoch": 0.05, "learning_rate": 1.996764222141756e-05, "loss": 0.8848, "step": 371 }, { "epoch": 0.06, "learning_rate": 1.9967256074289944e-05, "loss": 0.8867, "step": 372 }, { "epoch": 0.06, "learning_rate": 1.996686764051024e-05, "loss": 0.8291, "step": 373 }, { "epoch": 0.06, "learning_rate": 1.9966476920167568e-05, "loss": 0.9209, "step": 374 }, { "epoch": 0.06, "learning_rate": 1.9966083913351563e-05, "loss": 0.8833, "step": 375 }, { "epoch": 0.06, "learning_rate": 1.9965688620152382e-05, "loss": 0.9253, "step": 376 }, { "epoch": 0.06, "learning_rate": 1.996529104066072e-05, "loss": 0.8623, "step": 377 }, { "epoch": 0.06, "learning_rate": 1.9964891174967786e-05, "loss": 0.9282, "step": 378 }, { "epoch": 0.06, "learning_rate": 1.9964489023165313e-05, "loss": 0.876, "step": 379 }, { "epoch": 0.06, "learning_rate": 1.996408458534556e-05, "loss": 0.8042, "step": 380 }, { "epoch": 0.06, "learning_rate": 1.996367786160132e-05, "loss": 0.8154, "step": 381 }, { "epoch": 0.06, "learning_rate": 1.9963268852025893e-05, "loss": 0.9121, "step": 382 }, { "epoch": 0.06, "learning_rate": 1.9962857556713117e-05, "loss": 0.8438, "step": 383 }, { "epoch": 0.06, "learning_rate": 1.9962443975757352e-05, "loss": 0.8628, "step": 384 }, { "epoch": 0.06, "learning_rate": 1.9962028109253474e-05, "loss": 0.897, "step": 385 }, { "epoch": 0.06, "learning_rate": 1.996160995729689e-05, "loss": 0.8599, "step": 386 }, { "epoch": 0.06, "learning_rate": 1.996118951998354e-05, "loss": 0.8633, "step": 387 }, { "epoch": 0.06, "learning_rate": 1.996076679740987e-05, "loss": 0.8721, "step": 388 }, { "epoch": 0.06, "learning_rate": 1.9960341789672863e-05, "loss": 0.8472, "step": 389 }, { "epoch": 0.06, "learning_rate": 1.995991449687002e-05, "loss": 0.9287, "step": 390 }, { "epoch": 0.06, "learning_rate": 1.9959484919099375e-05, "loss": 0.8594, "step": 391 }, { "epoch": 0.06, "learning_rate": 1.9959053056459474e-05, "loss": 0.9004, "step": 392 }, { "epoch": 0.06, "learning_rate": 1.9958618909049398e-05, "loss": 0.8735, "step": 393 }, { "epoch": 0.06, "learning_rate": 1.995818247696874e-05, "loss": 0.8389, "step": 394 }, { "epoch": 0.06, "learning_rate": 1.9957743760317636e-05, "loss": 0.894, "step": 395 }, { "epoch": 0.06, "learning_rate": 1.9957302759196727e-05, "loss": 0.8857, "step": 396 }, { "epoch": 0.06, "learning_rate": 1.9956859473707187e-05, "loss": 0.833, "step": 397 }, { "epoch": 0.06, "learning_rate": 1.9956413903950715e-05, "loss": 0.875, "step": 398 }, { "epoch": 0.06, "learning_rate": 1.995596605002953e-05, "loss": 0.8403, "step": 399 }, { "epoch": 0.06, "learning_rate": 1.995551591204638e-05, "loss": 0.833, "step": 400 }, { "epoch": 0.06, "learning_rate": 1.9955063490104526e-05, "loss": 0.8447, "step": 401 }, { "epoch": 0.06, "learning_rate": 1.995460878430777e-05, "loss": 0.8628, "step": 402 }, { "epoch": 0.06, "learning_rate": 1.9954151794760425e-05, "loss": 0.8789, "step": 403 }, { "epoch": 0.06, "learning_rate": 1.9953692521567334e-05, "loss": 0.3088, "step": 404 }, { "epoch": 0.06, "learning_rate": 1.9953230964833857e-05, "loss": 0.8081, "step": 405 }, { "epoch": 0.06, "learning_rate": 1.9952767124665892e-05, "loss": 0.8525, "step": 406 }, { "epoch": 0.06, "learning_rate": 1.9952301001169842e-05, "loss": 0.9282, "step": 407 }, { "epoch": 0.06, "learning_rate": 1.995183259445265e-05, "loss": 0.3341, "step": 408 }, { "epoch": 0.06, "learning_rate": 1.995136190462177e-05, "loss": 0.8745, "step": 409 }, { "epoch": 0.06, "learning_rate": 1.995088893178519e-05, "loss": 0.8818, "step": 410 }, { "epoch": 0.06, "learning_rate": 1.9950413676051415e-05, "loss": 0.8931, "step": 411 }, { "epoch": 0.06, "learning_rate": 1.9949936137529482e-05, "loss": 0.8843, "step": 412 }, { "epoch": 0.06, "learning_rate": 1.9949456316328942e-05, "loss": 0.9102, "step": 413 }, { "epoch": 0.06, "learning_rate": 1.9948974212559873e-05, "loss": 0.875, "step": 414 }, { "epoch": 0.06, "learning_rate": 1.994848982633288e-05, "loss": 0.8428, "step": 415 }, { "epoch": 0.06, "learning_rate": 1.9948003157759088e-05, "loss": 0.854, "step": 416 }, { "epoch": 0.06, "learning_rate": 1.9947514206950146e-05, "loss": 0.8784, "step": 417 }, { "epoch": 0.06, "learning_rate": 1.994702297401823e-05, "loss": 0.8359, "step": 418 }, { "epoch": 0.06, "learning_rate": 1.9946529459076034e-05, "loss": 0.8691, "step": 419 }, { "epoch": 0.06, "learning_rate": 1.9946033662236778e-05, "loss": 0.894, "step": 420 }, { "epoch": 0.06, "learning_rate": 1.994553558361421e-05, "loss": 0.8521, "step": 421 }, { "epoch": 0.06, "learning_rate": 1.9945035223322593e-05, "loss": 0.8652, "step": 422 }, { "epoch": 0.06, "learning_rate": 1.994453258147672e-05, "loss": 0.8818, "step": 423 }, { "epoch": 0.06, "learning_rate": 1.9944027658191903e-05, "loss": 0.2729, "step": 424 }, { "epoch": 0.06, "learning_rate": 1.9943520453583986e-05, "loss": 0.874, "step": 425 }, { "epoch": 0.06, "learning_rate": 1.9943010967769324e-05, "loss": 0.2968, "step": 426 }, { "epoch": 0.06, "learning_rate": 1.9942499200864805e-05, "loss": 0.897, "step": 427 }, { "epoch": 0.06, "learning_rate": 1.9941985152987834e-05, "loss": 0.9253, "step": 428 }, { "epoch": 0.06, "learning_rate": 1.994146882425634e-05, "loss": 0.8989, "step": 429 }, { "epoch": 0.06, "learning_rate": 1.9940950214788783e-05, "loss": 0.8911, "step": 430 }, { "epoch": 0.06, "learning_rate": 1.9940429324704137e-05, "loss": 0.8911, "step": 431 }, { "epoch": 0.06, "learning_rate": 1.9939906154121902e-05, "loss": 0.9028, "step": 432 }, { "epoch": 0.06, "learning_rate": 1.9939380703162104e-05, "loss": 0.7974, "step": 433 }, { "epoch": 0.06, "learning_rate": 1.993885297194529e-05, "loss": 0.3132, "step": 434 }, { "epoch": 0.06, "learning_rate": 1.9938322960592532e-05, "loss": 0.9512, "step": 435 }, { "epoch": 0.06, "learning_rate": 1.9937790669225417e-05, "loss": 0.876, "step": 436 }, { "epoch": 0.06, "learning_rate": 1.9937256097966068e-05, "loss": 0.8467, "step": 437 }, { "epoch": 0.06, "learning_rate": 1.9936719246937118e-05, "loss": 0.834, "step": 438 }, { "epoch": 0.06, "learning_rate": 1.9936180116261736e-05, "loss": 0.9072, "step": 439 }, { "epoch": 0.07, "learning_rate": 1.9935638706063605e-05, "loss": 0.8828, "step": 440 }, { "epoch": 0.07, "learning_rate": 1.993509501646693e-05, "loss": 0.9473, "step": 441 }, { "epoch": 0.07, "learning_rate": 1.993454904759645e-05, "loss": 0.9697, "step": 442 }, { "epoch": 0.07, "learning_rate": 1.9934000799577414e-05, "loss": 0.8794, "step": 443 }, { "epoch": 0.07, "learning_rate": 1.9933450272535597e-05, "loss": 0.8843, "step": 444 }, { "epoch": 0.07, "learning_rate": 1.99328974665973e-05, "loss": 0.8369, "step": 445 }, { "epoch": 0.07, "learning_rate": 1.993234238188935e-05, "loss": 0.918, "step": 446 }, { "epoch": 0.07, "learning_rate": 1.9931785018539088e-05, "loss": 0.9048, "step": 447 }, { "epoch": 0.07, "learning_rate": 1.9931225376674388e-05, "loss": 0.9136, "step": 448 }, { "epoch": 0.07, "learning_rate": 1.9930663456423633e-05, "loss": 0.8271, "step": 449 }, { "epoch": 0.07, "learning_rate": 1.9930099257915744e-05, "loss": 0.8931, "step": 450 }, { "epoch": 0.07, "learning_rate": 1.9929532781280148e-05, "loss": 0.8652, "step": 451 }, { "epoch": 0.07, "learning_rate": 1.9928964026646816e-05, "loss": 0.938, "step": 452 }, { "epoch": 0.07, "learning_rate": 1.9928392994146228e-05, "loss": 0.8853, "step": 453 }, { "epoch": 0.07, "learning_rate": 1.992781968390938e-05, "loss": 0.856, "step": 454 }, { "epoch": 0.07, "learning_rate": 1.99272440960678e-05, "loss": 0.9199, "step": 455 }, { "epoch": 0.07, "learning_rate": 1.992666623075354e-05, "loss": 0.8906, "step": 456 }, { "epoch": 0.07, "learning_rate": 1.9926086088099176e-05, "loss": 0.9282, "step": 457 }, { "epoch": 0.07, "learning_rate": 1.9925503668237796e-05, "loss": 0.894, "step": 458 }, { "epoch": 0.07, "learning_rate": 1.9924918971303017e-05, "loss": 0.8809, "step": 459 }, { "epoch": 0.07, "learning_rate": 1.9924331997428983e-05, "loss": 0.8638, "step": 460 }, { "epoch": 0.07, "learning_rate": 1.992374274675035e-05, "loss": 0.9131, "step": 461 }, { "epoch": 0.07, "learning_rate": 1.9923151219402308e-05, "loss": 0.9229, "step": 462 }, { "epoch": 0.07, "learning_rate": 1.9922557415520557e-05, "loss": 0.8472, "step": 463 }, { "epoch": 0.07, "learning_rate": 1.9921961335241326e-05, "loss": 0.8804, "step": 464 }, { "epoch": 0.07, "learning_rate": 1.992136297870137e-05, "loss": 0.9624, "step": 465 }, { "epoch": 0.07, "learning_rate": 1.992076234603796e-05, "loss": 0.8574, "step": 466 }, { "epoch": 0.07, "learning_rate": 1.992015943738889e-05, "loss": 0.9253, "step": 467 }, { "epoch": 0.07, "learning_rate": 1.9919554252892473e-05, "loss": 0.9136, "step": 468 }, { "epoch": 0.07, "learning_rate": 1.9918946792687553e-05, "loss": 0.9229, "step": 469 }, { "epoch": 0.07, "learning_rate": 1.9918337056913495e-05, "loss": 0.8999, "step": 470 }, { "epoch": 0.07, "learning_rate": 1.9917725045710176e-05, "loss": 0.8433, "step": 471 }, { "epoch": 0.07, "learning_rate": 1.9917110759218003e-05, "loss": 0.9307, "step": 472 }, { "epoch": 0.07, "learning_rate": 1.9916494197577904e-05, "loss": 0.3005, "step": 473 }, { "epoch": 0.07, "learning_rate": 1.991587536093133e-05, "loss": 0.8569, "step": 474 }, { "epoch": 0.07, "learning_rate": 1.9915254249420245e-05, "loss": 0.9663, "step": 475 }, { "epoch": 0.07, "learning_rate": 1.9914630863187156e-05, "loss": 0.8447, "step": 476 }, { "epoch": 0.07, "learning_rate": 1.9914005202375063e-05, "loss": 0.8584, "step": 477 }, { "epoch": 0.07, "learning_rate": 1.9913377267127515e-05, "loss": 0.8345, "step": 478 }, { "epoch": 0.07, "learning_rate": 1.9912747057588562e-05, "loss": 0.8604, "step": 479 }, { "epoch": 0.07, "learning_rate": 1.991211457390279e-05, "loss": 0.3125, "step": 480 }, { "epoch": 0.07, "learning_rate": 1.9911479816215297e-05, "loss": 0.8813, "step": 481 }, { "epoch": 0.07, "learning_rate": 1.9910842784671706e-05, "loss": 0.873, "step": 482 }, { "epoch": 0.07, "learning_rate": 1.991020347941817e-05, "loss": 0.8721, "step": 483 }, { "epoch": 0.07, "learning_rate": 1.990956190060135e-05, "loss": 0.8901, "step": 484 }, { "epoch": 0.07, "learning_rate": 1.9908918048368435e-05, "loss": 0.8975, "step": 485 }, { "epoch": 0.07, "learning_rate": 1.9908271922867137e-05, "loss": 0.853, "step": 486 }, { "epoch": 0.07, "learning_rate": 1.9907623524245684e-05, "loss": 0.9331, "step": 487 }, { "epoch": 0.07, "learning_rate": 1.990697285265284e-05, "loss": 0.8735, "step": 488 }, { "epoch": 0.07, "learning_rate": 1.9906319908237866e-05, "loss": 0.8828, "step": 489 }, { "epoch": 0.07, "learning_rate": 1.9905664691150567e-05, "loss": 0.8779, "step": 490 }, { "epoch": 0.07, "learning_rate": 1.9905007201541253e-05, "loss": 0.8818, "step": 491 }, { "epoch": 0.07, "learning_rate": 1.9904347439560772e-05, "loss": 0.8696, "step": 492 }, { "epoch": 0.07, "learning_rate": 1.9903685405360478e-05, "loss": 0.8652, "step": 493 }, { "epoch": 0.07, "learning_rate": 1.9903021099092257e-05, "loss": 0.9502, "step": 494 }, { "epoch": 0.07, "learning_rate": 1.9902354520908507e-05, "loss": 0.8569, "step": 495 }, { "epoch": 0.07, "learning_rate": 1.9901685670962155e-05, "loss": 0.9121, "step": 496 }, { "epoch": 0.07, "learning_rate": 1.9901014549406647e-05, "loss": 0.9053, "step": 497 }, { "epoch": 0.07, "learning_rate": 1.9900341156395942e-05, "loss": 0.8018, "step": 498 }, { "epoch": 0.07, "learning_rate": 1.9899665492084536e-05, "loss": 0.8638, "step": 499 }, { "epoch": 0.07, "learning_rate": 1.9898987556627435e-05, "loss": 0.9219, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.989830735018017e-05, "loss": 0.8301, "step": 501 }, { "epoch": 0.07, "learning_rate": 1.9897624872898785e-05, "loss": 0.8428, "step": 502 }, { "epoch": 0.07, "learning_rate": 1.9896940124939862e-05, "loss": 0.8438, "step": 503 }, { "epoch": 0.07, "learning_rate": 1.9896253106460484e-05, "loss": 0.894, "step": 504 }, { "epoch": 0.07, "learning_rate": 1.9895563817618266e-05, "loss": 0.8892, "step": 505 }, { "epoch": 0.07, "learning_rate": 1.9894872258571344e-05, "loss": 0.9521, "step": 506 }, { "epoch": 0.07, "learning_rate": 1.9894178429478376e-05, "loss": 0.8672, "step": 507 }, { "epoch": 0.08, "learning_rate": 1.9893482330498533e-05, "loss": 0.8945, "step": 508 }, { "epoch": 0.08, "learning_rate": 1.9892783961791516e-05, "loss": 0.8926, "step": 509 }, { "epoch": 0.08, "learning_rate": 1.9892083323517535e-05, "loss": 0.8115, "step": 510 }, { "epoch": 0.08, "learning_rate": 1.9891380415837333e-05, "loss": 0.8906, "step": 511 }, { "epoch": 0.08, "learning_rate": 1.9890675238912172e-05, "loss": 0.9106, "step": 512 }, { "epoch": 0.08, "learning_rate": 1.9889967792903822e-05, "loss": 0.8711, "step": 513 }, { "epoch": 0.08, "learning_rate": 1.9889258077974588e-05, "loss": 0.9136, "step": 514 }, { "epoch": 0.08, "learning_rate": 1.9888546094287293e-05, "loss": 0.8271, "step": 515 }, { "epoch": 0.08, "learning_rate": 1.9887831842005276e-05, "loss": 0.8555, "step": 516 }, { "epoch": 0.08, "learning_rate": 1.9887115321292393e-05, "loss": 0.9297, "step": 517 }, { "epoch": 0.08, "learning_rate": 1.9886396532313033e-05, "loss": 0.9194, "step": 518 }, { "epoch": 0.08, "learning_rate": 1.9885675475232094e-05, "loss": 0.8867, "step": 519 }, { "epoch": 0.08, "learning_rate": 1.9884952150214997e-05, "loss": 0.9463, "step": 520 }, { "epoch": 0.08, "learning_rate": 1.9884226557427686e-05, "loss": 0.8843, "step": 521 }, { "epoch": 0.08, "learning_rate": 1.9883498697036624e-05, "loss": 0.7993, "step": 522 }, { "epoch": 0.08, "learning_rate": 1.9882768569208798e-05, "loss": 0.9111, "step": 523 }, { "epoch": 0.08, "learning_rate": 1.9882036174111707e-05, "loss": 0.3484, "step": 524 }, { "epoch": 0.08, "learning_rate": 1.9881301511913372e-05, "loss": 0.8433, "step": 525 }, { "epoch": 0.08, "learning_rate": 1.9880564582782346e-05, "loss": 0.8638, "step": 526 }, { "epoch": 0.08, "learning_rate": 1.987982538688768e-05, "loss": 0.8477, "step": 527 }, { "epoch": 0.08, "learning_rate": 1.987908392439897e-05, "loss": 0.9292, "step": 528 }, { "epoch": 0.08, "learning_rate": 1.987834019548631e-05, "loss": 0.9355, "step": 529 }, { "epoch": 0.08, "learning_rate": 1.987759420032033e-05, "loss": 0.8213, "step": 530 }, { "epoch": 0.08, "learning_rate": 1.9876845939072166e-05, "loss": 0.8398, "step": 531 }, { "epoch": 0.08, "learning_rate": 1.9876095411913492e-05, "loss": 0.9229, "step": 532 }, { "epoch": 0.08, "learning_rate": 1.9875342619016483e-05, "loss": 0.9331, "step": 533 }, { "epoch": 0.08, "learning_rate": 1.9874587560553844e-05, "loss": 0.8818, "step": 534 }, { "epoch": 0.08, "learning_rate": 1.9873830236698798e-05, "loss": 0.8828, "step": 535 }, { "epoch": 0.08, "learning_rate": 1.987307064762509e-05, "loss": 0.9038, "step": 536 }, { "epoch": 0.08, "learning_rate": 1.9872308793506982e-05, "loss": 0.8535, "step": 537 }, { "epoch": 0.08, "learning_rate": 1.9871544674519246e-05, "loss": 0.9277, "step": 538 }, { "epoch": 0.08, "learning_rate": 1.9870778290837198e-05, "loss": 0.9097, "step": 539 }, { "epoch": 0.08, "learning_rate": 1.9870009642636652e-05, "loss": 0.811, "step": 540 }, { "epoch": 0.08, "learning_rate": 1.986923873009395e-05, "loss": 0.8872, "step": 541 }, { "epoch": 0.08, "learning_rate": 1.9868465553385946e-05, "loss": 0.8628, "step": 542 }, { "epoch": 0.08, "learning_rate": 1.986769011269003e-05, "loss": 0.876, "step": 543 }, { "epoch": 0.08, "learning_rate": 1.9866912408184094e-05, "loss": 0.9429, "step": 544 }, { "epoch": 0.08, "learning_rate": 1.9866132440046556e-05, "loss": 0.8887, "step": 545 }, { "epoch": 0.08, "learning_rate": 1.9865350208456354e-05, "loss": 0.8975, "step": 546 }, { "epoch": 0.08, "learning_rate": 1.9864565713592946e-05, "loss": 0.874, "step": 547 }, { "epoch": 0.08, "learning_rate": 1.9863778955636308e-05, "loss": 0.9092, "step": 548 }, { "epoch": 0.08, "learning_rate": 1.9862989934766935e-05, "loss": 0.8818, "step": 549 }, { "epoch": 0.08, "learning_rate": 1.986219865116584e-05, "loss": 0.8735, "step": 550 }, { "epoch": 0.08, "learning_rate": 1.9861405105014558e-05, "loss": 0.8198, "step": 551 }, { "epoch": 0.08, "learning_rate": 1.986060929649514e-05, "loss": 0.8198, "step": 552 }, { "epoch": 0.08, "learning_rate": 1.9859811225790164e-05, "loss": 0.8901, "step": 553 }, { "epoch": 0.08, "learning_rate": 1.9859010893082708e-05, "loss": 0.9009, "step": 554 }, { "epoch": 0.08, "learning_rate": 1.9858208298556394e-05, "loss": 0.8735, "step": 555 }, { "epoch": 0.08, "learning_rate": 1.9857403442395343e-05, "loss": 0.8892, "step": 556 }, { "epoch": 0.08, "learning_rate": 1.98565963247842e-05, "loss": 0.8481, "step": 557 }, { "epoch": 0.08, "learning_rate": 1.9855786945908142e-05, "loss": 0.8735, "step": 558 }, { "epoch": 0.08, "learning_rate": 1.9854975305952844e-05, "loss": 0.7729, "step": 559 }, { "epoch": 0.08, "learning_rate": 1.9854161405104512e-05, "loss": 0.8389, "step": 560 }, { "epoch": 0.08, "learning_rate": 1.9853345243549865e-05, "loss": 0.9175, "step": 561 }, { "epoch": 0.08, "learning_rate": 1.9852526821476155e-05, "loss": 0.9155, "step": 562 }, { "epoch": 0.08, "learning_rate": 1.985170613907113e-05, "loss": 0.8438, "step": 563 }, { "epoch": 0.08, "learning_rate": 1.9850883196523072e-05, "loss": 0.8301, "step": 564 }, { "epoch": 0.08, "learning_rate": 1.9850057994020777e-05, "loss": 0.9258, "step": 565 }, { "epoch": 0.08, "learning_rate": 1.984923053175356e-05, "loss": 0.916, "step": 566 }, { "epoch": 0.08, "learning_rate": 1.9848400809911255e-05, "loss": 0.8818, "step": 567 }, { "epoch": 0.08, "learning_rate": 1.9847568828684217e-05, "loss": 0.3037, "step": 568 }, { "epoch": 0.08, "learning_rate": 1.9846734588263312e-05, "loss": 0.8877, "step": 569 }, { "epoch": 0.08, "learning_rate": 1.9845898088839926e-05, "loss": 0.3188, "step": 570 }, { "epoch": 0.08, "learning_rate": 1.9845059330605974e-05, "loss": 0.8843, "step": 571 }, { "epoch": 0.08, "learning_rate": 1.984421831375387e-05, "loss": 0.8281, "step": 572 }, { "epoch": 0.08, "learning_rate": 1.9843375038476565e-05, "loss": 0.8164, "step": 573 }, { "epoch": 0.08, "learning_rate": 1.9842529504967522e-05, "loss": 0.8784, "step": 574 }, { "epoch": 0.09, "learning_rate": 1.984168171342071e-05, "loss": 0.9058, "step": 575 }, { "epoch": 0.09, "learning_rate": 1.984083166403064e-05, "loss": 0.8535, "step": 576 }, { "epoch": 0.09, "learning_rate": 1.9839979356992318e-05, "loss": 0.8252, "step": 577 }, { "epoch": 0.09, "learning_rate": 1.9839124792501275e-05, "loss": 0.8999, "step": 578 }, { "epoch": 0.09, "learning_rate": 1.983826797075357e-05, "loss": 0.8921, "step": 579 }, { "epoch": 0.09, "learning_rate": 1.9837408891945768e-05, "loss": 0.8921, "step": 580 }, { "epoch": 0.09, "learning_rate": 1.9836547556274954e-05, "loss": 0.8867, "step": 581 }, { "epoch": 0.09, "learning_rate": 1.9835683963938734e-05, "loss": 0.8662, "step": 582 }, { "epoch": 0.09, "learning_rate": 1.9834818115135235e-05, "loss": 0.894, "step": 583 }, { "epoch": 0.09, "learning_rate": 1.9833950010063087e-05, "loss": 0.8921, "step": 584 }, { "epoch": 0.09, "learning_rate": 1.983307964892146e-05, "loss": 0.8306, "step": 585 }, { "epoch": 0.09, "learning_rate": 1.9832207031910017e-05, "loss": 0.8198, "step": 586 }, { "epoch": 0.09, "learning_rate": 1.983133215922896e-05, "loss": 0.8379, "step": 587 }, { "epoch": 0.09, "learning_rate": 1.9830455031078994e-05, "loss": 0.8667, "step": 588 }, { "epoch": 0.09, "learning_rate": 1.9829575647661343e-05, "loss": 0.9136, "step": 589 }, { "epoch": 0.09, "learning_rate": 1.9828694009177764e-05, "loss": 0.9121, "step": 590 }, { "epoch": 0.09, "learning_rate": 1.982781011583051e-05, "loss": 0.8794, "step": 591 }, { "epoch": 0.09, "learning_rate": 1.982692396782236e-05, "loss": 0.894, "step": 592 }, { "epoch": 0.09, "learning_rate": 1.9826035565356615e-05, "loss": 0.8396, "step": 593 }, { "epoch": 0.09, "learning_rate": 1.982514490863709e-05, "loss": 0.8789, "step": 594 }, { "epoch": 0.09, "learning_rate": 1.982425199786811e-05, "loss": 0.8662, "step": 595 }, { "epoch": 0.09, "learning_rate": 1.9823356833254534e-05, "loss": 0.8501, "step": 596 }, { "epoch": 0.09, "learning_rate": 1.9822459415001712e-05, "loss": 0.8521, "step": 597 }, { "epoch": 0.09, "learning_rate": 1.9821559743315543e-05, "loss": 0.9131, "step": 598 }, { "epoch": 0.09, "learning_rate": 1.9820657818402414e-05, "loss": 0.8848, "step": 599 }, { "epoch": 0.09, "learning_rate": 1.981975364046925e-05, "loss": 0.8608, "step": 600 }, { "epoch": 0.09, "learning_rate": 1.9818847209723477e-05, "loss": 0.9194, "step": 601 }, { "epoch": 0.09, "learning_rate": 1.981793852637305e-05, "loss": 0.791, "step": 602 }, { "epoch": 0.09, "learning_rate": 1.9817027590626436e-05, "loss": 0.7981, "step": 603 }, { "epoch": 0.09, "learning_rate": 1.9816114402692612e-05, "loss": 0.8911, "step": 604 }, { "epoch": 0.09, "learning_rate": 1.9815198962781088e-05, "loss": 0.8965, "step": 605 }, { "epoch": 0.09, "learning_rate": 1.9814281271101876e-05, "loss": 0.8936, "step": 606 }, { "epoch": 0.09, "learning_rate": 1.9813361327865507e-05, "loss": 0.9097, "step": 607 }, { "epoch": 0.09, "learning_rate": 1.9812439133283033e-05, "loss": 0.8638, "step": 608 }, { "epoch": 0.09, "learning_rate": 1.981151468756603e-05, "loss": 0.8765, "step": 609 }, { "epoch": 0.09, "learning_rate": 1.9810587990926563e-05, "loss": 0.876, "step": 610 }, { "epoch": 0.09, "learning_rate": 1.9809659043577242e-05, "loss": 0.8315, "step": 611 }, { "epoch": 0.09, "learning_rate": 1.9808727845731182e-05, "loss": 0.8418, "step": 612 }, { "epoch": 0.09, "learning_rate": 1.980779439760202e-05, "loss": 0.8745, "step": 613 }, { "epoch": 0.09, "learning_rate": 1.9806858699403894e-05, "loss": 0.8584, "step": 614 }, { "epoch": 0.09, "learning_rate": 1.9805920751351476e-05, "loss": 0.9448, "step": 615 }, { "epoch": 0.09, "learning_rate": 1.980498055365994e-05, "loss": 0.8896, "step": 616 }, { "epoch": 0.09, "learning_rate": 1.980403810654499e-05, "loss": 0.8838, "step": 617 }, { "epoch": 0.09, "learning_rate": 1.9803093410222838e-05, "loss": 0.8501, "step": 618 }, { "epoch": 0.09, "learning_rate": 1.980214646491021e-05, "loss": 0.8457, "step": 619 }, { "epoch": 0.09, "learning_rate": 1.9801197270824346e-05, "loss": 0.9307, "step": 620 }, { "epoch": 0.09, "learning_rate": 1.9800245828183015e-05, "loss": 0.8799, "step": 621 }, { "epoch": 0.09, "learning_rate": 1.9799292137204494e-05, "loss": 0.915, "step": 622 }, { "epoch": 0.09, "learning_rate": 1.9798336198107567e-05, "loss": 0.9053, "step": 623 }, { "epoch": 0.09, "learning_rate": 1.979737801111155e-05, "loss": 0.896, "step": 624 }, { "epoch": 0.09, "learning_rate": 1.9796417576436264e-05, "loss": 0.8853, "step": 625 }, { "epoch": 0.09, "learning_rate": 1.979545489430205e-05, "loss": 0.8477, "step": 626 }, { "epoch": 0.09, "learning_rate": 1.9794489964929757e-05, "loss": 0.9175, "step": 627 }, { "epoch": 0.09, "learning_rate": 1.979352278854076e-05, "loss": 0.8984, "step": 628 }, { "epoch": 0.09, "learning_rate": 1.979255336535695e-05, "loss": 0.9541, "step": 629 }, { "epoch": 0.09, "learning_rate": 1.9791581695600722e-05, "loss": 0.9165, "step": 630 }, { "epoch": 0.09, "learning_rate": 1.979060777949499e-05, "loss": 0.9688, "step": 631 }, { "epoch": 0.09, "learning_rate": 1.9789631617263198e-05, "loss": 0.9297, "step": 632 }, { "epoch": 0.09, "learning_rate": 1.9788653209129284e-05, "loss": 0.8818, "step": 633 }, { "epoch": 0.09, "learning_rate": 1.9787672555317714e-05, "loss": 0.8955, "step": 634 }, { "epoch": 0.09, "learning_rate": 1.9786689656053467e-05, "loss": 0.8486, "step": 635 }, { "epoch": 0.09, "learning_rate": 1.9785704511562032e-05, "loss": 0.8447, "step": 636 }, { "epoch": 0.09, "learning_rate": 1.9784717122069425e-05, "loss": 0.8618, "step": 637 }, { "epoch": 0.09, "learning_rate": 1.978372748780216e-05, "loss": 0.9434, "step": 638 }, { "epoch": 0.09, "learning_rate": 1.978273560898728e-05, "loss": 0.8662, "step": 639 }, { "epoch": 0.09, "learning_rate": 1.9781741485852338e-05, "loss": 0.9272, "step": 640 }, { "epoch": 0.09, "learning_rate": 1.9780745118625407e-05, "loss": 0.8687, "step": 641 }, { "epoch": 0.09, "learning_rate": 1.977974650753506e-05, "loss": 0.8857, "step": 642 }, { "epoch": 0.1, "learning_rate": 1.9778745652810404e-05, "loss": 0.8535, "step": 643 }, { "epoch": 0.1, "learning_rate": 1.9777742554681044e-05, "loss": 0.8745, "step": 644 }, { "epoch": 0.1, "learning_rate": 1.9776737213377114e-05, "loss": 0.8745, "step": 645 }, { "epoch": 0.1, "learning_rate": 1.9775729629129253e-05, "loss": 0.3535, "step": 646 }, { "epoch": 0.1, "learning_rate": 1.9774719802168615e-05, "loss": 0.8638, "step": 647 }, { "epoch": 0.1, "learning_rate": 1.9773707732726873e-05, "loss": 0.8047, "step": 648 }, { "epoch": 0.1, "learning_rate": 1.9772693421036214e-05, "loss": 0.8906, "step": 649 }, { "epoch": 0.1, "learning_rate": 1.9771676867329334e-05, "loss": 0.9399, "step": 650 }, { "epoch": 0.1, "learning_rate": 1.9770658071839448e-05, "loss": 0.8291, "step": 651 }, { "epoch": 0.1, "learning_rate": 1.9769637034800287e-05, "loss": 0.8838, "step": 652 }, { "epoch": 0.1, "learning_rate": 1.9768613756446092e-05, "loss": 0.9004, "step": 653 }, { "epoch": 0.1, "learning_rate": 1.976758823701162e-05, "loss": 0.9263, "step": 654 }, { "epoch": 0.1, "learning_rate": 1.976656047673214e-05, "loss": 0.9531, "step": 655 }, { "epoch": 0.1, "learning_rate": 1.9765530475843443e-05, "loss": 0.8569, "step": 656 }, { "epoch": 0.1, "learning_rate": 1.9764498234581822e-05, "loss": 0.8447, "step": 657 }, { "epoch": 0.1, "learning_rate": 1.9763463753184092e-05, "loss": 0.8799, "step": 658 }, { "epoch": 0.1, "learning_rate": 1.9762427031887578e-05, "loss": 0.9268, "step": 659 }, { "epoch": 0.1, "learning_rate": 1.9761388070930126e-05, "loss": 0.9248, "step": 660 }, { "epoch": 0.1, "learning_rate": 1.9760346870550086e-05, "loss": 0.8335, "step": 661 }, { "epoch": 0.1, "learning_rate": 1.9759303430986332e-05, "loss": 0.8936, "step": 662 }, { "epoch": 0.1, "learning_rate": 1.975825775247824e-05, "loss": 0.8442, "step": 663 }, { "epoch": 0.1, "learning_rate": 1.9757209835265704e-05, "loss": 0.9058, "step": 664 }, { "epoch": 0.1, "learning_rate": 1.9756159679589143e-05, "loss": 0.9673, "step": 665 }, { "epoch": 0.1, "learning_rate": 1.975510728568947e-05, "loss": 0.9224, "step": 666 }, { "epoch": 0.1, "learning_rate": 1.975405265380813e-05, "loss": 0.9072, "step": 667 }, { "epoch": 0.1, "learning_rate": 1.975299578418707e-05, "loss": 0.8853, "step": 668 }, { "epoch": 0.1, "learning_rate": 1.9751936677068747e-05, "loss": 0.8892, "step": 669 }, { "epoch": 0.1, "learning_rate": 1.9750875332696143e-05, "loss": 0.8813, "step": 670 }, { "epoch": 0.1, "learning_rate": 1.974981175131275e-05, "loss": 0.8447, "step": 671 }, { "epoch": 0.1, "learning_rate": 1.974874593316257e-05, "loss": 0.8853, "step": 672 }, { "epoch": 0.1, "learning_rate": 1.9747677878490116e-05, "loss": 0.9526, "step": 673 }, { "epoch": 0.1, "learning_rate": 1.9746607587540417e-05, "loss": 0.9614, "step": 674 }, { "epoch": 0.1, "learning_rate": 1.9745535060559015e-05, "loss": 0.9111, "step": 675 }, { "epoch": 0.1, "learning_rate": 1.974446029779197e-05, "loss": 0.8994, "step": 676 }, { "epoch": 0.1, "learning_rate": 1.974338329948585e-05, "loss": 0.9272, "step": 677 }, { "epoch": 0.1, "learning_rate": 1.9742304065887732e-05, "loss": 0.9307, "step": 678 }, { "epoch": 0.1, "learning_rate": 1.974122259724521e-05, "loss": 0.8735, "step": 679 }, { "epoch": 0.1, "learning_rate": 1.9740138893806393e-05, "loss": 0.9033, "step": 680 }, { "epoch": 0.1, "learning_rate": 1.97390529558199e-05, "loss": 0.9072, "step": 681 }, { "epoch": 0.1, "learning_rate": 1.9737964783534863e-05, "loss": 0.8809, "step": 682 }, { "epoch": 0.1, "learning_rate": 1.973687437720093e-05, "loss": 0.873, "step": 683 }, { "epoch": 0.1, "learning_rate": 1.973578173706825e-05, "loss": 0.9004, "step": 684 }, { "epoch": 0.1, "learning_rate": 1.9734686863387494e-05, "loss": 0.9424, "step": 685 }, { "epoch": 0.1, "learning_rate": 1.973358975640985e-05, "loss": 0.8745, "step": 686 }, { "epoch": 0.1, "learning_rate": 1.973249041638701e-05, "loss": 0.9136, "step": 687 }, { "epoch": 0.1, "learning_rate": 1.973138884357118e-05, "loss": 0.8682, "step": 688 }, { "epoch": 0.1, "learning_rate": 1.973028503821508e-05, "loss": 0.9336, "step": 689 }, { "epoch": 0.1, "learning_rate": 1.9729179000571937e-05, "loss": 0.8252, "step": 690 }, { "epoch": 0.1, "learning_rate": 1.9728070730895497e-05, "loss": 0.8691, "step": 691 }, { "epoch": 0.1, "learning_rate": 1.9726960229440016e-05, "loss": 0.8867, "step": 692 }, { "epoch": 0.1, "learning_rate": 1.9725847496460256e-05, "loss": 0.9126, "step": 693 }, { "epoch": 0.1, "learning_rate": 1.9724732532211508e-05, "loss": 0.3218, "step": 694 }, { "epoch": 0.1, "learning_rate": 1.9723615336949554e-05, "loss": 0.8496, "step": 695 }, { "epoch": 0.1, "learning_rate": 1.9722495910930693e-05, "loss": 0.8979, "step": 696 }, { "epoch": 0.1, "learning_rate": 1.9721374254411753e-05, "loss": 0.9097, "step": 697 }, { "epoch": 0.1, "learning_rate": 1.9720250367650048e-05, "loss": 0.9102, "step": 698 }, { "epoch": 0.1, "learning_rate": 1.9719124250903422e-05, "loss": 0.9146, "step": 699 }, { "epoch": 0.1, "learning_rate": 1.9717995904430224e-05, "loss": 0.9004, "step": 700 }, { "epoch": 0.1, "learning_rate": 1.9716865328489313e-05, "loss": 0.8208, "step": 701 }, { "epoch": 0.1, "learning_rate": 1.9715732523340065e-05, "loss": 0.8667, "step": 702 }, { "epoch": 0.1, "learning_rate": 1.9714597489242366e-05, "loss": 0.9048, "step": 703 }, { "epoch": 0.1, "learning_rate": 1.9713460226456604e-05, "loss": 0.9004, "step": 704 }, { "epoch": 0.1, "learning_rate": 1.9712320735243686e-05, "loss": 0.3115, "step": 705 }, { "epoch": 0.1, "learning_rate": 1.971117901586504e-05, "loss": 0.9116, "step": 706 }, { "epoch": 0.1, "learning_rate": 1.9710035068582586e-05, "loss": 0.8735, "step": 707 }, { "epoch": 0.1, "learning_rate": 1.9708888893658768e-05, "loss": 0.8989, "step": 708 }, { "epoch": 0.1, "learning_rate": 1.9707740491356535e-05, "loss": 0.854, "step": 709 }, { "epoch": 0.1, "learning_rate": 1.970658986193935e-05, "loss": 0.8721, "step": 710 }, { "epoch": 0.11, "learning_rate": 1.9705437005671188e-05, "loss": 0.8394, "step": 711 }, { "epoch": 0.11, "learning_rate": 1.9704281922816533e-05, "loss": 0.8735, "step": 712 }, { "epoch": 0.11, "learning_rate": 1.9703124613640378e-05, "loss": 0.939, "step": 713 }, { "epoch": 0.11, "learning_rate": 1.970196507840823e-05, "loss": 0.8755, "step": 714 }, { "epoch": 0.11, "learning_rate": 1.9700803317386106e-05, "loss": 0.9131, "step": 715 }, { "epoch": 0.11, "learning_rate": 1.9699639330840532e-05, "loss": 0.9253, "step": 716 }, { "epoch": 0.11, "learning_rate": 1.9698473119038545e-05, "loss": 0.9028, "step": 717 }, { "epoch": 0.11, "learning_rate": 1.9697304682247695e-05, "loss": 0.9316, "step": 718 }, { "epoch": 0.11, "learning_rate": 1.9696134020736036e-05, "loss": 0.9102, "step": 719 }, { "epoch": 0.11, "learning_rate": 1.9694961134772148e-05, "loss": 0.8848, "step": 720 }, { "epoch": 0.11, "learning_rate": 1.9693786024625097e-05, "loss": 0.8853, "step": 721 }, { "epoch": 0.11, "learning_rate": 1.969260869056448e-05, "loss": 0.9312, "step": 722 }, { "epoch": 0.11, "learning_rate": 1.9691429132860396e-05, "loss": 0.9194, "step": 723 }, { "epoch": 0.11, "learning_rate": 1.969024735178345e-05, "loss": 0.9443, "step": 724 }, { "epoch": 0.11, "learning_rate": 1.968906334760477e-05, "loss": 0.8525, "step": 725 }, { "epoch": 0.11, "learning_rate": 1.9687877120595983e-05, "loss": 0.8745, "step": 726 }, { "epoch": 0.11, "learning_rate": 1.9686688671029224e-05, "loss": 0.9351, "step": 727 }, { "epoch": 0.11, "learning_rate": 1.968549799917715e-05, "loss": 0.918, "step": 728 }, { "epoch": 0.11, "learning_rate": 1.968430510531291e-05, "loss": 0.9229, "step": 729 }, { "epoch": 0.11, "learning_rate": 1.968310998971019e-05, "loss": 0.8677, "step": 730 }, { "epoch": 0.11, "learning_rate": 1.9681912652643156e-05, "loss": 0.8706, "step": 731 }, { "epoch": 0.11, "learning_rate": 1.9680713094386503e-05, "loss": 0.8047, "step": 732 }, { "epoch": 0.11, "learning_rate": 1.9679511315215424e-05, "loss": 0.8257, "step": 733 }, { "epoch": 0.11, "learning_rate": 1.967830731540563e-05, "loss": 0.325, "step": 734 }, { "epoch": 0.11, "learning_rate": 1.9677101095233342e-05, "loss": 0.834, "step": 735 }, { "epoch": 0.11, "learning_rate": 1.9675892654975278e-05, "loss": 0.9321, "step": 736 }, { "epoch": 0.11, "learning_rate": 1.9674681994908682e-05, "loss": 0.8574, "step": 737 }, { "epoch": 0.11, "learning_rate": 1.9673469115311297e-05, "loss": 0.8667, "step": 738 }, { "epoch": 0.11, "learning_rate": 1.9672254016461373e-05, "loss": 0.8521, "step": 739 }, { "epoch": 0.11, "learning_rate": 1.967103669863768e-05, "loss": 0.8657, "step": 740 }, { "epoch": 0.11, "learning_rate": 1.966981716211949e-05, "loss": 0.8252, "step": 741 }, { "epoch": 0.11, "learning_rate": 1.966859540718658e-05, "loss": 0.9727, "step": 742 }, { "epoch": 0.11, "learning_rate": 1.9667371434119244e-05, "loss": 0.9014, "step": 743 }, { "epoch": 0.11, "learning_rate": 1.966614524319828e-05, "loss": 0.8193, "step": 744 }, { "epoch": 0.11, "learning_rate": 1.9664916834704995e-05, "loss": 0.8501, "step": 745 }, { "epoch": 0.11, "learning_rate": 1.966368620892121e-05, "loss": 0.9023, "step": 746 }, { "epoch": 0.11, "learning_rate": 1.966245336612925e-05, "loss": 0.8926, "step": 747 }, { "epoch": 0.11, "learning_rate": 1.9661218306611947e-05, "loss": 0.8398, "step": 748 }, { "epoch": 0.11, "learning_rate": 1.9659981030652648e-05, "loss": 0.9136, "step": 749 }, { "epoch": 0.11, "learning_rate": 1.96587415385352e-05, "loss": 0.8359, "step": 750 }, { "epoch": 0.11, "learning_rate": 1.9657499830543964e-05, "loss": 0.9136, "step": 751 }, { "epoch": 0.11, "learning_rate": 1.9656255906963812e-05, "loss": 0.8589, "step": 752 }, { "epoch": 0.11, "learning_rate": 1.965500976808011e-05, "loss": 0.8989, "step": 753 }, { "epoch": 0.11, "learning_rate": 1.9653761414178753e-05, "loss": 0.8618, "step": 754 }, { "epoch": 0.11, "learning_rate": 1.9652510845546133e-05, "loss": 0.3169, "step": 755 }, { "epoch": 0.11, "learning_rate": 1.965125806246915e-05, "loss": 0.8799, "step": 756 }, { "epoch": 0.11, "learning_rate": 1.9650003065235206e-05, "loss": 0.9062, "step": 757 }, { "epoch": 0.11, "learning_rate": 1.9648745854132225e-05, "loss": 0.8506, "step": 758 }, { "epoch": 0.11, "learning_rate": 1.9647486429448635e-05, "loss": 0.8823, "step": 759 }, { "epoch": 0.11, "learning_rate": 1.964622479147336e-05, "loss": 0.9185, "step": 760 }, { "epoch": 0.11, "learning_rate": 1.9644960940495846e-05, "loss": 0.8311, "step": 761 }, { "epoch": 0.11, "learning_rate": 1.964369487680604e-05, "loss": 0.9004, "step": 762 }, { "epoch": 0.11, "learning_rate": 1.9642426600694395e-05, "loss": 0.8579, "step": 763 }, { "epoch": 0.11, "learning_rate": 1.964115611245188e-05, "loss": 0.8296, "step": 764 }, { "epoch": 0.11, "learning_rate": 1.9639883412369962e-05, "loss": 0.8755, "step": 765 }, { "epoch": 0.11, "learning_rate": 1.963860850074062e-05, "loss": 0.8257, "step": 766 }, { "epoch": 0.11, "learning_rate": 1.9637331377856337e-05, "loss": 0.7979, "step": 767 }, { "epoch": 0.11, "learning_rate": 1.9636052044010113e-05, "loss": 0.8428, "step": 768 }, { "epoch": 0.11, "learning_rate": 1.9634770499495443e-05, "loss": 0.873, "step": 769 }, { "epoch": 0.11, "learning_rate": 1.963348674460633e-05, "loss": 0.9165, "step": 770 }, { "epoch": 0.11, "learning_rate": 1.9632200779637302e-05, "loss": 0.8799, "step": 771 }, { "epoch": 0.11, "learning_rate": 1.9630912604883363e-05, "loss": 0.8999, "step": 772 }, { "epoch": 0.11, "learning_rate": 1.9629622220640058e-05, "loss": 0.8525, "step": 773 }, { "epoch": 0.11, "learning_rate": 1.962832962720341e-05, "loss": 0.873, "step": 774 }, { "epoch": 0.11, "learning_rate": 1.9627034824869968e-05, "loss": 0.8491, "step": 775 }, { "epoch": 0.11, "learning_rate": 1.9625737813936777e-05, "loss": 0.9199, "step": 776 }, { "epoch": 0.11, "learning_rate": 1.9624438594701397e-05, "loss": 0.8726, "step": 777 }, { "epoch": 0.12, "learning_rate": 1.9623137167461886e-05, "loss": 0.8354, "step": 778 }, { "epoch": 0.12, "learning_rate": 1.9621833532516813e-05, "loss": 0.853, "step": 779 }, { "epoch": 0.12, "learning_rate": 1.9620527690165258e-05, "loss": 0.8896, "step": 780 }, { "epoch": 0.12, "learning_rate": 1.9619219640706796e-05, "loss": 0.9053, "step": 781 }, { "epoch": 0.12, "learning_rate": 1.961790938444152e-05, "loss": 0.9043, "step": 782 }, { "epoch": 0.12, "learning_rate": 1.9616596921670025e-05, "loss": 0.8271, "step": 783 }, { "epoch": 0.12, "learning_rate": 1.9615282252693407e-05, "loss": 0.9106, "step": 784 }, { "epoch": 0.12, "learning_rate": 1.9613965377813273e-05, "loss": 0.8628, "step": 785 }, { "epoch": 0.12, "learning_rate": 1.9612646297331738e-05, "loss": 0.8059, "step": 786 }, { "epoch": 0.12, "learning_rate": 1.961132501155142e-05, "loss": 0.8628, "step": 787 }, { "epoch": 0.12, "learning_rate": 1.961000152077545e-05, "loss": 0.9146, "step": 788 }, { "epoch": 0.12, "learning_rate": 1.9608675825307442e-05, "loss": 0.8911, "step": 789 }, { "epoch": 0.12, "learning_rate": 1.960734792545155e-05, "loss": 0.8618, "step": 790 }, { "epoch": 0.12, "learning_rate": 1.9606017821512405e-05, "loss": 0.9229, "step": 791 }, { "epoch": 0.12, "learning_rate": 1.9604685513795157e-05, "loss": 0.8516, "step": 792 }, { "epoch": 0.12, "learning_rate": 1.9603351002605465e-05, "loss": 0.8643, "step": 793 }, { "epoch": 0.12, "learning_rate": 1.960201428824948e-05, "loss": 0.8599, "step": 794 }, { "epoch": 0.12, "learning_rate": 1.960067537103387e-05, "loss": 0.9072, "step": 795 }, { "epoch": 0.12, "learning_rate": 1.9599334251265805e-05, "loss": 0.8799, "step": 796 }, { "epoch": 0.12, "learning_rate": 1.959799092925296e-05, "loss": 0.9243, "step": 797 }, { "epoch": 0.12, "learning_rate": 1.9596645405303508e-05, "loss": 0.3169, "step": 798 }, { "epoch": 0.12, "learning_rate": 1.959529767972615e-05, "loss": 0.9116, "step": 799 }, { "epoch": 0.12, "learning_rate": 1.9593947752830057e-05, "loss": 0.9111, "step": 800 }, { "epoch": 0.12, "learning_rate": 1.959259562492494e-05, "loss": 0.8535, "step": 801 }, { "epoch": 0.12, "learning_rate": 1.959124129632099e-05, "loss": 0.8242, "step": 802 }, { "epoch": 0.12, "learning_rate": 1.958988476732892e-05, "loss": 0.916, "step": 803 }, { "epoch": 0.12, "learning_rate": 1.958852603825993e-05, "loss": 0.9634, "step": 804 }, { "epoch": 0.12, "learning_rate": 1.9587165109425746e-05, "loss": 0.8721, "step": 805 }, { "epoch": 0.12, "learning_rate": 1.9585801981138575e-05, "loss": 0.8682, "step": 806 }, { "epoch": 0.12, "learning_rate": 1.9584436653711152e-05, "loss": 0.8423, "step": 807 }, { "epoch": 0.12, "learning_rate": 1.95830691274567e-05, "loss": 0.9048, "step": 808 }, { "epoch": 0.12, "learning_rate": 1.9581699402688956e-05, "loss": 0.8506, "step": 809 }, { "epoch": 0.12, "learning_rate": 1.958032747972215e-05, "loss": 0.856, "step": 810 }, { "epoch": 0.12, "learning_rate": 1.9578953358871032e-05, "loss": 0.8301, "step": 811 }, { "epoch": 0.12, "learning_rate": 1.9577577040450842e-05, "loss": 0.7944, "step": 812 }, { "epoch": 0.12, "learning_rate": 1.9576198524777333e-05, "loss": 0.8545, "step": 813 }, { "epoch": 0.12, "learning_rate": 1.9574817812166758e-05, "loss": 0.8662, "step": 814 }, { "epoch": 0.12, "learning_rate": 1.9573434902935876e-05, "loss": 0.8779, "step": 815 }, { "epoch": 0.12, "learning_rate": 1.9572049797401945e-05, "loss": 0.8657, "step": 816 }, { "epoch": 0.12, "learning_rate": 1.957066249588274e-05, "loss": 0.9087, "step": 817 }, { "epoch": 0.12, "learning_rate": 1.9569272998696517e-05, "loss": 0.8779, "step": 818 }, { "epoch": 0.12, "learning_rate": 1.9567881306162065e-05, "loss": 0.8892, "step": 819 }, { "epoch": 0.12, "learning_rate": 1.9566487418598652e-05, "loss": 0.8179, "step": 820 }, { "epoch": 0.12, "learning_rate": 1.9565091336326055e-05, "loss": 0.8354, "step": 821 }, { "epoch": 0.12, "learning_rate": 1.9563693059664572e-05, "loss": 0.8818, "step": 822 }, { "epoch": 0.12, "learning_rate": 1.9562292588934973e-05, "loss": 0.9204, "step": 823 }, { "epoch": 0.12, "learning_rate": 1.9560889924458566e-05, "loss": 0.7793, "step": 824 }, { "epoch": 0.12, "learning_rate": 1.9559485066557132e-05, "loss": 0.7905, "step": 825 }, { "epoch": 0.12, "learning_rate": 1.9558078015552973e-05, "loss": 0.8774, "step": 826 }, { "epoch": 0.12, "learning_rate": 1.9556668771768894e-05, "loss": 0.8623, "step": 827 }, { "epoch": 0.12, "learning_rate": 1.9555257335528193e-05, "loss": 0.3352, "step": 828 }, { "epoch": 0.12, "learning_rate": 1.9553843707154682e-05, "loss": 0.874, "step": 829 }, { "epoch": 0.12, "learning_rate": 1.9552427886972665e-05, "loss": 0.8921, "step": 830 }, { "epoch": 0.12, "learning_rate": 1.9551009875306955e-05, "loss": 0.8535, "step": 831 }, { "epoch": 0.12, "learning_rate": 1.954958967248287e-05, "loss": 0.853, "step": 832 }, { "epoch": 0.12, "learning_rate": 1.9548167278826224e-05, "loss": 0.873, "step": 833 }, { "epoch": 0.12, "learning_rate": 1.9546742694663343e-05, "loss": 0.8911, "step": 834 }, { "epoch": 0.12, "learning_rate": 1.9545315920321045e-05, "loss": 0.8223, "step": 835 }, { "epoch": 0.12, "learning_rate": 1.954388695612666e-05, "loss": 0.8408, "step": 836 }, { "epoch": 0.12, "learning_rate": 1.954245580240801e-05, "loss": 0.8628, "step": 837 }, { "epoch": 0.12, "learning_rate": 1.954102245949343e-05, "loss": 0.7507, "step": 838 }, { "epoch": 0.12, "learning_rate": 1.953958692771175e-05, "loss": 0.9194, "step": 839 }, { "epoch": 0.12, "learning_rate": 1.9538149207392306e-05, "loss": 0.8525, "step": 840 }, { "epoch": 0.12, "learning_rate": 1.9536709298864937e-05, "loss": 0.9116, "step": 841 }, { "epoch": 0.12, "learning_rate": 1.9535267202459972e-05, "loss": 0.854, "step": 842 }, { "epoch": 0.12, "learning_rate": 1.9533822918508263e-05, "loss": 0.3035, "step": 843 }, { "epoch": 0.12, "learning_rate": 1.9532376447341143e-05, "loss": 0.8403, "step": 844 }, { "epoch": 0.12, "learning_rate": 1.9530927789290467e-05, "loss": 0.8252, "step": 845 }, { "epoch": 0.13, "learning_rate": 1.952947694468857e-05, "loss": 0.9106, "step": 846 }, { "epoch": 0.13, "learning_rate": 1.9528023913868305e-05, "loss": 0.8462, "step": 847 }, { "epoch": 0.13, "learning_rate": 1.952656869716302e-05, "loss": 0.8784, "step": 848 }, { "epoch": 0.13, "learning_rate": 1.9525111294906566e-05, "loss": 0.8481, "step": 849 }, { "epoch": 0.13, "learning_rate": 1.9523651707433295e-05, "loss": 0.9019, "step": 850 }, { "epoch": 0.13, "learning_rate": 1.9522189935078062e-05, "loss": 0.897, "step": 851 }, { "epoch": 0.13, "learning_rate": 1.952072597817622e-05, "loss": 0.8848, "step": 852 }, { "epoch": 0.13, "learning_rate": 1.951925983706362e-05, "loss": 0.8398, "step": 853 }, { "epoch": 0.13, "learning_rate": 1.9517791512076628e-05, "loss": 0.8428, "step": 854 }, { "epoch": 0.13, "learning_rate": 1.9516321003552096e-05, "loss": 0.8979, "step": 855 }, { "epoch": 0.13, "learning_rate": 1.9514848311827383e-05, "loss": 0.8677, "step": 856 }, { "epoch": 0.13, "learning_rate": 1.951337343724035e-05, "loss": 0.8271, "step": 857 }, { "epoch": 0.13, "learning_rate": 1.9511896380129357e-05, "loss": 0.8496, "step": 858 }, { "epoch": 0.13, "learning_rate": 1.9510417140833266e-05, "loss": 0.8867, "step": 859 }, { "epoch": 0.13, "learning_rate": 1.9508935719691438e-05, "loss": 0.8745, "step": 860 }, { "epoch": 0.13, "learning_rate": 1.9507452117043736e-05, "loss": 0.3181, "step": 861 }, { "epoch": 0.13, "learning_rate": 1.950596633323052e-05, "loss": 0.3424, "step": 862 }, { "epoch": 0.13, "learning_rate": 1.9504478368592658e-05, "loss": 0.8115, "step": 863 }, { "epoch": 0.13, "learning_rate": 1.9502988223471506e-05, "loss": 0.9004, "step": 864 }, { "epoch": 0.13, "learning_rate": 1.950149589820894e-05, "loss": 0.833, "step": 865 }, { "epoch": 0.13, "learning_rate": 1.950000139314731e-05, "loss": 0.8892, "step": 866 }, { "epoch": 0.13, "learning_rate": 1.9498504708629486e-05, "loss": 0.8257, "step": 867 }, { "epoch": 0.13, "learning_rate": 1.9497005844998835e-05, "loss": 0.8652, "step": 868 }, { "epoch": 0.13, "learning_rate": 1.949550480259922e-05, "loss": 0.8301, "step": 869 }, { "epoch": 0.13, "learning_rate": 1.9494001581775e-05, "loss": 0.9253, "step": 870 }, { "epoch": 0.13, "learning_rate": 1.949249618287104e-05, "loss": 0.8687, "step": 871 }, { "epoch": 0.13, "learning_rate": 1.949098860623271e-05, "loss": 0.8389, "step": 872 }, { "epoch": 0.13, "learning_rate": 1.9489478852205862e-05, "loss": 0.8481, "step": 873 }, { "epoch": 0.13, "learning_rate": 1.9487966921136865e-05, "loss": 0.9106, "step": 874 }, { "epoch": 0.13, "learning_rate": 1.9486452813372586e-05, "loss": 0.8066, "step": 875 }, { "epoch": 0.13, "learning_rate": 1.9484936529260374e-05, "loss": 0.8447, "step": 876 }, { "epoch": 0.13, "learning_rate": 1.94834180691481e-05, "loss": 0.8291, "step": 877 }, { "epoch": 0.13, "learning_rate": 1.9481897433384115e-05, "loss": 0.8682, "step": 878 }, { "epoch": 0.13, "learning_rate": 1.948037462231728e-05, "loss": 0.3254, "step": 879 }, { "epoch": 0.13, "learning_rate": 1.947884963629696e-05, "loss": 0.3164, "step": 880 }, { "epoch": 0.13, "learning_rate": 1.9477322475673003e-05, "loss": 0.8208, "step": 881 }, { "epoch": 0.13, "learning_rate": 1.947579314079577e-05, "loss": 0.8408, "step": 882 }, { "epoch": 0.13, "learning_rate": 1.947426163201611e-05, "loss": 0.876, "step": 883 }, { "epoch": 0.13, "learning_rate": 1.9472727949685383e-05, "loss": 0.9121, "step": 884 }, { "epoch": 0.13, "learning_rate": 1.9471192094155436e-05, "loss": 0.8906, "step": 885 }, { "epoch": 0.13, "learning_rate": 1.9469654065778622e-05, "loss": 0.9097, "step": 886 }, { "epoch": 0.13, "learning_rate": 1.946811386490779e-05, "loss": 0.8848, "step": 887 }, { "epoch": 0.13, "learning_rate": 1.946657149189629e-05, "loss": 0.8179, "step": 888 }, { "epoch": 0.13, "learning_rate": 1.946502694709796e-05, "loss": 0.8022, "step": 889 }, { "epoch": 0.13, "learning_rate": 1.9463480230867147e-05, "loss": 0.8833, "step": 890 }, { "epoch": 0.13, "learning_rate": 1.9461931343558697e-05, "loss": 0.854, "step": 891 }, { "epoch": 0.13, "learning_rate": 1.9460380285527945e-05, "loss": 0.8867, "step": 892 }, { "epoch": 0.13, "learning_rate": 1.9458827057130734e-05, "loss": 0.8716, "step": 893 }, { "epoch": 0.13, "learning_rate": 1.9457271658723396e-05, "loss": 0.9043, "step": 894 }, { "epoch": 0.13, "learning_rate": 1.9455714090662767e-05, "loss": 0.3218, "step": 895 }, { "epoch": 0.13, "learning_rate": 1.945415435330618e-05, "loss": 0.8398, "step": 896 }, { "epoch": 0.13, "learning_rate": 1.945259244701146e-05, "loss": 0.7881, "step": 897 }, { "epoch": 0.13, "learning_rate": 1.9451028372136936e-05, "loss": 0.3018, "step": 898 }, { "epoch": 0.13, "learning_rate": 1.9449462129041438e-05, "loss": 0.814, "step": 899 }, { "epoch": 0.13, "learning_rate": 1.9447893718084275e-05, "loss": 0.8506, "step": 900 }, { "epoch": 0.13, "learning_rate": 1.944632313962528e-05, "loss": 0.8462, "step": 901 }, { "epoch": 0.13, "learning_rate": 1.9444750394024762e-05, "loss": 0.8984, "step": 902 }, { "epoch": 0.13, "learning_rate": 1.9443175481643536e-05, "loss": 0.833, "step": 903 }, { "epoch": 0.13, "learning_rate": 1.944159840284291e-05, "loss": 0.9243, "step": 904 }, { "epoch": 0.13, "learning_rate": 1.94400191579847e-05, "loss": 0.7993, "step": 905 }, { "epoch": 0.13, "learning_rate": 1.94384377474312e-05, "loss": 0.7703, "step": 906 }, { "epoch": 0.13, "learning_rate": 1.943685417154522e-05, "loss": 0.8701, "step": 907 }, { "epoch": 0.13, "learning_rate": 1.9435268430690056e-05, "loss": 0.856, "step": 908 }, { "epoch": 0.13, "learning_rate": 1.94336805252295e-05, "loss": 0.9292, "step": 909 }, { "epoch": 0.13, "learning_rate": 1.9432090455527847e-05, "loss": 0.897, "step": 910 }, { "epoch": 0.13, "learning_rate": 1.9430498221949884e-05, "loss": 0.855, "step": 911 }, { "epoch": 0.13, "learning_rate": 1.9428903824860895e-05, "loss": 0.8613, "step": 912 }, { "epoch": 0.14, "learning_rate": 1.942730726462666e-05, "loss": 0.9023, "step": 913 }, { "epoch": 0.14, "learning_rate": 1.9425708541613457e-05, "loss": 0.8921, "step": 914 }, { "epoch": 0.14, "learning_rate": 1.942410765618806e-05, "loss": 0.8423, "step": 915 }, { "epoch": 0.14, "learning_rate": 1.9422504608717737e-05, "loss": 0.8936, "step": 916 }, { "epoch": 0.14, "learning_rate": 1.942089939957026e-05, "loss": 0.8364, "step": 917 }, { "epoch": 0.14, "learning_rate": 1.9419292029113878e-05, "loss": 0.8779, "step": 918 }, { "epoch": 0.14, "learning_rate": 1.9417682497717356e-05, "loss": 0.8916, "step": 919 }, { "epoch": 0.14, "learning_rate": 1.9416070805749947e-05, "loss": 0.917, "step": 920 }, { "epoch": 0.14, "learning_rate": 1.9414456953581396e-05, "loss": 0.8301, "step": 921 }, { "epoch": 0.14, "learning_rate": 1.941284094158195e-05, "loss": 0.8755, "step": 922 }, { "epoch": 0.14, "learning_rate": 1.941122277012235e-05, "loss": 0.8994, "step": 923 }, { "epoch": 0.14, "learning_rate": 1.940960243957383e-05, "loss": 0.9229, "step": 924 }, { "epoch": 0.14, "learning_rate": 1.940797995030812e-05, "loss": 0.8818, "step": 925 }, { "epoch": 0.14, "learning_rate": 1.940635530269744e-05, "loss": 0.8975, "step": 926 }, { "epoch": 0.14, "learning_rate": 1.9404728497114523e-05, "loss": 0.8164, "step": 927 }, { "epoch": 0.14, "learning_rate": 1.9403099533932573e-05, "loss": 0.8755, "step": 928 }, { "epoch": 0.14, "learning_rate": 1.9401468413525306e-05, "loss": 0.8726, "step": 929 }, { "epoch": 0.14, "learning_rate": 1.939983513626693e-05, "loss": 0.873, "step": 930 }, { "epoch": 0.14, "learning_rate": 1.9398199702532143e-05, "loss": 0.8594, "step": 931 }, { "epoch": 0.14, "learning_rate": 1.9396562112696137e-05, "loss": 0.8643, "step": 932 }, { "epoch": 0.14, "learning_rate": 1.939492236713461e-05, "loss": 0.8413, "step": 933 }, { "epoch": 0.14, "learning_rate": 1.9393280466223737e-05, "loss": 0.8755, "step": 934 }, { "epoch": 0.14, "learning_rate": 1.9391636410340206e-05, "loss": 0.8242, "step": 935 }, { "epoch": 0.14, "learning_rate": 1.9389990199861186e-05, "loss": 0.8955, "step": 936 }, { "epoch": 0.14, "learning_rate": 1.9388341835164346e-05, "loss": 0.9155, "step": 937 }, { "epoch": 0.14, "learning_rate": 1.9386691316627845e-05, "loss": 0.8555, "step": 938 }, { "epoch": 0.14, "learning_rate": 1.938503864463034e-05, "loss": 0.9229, "step": 939 }, { "epoch": 0.14, "learning_rate": 1.938338381955099e-05, "loss": 0.8804, "step": 940 }, { "epoch": 0.14, "learning_rate": 1.9381726841769422e-05, "loss": 0.9937, "step": 941 }, { "epoch": 0.14, "learning_rate": 1.9380067711665787e-05, "loss": 0.8633, "step": 942 }, { "epoch": 0.14, "learning_rate": 1.9378406429620712e-05, "loss": 0.9023, "step": 943 }, { "epoch": 0.14, "learning_rate": 1.9376742996015325e-05, "loss": 0.8501, "step": 944 }, { "epoch": 0.14, "learning_rate": 1.937507741123124e-05, "loss": 0.8628, "step": 945 }, { "epoch": 0.14, "learning_rate": 1.9373409675650573e-05, "loss": 0.8789, "step": 946 }, { "epoch": 0.14, "learning_rate": 1.937173978965593e-05, "loss": 0.8105, "step": 947 }, { "epoch": 0.14, "learning_rate": 1.9370067753630413e-05, "loss": 0.8306, "step": 948 }, { "epoch": 0.14, "learning_rate": 1.9368393567957607e-05, "loss": 0.9316, "step": 949 }, { "epoch": 0.14, "learning_rate": 1.93667172330216e-05, "loss": 0.9072, "step": 950 }, { "epoch": 0.14, "learning_rate": 1.936503874920698e-05, "loss": 0.7761, "step": 951 }, { "epoch": 0.14, "learning_rate": 1.9363358116898804e-05, "loss": 0.8535, "step": 952 }, { "epoch": 0.14, "learning_rate": 1.9361675336482652e-05, "loss": 0.918, "step": 953 }, { "epoch": 0.14, "learning_rate": 1.9359990408344565e-05, "loss": 0.9087, "step": 954 }, { "epoch": 0.14, "learning_rate": 1.9358303332871106e-05, "loss": 0.8164, "step": 955 }, { "epoch": 0.14, "learning_rate": 1.9356614110449314e-05, "loss": 0.8325, "step": 956 }, { "epoch": 0.14, "learning_rate": 1.9354922741466723e-05, "loss": 0.9204, "step": 957 }, { "epoch": 0.14, "learning_rate": 1.935322922631136e-05, "loss": 0.8687, "step": 958 }, { "epoch": 0.14, "learning_rate": 1.9351533565371747e-05, "loss": 0.8442, "step": 959 }, { "epoch": 0.14, "learning_rate": 1.9349835759036898e-05, "loss": 0.8467, "step": 960 }, { "epoch": 0.14, "learning_rate": 1.9348135807696315e-05, "loss": 0.8462, "step": 961 }, { "epoch": 0.14, "learning_rate": 1.934643371173999e-05, "loss": 0.9224, "step": 962 }, { "epoch": 0.14, "learning_rate": 1.9344729471558425e-05, "loss": 0.8867, "step": 963 }, { "epoch": 0.14, "learning_rate": 1.934302308754259e-05, "loss": 0.8057, "step": 964 }, { "epoch": 0.14, "learning_rate": 1.934131456008396e-05, "loss": 0.337, "step": 965 }, { "epoch": 0.14, "learning_rate": 1.9339603889574498e-05, "loss": 0.8721, "step": 966 }, { "epoch": 0.14, "learning_rate": 1.9337891076406667e-05, "loss": 0.8218, "step": 967 }, { "epoch": 0.14, "learning_rate": 1.9336176120973405e-05, "loss": 0.8545, "step": 968 }, { "epoch": 0.14, "learning_rate": 1.9334459023668154e-05, "loss": 0.8306, "step": 969 }, { "epoch": 0.14, "learning_rate": 1.9332739784884844e-05, "loss": 0.8218, "step": 970 }, { "epoch": 0.14, "learning_rate": 1.9331018405017902e-05, "loss": 0.9946, "step": 971 }, { "epoch": 0.14, "learning_rate": 1.9329294884462234e-05, "loss": 0.9102, "step": 972 }, { "epoch": 0.14, "learning_rate": 1.932756922361325e-05, "loss": 0.8511, "step": 973 }, { "epoch": 0.14, "learning_rate": 1.9325841422866833e-05, "loss": 0.8804, "step": 974 }, { "epoch": 0.14, "learning_rate": 1.9324111482619387e-05, "loss": 0.8711, "step": 975 }, { "epoch": 0.14, "learning_rate": 1.9322379403267774e-05, "loss": 0.9482, "step": 976 }, { "epoch": 0.14, "learning_rate": 1.9320645185209364e-05, "loss": 0.8633, "step": 977 }, { "epoch": 0.14, "learning_rate": 1.931890882884202e-05, "loss": 0.9072, "step": 978 }, { "epoch": 0.14, "learning_rate": 1.931717033456409e-05, "loss": 0.8574, "step": 979 }, { "epoch": 0.14, "learning_rate": 1.9315429702774408e-05, "loss": 0.8306, "step": 980 }, { "epoch": 0.15, "learning_rate": 1.9313686933872303e-05, "loss": 0.8015, "step": 981 }, { "epoch": 0.15, "learning_rate": 1.9311942028257604e-05, "loss": 0.8564, "step": 982 }, { "epoch": 0.15, "learning_rate": 1.9310194986330616e-05, "loss": 0.8315, "step": 983 }, { "epoch": 0.15, "learning_rate": 1.9308445808492134e-05, "loss": 0.8452, "step": 984 }, { "epoch": 0.15, "learning_rate": 1.9306694495143453e-05, "loss": 0.9751, "step": 985 }, { "epoch": 0.15, "learning_rate": 1.9304941046686356e-05, "loss": 0.8525, "step": 986 }, { "epoch": 0.15, "learning_rate": 1.9303185463523108e-05, "loss": 0.8262, "step": 987 }, { "epoch": 0.15, "learning_rate": 1.930142774605647e-05, "loss": 0.8237, "step": 988 }, { "epoch": 0.15, "learning_rate": 1.9299667894689695e-05, "loss": 0.8906, "step": 989 }, { "epoch": 0.15, "learning_rate": 1.9297905909826512e-05, "loss": 0.833, "step": 990 }, { "epoch": 0.15, "learning_rate": 1.9296141791871155e-05, "loss": 0.8257, "step": 991 }, { "epoch": 0.15, "learning_rate": 1.9294375541228345e-05, "loss": 0.8774, "step": 992 }, { "epoch": 0.15, "learning_rate": 1.9292607158303283e-05, "loss": 0.895, "step": 993 }, { "epoch": 0.15, "learning_rate": 1.929083664350167e-05, "loss": 0.8901, "step": 994 }, { "epoch": 0.15, "learning_rate": 1.928906399722969e-05, "loss": 0.8794, "step": 995 }, { "epoch": 0.15, "learning_rate": 1.9287289219894013e-05, "loss": 0.9004, "step": 996 }, { "epoch": 0.15, "learning_rate": 1.9285512311901807e-05, "loss": 0.8369, "step": 997 }, { "epoch": 0.15, "learning_rate": 1.928373327366072e-05, "loss": 0.8975, "step": 998 }, { "epoch": 0.15, "learning_rate": 1.9281952105578897e-05, "loss": 0.9014, "step": 999 }, { "epoch": 0.15, "learning_rate": 1.9280168808064964e-05, "loss": 0.9302, "step": 1000 }, { "epoch": 0.15, "learning_rate": 1.9278383381528036e-05, "loss": 0.874, "step": 1001 }, { "epoch": 0.15, "learning_rate": 1.9276595826377728e-05, "loss": 0.896, "step": 1002 }, { "epoch": 0.15, "learning_rate": 1.9274806143024126e-05, "loss": 0.9053, "step": 1003 }, { "epoch": 0.15, "learning_rate": 1.9273014331877814e-05, "loss": 0.8989, "step": 1004 }, { "epoch": 0.15, "learning_rate": 1.9271220393349867e-05, "loss": 0.8555, "step": 1005 }, { "epoch": 0.15, "learning_rate": 1.9269424327851842e-05, "loss": 0.813, "step": 1006 }, { "epoch": 0.15, "learning_rate": 1.9267626135795786e-05, "loss": 0.8433, "step": 1007 }, { "epoch": 0.15, "learning_rate": 1.9265825817594232e-05, "loss": 0.9062, "step": 1008 }, { "epoch": 0.15, "learning_rate": 1.9264023373660203e-05, "loss": 0.9136, "step": 1009 }, { "epoch": 0.15, "learning_rate": 1.926221880440721e-05, "loss": 0.8633, "step": 1010 }, { "epoch": 0.15, "learning_rate": 1.9260412110249248e-05, "loss": 0.8853, "step": 1011 }, { "epoch": 0.15, "learning_rate": 1.925860329160081e-05, "loss": 0.3635, "step": 1012 }, { "epoch": 0.15, "learning_rate": 1.9256792348876862e-05, "loss": 0.8784, "step": 1013 }, { "epoch": 0.15, "learning_rate": 1.9254979282492864e-05, "loss": 0.9209, "step": 1014 }, { "epoch": 0.15, "learning_rate": 1.9253164092864768e-05, "loss": 0.853, "step": 1015 }, { "epoch": 0.15, "learning_rate": 1.9251346780409004e-05, "loss": 0.9985, "step": 1016 }, { "epoch": 0.15, "learning_rate": 1.9249527345542493e-05, "loss": 0.9028, "step": 1017 }, { "epoch": 0.15, "learning_rate": 1.9247705788682646e-05, "loss": 0.8779, "step": 1018 }, { "epoch": 0.15, "learning_rate": 1.9245882110247354e-05, "loss": 0.8467, "step": 1019 }, { "epoch": 0.15, "learning_rate": 1.9244056310655006e-05, "loss": 0.8765, "step": 1020 }, { "epoch": 0.15, "learning_rate": 1.924222839032446e-05, "loss": 0.9126, "step": 1021 }, { "epoch": 0.15, "learning_rate": 1.9240398349675083e-05, "loss": 0.8237, "step": 1022 }, { "epoch": 0.15, "learning_rate": 1.923856618912671e-05, "loss": 0.9023, "step": 1023 }, { "epoch": 0.15, "learning_rate": 1.9236731909099666e-05, "loss": 1.0049, "step": 1024 }, { "epoch": 0.15, "learning_rate": 1.9234895510014767e-05, "loss": 0.9854, "step": 1025 }, { "epoch": 0.15, "learning_rate": 1.9233056992293314e-05, "loss": 0.8652, "step": 1026 }, { "epoch": 0.15, "learning_rate": 1.923121635635709e-05, "loss": 0.8486, "step": 1027 }, { "epoch": 0.15, "learning_rate": 1.9229373602628375e-05, "loss": 0.8955, "step": 1028 }, { "epoch": 0.15, "learning_rate": 1.922752873152992e-05, "loss": 0.8848, "step": 1029 }, { "epoch": 0.15, "learning_rate": 1.922568174348497e-05, "loss": 0.8257, "step": 1030 }, { "epoch": 0.15, "learning_rate": 1.922383263891726e-05, "loss": 0.9136, "step": 1031 }, { "epoch": 0.15, "learning_rate": 1.9221981418250993e-05, "loss": 0.9258, "step": 1032 }, { "epoch": 0.15, "learning_rate": 1.922012808191088e-05, "loss": 0.8931, "step": 1033 }, { "epoch": 0.15, "learning_rate": 1.9218272630322104e-05, "loss": 0.8813, "step": 1034 }, { "epoch": 0.15, "learning_rate": 1.9216415063910332e-05, "loss": 0.8979, "step": 1035 }, { "epoch": 0.15, "learning_rate": 1.9214555383101724e-05, "loss": 0.8545, "step": 1036 }, { "epoch": 0.15, "learning_rate": 1.9212693588322923e-05, "loss": 0.8496, "step": 1037 }, { "epoch": 0.15, "learning_rate": 1.9210829680001047e-05, "loss": 0.938, "step": 1038 }, { "epoch": 0.15, "learning_rate": 1.920896365856372e-05, "loss": 0.8936, "step": 1039 }, { "epoch": 0.15, "learning_rate": 1.9207095524439027e-05, "loss": 0.8965, "step": 1040 }, { "epoch": 0.15, "learning_rate": 1.9205225278055555e-05, "loss": 0.8938, "step": 1041 }, { "epoch": 0.15, "learning_rate": 1.9203352919842366e-05, "loss": 0.8774, "step": 1042 }, { "epoch": 0.15, "learning_rate": 1.9201478450229012e-05, "loss": 0.8281, "step": 1043 }, { "epoch": 0.15, "learning_rate": 1.919960186964552e-05, "loss": 0.9233, "step": 1044 }, { "epoch": 0.15, "learning_rate": 1.919772317852242e-05, "loss": 0.8364, "step": 1045 }, { "epoch": 0.15, "learning_rate": 1.919584237729071e-05, "loss": 0.9673, "step": 1046 }, { "epoch": 0.15, "learning_rate": 1.9193959466381868e-05, "loss": 0.9351, "step": 1047 }, { "epoch": 0.15, "learning_rate": 1.9192074446227878e-05, "loss": 0.7847, "step": 1048 }, { "epoch": 0.16, "learning_rate": 1.9190187317261184e-05, "loss": 0.8652, "step": 1049 }, { "epoch": 0.16, "learning_rate": 1.918829807991473e-05, "loss": 0.874, "step": 1050 }, { "epoch": 0.16, "learning_rate": 1.9186406734621937e-05, "loss": 0.854, "step": 1051 }, { "epoch": 0.16, "learning_rate": 1.918451328181671e-05, "loss": 0.9189, "step": 1052 }, { "epoch": 0.16, "learning_rate": 1.918261772193344e-05, "loss": 0.9072, "step": 1053 }, { "epoch": 0.16, "learning_rate": 1.9180720055407e-05, "loss": 0.8389, "step": 1054 }, { "epoch": 0.16, "learning_rate": 1.917882028267274e-05, "loss": 0.8677, "step": 1055 }, { "epoch": 0.16, "learning_rate": 1.9176918404166506e-05, "loss": 0.8628, "step": 1056 }, { "epoch": 0.16, "learning_rate": 1.9175014420324613e-05, "loss": 0.7964, "step": 1057 }, { "epoch": 0.16, "learning_rate": 1.9173108331583874e-05, "loss": 0.9087, "step": 1058 }, { "epoch": 0.16, "learning_rate": 1.9171200138381572e-05, "loss": 0.3232, "step": 1059 }, { "epoch": 0.16, "learning_rate": 1.916928984115548e-05, "loss": 0.8008, "step": 1060 }, { "epoch": 0.16, "learning_rate": 1.916737744034385e-05, "loss": 0.8574, "step": 1061 }, { "epoch": 0.16, "learning_rate": 1.916546293638542e-05, "loss": 0.8921, "step": 1062 }, { "epoch": 0.16, "learning_rate": 1.9163546329719406e-05, "loss": 0.8838, "step": 1063 }, { "epoch": 0.16, "learning_rate": 1.916162762078551e-05, "loss": 0.9072, "step": 1064 }, { "epoch": 0.16, "learning_rate": 1.9159706810023918e-05, "loss": 0.9243, "step": 1065 }, { "epoch": 0.16, "learning_rate": 1.9157783897875292e-05, "loss": 0.2986, "step": 1066 }, { "epoch": 0.16, "learning_rate": 1.9155858884780777e-05, "loss": 0.8691, "step": 1067 }, { "epoch": 0.16, "learning_rate": 1.9153931771182012e-05, "loss": 0.8916, "step": 1068 }, { "epoch": 0.16, "learning_rate": 1.91520025575211e-05, "loss": 0.8052, "step": 1069 }, { "epoch": 0.16, "learning_rate": 1.9150071244240638e-05, "loss": 0.8901, "step": 1070 }, { "epoch": 0.16, "learning_rate": 1.91481378317837e-05, "loss": 0.9097, "step": 1071 }, { "epoch": 0.16, "learning_rate": 1.914620232059384e-05, "loss": 0.8975, "step": 1072 }, { "epoch": 0.16, "learning_rate": 1.9144264711115104e-05, "loss": 0.8555, "step": 1073 }, { "epoch": 0.16, "learning_rate": 1.9142325003792004e-05, "loss": 0.8491, "step": 1074 }, { "epoch": 0.16, "learning_rate": 1.9140383199069542e-05, "loss": 0.8794, "step": 1075 }, { "epoch": 0.16, "learning_rate": 1.91384392973932e-05, "loss": 0.877, "step": 1076 }, { "epoch": 0.16, "learning_rate": 1.9136493299208944e-05, "loss": 0.9429, "step": 1077 }, { "epoch": 0.16, "learning_rate": 1.9134545204963214e-05, "loss": 0.8398, "step": 1078 }, { "epoch": 0.16, "learning_rate": 1.9132595015102936e-05, "loss": 0.8867, "step": 1079 }, { "epoch": 0.16, "learning_rate": 1.9130642730075516e-05, "loss": 0.8745, "step": 1080 }, { "epoch": 0.16, "learning_rate": 1.912868835032884e-05, "loss": 0.9214, "step": 1081 }, { "epoch": 0.16, "learning_rate": 1.9126731876311274e-05, "loss": 0.8574, "step": 1082 }, { "epoch": 0.16, "learning_rate": 1.9124773308471666e-05, "loss": 0.8823, "step": 1083 }, { "epoch": 0.16, "learning_rate": 1.9122812647259348e-05, "loss": 0.8179, "step": 1084 }, { "epoch": 0.16, "learning_rate": 1.912084989312412e-05, "loss": 0.8599, "step": 1085 }, { "epoch": 0.16, "learning_rate": 1.9118885046516274e-05, "loss": 0.9868, "step": 1086 }, { "epoch": 0.16, "learning_rate": 1.911691810788658e-05, "loss": 0.9058, "step": 1087 }, { "epoch": 0.16, "learning_rate": 1.9114949077686277e-05, "loss": 0.3354, "step": 1088 }, { "epoch": 0.16, "learning_rate": 1.9112977956367105e-05, "loss": 0.9995, "step": 1089 }, { "epoch": 0.16, "learning_rate": 1.9111004744381266e-05, "loss": 0.8813, "step": 1090 }, { "epoch": 0.16, "learning_rate": 1.9109029442181443e-05, "loss": 0.8721, "step": 1091 }, { "epoch": 0.16, "learning_rate": 1.9107052050220808e-05, "loss": 0.8604, "step": 1092 }, { "epoch": 0.16, "learning_rate": 1.9105072568953008e-05, "loss": 0.3379, "step": 1093 }, { "epoch": 0.16, "learning_rate": 1.9103090998832167e-05, "loss": 0.9082, "step": 1094 }, { "epoch": 0.16, "learning_rate": 1.910110734031289e-05, "loss": 0.8369, "step": 1095 }, { "epoch": 0.16, "learning_rate": 1.9099121593850255e-05, "loss": 0.9043, "step": 1096 }, { "epoch": 0.16, "learning_rate": 1.9097133759899838e-05, "loss": 0.8872, "step": 1097 }, { "epoch": 0.16, "learning_rate": 1.9095143838917667e-05, "loss": 0.9126, "step": 1098 }, { "epoch": 0.16, "learning_rate": 1.9093151831360268e-05, "loss": 0.8403, "step": 1099 }, { "epoch": 0.16, "learning_rate": 1.9091157737684643e-05, "loss": 0.8994, "step": 1100 }, { "epoch": 0.16, "learning_rate": 1.9089161558348266e-05, "loss": 0.9492, "step": 1101 }, { "epoch": 0.16, "learning_rate": 1.9087163293809093e-05, "loss": 0.8862, "step": 1102 }, { "epoch": 0.16, "learning_rate": 1.9085162944525563e-05, "loss": 1.0015, "step": 1103 }, { "epoch": 0.16, "learning_rate": 1.9083160510956584e-05, "loss": 0.9419, "step": 1104 }, { "epoch": 0.16, "learning_rate": 1.908115599356155e-05, "loss": 1.0, "step": 1105 }, { "epoch": 0.16, "learning_rate": 1.907914939280033e-05, "loss": 0.9478, "step": 1106 }, { "epoch": 0.16, "learning_rate": 1.907714070913327e-05, "loss": 0.915, "step": 1107 }, { "epoch": 0.16, "learning_rate": 1.907512994302119e-05, "loss": 0.9927, "step": 1108 }, { "epoch": 0.16, "learning_rate": 1.9073117094925405e-05, "loss": 0.9517, "step": 1109 }, { "epoch": 0.16, "learning_rate": 1.907110216530769e-05, "loss": 0.9722, "step": 1110 }, { "epoch": 0.16, "learning_rate": 1.9069085154630295e-05, "loss": 0.938, "step": 1111 }, { "epoch": 0.16, "learning_rate": 1.9067066063355967e-05, "loss": 0.978, "step": 1112 }, { "epoch": 0.16, "learning_rate": 1.906504489194791e-05, "loss": 0.9497, "step": 1113 }, { "epoch": 0.16, "learning_rate": 1.906302164086982e-05, "loss": 0.9336, "step": 1114 }, { "epoch": 0.16, "learning_rate": 1.9060996310585862e-05, "loss": 0.9487, "step": 1115 }, { "epoch": 0.17, "learning_rate": 1.9058968901560677e-05, "loss": 1.0186, "step": 1116 }, { "epoch": 0.17, "learning_rate": 1.905693941425939e-05, "loss": 0.9331, "step": 1117 }, { "epoch": 0.17, "learning_rate": 1.9054907849147597e-05, "loss": 0.9468, "step": 1118 }, { "epoch": 0.17, "learning_rate": 1.905287420669137e-05, "loss": 0.8721, "step": 1119 }, { "epoch": 0.17, "learning_rate": 1.9050838487357267e-05, "loss": 0.9385, "step": 1120 }, { "epoch": 0.17, "learning_rate": 1.9048800691612305e-05, "loss": 0.917, "step": 1121 }, { "epoch": 0.17, "learning_rate": 1.9046760819924e-05, "loss": 0.3174, "step": 1122 }, { "epoch": 0.17, "learning_rate": 1.904471887276032e-05, "loss": 0.9097, "step": 1123 }, { "epoch": 0.17, "learning_rate": 1.9042674850589732e-05, "loss": 0.9351, "step": 1124 }, { "epoch": 0.17, "learning_rate": 1.9040628753881155e-05, "loss": 0.895, "step": 1125 }, { "epoch": 0.17, "learning_rate": 1.9038580583104013e-05, "loss": 0.8037, "step": 1126 }, { "epoch": 0.17, "learning_rate": 1.903653033872818e-05, "loss": 0.3344, "step": 1127 }, { "epoch": 0.17, "learning_rate": 1.9034478021224015e-05, "loss": 0.8384, "step": 1128 }, { "epoch": 0.17, "learning_rate": 1.903242363106236e-05, "loss": 0.8613, "step": 1129 }, { "epoch": 0.17, "learning_rate": 1.903036716871452e-05, "loss": 0.8721, "step": 1130 }, { "epoch": 0.17, "learning_rate": 1.902830863465228e-05, "loss": 0.3223, "step": 1131 }, { "epoch": 0.17, "learning_rate": 1.9026248029347908e-05, "loss": 0.3062, "step": 1132 }, { "epoch": 0.17, "learning_rate": 1.9024185353274138e-05, "loss": 0.9019, "step": 1133 }, { "epoch": 0.17, "learning_rate": 1.902212060690418e-05, "loss": 0.8506, "step": 1134 }, { "epoch": 0.17, "learning_rate": 1.902005379071172e-05, "loss": 0.9004, "step": 1135 }, { "epoch": 0.17, "learning_rate": 1.9017984905170923e-05, "loss": 0.8755, "step": 1136 }, { "epoch": 0.17, "learning_rate": 1.9015913950756425e-05, "loss": 0.9067, "step": 1137 }, { "epoch": 0.17, "learning_rate": 1.9013840927943334e-05, "loss": 0.8564, "step": 1138 }, { "epoch": 0.17, "learning_rate": 1.9011765837207237e-05, "loss": 0.8589, "step": 1139 }, { "epoch": 0.17, "learning_rate": 1.900968867902419e-05, "loss": 0.8774, "step": 1140 }, { "epoch": 0.17, "learning_rate": 1.9007609453870738e-05, "loss": 0.8638, "step": 1141 }, { "epoch": 0.17, "learning_rate": 1.9005528162223878e-05, "loss": 0.9458, "step": 1142 }, { "epoch": 0.17, "learning_rate": 1.9003444804561098e-05, "loss": 0.8589, "step": 1143 }, { "epoch": 0.17, "learning_rate": 1.9001359381360354e-05, "loss": 0.8789, "step": 1144 }, { "epoch": 0.17, "learning_rate": 1.8999271893100074e-05, "loss": 0.9253, "step": 1145 }, { "epoch": 0.17, "learning_rate": 1.8997182340259165e-05, "loss": 0.9092, "step": 1146 }, { "epoch": 0.17, "learning_rate": 1.8995090723317e-05, "loss": 0.8784, "step": 1147 }, { "epoch": 0.17, "learning_rate": 1.8992997042753437e-05, "loss": 0.8945, "step": 1148 }, { "epoch": 0.17, "learning_rate": 1.8990901299048798e-05, "loss": 0.8901, "step": 1149 }, { "epoch": 0.17, "learning_rate": 1.8988803492683875e-05, "loss": 0.8618, "step": 1150 }, { "epoch": 0.17, "learning_rate": 1.8986703624139948e-05, "loss": 0.9014, "step": 1151 }, { "epoch": 0.17, "learning_rate": 1.8984601693898756e-05, "loss": 0.9326, "step": 1152 }, { "epoch": 0.17, "learning_rate": 1.898249770244252e-05, "loss": 0.8857, "step": 1153 }, { "epoch": 0.17, "learning_rate": 1.898039165025393e-05, "loss": 0.8589, "step": 1154 }, { "epoch": 0.17, "learning_rate": 1.897828353781614e-05, "loss": 0.895, "step": 1155 }, { "epoch": 0.17, "learning_rate": 1.89761733656128e-05, "loss": 0.3257, "step": 1156 }, { "epoch": 0.17, "learning_rate": 1.8974061134128008e-05, "loss": 0.8838, "step": 1157 }, { "epoch": 0.17, "learning_rate": 1.8971946843846348e-05, "loss": 0.9048, "step": 1158 }, { "epoch": 0.17, "learning_rate": 1.896983049525287e-05, "loss": 0.8613, "step": 1159 }, { "epoch": 0.17, "learning_rate": 1.89677120888331e-05, "loss": 0.8872, "step": 1160 }, { "epoch": 0.17, "learning_rate": 1.896559162507304e-05, "loss": 0.9282, "step": 1161 }, { "epoch": 0.17, "learning_rate": 1.8963469104459157e-05, "loss": 0.896, "step": 1162 }, { "epoch": 0.17, "learning_rate": 1.896134452747839e-05, "loss": 0.8604, "step": 1163 }, { "epoch": 0.17, "learning_rate": 1.8959217894618146e-05, "loss": 0.8555, "step": 1164 }, { "epoch": 0.17, "learning_rate": 1.8957089206366325e-05, "loss": 0.8813, "step": 1165 }, { "epoch": 0.17, "learning_rate": 1.895495846321127e-05, "loss": 0.8706, "step": 1166 }, { "epoch": 0.17, "learning_rate": 1.8952825665641808e-05, "loss": 0.8823, "step": 1167 }, { "epoch": 0.17, "learning_rate": 1.895069081414725e-05, "loss": 0.9097, "step": 1168 }, { "epoch": 0.17, "learning_rate": 1.8948553909217354e-05, "loss": 0.2866, "step": 1169 }, { "epoch": 0.17, "learning_rate": 1.8946414951342368e-05, "loss": 0.8677, "step": 1170 }, { "epoch": 0.17, "learning_rate": 1.8944273941012998e-05, "loss": 0.8623, "step": 1171 }, { "epoch": 0.17, "learning_rate": 1.8942130878720434e-05, "loss": 0.9229, "step": 1172 }, { "epoch": 0.17, "learning_rate": 1.8939985764956323e-05, "loss": 0.8813, "step": 1173 }, { "epoch": 0.17, "learning_rate": 1.8937838600212792e-05, "loss": 0.8667, "step": 1174 }, { "epoch": 0.17, "learning_rate": 1.8935689384982433e-05, "loss": 0.8608, "step": 1175 }, { "epoch": 0.17, "learning_rate": 1.893353811975832e-05, "loss": 0.8652, "step": 1176 }, { "epoch": 0.17, "learning_rate": 1.893138480503398e-05, "loss": 0.8892, "step": 1177 }, { "epoch": 0.17, "learning_rate": 1.892922944130342e-05, "loss": 0.9312, "step": 1178 }, { "epoch": 0.17, "learning_rate": 1.892707202906112e-05, "loss": 0.917, "step": 1179 }, { "epoch": 0.17, "learning_rate": 1.8924912568802023e-05, "loss": 0.9028, "step": 1180 }, { "epoch": 0.17, "learning_rate": 1.8922751061021545e-05, "loss": 0.8379, "step": 1181 }, { "epoch": 0.17, "learning_rate": 1.8920587506215567e-05, "loss": 0.8022, "step": 1182 }, { "epoch": 0.17, "learning_rate": 1.891842190488045e-05, "loss": 0.8618, "step": 1183 }, { "epoch": 0.18, "learning_rate": 1.8916254257513017e-05, "loss": 0.8589, "step": 1184 }, { "epoch": 0.18, "learning_rate": 1.8914084564610565e-05, "loss": 0.8721, "step": 1185 }, { "epoch": 0.18, "learning_rate": 1.8911912826670848e-05, "loss": 0.9243, "step": 1186 }, { "epoch": 0.18, "learning_rate": 1.8909739044192107e-05, "loss": 0.8452, "step": 1187 }, { "epoch": 0.18, "learning_rate": 1.8907563217673038e-05, "loss": 0.8599, "step": 1188 }, { "epoch": 0.18, "learning_rate": 1.8905385347612814e-05, "loss": 0.8774, "step": 1189 }, { "epoch": 0.18, "learning_rate": 1.8903205434511072e-05, "loss": 0.9326, "step": 1190 }, { "epoch": 0.18, "learning_rate": 1.8901023478867926e-05, "loss": 0.9209, "step": 1191 }, { "epoch": 0.18, "learning_rate": 1.8898839481183943e-05, "loss": 0.8823, "step": 1192 }, { "epoch": 0.18, "learning_rate": 1.8896653441960175e-05, "loss": 0.8882, "step": 1193 }, { "epoch": 0.18, "learning_rate": 1.889446536169813e-05, "loss": 0.8516, "step": 1194 }, { "epoch": 0.18, "learning_rate": 1.88922752408998e-05, "loss": 0.814, "step": 1195 }, { "epoch": 0.18, "learning_rate": 1.889008308006762e-05, "loss": 0.8809, "step": 1196 }, { "epoch": 0.18, "learning_rate": 1.888788887970452e-05, "loss": 0.8643, "step": 1197 }, { "epoch": 0.18, "learning_rate": 1.8885692640313875e-05, "loss": 0.8237, "step": 1198 }, { "epoch": 0.18, "learning_rate": 1.8883494362399547e-05, "loss": 0.8828, "step": 1199 }, { "epoch": 0.18, "learning_rate": 1.888129404646585e-05, "loss": 0.8184, "step": 1200 }, { "epoch": 0.18, "learning_rate": 1.8879091693017582e-05, "loss": 0.8804, "step": 1201 }, { "epoch": 0.18, "learning_rate": 1.8876887302559992e-05, "loss": 0.8877, "step": 1202 }, { "epoch": 0.18, "learning_rate": 1.88746808755988e-05, "loss": 0.9463, "step": 1203 }, { "epoch": 0.18, "learning_rate": 1.8872472412640207e-05, "loss": 0.9243, "step": 1204 }, { "epoch": 0.18, "learning_rate": 1.887026191419086e-05, "loss": 0.9272, "step": 1205 }, { "epoch": 0.18, "learning_rate": 1.8868049380757895e-05, "loss": 0.9209, "step": 1206 }, { "epoch": 0.18, "learning_rate": 1.886583481284889e-05, "loss": 0.8584, "step": 1207 }, { "epoch": 0.18, "learning_rate": 1.8863618210971912e-05, "loss": 0.8372, "step": 1208 }, { "epoch": 0.18, "learning_rate": 1.8861399575635486e-05, "loss": 0.9062, "step": 1209 }, { "epoch": 0.18, "learning_rate": 1.8859178907348602e-05, "loss": 0.8145, "step": 1210 }, { "epoch": 0.18, "learning_rate": 1.8856956206620717e-05, "loss": 0.3381, "step": 1211 }, { "epoch": 0.18, "learning_rate": 1.885473147396175e-05, "loss": 0.3209, "step": 1212 }, { "epoch": 0.18, "learning_rate": 1.88525047098821e-05, "loss": 0.8613, "step": 1213 }, { "epoch": 0.18, "learning_rate": 1.8850275914892622e-05, "loss": 0.8691, "step": 1214 }, { "epoch": 0.18, "learning_rate": 1.8848045089504633e-05, "loss": 0.8398, "step": 1215 }, { "epoch": 0.18, "learning_rate": 1.8845812234229924e-05, "loss": 0.8691, "step": 1216 }, { "epoch": 0.18, "learning_rate": 1.8843577349580747e-05, "loss": 0.8604, "step": 1217 }, { "epoch": 0.18, "learning_rate": 1.8841340436069825e-05, "loss": 0.8726, "step": 1218 }, { "epoch": 0.18, "learning_rate": 1.8839101494210338e-05, "loss": 0.8916, "step": 1219 }, { "epoch": 0.18, "learning_rate": 1.883686052451594e-05, "loss": 0.9453, "step": 1220 }, { "epoch": 0.18, "learning_rate": 1.8834617527500743e-05, "loss": 0.897, "step": 1221 }, { "epoch": 0.18, "learning_rate": 1.883237250367933e-05, "loss": 0.8501, "step": 1222 }, { "epoch": 0.18, "learning_rate": 1.8830125453566742e-05, "loss": 0.8599, "step": 1223 }, { "epoch": 0.18, "learning_rate": 1.8827876377678494e-05, "loss": 0.855, "step": 1224 }, { "epoch": 0.18, "learning_rate": 1.8825625276530558e-05, "loss": 0.8984, "step": 1225 }, { "epoch": 0.18, "learning_rate": 1.8823372150639375e-05, "loss": 0.8516, "step": 1226 }, { "epoch": 0.18, "learning_rate": 1.882111700052185e-05, "loss": 0.8545, "step": 1227 }, { "epoch": 0.18, "learning_rate": 1.881885982669535e-05, "loss": 0.7876, "step": 1228 }, { "epoch": 0.18, "learning_rate": 1.8816600629677705e-05, "loss": 0.8647, "step": 1229 }, { "epoch": 0.18, "learning_rate": 1.8814339409987217e-05, "loss": 0.8774, "step": 1230 }, { "epoch": 0.18, "learning_rate": 1.8812076168142647e-05, "loss": 0.8394, "step": 1231 }, { "epoch": 0.18, "learning_rate": 1.880981090466321e-05, "loss": 0.8174, "step": 1232 }, { "epoch": 0.18, "learning_rate": 1.8807543620068606e-05, "loss": 0.8408, "step": 1233 }, { "epoch": 0.18, "learning_rate": 1.8805274314878986e-05, "loss": 0.8872, "step": 1234 }, { "epoch": 0.18, "learning_rate": 1.880300298961496e-05, "loss": 0.8462, "step": 1235 }, { "epoch": 0.18, "learning_rate": 1.880072964479761e-05, "loss": 0.353, "step": 1236 }, { "epoch": 0.18, "learning_rate": 1.8798454280948483e-05, "loss": 0.8188, "step": 1237 }, { "epoch": 0.18, "learning_rate": 1.8796176898589575e-05, "loss": 0.8354, "step": 1238 }, { "epoch": 0.18, "learning_rate": 1.879389749824336e-05, "loss": 0.8843, "step": 1239 }, { "epoch": 0.18, "learning_rate": 1.8791616080432777e-05, "loss": 0.2764, "step": 1240 }, { "epoch": 0.18, "learning_rate": 1.8789332645681212e-05, "loss": 0.8384, "step": 1241 }, { "epoch": 0.18, "learning_rate": 1.8787047194512524e-05, "loss": 0.9053, "step": 1242 }, { "epoch": 0.18, "learning_rate": 1.8784759727451033e-05, "loss": 0.3406, "step": 1243 }, { "epoch": 0.18, "learning_rate": 1.878247024502152e-05, "loss": 0.9492, "step": 1244 }, { "epoch": 0.18, "learning_rate": 1.8780178747749237e-05, "loss": 0.8701, "step": 1245 }, { "epoch": 0.18, "learning_rate": 1.877788523615988e-05, "loss": 0.8726, "step": 1246 }, { "epoch": 0.18, "learning_rate": 1.8775589710779627e-05, "loss": 0.896, "step": 1247 }, { "epoch": 0.18, "learning_rate": 1.8773292172135106e-05, "loss": 0.8857, "step": 1248 }, { "epoch": 0.18, "learning_rate": 1.8770992620753413e-05, "loss": 0.8428, "step": 1249 }, { "epoch": 0.18, "learning_rate": 1.8768691057162097e-05, "loss": 0.895, "step": 1250 }, { "epoch": 0.19, "learning_rate": 1.8766387481889178e-05, "loss": 0.8936, "step": 1251 }, { "epoch": 0.19, "learning_rate": 1.8764081895463137e-05, "loss": 0.7729, "step": 1252 }, { "epoch": 0.19, "learning_rate": 1.8761774298412905e-05, "loss": 0.8857, "step": 1253 }, { "epoch": 0.19, "learning_rate": 1.875946469126789e-05, "loss": 0.8262, "step": 1254 }, { "epoch": 0.19, "learning_rate": 1.8757153074557953e-05, "loss": 0.8262, "step": 1255 }, { "epoch": 0.19, "learning_rate": 1.875483944881341e-05, "loss": 0.873, "step": 1256 }, { "epoch": 0.19, "learning_rate": 1.8752523814565053e-05, "loss": 0.876, "step": 1257 }, { "epoch": 0.19, "learning_rate": 1.8750206172344125e-05, "loss": 0.875, "step": 1258 }, { "epoch": 0.19, "learning_rate": 1.874788652268233e-05, "loss": 0.8906, "step": 1259 }, { "epoch": 0.19, "learning_rate": 1.874556486611183e-05, "loss": 0.9116, "step": 1260 }, { "epoch": 0.19, "learning_rate": 1.8743241203165253e-05, "loss": 0.8359, "step": 1261 }, { "epoch": 0.19, "learning_rate": 1.874091553437569e-05, "loss": 0.8169, "step": 1262 }, { "epoch": 0.19, "learning_rate": 1.8738587860276685e-05, "loss": 0.8589, "step": 1263 }, { "epoch": 0.19, "learning_rate": 1.8736258181402244e-05, "loss": 0.8594, "step": 1264 }, { "epoch": 0.19, "learning_rate": 1.873392649828683e-05, "loss": 0.8916, "step": 1265 }, { "epoch": 0.19, "learning_rate": 1.8731592811465377e-05, "loss": 0.9092, "step": 1266 }, { "epoch": 0.19, "learning_rate": 1.8729257121473262e-05, "loss": 0.9438, "step": 1267 }, { "epoch": 0.19, "learning_rate": 1.872691942884634e-05, "loss": 0.8242, "step": 1268 }, { "epoch": 0.19, "learning_rate": 1.872457973412091e-05, "loss": 0.874, "step": 1269 }, { "epoch": 0.19, "learning_rate": 1.872223803783374e-05, "loss": 0.8853, "step": 1270 }, { "epoch": 0.19, "learning_rate": 1.8719894340522048e-05, "loss": 0.8032, "step": 1271 }, { "epoch": 0.19, "learning_rate": 1.871754864272352e-05, "loss": 0.8462, "step": 1272 }, { "epoch": 0.19, "learning_rate": 1.87152009449763e-05, "loss": 0.8716, "step": 1273 }, { "epoch": 0.19, "learning_rate": 1.8712851247818985e-05, "loss": 0.8311, "step": 1274 }, { "epoch": 0.19, "learning_rate": 1.8710499551790632e-05, "loss": 0.854, "step": 1275 }, { "epoch": 0.19, "learning_rate": 1.8708145857430766e-05, "loss": 0.8833, "step": 1276 }, { "epoch": 0.19, "learning_rate": 1.870579016527936e-05, "loss": 0.8726, "step": 1277 }, { "epoch": 0.19, "learning_rate": 1.8703432475876844e-05, "loss": 0.8628, "step": 1278 }, { "epoch": 0.19, "learning_rate": 1.8701072789764118e-05, "loss": 0.8535, "step": 1279 }, { "epoch": 0.19, "learning_rate": 1.8698711107482522e-05, "loss": 0.8716, "step": 1280 }, { "epoch": 0.19, "learning_rate": 1.869634742957388e-05, "loss": 0.3582, "step": 1281 }, { "epoch": 0.19, "learning_rate": 1.8693981756580442e-05, "loss": 0.8188, "step": 1282 }, { "epoch": 0.19, "learning_rate": 1.8691614089044946e-05, "loss": 0.8643, "step": 1283 }, { "epoch": 0.19, "learning_rate": 1.8689244427510564e-05, "loss": 0.8872, "step": 1284 }, { "epoch": 0.19, "learning_rate": 1.868687277252094e-05, "loss": 0.8364, "step": 1285 }, { "epoch": 0.19, "learning_rate": 1.8684499124620167e-05, "loss": 0.8311, "step": 1286 }, { "epoch": 0.19, "learning_rate": 1.8682123484352804e-05, "loss": 0.8032, "step": 1287 }, { "epoch": 0.19, "learning_rate": 1.867974585226386e-05, "loss": 0.8179, "step": 1288 }, { "epoch": 0.19, "learning_rate": 1.86773662288988e-05, "loss": 0.9058, "step": 1289 }, { "epoch": 0.19, "learning_rate": 1.8674984614803553e-05, "loss": 0.9097, "step": 1290 }, { "epoch": 0.19, "learning_rate": 1.8672601010524497e-05, "loss": 0.8506, "step": 1291 }, { "epoch": 0.19, "learning_rate": 1.8670215416608467e-05, "loss": 0.9141, "step": 1292 }, { "epoch": 0.19, "learning_rate": 1.8667827833602767e-05, "loss": 0.7959, "step": 1293 }, { "epoch": 0.19, "learning_rate": 1.866543826205514e-05, "loss": 0.8579, "step": 1294 }, { "epoch": 0.19, "learning_rate": 1.8663046702513795e-05, "loss": 0.8374, "step": 1295 }, { "epoch": 0.19, "learning_rate": 1.8660653155527396e-05, "loss": 0.8296, "step": 1296 }, { "epoch": 0.19, "learning_rate": 1.865825762164506e-05, "loss": 0.8271, "step": 1297 }, { "epoch": 0.19, "learning_rate": 1.8655860101416362e-05, "loss": 0.8257, "step": 1298 }, { "epoch": 0.19, "learning_rate": 1.8653460595391335e-05, "loss": 0.8291, "step": 1299 }, { "epoch": 0.19, "learning_rate": 1.865105910412046e-05, "loss": 0.8696, "step": 1300 }, { "epoch": 0.19, "learning_rate": 1.8648655628154687e-05, "loss": 0.9443, "step": 1301 }, { "epoch": 0.19, "learning_rate": 1.8646250168045402e-05, "loss": 0.7661, "step": 1302 }, { "epoch": 0.19, "learning_rate": 1.8643842724344468e-05, "loss": 0.833, "step": 1303 }, { "epoch": 0.19, "learning_rate": 1.8641433297604183e-05, "loss": 0.8208, "step": 1304 }, { "epoch": 0.19, "learning_rate": 1.8639021888377313e-05, "loss": 0.8862, "step": 1305 }, { "epoch": 0.19, "learning_rate": 1.8636608497217077e-05, "loss": 0.8828, "step": 1306 }, { "epoch": 0.19, "learning_rate": 1.863419312467714e-05, "loss": 0.9048, "step": 1307 }, { "epoch": 0.19, "learning_rate": 1.863177577131164e-05, "loss": 0.9473, "step": 1308 }, { "epoch": 0.19, "learning_rate": 1.862935643767514e-05, "loss": 0.8599, "step": 1309 }, { "epoch": 0.19, "learning_rate": 1.862693512432269e-05, "loss": 0.8848, "step": 1310 }, { "epoch": 0.19, "learning_rate": 1.862451183180977e-05, "loss": 0.8706, "step": 1311 }, { "epoch": 0.19, "learning_rate": 1.862208656069233e-05, "loss": 0.8022, "step": 1312 }, { "epoch": 0.19, "learning_rate": 1.861965931152676e-05, "loss": 0.8994, "step": 1313 }, { "epoch": 0.19, "learning_rate": 1.861723008486992e-05, "loss": 0.9263, "step": 1314 }, { "epoch": 0.19, "learning_rate": 1.8614798881279107e-05, "loss": 0.8745, "step": 1315 }, { "epoch": 0.19, "learning_rate": 1.8612365701312075e-05, "loss": 0.8545, "step": 1316 }, { "epoch": 0.19, "learning_rate": 1.8609930545527048e-05, "loss": 0.9302, "step": 1317 }, { "epoch": 0.19, "learning_rate": 1.8607493414482683e-05, "loss": 0.8682, "step": 1318 }, { "epoch": 0.2, "learning_rate": 1.8605054308738095e-05, "loss": 0.9097, "step": 1319 }, { "epoch": 0.2, "learning_rate": 1.8602613228852862e-05, "loss": 0.7866, "step": 1320 }, { "epoch": 0.2, "learning_rate": 1.8600170175387004e-05, "loss": 0.8921, "step": 1321 }, { "epoch": 0.2, "learning_rate": 1.8597725148900997e-05, "loss": 0.8545, "step": 1322 }, { "epoch": 0.2, "learning_rate": 1.859527814995577e-05, "loss": 0.8501, "step": 1323 }, { "epoch": 0.2, "learning_rate": 1.859282917911271e-05, "loss": 0.9102, "step": 1324 }, { "epoch": 0.2, "learning_rate": 1.8590378236933642e-05, "loss": 0.7197, "step": 1325 }, { "epoch": 0.2, "learning_rate": 1.8587925323980863e-05, "loss": 0.8574, "step": 1326 }, { "epoch": 0.2, "learning_rate": 1.8585470440817103e-05, "loss": 0.8647, "step": 1327 }, { "epoch": 0.2, "learning_rate": 1.8583013588005553e-05, "loss": 0.8921, "step": 1328 }, { "epoch": 0.2, "learning_rate": 1.858055476610986e-05, "loss": 0.8613, "step": 1329 }, { "epoch": 0.2, "learning_rate": 1.8578093975694116e-05, "loss": 0.8687, "step": 1330 }, { "epoch": 0.2, "learning_rate": 1.8575631217322864e-05, "loss": 0.8008, "step": 1331 }, { "epoch": 0.2, "learning_rate": 1.8573166491561108e-05, "loss": 0.7446, "step": 1332 }, { "epoch": 0.2, "learning_rate": 1.857069979897429e-05, "loss": 0.8745, "step": 1333 }, { "epoch": 0.2, "learning_rate": 1.8568231140128307e-05, "loss": 0.8481, "step": 1334 }, { "epoch": 0.2, "learning_rate": 1.856576051558952e-05, "loss": 0.8594, "step": 1335 }, { "epoch": 0.2, "learning_rate": 1.8563287925924725e-05, "loss": 0.8711, "step": 1336 }, { "epoch": 0.2, "learning_rate": 1.8560813371701174e-05, "loss": 0.8589, "step": 1337 }, { "epoch": 0.2, "learning_rate": 1.8558336853486573e-05, "loss": 0.792, "step": 1338 }, { "epoch": 0.2, "learning_rate": 1.8555858371849075e-05, "loss": 0.8589, "step": 1339 }, { "epoch": 0.2, "learning_rate": 1.8553377927357283e-05, "loss": 0.8979, "step": 1340 }, { "epoch": 0.2, "learning_rate": 1.855089552058025e-05, "loss": 0.9106, "step": 1341 }, { "epoch": 0.2, "learning_rate": 1.8548411152087483e-05, "loss": 0.856, "step": 1342 }, { "epoch": 0.2, "learning_rate": 1.854592482244894e-05, "loss": 0.8091, "step": 1343 }, { "epoch": 0.2, "learning_rate": 1.8543436532235024e-05, "loss": 0.3523, "step": 1344 }, { "epoch": 0.2, "learning_rate": 1.8540946282016587e-05, "loss": 0.9346, "step": 1345 }, { "epoch": 0.2, "learning_rate": 1.8538454072364935e-05, "loss": 0.8735, "step": 1346 }, { "epoch": 0.2, "learning_rate": 1.853595990385182e-05, "loss": 0.9155, "step": 1347 }, { "epoch": 0.2, "learning_rate": 1.853346377704945e-05, "loss": 0.9131, "step": 1348 }, { "epoch": 0.2, "learning_rate": 1.8530965692530472e-05, "loss": 0.9087, "step": 1349 }, { "epoch": 0.2, "learning_rate": 1.852846565086799e-05, "loss": 0.8374, "step": 1350 }, { "epoch": 0.2, "learning_rate": 1.8525963652635556e-05, "loss": 0.364, "step": 1351 }, { "epoch": 0.2, "learning_rate": 1.8523459698407164e-05, "loss": 0.8457, "step": 1352 }, { "epoch": 0.2, "learning_rate": 1.852095378875727e-05, "loss": 0.8716, "step": 1353 }, { "epoch": 0.2, "learning_rate": 1.8518445924260765e-05, "loss": 0.8154, "step": 1354 }, { "epoch": 0.2, "learning_rate": 1.8515936105493e-05, "loss": 0.792, "step": 1355 }, { "epoch": 0.2, "learning_rate": 1.8513424333029757e-05, "loss": 0.8838, "step": 1356 }, { "epoch": 0.2, "learning_rate": 1.8510910607447293e-05, "loss": 0.8843, "step": 1357 }, { "epoch": 0.2, "learning_rate": 1.8508394929322287e-05, "loss": 0.896, "step": 1358 }, { "epoch": 0.2, "learning_rate": 1.8505877299231877e-05, "loss": 0.8735, "step": 1359 }, { "epoch": 0.2, "learning_rate": 1.8503357717753658e-05, "loss": 0.9038, "step": 1360 }, { "epoch": 0.2, "learning_rate": 1.8500836185465652e-05, "loss": 0.8174, "step": 1361 }, { "epoch": 0.2, "learning_rate": 1.849831270294635e-05, "loss": 0.8711, "step": 1362 }, { "epoch": 0.2, "learning_rate": 1.8495787270774676e-05, "loss": 0.8809, "step": 1363 }, { "epoch": 0.2, "learning_rate": 1.849325988953e-05, "loss": 0.8501, "step": 1364 }, { "epoch": 0.2, "learning_rate": 1.8490730559792153e-05, "loss": 0.3105, "step": 1365 }, { "epoch": 0.2, "learning_rate": 1.84881992821414e-05, "loss": 0.8911, "step": 1366 }, { "epoch": 0.2, "learning_rate": 1.848566605715846e-05, "loss": 0.8203, "step": 1367 }, { "epoch": 0.2, "learning_rate": 1.8483130885424493e-05, "loss": 0.343, "step": 1368 }, { "epoch": 0.2, "learning_rate": 1.8480593767521116e-05, "loss": 0.854, "step": 1369 }, { "epoch": 0.2, "learning_rate": 1.8478054704030376e-05, "loss": 0.8433, "step": 1370 }, { "epoch": 0.2, "learning_rate": 1.8475513695534784e-05, "loss": 0.8857, "step": 1371 }, { "epoch": 0.2, "learning_rate": 1.8472970742617284e-05, "loss": 0.9023, "step": 1372 }, { "epoch": 0.2, "learning_rate": 1.8470425845861274e-05, "loss": 0.8911, "step": 1373 }, { "epoch": 0.2, "learning_rate": 1.8467879005850595e-05, "loss": 0.9204, "step": 1374 }, { "epoch": 0.2, "learning_rate": 1.8465330223169532e-05, "loss": 0.9023, "step": 1375 }, { "epoch": 0.2, "learning_rate": 1.8462779498402817e-05, "loss": 0.8379, "step": 1376 }, { "epoch": 0.2, "learning_rate": 1.846022683213563e-05, "loss": 0.8838, "step": 1377 }, { "epoch": 0.2, "learning_rate": 1.8457672224953595e-05, "loss": 0.2905, "step": 1378 }, { "epoch": 0.2, "learning_rate": 1.8455115677442782e-05, "loss": 0.8184, "step": 1379 }, { "epoch": 0.2, "learning_rate": 1.84525571901897e-05, "loss": 0.8633, "step": 1380 }, { "epoch": 0.2, "learning_rate": 1.844999676378131e-05, "loss": 0.8857, "step": 1381 }, { "epoch": 0.2, "learning_rate": 1.8447434398805018e-05, "loss": 0.8721, "step": 1382 }, { "epoch": 0.2, "learning_rate": 1.844487009584867e-05, "loss": 0.9297, "step": 1383 }, { "epoch": 0.2, "learning_rate": 1.8442303855500562e-05, "loss": 0.8252, "step": 1384 }, { "epoch": 0.2, "learning_rate": 1.843973567834943e-05, "loss": 0.8838, "step": 1385 }, { "epoch": 0.2, "learning_rate": 1.8437165564984455e-05, "loss": 0.8457, "step": 1386 }, { "epoch": 0.21, "learning_rate": 1.8434593515995265e-05, "loss": 0.814, "step": 1387 }, { "epoch": 0.21, "learning_rate": 1.843201953197193e-05, "loss": 0.8335, "step": 1388 }, { "epoch": 0.21, "learning_rate": 1.8429443613504962e-05, "loss": 0.8169, "step": 1389 }, { "epoch": 0.21, "learning_rate": 1.8426865761185324e-05, "loss": 0.8232, "step": 1390 }, { "epoch": 0.21, "learning_rate": 1.842428597560441e-05, "loss": 0.8296, "step": 1391 }, { "epoch": 0.21, "learning_rate": 1.842170425735407e-05, "loss": 0.8428, "step": 1392 }, { "epoch": 0.21, "learning_rate": 1.841912060702659e-05, "loss": 0.8887, "step": 1393 }, { "epoch": 0.21, "learning_rate": 1.841653502521471e-05, "loss": 0.8569, "step": 1394 }, { "epoch": 0.21, "learning_rate": 1.841394751251159e-05, "loss": 0.8193, "step": 1395 }, { "epoch": 0.21, "learning_rate": 1.8411358069510864e-05, "loss": 0.8813, "step": 1396 }, { "epoch": 0.21, "learning_rate": 1.8408766696806578e-05, "loss": 0.9092, "step": 1397 }, { "epoch": 0.21, "learning_rate": 1.840617339499325e-05, "loss": 0.897, "step": 1398 }, { "epoch": 0.21, "learning_rate": 1.840357816466581e-05, "loss": 0.8193, "step": 1399 }, { "epoch": 0.21, "learning_rate": 1.8400981006419663e-05, "loss": 0.8223, "step": 1400 }, { "epoch": 0.21, "learning_rate": 1.8398381920850626e-05, "loss": 0.8638, "step": 1401 }, { "epoch": 0.21, "learning_rate": 1.8395780908554983e-05, "loss": 0.8833, "step": 1402 }, { "epoch": 0.21, "learning_rate": 1.839317797012944e-05, "loss": 0.875, "step": 1403 }, { "epoch": 0.21, "learning_rate": 1.8390573106171158e-05, "loss": 0.8511, "step": 1404 }, { "epoch": 0.21, "learning_rate": 1.8387966317277733e-05, "loss": 0.8184, "step": 1405 }, { "epoch": 0.21, "learning_rate": 1.838535760404721e-05, "loss": 0.8398, "step": 1406 }, { "epoch": 0.21, "learning_rate": 1.8382746967078063e-05, "loss": 0.8281, "step": 1407 }, { "epoch": 0.21, "learning_rate": 1.8380134406969218e-05, "loss": 0.916, "step": 1408 }, { "epoch": 0.21, "learning_rate": 1.8377519924320045e-05, "loss": 0.8433, "step": 1409 }, { "epoch": 0.21, "learning_rate": 1.8374903519730347e-05, "loss": 0.7871, "step": 1410 }, { "epoch": 0.21, "learning_rate": 1.837228519380036e-05, "loss": 0.8486, "step": 1411 }, { "epoch": 0.21, "learning_rate": 1.8369664947130787e-05, "loss": 0.8521, "step": 1412 }, { "epoch": 0.21, "learning_rate": 1.8367042780322744e-05, "loss": 0.8306, "step": 1413 }, { "epoch": 0.21, "learning_rate": 1.8364418693977803e-05, "loss": 0.8989, "step": 1414 }, { "epoch": 0.21, "learning_rate": 1.8361792688697972e-05, "loss": 0.8096, "step": 1415 }, { "epoch": 0.21, "learning_rate": 1.8359164765085698e-05, "loss": 0.8345, "step": 1416 }, { "epoch": 0.21, "learning_rate": 1.8356534923743875e-05, "loss": 0.8965, "step": 1417 }, { "epoch": 0.21, "learning_rate": 1.8353903165275825e-05, "loss": 0.8662, "step": 1418 }, { "epoch": 0.21, "learning_rate": 1.8351269490285323e-05, "loss": 0.8569, "step": 1419 }, { "epoch": 0.21, "learning_rate": 1.8348633899376567e-05, "loss": 0.8179, "step": 1420 }, { "epoch": 0.21, "learning_rate": 1.834599639315422e-05, "loss": 0.8916, "step": 1421 }, { "epoch": 0.21, "learning_rate": 1.8343356972223357e-05, "loss": 0.8462, "step": 1422 }, { "epoch": 0.21, "learning_rate": 1.834071563718951e-05, "loss": 0.8857, "step": 1423 }, { "epoch": 0.21, "learning_rate": 1.8338072388658642e-05, "loss": 0.7886, "step": 1424 }, { "epoch": 0.21, "learning_rate": 1.833542722723716e-05, "loss": 0.9185, "step": 1425 }, { "epoch": 0.21, "learning_rate": 1.8332780153531905e-05, "loss": 0.8921, "step": 1426 }, { "epoch": 0.21, "learning_rate": 1.833013116815016e-05, "loss": 0.8926, "step": 1427 }, { "epoch": 0.21, "learning_rate": 1.8327480271699647e-05, "loss": 0.8638, "step": 1428 }, { "epoch": 0.21, "learning_rate": 1.8324827464788525e-05, "loss": 0.8848, "step": 1429 }, { "epoch": 0.21, "learning_rate": 1.8322172748025386e-05, "loss": 0.8462, "step": 1430 }, { "epoch": 0.21, "learning_rate": 1.8319516122019274e-05, "loss": 0.8423, "step": 1431 }, { "epoch": 0.21, "learning_rate": 1.8316857587379656e-05, "loss": 0.8906, "step": 1432 }, { "epoch": 0.21, "learning_rate": 1.831419714471645e-05, "loss": 0.8511, "step": 1433 }, { "epoch": 0.21, "learning_rate": 1.831153479464e-05, "loss": 0.8525, "step": 1434 }, { "epoch": 0.21, "learning_rate": 1.8308870537761094e-05, "loss": 0.8687, "step": 1435 }, { "epoch": 0.21, "learning_rate": 1.8306204374690955e-05, "loss": 0.9082, "step": 1436 }, { "epoch": 0.21, "learning_rate": 1.830353630604125e-05, "loss": 0.873, "step": 1437 }, { "epoch": 0.21, "learning_rate": 1.830086633242407e-05, "loss": 0.8379, "step": 1438 }, { "epoch": 0.21, "learning_rate": 1.8298194454451957e-05, "loss": 0.8428, "step": 1439 }, { "epoch": 0.21, "learning_rate": 1.829552067273788e-05, "loss": 0.8711, "step": 1440 }, { "epoch": 0.21, "learning_rate": 1.8292844987895247e-05, "loss": 0.9595, "step": 1441 }, { "epoch": 0.21, "learning_rate": 1.829016740053791e-05, "loss": 0.8354, "step": 1442 }, { "epoch": 0.21, "learning_rate": 1.8287487911280147e-05, "loss": 0.8467, "step": 1443 }, { "epoch": 0.21, "learning_rate": 1.8284806520736677e-05, "loss": 0.7793, "step": 1444 }, { "epoch": 0.21, "learning_rate": 1.8282123229522654e-05, "loss": 0.874, "step": 1445 }, { "epoch": 0.21, "learning_rate": 1.8279438038253675e-05, "loss": 0.9229, "step": 1446 }, { "epoch": 0.21, "learning_rate": 1.827675094754576e-05, "loss": 0.8286, "step": 1447 }, { "epoch": 0.21, "learning_rate": 1.8274061958015377e-05, "loss": 0.8369, "step": 1448 }, { "epoch": 0.21, "learning_rate": 1.8271371070279418e-05, "loss": 0.8901, "step": 1449 }, { "epoch": 0.21, "learning_rate": 1.8268678284955222e-05, "loss": 0.874, "step": 1450 }, { "epoch": 0.21, "learning_rate": 1.826598360266056e-05, "loss": 0.8696, "step": 1451 }, { "epoch": 0.21, "learning_rate": 1.8263287024013628e-05, "loss": 0.854, "step": 1452 }, { "epoch": 0.21, "learning_rate": 1.8260588549633072e-05, "loss": 0.8613, "step": 1453 }, { "epoch": 0.22, "learning_rate": 1.8257888180137966e-05, "loss": 0.8784, "step": 1454 }, { "epoch": 0.22, "learning_rate": 1.8255185916147817e-05, "loss": 0.8813, "step": 1455 }, { "epoch": 0.22, "learning_rate": 1.8252481758282573e-05, "loss": 0.854, "step": 1456 }, { "epoch": 0.22, "learning_rate": 1.82497757071626e-05, "loss": 0.8564, "step": 1457 }, { "epoch": 0.22, "learning_rate": 1.824706776340873e-05, "loss": 0.8989, "step": 1458 }, { "epoch": 0.22, "learning_rate": 1.824435792764219e-05, "loss": 0.8525, "step": 1459 }, { "epoch": 0.22, "learning_rate": 1.8241646200484676e-05, "loss": 0.8345, "step": 1460 }, { "epoch": 0.22, "learning_rate": 1.8238932582558294e-05, "loss": 0.8364, "step": 1461 }, { "epoch": 0.22, "learning_rate": 1.823621707448559e-05, "loss": 0.8086, "step": 1462 }, { "epoch": 0.22, "learning_rate": 1.8233499676889556e-05, "loss": 0.833, "step": 1463 }, { "epoch": 0.22, "learning_rate": 1.8230780390393606e-05, "loss": 0.9136, "step": 1464 }, { "epoch": 0.22, "learning_rate": 1.822805921562158e-05, "loss": 0.8892, "step": 1465 }, { "epoch": 0.22, "learning_rate": 1.8225336153197765e-05, "loss": 0.8716, "step": 1466 }, { "epoch": 0.22, "learning_rate": 1.822261120374688e-05, "loss": 0.8076, "step": 1467 }, { "epoch": 0.22, "learning_rate": 1.821988436789407e-05, "loss": 0.8701, "step": 1468 }, { "epoch": 0.22, "learning_rate": 1.8217155646264915e-05, "loss": 0.9385, "step": 1469 }, { "epoch": 0.22, "learning_rate": 1.8214425039485428e-05, "loss": 0.8809, "step": 1470 }, { "epoch": 0.22, "learning_rate": 1.821169254818206e-05, "loss": 0.8481, "step": 1471 }, { "epoch": 0.22, "learning_rate": 1.8208958172981685e-05, "loss": 0.8257, "step": 1472 }, { "epoch": 0.22, "learning_rate": 1.820622191451161e-05, "loss": 0.834, "step": 1473 }, { "epoch": 0.22, "learning_rate": 1.8203483773399587e-05, "loss": 0.8301, "step": 1474 }, { "epoch": 0.22, "learning_rate": 1.8200743750273783e-05, "loss": 0.3228, "step": 1475 }, { "epoch": 0.22, "learning_rate": 1.8198001845762805e-05, "loss": 0.8765, "step": 1476 }, { "epoch": 0.22, "learning_rate": 1.8195258060495693e-05, "loss": 0.7993, "step": 1477 }, { "epoch": 0.22, "learning_rate": 1.819251239510192e-05, "loss": 0.895, "step": 1478 }, { "epoch": 0.22, "learning_rate": 1.818976485021138e-05, "loss": 0.8408, "step": 1479 }, { "epoch": 0.22, "learning_rate": 1.8187015426454402e-05, "loss": 0.9082, "step": 1480 }, { "epoch": 0.22, "learning_rate": 1.818426412446176e-05, "loss": 0.8608, "step": 1481 }, { "epoch": 0.22, "learning_rate": 1.8181510944864642e-05, "loss": 0.3131, "step": 1482 }, { "epoch": 0.22, "learning_rate": 1.8178755888294673e-05, "loss": 0.8589, "step": 1483 }, { "epoch": 0.22, "learning_rate": 1.8175998955383906e-05, "loss": 0.8447, "step": 1484 }, { "epoch": 0.22, "learning_rate": 1.817324014676483e-05, "loss": 0.9111, "step": 1485 }, { "epoch": 0.22, "learning_rate": 1.8170479463070362e-05, "loss": 0.8843, "step": 1486 }, { "epoch": 0.22, "learning_rate": 1.8167716904933842e-05, "loss": 0.8506, "step": 1487 }, { "epoch": 0.22, "learning_rate": 1.8164952472989047e-05, "loss": 0.8701, "step": 1488 }, { "epoch": 0.22, "learning_rate": 1.816218616787019e-05, "loss": 0.8716, "step": 1489 }, { "epoch": 0.22, "learning_rate": 1.8159417990211904e-05, "loss": 0.9302, "step": 1490 }, { "epoch": 0.22, "learning_rate": 1.815664794064925e-05, "loss": 0.8643, "step": 1491 }, { "epoch": 0.22, "learning_rate": 1.8153876019817725e-05, "loss": 0.897, "step": 1492 }, { "epoch": 0.22, "learning_rate": 1.8151102228353257e-05, "loss": 0.8633, "step": 1493 }, { "epoch": 0.22, "learning_rate": 1.8148326566892197e-05, "loss": 0.8774, "step": 1494 }, { "epoch": 0.22, "learning_rate": 1.8145549036071327e-05, "loss": 0.8682, "step": 1495 }, { "epoch": 0.22, "learning_rate": 1.8142769636527852e-05, "loss": 0.8569, "step": 1496 }, { "epoch": 0.22, "learning_rate": 1.8139988368899423e-05, "loss": 0.9365, "step": 1497 }, { "epoch": 0.22, "learning_rate": 1.81372052338241e-05, "loss": 0.8638, "step": 1498 }, { "epoch": 0.22, "learning_rate": 1.8134420231940384e-05, "loss": 0.9297, "step": 1499 }, { "epoch": 0.22, "learning_rate": 1.81316333638872e-05, "loss": 0.9189, "step": 1500 }, { "epoch": 0.22, "learning_rate": 1.8128844630303896e-05, "loss": 0.8384, "step": 1501 }, { "epoch": 0.22, "learning_rate": 1.812605403183026e-05, "loss": 0.8149, "step": 1502 }, { "epoch": 0.22, "learning_rate": 1.8123261569106502e-05, "loss": 0.8428, "step": 1503 }, { "epoch": 0.22, "learning_rate": 1.812046724277325e-05, "loss": 0.8647, "step": 1504 }, { "epoch": 0.22, "learning_rate": 1.8117671053471576e-05, "loss": 0.8574, "step": 1505 }, { "epoch": 0.22, "learning_rate": 1.811487300184297e-05, "loss": 0.894, "step": 1506 }, { "epoch": 0.22, "learning_rate": 1.811207308852935e-05, "loss": 0.8286, "step": 1507 }, { "epoch": 0.22, "learning_rate": 1.8109271314173062e-05, "loss": 0.8574, "step": 1508 }, { "epoch": 0.22, "learning_rate": 1.8106467679416877e-05, "loss": 0.877, "step": 1509 }, { "epoch": 0.22, "learning_rate": 1.8103662184904e-05, "loss": 0.8335, "step": 1510 }, { "epoch": 0.22, "learning_rate": 1.8100854831278052e-05, "loss": 0.8247, "step": 1511 }, { "epoch": 0.22, "learning_rate": 1.8098045619183092e-05, "loss": 0.8198, "step": 1512 }, { "epoch": 0.22, "learning_rate": 1.8095234549263592e-05, "loss": 0.8828, "step": 1513 }, { "epoch": 0.22, "learning_rate": 1.8092421622164464e-05, "loss": 0.8813, "step": 1514 }, { "epoch": 0.22, "learning_rate": 1.8089606838531034e-05, "loss": 0.8813, "step": 1515 }, { "epoch": 0.22, "learning_rate": 1.8086790199009067e-05, "loss": 0.873, "step": 1516 }, { "epoch": 0.22, "learning_rate": 1.808397170424474e-05, "loss": 0.8491, "step": 1517 }, { "epoch": 0.22, "learning_rate": 1.808115135488467e-05, "loss": 0.8652, "step": 1518 }, { "epoch": 0.22, "learning_rate": 1.8078329151575874e-05, "loss": 0.8564, "step": 1519 }, { "epoch": 0.22, "learning_rate": 1.807550509496583e-05, "loss": 0.9204, "step": 1520 }, { "epoch": 0.22, "learning_rate": 1.8072679185702416e-05, "loss": 0.9033, "step": 1521 }, { "epoch": 0.23, "learning_rate": 1.8069851424433943e-05, "loss": 0.8726, "step": 1522 }, { "epoch": 0.23, "learning_rate": 1.806702181180914e-05, "loss": 0.8379, "step": 1523 }, { "epoch": 0.23, "learning_rate": 1.8064190348477173e-05, "loss": 0.8599, "step": 1524 }, { "epoch": 0.23, "learning_rate": 1.8061357035087627e-05, "loss": 0.8154, "step": 1525 }, { "epoch": 0.23, "learning_rate": 1.8058521872290505e-05, "loss": 0.9033, "step": 1526 }, { "epoch": 0.23, "learning_rate": 1.8055684860736246e-05, "loss": 0.8872, "step": 1527 }, { "epoch": 0.23, "learning_rate": 1.80528460010757e-05, "loss": 0.8989, "step": 1528 }, { "epoch": 0.23, "learning_rate": 1.8050005293960157e-05, "loss": 0.877, "step": 1529 }, { "epoch": 0.23, "learning_rate": 1.8047162740041314e-05, "loss": 0.8496, "step": 1530 }, { "epoch": 0.23, "learning_rate": 1.8044318339971302e-05, "loss": 0.8911, "step": 1531 }, { "epoch": 0.23, "learning_rate": 1.8041472094402676e-05, "loss": 0.8384, "step": 1532 }, { "epoch": 0.23, "learning_rate": 1.8038624003988406e-05, "loss": 0.8936, "step": 1533 }, { "epoch": 0.23, "learning_rate": 1.803577406938189e-05, "loss": 0.875, "step": 1534 }, { "epoch": 0.23, "learning_rate": 1.803292229123696e-05, "loss": 0.9087, "step": 1535 }, { "epoch": 0.23, "learning_rate": 1.803006867020785e-05, "loss": 0.8247, "step": 1536 }, { "epoch": 0.23, "learning_rate": 1.8027213206949232e-05, "loss": 0.8179, "step": 1537 }, { "epoch": 0.23, "learning_rate": 1.8024355902116198e-05, "loss": 0.7744, "step": 1538 }, { "epoch": 0.23, "learning_rate": 1.8021496756364256e-05, "loss": 0.8442, "step": 1539 }, { "epoch": 0.23, "learning_rate": 1.8018635770349343e-05, "loss": 0.8428, "step": 1540 }, { "epoch": 0.23, "learning_rate": 1.8015772944727814e-05, "loss": 0.856, "step": 1541 }, { "epoch": 0.23, "learning_rate": 1.8012908280156452e-05, "loss": 0.8491, "step": 1542 }, { "epoch": 0.23, "learning_rate": 1.801004177729246e-05, "loss": 0.8721, "step": 1543 }, { "epoch": 0.23, "learning_rate": 1.8007173436793453e-05, "loss": 0.9004, "step": 1544 }, { "epoch": 0.23, "learning_rate": 1.800430325931748e-05, "loss": 0.8574, "step": 1545 }, { "epoch": 0.23, "learning_rate": 1.8001431245523008e-05, "loss": 0.8696, "step": 1546 }, { "epoch": 0.23, "learning_rate": 1.7998557396068923e-05, "loss": 0.8389, "step": 1547 }, { "epoch": 0.23, "learning_rate": 1.799568171161453e-05, "loss": 0.8647, "step": 1548 }, { "epoch": 0.23, "learning_rate": 1.7992804192819565e-05, "loss": 0.9087, "step": 1549 }, { "epoch": 0.23, "learning_rate": 1.798992484034417e-05, "loss": 0.9365, "step": 1550 }, { "epoch": 0.23, "learning_rate": 1.798704365484892e-05, "loss": 0.8521, "step": 1551 }, { "epoch": 0.23, "learning_rate": 1.7984160636994808e-05, "loss": 0.9175, "step": 1552 }, { "epoch": 0.23, "learning_rate": 1.7981275787443243e-05, "loss": 0.8223, "step": 1553 }, { "epoch": 0.23, "learning_rate": 1.7978389106856056e-05, "loss": 0.8442, "step": 1554 }, { "epoch": 0.23, "learning_rate": 1.7975500595895503e-05, "loss": 0.8215, "step": 1555 }, { "epoch": 0.23, "learning_rate": 1.7972610255224246e-05, "loss": 0.8975, "step": 1556 }, { "epoch": 0.23, "learning_rate": 1.796971808550539e-05, "loss": 0.8481, "step": 1557 }, { "epoch": 0.23, "learning_rate": 1.7966824087402438e-05, "loss": 0.9209, "step": 1558 }, { "epoch": 0.23, "learning_rate": 1.796392826157932e-05, "loss": 0.8057, "step": 1559 }, { "epoch": 0.23, "learning_rate": 1.7961030608700395e-05, "loss": 0.3083, "step": 1560 }, { "epoch": 0.23, "learning_rate": 1.7958131129430417e-05, "loss": 0.8662, "step": 1561 }, { "epoch": 0.23, "learning_rate": 1.795522982443459e-05, "loss": 0.3489, "step": 1562 }, { "epoch": 0.23, "learning_rate": 1.795232669437851e-05, "loss": 0.791, "step": 1563 }, { "epoch": 0.23, "learning_rate": 1.7949421739928205e-05, "loss": 0.8301, "step": 1564 }, { "epoch": 0.23, "learning_rate": 1.7946514961750123e-05, "loss": 0.8652, "step": 1565 }, { "epoch": 0.23, "learning_rate": 1.7943606360511122e-05, "loss": 0.915, "step": 1566 }, { "epoch": 0.23, "learning_rate": 1.7940695936878486e-05, "loss": 0.7979, "step": 1567 }, { "epoch": 0.23, "learning_rate": 1.793778369151991e-05, "loss": 0.8042, "step": 1568 }, { "epoch": 0.23, "learning_rate": 1.7934869625103517e-05, "loss": 0.9282, "step": 1569 }, { "epoch": 0.23, "learning_rate": 1.793195373829784e-05, "loss": 0.8145, "step": 1570 }, { "epoch": 0.23, "learning_rate": 1.7929036031771825e-05, "loss": 0.3401, "step": 1571 }, { "epoch": 0.23, "learning_rate": 1.792611650619485e-05, "loss": 0.813, "step": 1572 }, { "epoch": 0.23, "learning_rate": 1.7923195162236694e-05, "loss": 0.8848, "step": 1573 }, { "epoch": 0.23, "learning_rate": 1.792027200056757e-05, "loss": 0.8599, "step": 1574 }, { "epoch": 0.23, "learning_rate": 1.7917347021858092e-05, "loss": 0.9072, "step": 1575 }, { "epoch": 0.23, "learning_rate": 1.7914420226779303e-05, "loss": 0.8604, "step": 1576 }, { "epoch": 0.23, "learning_rate": 1.7911491616002656e-05, "loss": 0.8398, "step": 1577 }, { "epoch": 0.23, "learning_rate": 1.7908561190200022e-05, "loss": 0.9121, "step": 1578 }, { "epoch": 0.23, "learning_rate": 1.790562895004369e-05, "loss": 0.8711, "step": 1579 }, { "epoch": 0.23, "learning_rate": 1.790269489620636e-05, "loss": 0.8364, "step": 1580 }, { "epoch": 0.23, "learning_rate": 1.7899759029361156e-05, "loss": 0.897, "step": 1581 }, { "epoch": 0.23, "learning_rate": 1.7896821350181613e-05, "loss": 0.8872, "step": 1582 }, { "epoch": 0.23, "learning_rate": 1.7893881859341684e-05, "loss": 0.8633, "step": 1583 }, { "epoch": 0.23, "learning_rate": 1.7890940557515735e-05, "loss": 0.8687, "step": 1584 }, { "epoch": 0.23, "learning_rate": 1.7887997445378547e-05, "loss": 0.8623, "step": 1585 }, { "epoch": 0.23, "learning_rate": 1.7885052523605324e-05, "loss": 0.8633, "step": 1586 }, { "epoch": 0.23, "learning_rate": 1.7882105792871675e-05, "loss": 0.9087, "step": 1587 }, { "epoch": 0.23, "learning_rate": 1.787915725385363e-05, "loss": 0.855, "step": 1588 }, { "epoch": 0.23, "learning_rate": 1.7876206907227628e-05, "loss": 0.8384, "step": 1589 }, { "epoch": 0.24, "learning_rate": 1.787325475367053e-05, "loss": 0.8433, "step": 1590 }, { "epoch": 0.24, "learning_rate": 1.787030079385961e-05, "loss": 0.8672, "step": 1591 }, { "epoch": 0.24, "learning_rate": 1.7867345028472556e-05, "loss": 0.8003, "step": 1592 }, { "epoch": 0.24, "learning_rate": 1.7864387458187466e-05, "loss": 0.9243, "step": 1593 }, { "epoch": 0.24, "learning_rate": 1.7861428083682855e-05, "loss": 0.873, "step": 1594 }, { "epoch": 0.24, "learning_rate": 1.785846690563765e-05, "loss": 0.8418, "step": 1595 }, { "epoch": 0.24, "learning_rate": 1.7855503924731205e-05, "loss": 0.8789, "step": 1596 }, { "epoch": 0.24, "learning_rate": 1.785253914164326e-05, "loss": 0.3291, "step": 1597 }, { "epoch": 0.24, "learning_rate": 1.7849572557054e-05, "loss": 0.8296, "step": 1598 }, { "epoch": 0.24, "learning_rate": 1.7846604171643997e-05, "loss": 0.876, "step": 1599 }, { "epoch": 0.24, "learning_rate": 1.784363398609425e-05, "loss": 0.8623, "step": 1600 }, { "epoch": 0.24, "learning_rate": 1.7840662001086174e-05, "loss": 0.8418, "step": 1601 }, { "epoch": 0.24, "learning_rate": 1.7837688217301584e-05, "loss": 0.8403, "step": 1602 }, { "epoch": 0.24, "learning_rate": 1.7834712635422718e-05, "loss": 0.8623, "step": 1603 }, { "epoch": 0.24, "learning_rate": 1.7831735256132226e-05, "loss": 0.875, "step": 1604 }, { "epoch": 0.24, "learning_rate": 1.782875608011316e-05, "loss": 0.877, "step": 1605 }, { "epoch": 0.24, "learning_rate": 1.7825775108049003e-05, "loss": 0.8667, "step": 1606 }, { "epoch": 0.24, "learning_rate": 1.7822792340623623e-05, "loss": 0.8472, "step": 1607 }, { "epoch": 0.24, "learning_rate": 1.7819807778521335e-05, "loss": 0.8647, "step": 1608 }, { "epoch": 0.24, "learning_rate": 1.781682142242683e-05, "loss": 0.9053, "step": 1609 }, { "epoch": 0.24, "learning_rate": 1.7813833273025237e-05, "loss": 0.8613, "step": 1610 }, { "epoch": 0.24, "learning_rate": 1.7810843331002085e-05, "loss": 0.8516, "step": 1611 }, { "epoch": 0.24, "learning_rate": 1.7807851597043315e-05, "loss": 0.8584, "step": 1612 }, { "epoch": 0.24, "learning_rate": 1.7804858071835273e-05, "loss": 0.8667, "step": 1613 }, { "epoch": 0.24, "learning_rate": 1.7801862756064735e-05, "loss": 0.8179, "step": 1614 }, { "epoch": 0.24, "learning_rate": 1.7798865650418868e-05, "loss": 0.8896, "step": 1615 }, { "epoch": 0.24, "learning_rate": 1.7795866755585258e-05, "loss": 0.8633, "step": 1616 }, { "epoch": 0.24, "learning_rate": 1.77928660722519e-05, "loss": 0.896, "step": 1617 }, { "epoch": 0.24, "learning_rate": 1.7789863601107203e-05, "loss": 0.8818, "step": 1618 }, { "epoch": 0.24, "learning_rate": 1.778685934283998e-05, "loss": 0.9409, "step": 1619 }, { "epoch": 0.24, "learning_rate": 1.778385329813946e-05, "loss": 0.8696, "step": 1620 }, { "epoch": 0.24, "learning_rate": 1.7780845467695277e-05, "loss": 0.875, "step": 1621 }, { "epoch": 0.24, "learning_rate": 1.7777835852197477e-05, "loss": 0.8296, "step": 1622 }, { "epoch": 0.24, "learning_rate": 1.7774824452336516e-05, "loss": 0.8848, "step": 1623 }, { "epoch": 0.24, "learning_rate": 1.7771811268803258e-05, "loss": 0.8745, "step": 1624 }, { "epoch": 0.24, "learning_rate": 1.7768796302288973e-05, "loss": 0.9316, "step": 1625 }, { "epoch": 0.24, "learning_rate": 1.7765779553485352e-05, "loss": 0.9092, "step": 1626 }, { "epoch": 0.24, "learning_rate": 1.776276102308448e-05, "loss": 0.8511, "step": 1627 }, { "epoch": 0.24, "learning_rate": 1.775974071177886e-05, "loss": 0.875, "step": 1628 }, { "epoch": 0.24, "learning_rate": 1.7756718620261402e-05, "loss": 0.8184, "step": 1629 }, { "epoch": 0.24, "learning_rate": 1.7753694749225422e-05, "loss": 0.8535, "step": 1630 }, { "epoch": 0.24, "learning_rate": 1.7750669099364643e-05, "loss": 0.8569, "step": 1631 }, { "epoch": 0.24, "learning_rate": 1.7747641671373203e-05, "loss": 0.8506, "step": 1632 }, { "epoch": 0.24, "learning_rate": 1.774461246594564e-05, "loss": 0.8613, "step": 1633 }, { "epoch": 0.24, "learning_rate": 1.7741581483776906e-05, "loss": 0.8315, "step": 1634 }, { "epoch": 0.24, "learning_rate": 1.773854872556236e-05, "loss": 0.8965, "step": 1635 }, { "epoch": 0.24, "learning_rate": 1.7735514191997763e-05, "loss": 0.894, "step": 1636 }, { "epoch": 0.24, "learning_rate": 1.7732477883779287e-05, "loss": 0.8301, "step": 1637 }, { "epoch": 0.24, "learning_rate": 1.772943980160351e-05, "loss": 0.3223, "step": 1638 }, { "epoch": 0.24, "learning_rate": 1.7726399946167424e-05, "loss": 0.8354, "step": 1639 }, { "epoch": 0.24, "learning_rate": 1.772335831816841e-05, "loss": 0.877, "step": 1640 }, { "epoch": 0.24, "learning_rate": 1.772031491830428e-05, "loss": 0.8379, "step": 1641 }, { "epoch": 0.24, "learning_rate": 1.7717269747273234e-05, "loss": 0.3325, "step": 1642 }, { "epoch": 0.24, "learning_rate": 1.7714222805773885e-05, "loss": 0.8208, "step": 1643 }, { "epoch": 0.24, "learning_rate": 1.7711174094505248e-05, "loss": 0.8955, "step": 1644 }, { "epoch": 0.24, "learning_rate": 1.770812361416675e-05, "loss": 0.8745, "step": 1645 }, { "epoch": 0.24, "learning_rate": 1.7705071365458225e-05, "loss": 0.8462, "step": 1646 }, { "epoch": 0.24, "learning_rate": 1.7702017349079907e-05, "loss": 0.8721, "step": 1647 }, { "epoch": 0.24, "learning_rate": 1.7698961565732432e-05, "loss": 0.8691, "step": 1648 }, { "epoch": 0.24, "learning_rate": 1.769590401611685e-05, "loss": 0.8735, "step": 1649 }, { "epoch": 0.24, "learning_rate": 1.7692844700934615e-05, "loss": 0.9346, "step": 1650 }, { "epoch": 0.24, "learning_rate": 1.768978362088759e-05, "loss": 0.8555, "step": 1651 }, { "epoch": 0.24, "learning_rate": 1.768672077667802e-05, "loss": 0.8887, "step": 1652 }, { "epoch": 0.24, "learning_rate": 1.7683656169008587e-05, "loss": 0.8687, "step": 1653 }, { "epoch": 0.24, "learning_rate": 1.7680589798582356e-05, "loss": 0.8535, "step": 1654 }, { "epoch": 0.24, "learning_rate": 1.7677521666102805e-05, "loss": 0.8384, "step": 1655 }, { "epoch": 0.24, "learning_rate": 1.767445177227381e-05, "loss": 0.7847, "step": 1656 }, { "epoch": 0.25, "learning_rate": 1.767138011779966e-05, "loss": 0.3116, "step": 1657 }, { "epoch": 0.25, "learning_rate": 1.7668306703385038e-05, "loss": 0.8838, "step": 1658 }, { "epoch": 0.25, "learning_rate": 1.7665231529735042e-05, "loss": 0.814, "step": 1659 }, { "epoch": 0.25, "learning_rate": 1.766215459755516e-05, "loss": 0.7642, "step": 1660 }, { "epoch": 0.25, "learning_rate": 1.7659075907551296e-05, "loss": 0.3298, "step": 1661 }, { "epoch": 0.25, "learning_rate": 1.7655995460429747e-05, "loss": 0.7808, "step": 1662 }, { "epoch": 0.25, "learning_rate": 1.765291325689723e-05, "loss": 0.8608, "step": 1663 }, { "epoch": 0.25, "learning_rate": 1.7649829297660836e-05, "loss": 0.7949, "step": 1664 }, { "epoch": 0.25, "learning_rate": 1.7646743583428087e-05, "loss": 0.8857, "step": 1665 }, { "epoch": 0.25, "learning_rate": 1.7643656114906895e-05, "loss": 0.8438, "step": 1666 }, { "epoch": 0.25, "learning_rate": 1.7640566892805573e-05, "loss": 0.8999, "step": 1667 }, { "epoch": 0.25, "learning_rate": 1.7637475917832843e-05, "loss": 0.854, "step": 1668 }, { "epoch": 0.25, "learning_rate": 1.763438319069782e-05, "loss": 0.8989, "step": 1669 }, { "epoch": 0.25, "learning_rate": 1.763128871211003e-05, "loss": 0.877, "step": 1670 }, { "epoch": 0.25, "learning_rate": 1.76281924827794e-05, "loss": 0.8516, "step": 1671 }, { "epoch": 0.25, "learning_rate": 1.762509450341625e-05, "loss": 0.8569, "step": 1672 }, { "epoch": 0.25, "learning_rate": 1.762199477473131e-05, "loss": 0.7871, "step": 1673 }, { "epoch": 0.25, "learning_rate": 1.7618893297435713e-05, "loss": 0.9155, "step": 1674 }, { "epoch": 0.25, "learning_rate": 1.761579007224098e-05, "loss": 0.8442, "step": 1675 }, { "epoch": 0.25, "learning_rate": 1.761268509985905e-05, "loss": 0.8701, "step": 1676 }, { "epoch": 0.25, "learning_rate": 1.7609578381002248e-05, "loss": 0.8716, "step": 1677 }, { "epoch": 0.25, "learning_rate": 1.760646991638331e-05, "loss": 0.8755, "step": 1678 }, { "epoch": 0.25, "learning_rate": 1.7603359706715366e-05, "loss": 0.3669, "step": 1679 }, { "epoch": 0.25, "learning_rate": 1.7600247752711952e-05, "loss": 0.8281, "step": 1680 }, { "epoch": 0.25, "learning_rate": 1.7597134055087e-05, "loss": 0.811, "step": 1681 }, { "epoch": 0.25, "learning_rate": 1.7594018614554845e-05, "loss": 0.9541, "step": 1682 }, { "epoch": 0.25, "learning_rate": 1.7590901431830218e-05, "loss": 0.856, "step": 1683 }, { "epoch": 0.25, "learning_rate": 1.758778250762825e-05, "loss": 0.8638, "step": 1684 }, { "epoch": 0.25, "learning_rate": 1.7584661842664478e-05, "loss": 0.8179, "step": 1685 }, { "epoch": 0.25, "learning_rate": 1.7581539437654833e-05, "loss": 0.8906, "step": 1686 }, { "epoch": 0.25, "learning_rate": 1.7578415293315646e-05, "loss": 0.8638, "step": 1687 }, { "epoch": 0.25, "learning_rate": 1.7575289410363642e-05, "loss": 0.8564, "step": 1688 }, { "epoch": 0.25, "learning_rate": 1.7572161789515955e-05, "loss": 0.9253, "step": 1689 }, { "epoch": 0.25, "learning_rate": 1.7569032431490108e-05, "loss": 0.8804, "step": 1690 }, { "epoch": 0.25, "learning_rate": 1.7565901337004035e-05, "loss": 0.8638, "step": 1691 }, { "epoch": 0.25, "learning_rate": 1.7562768506776055e-05, "loss": 0.811, "step": 1692 }, { "epoch": 0.25, "learning_rate": 1.755963394152489e-05, "loss": 0.8506, "step": 1693 }, { "epoch": 0.25, "learning_rate": 1.7556497641969658e-05, "loss": 0.8154, "step": 1694 }, { "epoch": 0.25, "learning_rate": 1.7553359608829886e-05, "loss": 0.8584, "step": 1695 }, { "epoch": 0.25, "learning_rate": 1.7550219842825486e-05, "loss": 0.8398, "step": 1696 }, { "epoch": 0.25, "learning_rate": 1.754707834467677e-05, "loss": 0.8735, "step": 1697 }, { "epoch": 0.25, "learning_rate": 1.754393511510445e-05, "loss": 0.7793, "step": 1698 }, { "epoch": 0.25, "learning_rate": 1.754079015482964e-05, "loss": 0.8413, "step": 1699 }, { "epoch": 0.25, "learning_rate": 1.7537643464573838e-05, "loss": 0.853, "step": 1700 }, { "epoch": 0.25, "learning_rate": 1.7534495045058947e-05, "loss": 0.895, "step": 1701 }, { "epoch": 0.25, "learning_rate": 1.753134489700727e-05, "loss": 0.8521, "step": 1702 }, { "epoch": 0.25, "learning_rate": 1.7528193021141502e-05, "loss": 0.8989, "step": 1703 }, { "epoch": 0.25, "learning_rate": 1.7525039418184732e-05, "loss": 0.8374, "step": 1704 }, { "epoch": 0.25, "learning_rate": 1.7521884088860453e-05, "loss": 0.8799, "step": 1705 }, { "epoch": 0.25, "learning_rate": 1.7518727033892542e-05, "loss": 0.7993, "step": 1706 }, { "epoch": 0.25, "learning_rate": 1.7515568254005287e-05, "loss": 0.9331, "step": 1707 }, { "epoch": 0.25, "learning_rate": 1.751240774992336e-05, "loss": 0.79, "step": 1708 }, { "epoch": 0.25, "learning_rate": 1.7509245522371834e-05, "loss": 0.8882, "step": 1709 }, { "epoch": 0.25, "learning_rate": 1.7506081572076176e-05, "loss": 0.8262, "step": 1710 }, { "epoch": 0.25, "learning_rate": 1.750291589976224e-05, "loss": 0.915, "step": 1711 }, { "epoch": 0.25, "learning_rate": 1.74997485061563e-05, "loss": 0.8452, "step": 1712 }, { "epoch": 0.25, "learning_rate": 1.749657939198499e-05, "loss": 0.8354, "step": 1713 }, { "epoch": 0.25, "learning_rate": 1.749340855797537e-05, "loss": 0.9102, "step": 1714 }, { "epoch": 0.25, "learning_rate": 1.749023600485488e-05, "loss": 0.3506, "step": 1715 }, { "epoch": 0.25, "learning_rate": 1.7487061733351348e-05, "loss": 0.8457, "step": 1716 }, { "epoch": 0.25, "learning_rate": 1.7483885744193006e-05, "loss": 0.8457, "step": 1717 }, { "epoch": 0.25, "learning_rate": 1.7480708038108485e-05, "loss": 0.813, "step": 1718 }, { "epoch": 0.25, "learning_rate": 1.7477528615826793e-05, "loss": 0.8467, "step": 1719 }, { "epoch": 0.25, "learning_rate": 1.747434747807735e-05, "loss": 0.936, "step": 1720 }, { "epoch": 0.25, "learning_rate": 1.7471164625589957e-05, "loss": 0.8843, "step": 1721 }, { "epoch": 0.25, "learning_rate": 1.7467980059094817e-05, "loss": 0.833, "step": 1722 }, { "epoch": 0.25, "learning_rate": 1.7464793779322512e-05, "loss": 0.8213, "step": 1723 }, { "epoch": 0.25, "learning_rate": 1.746160578700404e-05, "loss": 0.834, "step": 1724 }, { "epoch": 0.26, "learning_rate": 1.745841608287077e-05, "loss": 0.7974, "step": 1725 }, { "epoch": 0.26, "learning_rate": 1.745522466765447e-05, "loss": 0.8657, "step": 1726 }, { "epoch": 0.26, "learning_rate": 1.7452031542087313e-05, "loss": 0.8711, "step": 1727 }, { "epoch": 0.26, "learning_rate": 1.7448836706901846e-05, "loss": 0.856, "step": 1728 }, { "epoch": 0.26, "learning_rate": 1.744564016283102e-05, "loss": 0.9116, "step": 1729 }, { "epoch": 0.26, "learning_rate": 1.7442441910608174e-05, "loss": 0.8457, "step": 1730 }, { "epoch": 0.26, "learning_rate": 1.743924195096704e-05, "loss": 0.8535, "step": 1731 }, { "epoch": 0.26, "learning_rate": 1.7436040284641742e-05, "loss": 0.3364, "step": 1732 }, { "epoch": 0.26, "learning_rate": 1.743283691236679e-05, "loss": 0.8345, "step": 1733 }, { "epoch": 0.26, "learning_rate": 1.7429631834877098e-05, "loss": 0.8789, "step": 1734 }, { "epoch": 0.26, "learning_rate": 1.7426425052907956e-05, "loss": 0.3196, "step": 1735 }, { "epoch": 0.26, "learning_rate": 1.742321656719506e-05, "loss": 0.8638, "step": 1736 }, { "epoch": 0.26, "learning_rate": 1.7420006378474483e-05, "loss": 0.8774, "step": 1737 }, { "epoch": 0.26, "learning_rate": 1.7416794487482693e-05, "loss": 0.9121, "step": 1738 }, { "epoch": 0.26, "learning_rate": 1.7413580894956558e-05, "loss": 0.8149, "step": 1739 }, { "epoch": 0.26, "learning_rate": 1.7410365601633326e-05, "loss": 0.8472, "step": 1740 }, { "epoch": 0.26, "learning_rate": 1.7407148608250635e-05, "loss": 0.8594, "step": 1741 }, { "epoch": 0.26, "learning_rate": 1.740392991554652e-05, "loss": 0.8691, "step": 1742 }, { "epoch": 0.26, "learning_rate": 1.74007095242594e-05, "loss": 0.8276, "step": 1743 }, { "epoch": 0.26, "learning_rate": 1.7397487435128084e-05, "loss": 0.7842, "step": 1744 }, { "epoch": 0.26, "learning_rate": 1.7394263648891777e-05, "loss": 0.8945, "step": 1745 }, { "epoch": 0.26, "learning_rate": 1.7391038166290065e-05, "loss": 0.854, "step": 1746 }, { "epoch": 0.26, "learning_rate": 1.7387810988062924e-05, "loss": 0.8418, "step": 1747 }, { "epoch": 0.26, "learning_rate": 1.7384582114950726e-05, "loss": 0.7676, "step": 1748 }, { "epoch": 0.26, "learning_rate": 1.7381351547694226e-05, "loss": 0.8838, "step": 1749 }, { "epoch": 0.26, "learning_rate": 1.737811928703457e-05, "loss": 0.8506, "step": 1750 }, { "epoch": 0.26, "learning_rate": 1.7374885333713293e-05, "loss": 0.8398, "step": 1751 }, { "epoch": 0.26, "learning_rate": 1.7371649688472315e-05, "loss": 0.8823, "step": 1752 }, { "epoch": 0.26, "learning_rate": 1.736841235205394e-05, "loss": 0.8486, "step": 1753 }, { "epoch": 0.26, "learning_rate": 1.7365173325200875e-05, "loss": 0.8726, "step": 1754 }, { "epoch": 0.26, "learning_rate": 1.7361932608656207e-05, "loss": 0.9214, "step": 1755 }, { "epoch": 0.26, "learning_rate": 1.7358690203163403e-05, "loss": 0.8062, "step": 1756 }, { "epoch": 0.26, "learning_rate": 1.7355446109466326e-05, "loss": 0.8389, "step": 1757 }, { "epoch": 0.26, "learning_rate": 1.7352200328309225e-05, "loss": 0.8667, "step": 1758 }, { "epoch": 0.26, "learning_rate": 1.7348952860436737e-05, "loss": 0.8188, "step": 1759 }, { "epoch": 0.26, "learning_rate": 1.734570370659388e-05, "loss": 0.855, "step": 1760 }, { "epoch": 0.26, "learning_rate": 1.7342452867526067e-05, "loss": 0.873, "step": 1761 }, { "epoch": 0.26, "learning_rate": 1.733920034397909e-05, "loss": 0.9302, "step": 1762 }, { "epoch": 0.26, "learning_rate": 1.7335946136699138e-05, "loss": 0.8125, "step": 1763 }, { "epoch": 0.26, "learning_rate": 1.7332690246432774e-05, "loss": 0.8184, "step": 1764 }, { "epoch": 0.26, "learning_rate": 1.7329432673926953e-05, "loss": 0.9175, "step": 1765 }, { "epoch": 0.26, "learning_rate": 1.732617341992902e-05, "loss": 0.8325, "step": 1766 }, { "epoch": 0.26, "learning_rate": 1.7322912485186695e-05, "loss": 0.8838, "step": 1767 }, { "epoch": 0.26, "learning_rate": 1.7319649870448096e-05, "loss": 0.8511, "step": 1768 }, { "epoch": 0.26, "learning_rate": 1.7316385576461714e-05, "loss": 0.333, "step": 1769 }, { "epoch": 0.26, "learning_rate": 1.731311960397644e-05, "loss": 0.9067, "step": 1770 }, { "epoch": 0.26, "learning_rate": 1.7309851953741532e-05, "loss": 0.8535, "step": 1771 }, { "epoch": 0.26, "learning_rate": 1.730658262650665e-05, "loss": 0.855, "step": 1772 }, { "epoch": 0.26, "learning_rate": 1.7303311623021824e-05, "loss": 0.8623, "step": 1773 }, { "epoch": 0.26, "learning_rate": 1.7300038944037486e-05, "loss": 0.8901, "step": 1774 }, { "epoch": 0.26, "learning_rate": 1.7296764590304435e-05, "loss": 0.9175, "step": 1775 }, { "epoch": 0.26, "learning_rate": 1.7293488562573863e-05, "loss": 0.8306, "step": 1776 }, { "epoch": 0.26, "learning_rate": 1.7290210861597347e-05, "loss": 0.8379, "step": 1777 }, { "epoch": 0.26, "learning_rate": 1.728693148812684e-05, "loss": 0.7632, "step": 1778 }, { "epoch": 0.26, "learning_rate": 1.728365044291469e-05, "loss": 0.856, "step": 1779 }, { "epoch": 0.26, "learning_rate": 1.7280367726713616e-05, "loss": 0.8091, "step": 1780 }, { "epoch": 0.26, "learning_rate": 1.727708334027673e-05, "loss": 0.874, "step": 1781 }, { "epoch": 0.26, "learning_rate": 1.727379728435753e-05, "loss": 0.7964, "step": 1782 }, { "epoch": 0.26, "learning_rate": 1.7270509559709886e-05, "loss": 0.8667, "step": 1783 }, { "epoch": 0.26, "learning_rate": 1.726722016708805e-05, "loss": 0.8208, "step": 1784 }, { "epoch": 0.26, "learning_rate": 1.7263929107246672e-05, "loss": 0.8467, "step": 1785 }, { "epoch": 0.26, "learning_rate": 1.7260636380940773e-05, "loss": 0.8604, "step": 1786 }, { "epoch": 0.26, "learning_rate": 1.7257341988925756e-05, "loss": 0.8833, "step": 1787 }, { "epoch": 0.26, "learning_rate": 1.7254045931957413e-05, "loss": 0.9395, "step": 1788 }, { "epoch": 0.26, "learning_rate": 1.7250748210791906e-05, "loss": 0.8086, "step": 1789 }, { "epoch": 0.26, "learning_rate": 1.724744882618579e-05, "loss": 0.7603, "step": 1790 }, { "epoch": 0.26, "learning_rate": 1.7244147778896003e-05, "loss": 0.8599, "step": 1791 }, { "epoch": 0.27, "learning_rate": 1.724084506967985e-05, "loss": 0.8579, "step": 1792 }, { "epoch": 0.27, "learning_rate": 1.7237540699295038e-05, "loss": 0.8774, "step": 1793 }, { "epoch": 0.27, "learning_rate": 1.7234234668499633e-05, "loss": 0.9395, "step": 1794 }, { "epoch": 0.27, "learning_rate": 1.7230926978052097e-05, "loss": 0.8374, "step": 1795 }, { "epoch": 0.27, "learning_rate": 1.722761762871127e-05, "loss": 0.8491, "step": 1796 }, { "epoch": 0.27, "learning_rate": 1.722430662123637e-05, "loss": 0.8594, "step": 1797 }, { "epoch": 0.27, "learning_rate": 1.7220993956387e-05, "loss": 0.8652, "step": 1798 }, { "epoch": 0.27, "learning_rate": 1.721767963492313e-05, "loss": 0.876, "step": 1799 }, { "epoch": 0.27, "learning_rate": 1.7214363657605126e-05, "loss": 0.8462, "step": 1800 }, { "epoch": 0.27, "learning_rate": 1.721104602519373e-05, "loss": 0.8569, "step": 1801 }, { "epoch": 0.27, "learning_rate": 1.7207726738450055e-05, "loss": 0.8892, "step": 1802 }, { "epoch": 0.27, "learning_rate": 1.7204405798135607e-05, "loss": 0.8286, "step": 1803 }, { "epoch": 0.27, "learning_rate": 1.720108320501226e-05, "loss": 0.8604, "step": 1804 }, { "epoch": 0.27, "learning_rate": 1.7197758959842267e-05, "loss": 0.8979, "step": 1805 }, { "epoch": 0.27, "learning_rate": 1.7194433063388273e-05, "loss": 0.8486, "step": 1806 }, { "epoch": 0.27, "learning_rate": 1.719110551641329e-05, "loss": 0.8643, "step": 1807 }, { "epoch": 0.27, "learning_rate": 1.718777631968071e-05, "loss": 0.8516, "step": 1808 }, { "epoch": 0.27, "learning_rate": 1.718444547395431e-05, "loss": 0.814, "step": 1809 }, { "epoch": 0.27, "learning_rate": 1.7181112979998235e-05, "loss": 0.8335, "step": 1810 }, { "epoch": 0.27, "learning_rate": 1.7177778838577017e-05, "loss": 0.875, "step": 1811 }, { "epoch": 0.27, "learning_rate": 1.717444305045556e-05, "loss": 0.873, "step": 1812 }, { "epoch": 0.27, "learning_rate": 1.7171105616399153e-05, "loss": 0.3816, "step": 1813 }, { "epoch": 0.27, "learning_rate": 1.7167766537173457e-05, "loss": 0.9131, "step": 1814 }, { "epoch": 0.27, "learning_rate": 1.7164425813544507e-05, "loss": 0.9097, "step": 1815 }, { "epoch": 0.27, "learning_rate": 1.716108344627872e-05, "loss": 0.8716, "step": 1816 }, { "epoch": 0.27, "learning_rate": 1.71577394361429e-05, "loss": 0.8364, "step": 1817 }, { "epoch": 0.27, "learning_rate": 1.7154393783904206e-05, "loss": 0.9106, "step": 1818 }, { "epoch": 0.27, "learning_rate": 1.7151046490330193e-05, "loss": 0.9146, "step": 1819 }, { "epoch": 0.27, "learning_rate": 1.714769755618878e-05, "loss": 0.8467, "step": 1820 }, { "epoch": 0.27, "learning_rate": 1.714434698224827e-05, "loss": 0.8667, "step": 1821 }, { "epoch": 0.27, "learning_rate": 1.714099476927734e-05, "loss": 0.8813, "step": 1822 }, { "epoch": 0.27, "learning_rate": 1.713764091804504e-05, "loss": 0.8867, "step": 1823 }, { "epoch": 0.27, "learning_rate": 1.7134285429320803e-05, "loss": 0.8804, "step": 1824 }, { "epoch": 0.27, "learning_rate": 1.7130928303874427e-05, "loss": 0.8281, "step": 1825 }, { "epoch": 0.27, "learning_rate": 1.71275695424761e-05, "loss": 0.7705, "step": 1826 }, { "epoch": 0.27, "learning_rate": 1.712420914589637e-05, "loss": 0.8779, "step": 1827 }, { "epoch": 0.27, "learning_rate": 1.7120847114906168e-05, "loss": 0.855, "step": 1828 }, { "epoch": 0.27, "learning_rate": 1.7117483450276803e-05, "loss": 0.9287, "step": 1829 }, { "epoch": 0.27, "learning_rate": 1.711411815277995e-05, "loss": 0.8794, "step": 1830 }, { "epoch": 0.27, "learning_rate": 1.711075122318767e-05, "loss": 0.8452, "step": 1831 }, { "epoch": 0.27, "learning_rate": 1.7107382662272384e-05, "loss": 0.3099, "step": 1832 }, { "epoch": 0.27, "learning_rate": 1.71040124708069e-05, "loss": 0.8677, "step": 1833 }, { "epoch": 0.27, "learning_rate": 1.7100640649564396e-05, "loss": 0.3528, "step": 1834 }, { "epoch": 0.27, "learning_rate": 1.7097267199318416e-05, "loss": 0.939, "step": 1835 }, { "epoch": 0.27, "learning_rate": 1.7093892120842894e-05, "loss": 0.873, "step": 1836 }, { "epoch": 0.27, "learning_rate": 1.7090515414912126e-05, "loss": 0.9043, "step": 1837 }, { "epoch": 0.27, "learning_rate": 1.7087137082300776e-05, "loss": 0.8525, "step": 1838 }, { "epoch": 0.27, "learning_rate": 1.70837571237839e-05, "loss": 0.8506, "step": 1839 }, { "epoch": 0.27, "learning_rate": 1.708037554013691e-05, "loss": 0.9004, "step": 1840 }, { "epoch": 0.27, "learning_rate": 1.7076992332135595e-05, "loss": 0.8394, "step": 1841 }, { "epoch": 0.27, "learning_rate": 1.7073607500556127e-05, "loss": 0.8789, "step": 1842 }, { "epoch": 0.27, "learning_rate": 1.707022104617503e-05, "loss": 0.8687, "step": 1843 }, { "epoch": 0.27, "learning_rate": 1.7066832969769222e-05, "loss": 0.8271, "step": 1844 }, { "epoch": 0.27, "learning_rate": 1.706344327211598e-05, "loss": 0.8403, "step": 1845 }, { "epoch": 0.27, "learning_rate": 1.7060051953992956e-05, "loss": 0.8848, "step": 1846 }, { "epoch": 0.27, "learning_rate": 1.7056659016178173e-05, "loss": 0.873, "step": 1847 }, { "epoch": 0.27, "learning_rate": 1.7053264459450023e-05, "loss": 0.8804, "step": 1848 }, { "epoch": 0.27, "learning_rate": 1.704986828458728e-05, "loss": 0.8501, "step": 1849 }, { "epoch": 0.27, "learning_rate": 1.7046470492369086e-05, "loss": 0.8857, "step": 1850 }, { "epoch": 0.27, "learning_rate": 1.704307108357494e-05, "loss": 0.8613, "step": 1851 }, { "epoch": 0.27, "learning_rate": 1.7039670058984725e-05, "loss": 0.8745, "step": 1852 }, { "epoch": 0.27, "learning_rate": 1.7036267419378695e-05, "loss": 0.8496, "step": 1853 }, { "epoch": 0.27, "learning_rate": 1.7032863165537465e-05, "loss": 0.8511, "step": 1854 }, { "epoch": 0.27, "learning_rate": 1.7029457298242035e-05, "loss": 0.8315, "step": 1855 }, { "epoch": 0.27, "learning_rate": 1.702604981827376e-05, "loss": 0.8325, "step": 1856 }, { "epoch": 0.27, "learning_rate": 1.702264072641438e-05, "loss": 0.3618, "step": 1857 }, { "epoch": 0.27, "learning_rate": 1.7019230023445987e-05, "loss": 0.8921, "step": 1858 }, { "epoch": 0.27, "learning_rate": 1.7015817710151058e-05, "loss": 0.8687, "step": 1859 }, { "epoch": 0.28, "learning_rate": 1.7012403787312433e-05, "loss": 0.8057, "step": 1860 }, { "epoch": 0.28, "learning_rate": 1.7008988255713317e-05, "loss": 0.8604, "step": 1861 }, { "epoch": 0.28, "learning_rate": 1.70055711161373e-05, "loss": 0.9106, "step": 1862 }, { "epoch": 0.28, "learning_rate": 1.7002152369368317e-05, "loss": 0.8286, "step": 1863 }, { "epoch": 0.28, "learning_rate": 1.6998732016190694e-05, "loss": 0.8354, "step": 1864 }, { "epoch": 0.28, "learning_rate": 1.699531005738911e-05, "loss": 0.9365, "step": 1865 }, { "epoch": 0.28, "learning_rate": 1.6991886493748625e-05, "loss": 0.9043, "step": 1866 }, { "epoch": 0.28, "learning_rate": 1.6988461326054652e-05, "loss": 0.8389, "step": 1867 }, { "epoch": 0.28, "learning_rate": 1.698503455509299e-05, "loss": 0.8765, "step": 1868 }, { "epoch": 0.28, "learning_rate": 1.698160618164979e-05, "loss": 0.8491, "step": 1869 }, { "epoch": 0.28, "learning_rate": 1.697817620651158e-05, "loss": 0.8115, "step": 1870 }, { "epoch": 0.28, "learning_rate": 1.697474463046525e-05, "loss": 0.3389, "step": 1871 }, { "epoch": 0.28, "learning_rate": 1.6971311454298062e-05, "loss": 0.8462, "step": 1872 }, { "epoch": 0.28, "learning_rate": 1.6967876678797647e-05, "loss": 0.8564, "step": 1873 }, { "epoch": 0.28, "learning_rate": 1.696444030475199e-05, "loss": 0.9316, "step": 1874 }, { "epoch": 0.28, "learning_rate": 1.6961002332949456e-05, "loss": 0.8184, "step": 1875 }, { "epoch": 0.28, "learning_rate": 1.6957562764178774e-05, "loss": 0.9019, "step": 1876 }, { "epoch": 0.28, "learning_rate": 1.6954121599229035e-05, "loss": 0.8564, "step": 1877 }, { "epoch": 0.28, "learning_rate": 1.69506788388897e-05, "loss": 0.8652, "step": 1878 }, { "epoch": 0.28, "learning_rate": 1.6947234483950593e-05, "loss": 0.8809, "step": 1879 }, { "epoch": 0.28, "learning_rate": 1.6943788535201907e-05, "loss": 0.9229, "step": 1880 }, { "epoch": 0.28, "learning_rate": 1.6940340993434197e-05, "loss": 0.7974, "step": 1881 }, { "epoch": 0.28, "learning_rate": 1.693689185943839e-05, "loss": 0.894, "step": 1882 }, { "epoch": 0.28, "learning_rate": 1.6933441134005774e-05, "loss": 0.7449, "step": 1883 }, { "epoch": 0.28, "learning_rate": 1.692998881792799e-05, "loss": 0.8389, "step": 1884 }, { "epoch": 0.28, "learning_rate": 1.6926534911997075e-05, "loss": 0.918, "step": 1885 }, { "epoch": 0.28, "learning_rate": 1.6923079417005396e-05, "loss": 0.856, "step": 1886 }, { "epoch": 0.28, "learning_rate": 1.691962233374571e-05, "loss": 0.8135, "step": 1887 }, { "epoch": 0.28, "learning_rate": 1.6916163663011124e-05, "loss": 0.9634, "step": 1888 }, { "epoch": 0.28, "learning_rate": 1.6912703405595116e-05, "loss": 0.8721, "step": 1889 }, { "epoch": 0.28, "learning_rate": 1.6909241562291522e-05, "loss": 0.9316, "step": 1890 }, { "epoch": 0.28, "learning_rate": 1.690577813389455e-05, "loss": 0.8853, "step": 1891 }, { "epoch": 0.28, "learning_rate": 1.6902313121198764e-05, "loss": 0.8765, "step": 1892 }, { "epoch": 0.28, "learning_rate": 1.68988465249991e-05, "loss": 0.8892, "step": 1893 }, { "epoch": 0.28, "learning_rate": 1.6895378346090843e-05, "loss": 0.877, "step": 1894 }, { "epoch": 0.28, "learning_rate": 1.6891908585269655e-05, "loss": 0.8672, "step": 1895 }, { "epoch": 0.28, "learning_rate": 1.688843724333156e-05, "loss": 0.874, "step": 1896 }, { "epoch": 0.28, "learning_rate": 1.6884964321072938e-05, "loss": 0.8228, "step": 1897 }, { "epoch": 0.28, "learning_rate": 1.6881489819290532e-05, "loss": 0.8765, "step": 1898 }, { "epoch": 0.28, "learning_rate": 1.6878013738781454e-05, "loss": 0.8662, "step": 1899 }, { "epoch": 0.28, "learning_rate": 1.687453608034317e-05, "loss": 0.8462, "step": 1900 }, { "epoch": 0.28, "learning_rate": 1.6871056844773512e-05, "loss": 0.8569, "step": 1901 }, { "epoch": 0.28, "learning_rate": 1.6867576032870677e-05, "loss": 0.8652, "step": 1902 }, { "epoch": 0.28, "learning_rate": 1.686409364543321e-05, "loss": 0.8857, "step": 1903 }, { "epoch": 0.28, "learning_rate": 1.686060968326005e-05, "loss": 0.8687, "step": 1904 }, { "epoch": 0.28, "learning_rate": 1.6857124147150454e-05, "loss": 0.8711, "step": 1905 }, { "epoch": 0.28, "learning_rate": 1.6853637037904066e-05, "loss": 0.8789, "step": 1906 }, { "epoch": 0.28, "learning_rate": 1.6850148356320894e-05, "loss": 0.8408, "step": 1907 }, { "epoch": 0.28, "learning_rate": 1.684665810320129e-05, "loss": 0.8691, "step": 1908 }, { "epoch": 0.28, "learning_rate": 1.6843166279345976e-05, "loss": 0.8579, "step": 1909 }, { "epoch": 0.28, "learning_rate": 1.683967288555604e-05, "loss": 0.9092, "step": 1910 }, { "epoch": 0.28, "learning_rate": 1.6836177922632918e-05, "loss": 0.8418, "step": 1911 }, { "epoch": 0.28, "learning_rate": 1.6832681391378414e-05, "loss": 0.832, "step": 1912 }, { "epoch": 0.28, "learning_rate": 1.6829183292594692e-05, "loss": 0.8291, "step": 1913 }, { "epoch": 0.28, "learning_rate": 1.6825683627084272e-05, "loss": 0.8486, "step": 1914 }, { "epoch": 0.28, "learning_rate": 1.682218239565003e-05, "loss": 0.8755, "step": 1915 }, { "epoch": 0.28, "learning_rate": 1.681867959909521e-05, "loss": 0.3298, "step": 1916 }, { "epoch": 0.28, "learning_rate": 1.681517523822341e-05, "loss": 0.8843, "step": 1917 }, { "epoch": 0.28, "learning_rate": 1.681166931383859e-05, "loss": 0.8286, "step": 1918 }, { "epoch": 0.28, "learning_rate": 1.6808161826745068e-05, "loss": 0.8301, "step": 1919 }, { "epoch": 0.28, "learning_rate": 1.6804652777747513e-05, "loss": 0.834, "step": 1920 }, { "epoch": 0.28, "learning_rate": 1.680114216765096e-05, "loss": 0.8926, "step": 1921 }, { "epoch": 0.28, "learning_rate": 1.6797629997260802e-05, "loss": 0.8359, "step": 1922 }, { "epoch": 0.28, "learning_rate": 1.679411626738279e-05, "loss": 0.8945, "step": 1923 }, { "epoch": 0.28, "learning_rate": 1.6790600978823032e-05, "loss": 0.8057, "step": 1924 }, { "epoch": 0.28, "learning_rate": 1.6787084132387987e-05, "loss": 0.8677, "step": 1925 }, { "epoch": 0.28, "learning_rate": 1.6783565728884483e-05, "loss": 0.8545, "step": 1926 }, { "epoch": 0.28, "learning_rate": 1.6780045769119694e-05, "loss": 0.8623, "step": 1927 }, { "epoch": 0.29, "learning_rate": 1.6776524253901162e-05, "loss": 0.8281, "step": 1928 }, { "epoch": 0.29, "learning_rate": 1.677300118403678e-05, "loss": 0.877, "step": 1929 }, { "epoch": 0.29, "learning_rate": 1.67694765603348e-05, "loss": 0.9126, "step": 1930 }, { "epoch": 0.29, "learning_rate": 1.6765950383603815e-05, "loss": 0.9175, "step": 1931 }, { "epoch": 0.29, "learning_rate": 1.6762422654652806e-05, "loss": 0.8467, "step": 1932 }, { "epoch": 0.29, "learning_rate": 1.675889337429108e-05, "loss": 0.8687, "step": 1933 }, { "epoch": 0.29, "learning_rate": 1.6755362543328317e-05, "loss": 0.8745, "step": 1934 }, { "epoch": 0.29, "learning_rate": 1.6751830162574544e-05, "loss": 0.8213, "step": 1935 }, { "epoch": 0.29, "learning_rate": 1.674829623284015e-05, "loss": 0.835, "step": 1936 }, { "epoch": 0.29, "learning_rate": 1.6744760754935878e-05, "loss": 0.9497, "step": 1937 }, { "epoch": 0.29, "learning_rate": 1.674122372967282e-05, "loss": 0.7827, "step": 1938 }, { "epoch": 0.29, "learning_rate": 1.6737685157862428e-05, "loss": 0.8311, "step": 1939 }, { "epoch": 0.29, "learning_rate": 1.6734145040316515e-05, "loss": 0.8149, "step": 1940 }, { "epoch": 0.29, "learning_rate": 1.6730603377847236e-05, "loss": 0.9189, "step": 1941 }, { "epoch": 0.29, "learning_rate": 1.6727060171267102e-05, "loss": 0.8867, "step": 1942 }, { "epoch": 0.29, "learning_rate": 1.6723515421388992e-05, "loss": 0.8765, "step": 1943 }, { "epoch": 0.29, "learning_rate": 1.6719969129026128e-05, "loss": 0.8691, "step": 1944 }, { "epoch": 0.29, "learning_rate": 1.6716421294992087e-05, "loss": 0.8032, "step": 1945 }, { "epoch": 0.29, "learning_rate": 1.6712871920100796e-05, "loss": 0.8433, "step": 1946 }, { "epoch": 0.29, "learning_rate": 1.6709321005166545e-05, "loss": 0.8301, "step": 1947 }, { "epoch": 0.29, "learning_rate": 1.670576855100397e-05, "loss": 0.8247, "step": 1948 }, { "epoch": 0.29, "learning_rate": 1.6702214558428062e-05, "loss": 0.8115, "step": 1949 }, { "epoch": 0.29, "learning_rate": 1.6698659028254164e-05, "loss": 0.8398, "step": 1950 }, { "epoch": 0.29, "learning_rate": 1.6695101961297978e-05, "loss": 0.8501, "step": 1951 }, { "epoch": 0.29, "learning_rate": 1.669154335837555e-05, "loss": 0.9126, "step": 1952 }, { "epoch": 0.29, "learning_rate": 1.668798322030328e-05, "loss": 0.8804, "step": 1953 }, { "epoch": 0.29, "learning_rate": 1.6684421547897925e-05, "loss": 0.877, "step": 1954 }, { "epoch": 0.29, "learning_rate": 1.6680858341976596e-05, "loss": 0.8164, "step": 1955 }, { "epoch": 0.29, "learning_rate": 1.6677293603356738e-05, "loss": 0.8447, "step": 1956 }, { "epoch": 0.29, "learning_rate": 1.6673727332856172e-05, "loss": 0.8828, "step": 1957 }, { "epoch": 0.29, "learning_rate": 1.667015953129305e-05, "loss": 0.8447, "step": 1958 }, { "epoch": 0.29, "learning_rate": 1.666659019948589e-05, "loss": 0.7935, "step": 1959 }, { "epoch": 0.29, "learning_rate": 1.6663019338253556e-05, "loss": 0.7988, "step": 1960 }, { "epoch": 0.29, "learning_rate": 1.665944694841526e-05, "loss": 0.8511, "step": 1961 }, { "epoch": 0.29, "learning_rate": 1.665587303079057e-05, "loss": 0.8354, "step": 1962 }, { "epoch": 0.29, "learning_rate": 1.6652297586199395e-05, "loss": 0.896, "step": 1963 }, { "epoch": 0.29, "learning_rate": 1.6648720615462007e-05, "loss": 0.7881, "step": 1964 }, { "epoch": 0.29, "learning_rate": 1.6645142119399014e-05, "loss": 0.8926, "step": 1965 }, { "epoch": 0.29, "learning_rate": 1.664156209883139e-05, "loss": 0.8535, "step": 1966 }, { "epoch": 0.29, "learning_rate": 1.6637980554580447e-05, "loss": 0.8555, "step": 1967 }, { "epoch": 0.29, "learning_rate": 1.663439748746785e-05, "loss": 0.8105, "step": 1968 }, { "epoch": 0.29, "learning_rate": 1.6630812898315615e-05, "loss": 0.7466, "step": 1969 }, { "epoch": 0.29, "learning_rate": 1.6627226787946104e-05, "loss": 0.8677, "step": 1970 }, { "epoch": 0.29, "learning_rate": 1.6623639157182028e-05, "loss": 0.8271, "step": 1971 }, { "epoch": 0.29, "learning_rate": 1.6620050006846452e-05, "loss": 0.8359, "step": 1972 }, { "epoch": 0.29, "learning_rate": 1.6616459337762784e-05, "loss": 0.8521, "step": 1973 }, { "epoch": 0.29, "learning_rate": 1.6612867150754776e-05, "loss": 0.8506, "step": 1974 }, { "epoch": 0.29, "learning_rate": 1.6609273446646548e-05, "loss": 0.8657, "step": 1975 }, { "epoch": 0.29, "learning_rate": 1.6605678226262547e-05, "loss": 0.8867, "step": 1976 }, { "epoch": 0.29, "learning_rate": 1.6602081490427577e-05, "loss": 0.8555, "step": 1977 }, { "epoch": 0.29, "learning_rate": 1.6598483239966783e-05, "loss": 0.8643, "step": 1978 }, { "epoch": 0.29, "learning_rate": 1.6594883475705673e-05, "loss": 0.895, "step": 1979 }, { "epoch": 0.29, "learning_rate": 1.659128219847008e-05, "loss": 0.8184, "step": 1980 }, { "epoch": 0.29, "learning_rate": 1.6587679409086207e-05, "loss": 0.832, "step": 1981 }, { "epoch": 0.29, "learning_rate": 1.6584075108380587e-05, "loss": 0.8633, "step": 1982 }, { "epoch": 0.29, "learning_rate": 1.6580469297180107e-05, "loss": 0.8423, "step": 1983 }, { "epoch": 0.29, "learning_rate": 1.6576861976312e-05, "loss": 0.8872, "step": 1984 }, { "epoch": 0.29, "learning_rate": 1.6573253146603843e-05, "loss": 0.8599, "step": 1985 }, { "epoch": 0.29, "learning_rate": 1.6569642808883562e-05, "loss": 0.9106, "step": 1986 }, { "epoch": 0.29, "learning_rate": 1.6566030963979428e-05, "loss": 0.8242, "step": 1987 }, { "epoch": 0.29, "learning_rate": 1.6562417612720055e-05, "loss": 0.8223, "step": 1988 }, { "epoch": 0.29, "learning_rate": 1.655880275593441e-05, "loss": 0.7573, "step": 1989 }, { "epoch": 0.29, "learning_rate": 1.6555186394451794e-05, "loss": 0.793, "step": 1990 }, { "epoch": 0.29, "learning_rate": 1.655156852910186e-05, "loss": 0.8682, "step": 1991 }, { "epoch": 0.29, "learning_rate": 1.6547949160714614e-05, "loss": 0.7881, "step": 1992 }, { "epoch": 0.29, "learning_rate": 1.6544328290120392e-05, "loss": 0.8999, "step": 1993 }, { "epoch": 0.29, "learning_rate": 1.654070591814988e-05, "loss": 0.332, "step": 1994 }, { "epoch": 0.3, "learning_rate": 1.6537082045634116e-05, "loss": 0.8774, "step": 1995 }, { "epoch": 0.3, "learning_rate": 1.653345667340447e-05, "loss": 0.8066, "step": 1996 }, { "epoch": 0.3, "learning_rate": 1.6529829802292665e-05, "loss": 0.875, "step": 1997 }, { "epoch": 0.3, "learning_rate": 1.652620143313076e-05, "loss": 0.8057, "step": 1998 }, { "epoch": 0.3, "learning_rate": 1.6522571566751165e-05, "loss": 0.8936, "step": 1999 }, { "epoch": 0.3, "learning_rate": 1.6518940203986636e-05, "loss": 0.8813, "step": 2000 }, { "epoch": 0.3, "learning_rate": 1.6515307345670263e-05, "loss": 0.8232, "step": 2001 }, { "epoch": 0.3, "learning_rate": 1.6511672992635478e-05, "loss": 0.9043, "step": 2002 }, { "epoch": 0.3, "learning_rate": 1.650803714571607e-05, "loss": 0.8804, "step": 2003 }, { "epoch": 0.3, "learning_rate": 1.6504399805746157e-05, "loss": 0.8057, "step": 2004 }, { "epoch": 0.3, "learning_rate": 1.6500760973560205e-05, "loss": 0.8979, "step": 2005 }, { "epoch": 0.3, "learning_rate": 1.6497120649993022e-05, "loss": 0.8545, "step": 2006 }, { "epoch": 0.3, "learning_rate": 1.6493478835879763e-05, "loss": 0.8267, "step": 2007 }, { "epoch": 0.3, "learning_rate": 1.648983553205591e-05, "loss": 0.7324, "step": 2008 }, { "epoch": 0.3, "learning_rate": 1.6486190739357307e-05, "loss": 0.8223, "step": 2009 }, { "epoch": 0.3, "learning_rate": 1.648254445862012e-05, "loss": 0.2996, "step": 2010 }, { "epoch": 0.3, "learning_rate": 1.6478896690680875e-05, "loss": 0.8091, "step": 2011 }, { "epoch": 0.3, "learning_rate": 1.647524743637642e-05, "loss": 0.8384, "step": 2012 }, { "epoch": 0.3, "learning_rate": 1.6471596696543964e-05, "loss": 0.8438, "step": 2013 }, { "epoch": 0.3, "learning_rate": 1.6467944472021035e-05, "loss": 0.8145, "step": 2014 }, { "epoch": 0.3, "learning_rate": 1.6464290763645522e-05, "loss": 0.8735, "step": 2015 }, { "epoch": 0.3, "learning_rate": 1.6460635572255644e-05, "loss": 0.8833, "step": 2016 }, { "epoch": 0.3, "learning_rate": 1.6456978898689958e-05, "loss": 0.3386, "step": 2017 }, { "epoch": 0.3, "learning_rate": 1.645332074378737e-05, "loss": 0.8149, "step": 2018 }, { "epoch": 0.3, "learning_rate": 1.6449661108387118e-05, "loss": 0.8691, "step": 2019 }, { "epoch": 0.3, "learning_rate": 1.6445999993328784e-05, "loss": 0.833, "step": 2020 }, { "epoch": 0.3, "learning_rate": 1.6442337399452286e-05, "loss": 0.7534, "step": 2021 }, { "epoch": 0.3, "learning_rate": 1.6438673327597885e-05, "loss": 0.8608, "step": 2022 }, { "epoch": 0.3, "learning_rate": 1.6435007778606177e-05, "loss": 0.8667, "step": 2023 }, { "epoch": 0.3, "learning_rate": 1.6431340753318102e-05, "loss": 0.8325, "step": 2024 }, { "epoch": 0.3, "learning_rate": 1.6427672252574934e-05, "loss": 0.8564, "step": 2025 }, { "epoch": 0.3, "learning_rate": 1.6424002277218287e-05, "loss": 0.8867, "step": 2026 }, { "epoch": 0.3, "learning_rate": 1.6420330828090114e-05, "loss": 0.8101, "step": 2027 }, { "epoch": 0.3, "learning_rate": 1.6416657906032706e-05, "loss": 0.8271, "step": 2028 }, { "epoch": 0.3, "learning_rate": 1.641298351188869e-05, "loss": 0.7754, "step": 2029 }, { "epoch": 0.3, "learning_rate": 1.6409307646501032e-05, "loss": 0.8647, "step": 2030 }, { "epoch": 0.3, "learning_rate": 1.640563031071304e-05, "loss": 0.8442, "step": 2031 }, { "epoch": 0.3, "learning_rate": 1.6401951505368353e-05, "loss": 0.8535, "step": 2032 }, { "epoch": 0.3, "learning_rate": 1.6398271231310948e-05, "loss": 0.8237, "step": 2033 }, { "epoch": 0.3, "learning_rate": 1.639458948938514e-05, "loss": 0.8398, "step": 2034 }, { "epoch": 0.3, "learning_rate": 1.6390906280435582e-05, "loss": 0.897, "step": 2035 }, { "epoch": 0.3, "learning_rate": 1.6387221605307263e-05, "loss": 0.8921, "step": 2036 }, { "epoch": 0.3, "learning_rate": 1.6383535464845507e-05, "loss": 0.8477, "step": 2037 }, { "epoch": 0.3, "learning_rate": 1.6379847859895977e-05, "loss": 0.3503, "step": 2038 }, { "epoch": 0.3, "learning_rate": 1.6376158791304667e-05, "loss": 0.9106, "step": 2039 }, { "epoch": 0.3, "learning_rate": 1.6372468259917913e-05, "loss": 0.8223, "step": 2040 }, { "epoch": 0.3, "learning_rate": 1.6368776266582383e-05, "loss": 0.8691, "step": 2041 }, { "epoch": 0.3, "learning_rate": 1.6365082812145077e-05, "loss": 0.8286, "step": 2042 }, { "epoch": 0.3, "learning_rate": 1.636138789745334e-05, "loss": 0.3453, "step": 2043 }, { "epoch": 0.3, "learning_rate": 1.635769152335484e-05, "loss": 0.9048, "step": 2044 }, { "epoch": 0.3, "learning_rate": 1.6353993690697595e-05, "loss": 0.9111, "step": 2045 }, { "epoch": 0.3, "learning_rate": 1.6350294400329935e-05, "loss": 0.7764, "step": 2046 }, { "epoch": 0.3, "learning_rate": 1.6346593653100547e-05, "loss": 0.8809, "step": 2047 }, { "epoch": 0.3, "learning_rate": 1.6342891449858444e-05, "loss": 0.8452, "step": 2048 }, { "epoch": 0.3, "learning_rate": 1.6339187791452968e-05, "loss": 0.7554, "step": 2049 }, { "epoch": 0.3, "learning_rate": 1.63354826787338e-05, "loss": 0.8057, "step": 2050 }, { "epoch": 0.3, "learning_rate": 1.6331776112550956e-05, "loss": 0.812, "step": 2051 }, { "epoch": 0.3, "learning_rate": 1.632806809375478e-05, "loss": 0.8408, "step": 2052 }, { "epoch": 0.3, "learning_rate": 1.6324358623195954e-05, "loss": 0.8701, "step": 2053 }, { "epoch": 0.3, "learning_rate": 1.632064770172549e-05, "loss": 0.8652, "step": 2054 }, { "epoch": 0.3, "learning_rate": 1.631693533019474e-05, "loss": 0.8052, "step": 2055 }, { "epoch": 0.3, "learning_rate": 1.631322150945537e-05, "loss": 0.8833, "step": 2056 }, { "epoch": 0.3, "learning_rate": 1.630950624035941e-05, "loss": 0.8335, "step": 2057 }, { "epoch": 0.3, "learning_rate": 1.6305789523759186e-05, "loss": 0.3381, "step": 2058 }, { "epoch": 0.3, "learning_rate": 1.630207136050738e-05, "loss": 0.8086, "step": 2059 }, { "epoch": 0.3, "learning_rate": 1.6298351751457008e-05, "loss": 0.8447, "step": 2060 }, { "epoch": 0.3, "learning_rate": 1.6294630697461396e-05, "loss": 0.8687, "step": 2061 }, { "epoch": 0.3, "learning_rate": 1.629090819937422e-05, "loss": 0.8413, "step": 2062 }, { "epoch": 0.31, "learning_rate": 1.628718425804949e-05, "loss": 0.895, "step": 2063 }, { "epoch": 0.31, "learning_rate": 1.628345887434153e-05, "loss": 0.7568, "step": 2064 }, { "epoch": 0.31, "learning_rate": 1.6279732049105e-05, "loss": 0.8359, "step": 2065 }, { "epoch": 0.31, "learning_rate": 1.6276003783194913e-05, "loss": 0.8369, "step": 2066 }, { "epoch": 0.31, "learning_rate": 1.6272274077466573e-05, "loss": 0.8608, "step": 2067 }, { "epoch": 0.31, "learning_rate": 1.626854293277565e-05, "loss": 0.8223, "step": 2068 }, { "epoch": 0.31, "learning_rate": 1.6264810349978125e-05, "loss": 0.8535, "step": 2069 }, { "epoch": 0.31, "learning_rate": 1.626107632993031e-05, "loss": 0.8525, "step": 2070 }, { "epoch": 0.31, "learning_rate": 1.625734087348886e-05, "loss": 0.8623, "step": 2071 }, { "epoch": 0.31, "learning_rate": 1.6253603981510742e-05, "loss": 0.8403, "step": 2072 }, { "epoch": 0.31, "learning_rate": 1.624986565485326e-05, "loss": 0.8755, "step": 2073 }, { "epoch": 0.31, "learning_rate": 1.6246125894374058e-05, "loss": 0.7979, "step": 2074 }, { "epoch": 0.31, "learning_rate": 1.6242384700931082e-05, "loss": 0.8086, "step": 2075 }, { "epoch": 0.31, "learning_rate": 1.6238642075382638e-05, "loss": 0.8481, "step": 2076 }, { "epoch": 0.31, "learning_rate": 1.6234898018587336e-05, "loss": 0.8276, "step": 2077 }, { "epoch": 0.31, "learning_rate": 1.623115253140413e-05, "loss": 0.8364, "step": 2078 }, { "epoch": 0.31, "learning_rate": 1.6227405614692295e-05, "loss": 0.8057, "step": 2079 }, { "epoch": 0.31, "learning_rate": 1.622365726931143e-05, "loss": 0.8481, "step": 2080 }, { "epoch": 0.31, "learning_rate": 1.6219907496121474e-05, "loss": 0.7905, "step": 2081 }, { "epoch": 0.31, "learning_rate": 1.6216156295982682e-05, "loss": 0.3625, "step": 2082 }, { "epoch": 0.31, "learning_rate": 1.6212403669755642e-05, "loss": 0.7891, "step": 2083 }, { "epoch": 0.31, "learning_rate": 1.6208649618301268e-05, "loss": 0.877, "step": 2084 }, { "epoch": 0.31, "learning_rate": 1.6204894142480803e-05, "loss": 0.894, "step": 2085 }, { "epoch": 0.31, "learning_rate": 1.6201137243155815e-05, "loss": 0.8838, "step": 2086 }, { "epoch": 0.31, "learning_rate": 1.6197378921188193e-05, "loss": 0.8105, "step": 2087 }, { "epoch": 0.31, "learning_rate": 1.619361917744016e-05, "loss": 0.8652, "step": 2088 }, { "epoch": 0.31, "learning_rate": 1.6189858012774267e-05, "loss": 0.8564, "step": 2089 }, { "epoch": 0.31, "learning_rate": 1.6186095428053382e-05, "loss": 0.9126, "step": 2090 }, { "epoch": 0.31, "learning_rate": 1.618233142414071e-05, "loss": 0.8462, "step": 2091 }, { "epoch": 0.31, "learning_rate": 1.6178566001899768e-05, "loss": 0.8569, "step": 2092 }, { "epoch": 0.31, "learning_rate": 1.617479916219441e-05, "loss": 0.8979, "step": 2093 }, { "epoch": 0.31, "learning_rate": 1.6171030905888808e-05, "loss": 0.8467, "step": 2094 }, { "epoch": 0.31, "learning_rate": 1.616726123384746e-05, "loss": 0.812, "step": 2095 }, { "epoch": 0.31, "learning_rate": 1.6163490146935196e-05, "loss": 0.8252, "step": 2096 }, { "epoch": 0.31, "learning_rate": 1.6159717646017162e-05, "loss": 0.8887, "step": 2097 }, { "epoch": 0.31, "learning_rate": 1.615594373195884e-05, "loss": 0.8638, "step": 2098 }, { "epoch": 0.31, "learning_rate": 1.6152168405626013e-05, "loss": 0.8325, "step": 2099 }, { "epoch": 0.31, "learning_rate": 1.614839166788481e-05, "loss": 0.8218, "step": 2100 }, { "epoch": 0.31, "learning_rate": 1.6144613519601682e-05, "loss": 0.8335, "step": 2101 }, { "epoch": 0.31, "learning_rate": 1.6140833961643386e-05, "loss": 0.9028, "step": 2102 }, { "epoch": 0.31, "learning_rate": 1.6137052994877026e-05, "loss": 0.791, "step": 2103 }, { "epoch": 0.31, "learning_rate": 1.6133270620170014e-05, "loss": 0.8389, "step": 2104 }, { "epoch": 0.31, "learning_rate": 1.6129486838390088e-05, "loss": 0.8218, "step": 2105 }, { "epoch": 0.31, "learning_rate": 1.612570165040531e-05, "loss": 0.8789, "step": 2106 }, { "epoch": 0.31, "learning_rate": 1.6121915057084064e-05, "loss": 0.8208, "step": 2107 }, { "epoch": 0.31, "learning_rate": 1.6118127059295055e-05, "loss": 0.7383, "step": 2108 }, { "epoch": 0.31, "learning_rate": 1.6114337657907316e-05, "loss": 0.9082, "step": 2109 }, { "epoch": 0.31, "learning_rate": 1.6110546853790197e-05, "loss": 0.8569, "step": 2110 }, { "epoch": 0.31, "learning_rate": 1.6106754647813367e-05, "loss": 0.8765, "step": 2111 }, { "epoch": 0.31, "learning_rate": 1.6102961040846824e-05, "loss": 0.7988, "step": 2112 }, { "epoch": 0.31, "learning_rate": 1.609916603376088e-05, "loss": 0.8862, "step": 2113 }, { "epoch": 0.31, "learning_rate": 1.609536962742617e-05, "loss": 0.9263, "step": 2114 }, { "epoch": 0.31, "learning_rate": 1.6091571822713667e-05, "loss": 0.853, "step": 2115 }, { "epoch": 0.31, "learning_rate": 1.6087772620494628e-05, "loss": 0.8706, "step": 2116 }, { "epoch": 0.31, "learning_rate": 1.6083972021640666e-05, "loss": 0.8477, "step": 2117 }, { "epoch": 0.31, "learning_rate": 1.6080170027023702e-05, "loss": 0.8281, "step": 2118 }, { "epoch": 0.31, "learning_rate": 1.6076366637515968e-05, "loss": 0.8735, "step": 2119 }, { "epoch": 0.31, "learning_rate": 1.6072561853990028e-05, "loss": 0.3308, "step": 2120 }, { "epoch": 0.31, "learning_rate": 1.606875567731876e-05, "loss": 0.8511, "step": 2121 }, { "epoch": 0.31, "learning_rate": 1.606494810837537e-05, "loss": 0.8574, "step": 2122 }, { "epoch": 0.31, "learning_rate": 1.6061139148033364e-05, "loss": 0.8398, "step": 2123 }, { "epoch": 0.31, "learning_rate": 1.6057328797166592e-05, "loss": 0.8691, "step": 2124 }, { "epoch": 0.31, "learning_rate": 1.6053517056649206e-05, "loss": 0.8555, "step": 2125 }, { "epoch": 0.31, "learning_rate": 1.6049703927355684e-05, "loss": 0.7671, "step": 2126 }, { "epoch": 0.31, "learning_rate": 1.6045889410160812e-05, "loss": 0.8174, "step": 2127 }, { "epoch": 0.31, "learning_rate": 1.6042073505939718e-05, "loss": 0.7925, "step": 2128 }, { "epoch": 0.31, "learning_rate": 1.6038256215567823e-05, "loss": 0.8682, "step": 2129 }, { "epoch": 0.31, "learning_rate": 1.6034437539920876e-05, "loss": 0.8579, "step": 2130 }, { "epoch": 0.32, "learning_rate": 1.6030617479874944e-05, "loss": 0.8389, "step": 2131 }, { "epoch": 0.32, "learning_rate": 1.6026796036306413e-05, "loss": 0.8003, "step": 2132 }, { "epoch": 0.32, "learning_rate": 1.602297321009199e-05, "loss": 0.8857, "step": 2133 }, { "epoch": 0.32, "learning_rate": 1.601914900210868e-05, "loss": 0.8818, "step": 2134 }, { "epoch": 0.32, "learning_rate": 1.6015323413233838e-05, "loss": 0.7783, "step": 2135 }, { "epoch": 0.32, "learning_rate": 1.6011496444345102e-05, "loss": 0.8379, "step": 2136 }, { "epoch": 0.32, "learning_rate": 1.6007668096320445e-05, "loss": 0.8027, "step": 2137 }, { "epoch": 0.32, "learning_rate": 1.6003838370038155e-05, "loss": 0.8643, "step": 2138 }, { "epoch": 0.32, "learning_rate": 1.6000007266376837e-05, "loss": 0.2986, "step": 2139 }, { "epoch": 0.32, "learning_rate": 1.59961747862154e-05, "loss": 0.321, "step": 2140 }, { "epoch": 0.32, "learning_rate": 1.5992340930433084e-05, "loss": 0.3191, "step": 2141 }, { "epoch": 0.32, "learning_rate": 1.598850569990944e-05, "loss": 0.8306, "step": 2142 }, { "epoch": 0.32, "learning_rate": 1.598466909552433e-05, "loss": 0.8481, "step": 2143 }, { "epoch": 0.32, "learning_rate": 1.598083111815793e-05, "loss": 0.8823, "step": 2144 }, { "epoch": 0.32, "learning_rate": 1.5976991768690743e-05, "loss": 0.8823, "step": 2145 }, { "epoch": 0.32, "learning_rate": 1.5973151048003574e-05, "loss": 0.8696, "step": 2146 }, { "epoch": 0.32, "learning_rate": 1.596930895697755e-05, "loss": 0.8882, "step": 2147 }, { "epoch": 0.32, "learning_rate": 1.5965465496494107e-05, "loss": 0.8818, "step": 2148 }, { "epoch": 0.32, "learning_rate": 1.5961620667434997e-05, "loss": 0.9053, "step": 2149 }, { "epoch": 0.32, "learning_rate": 1.595777447068229e-05, "loss": 0.7856, "step": 2150 }, { "epoch": 0.32, "learning_rate": 1.595392690711837e-05, "loss": 0.8223, "step": 2151 }, { "epoch": 0.32, "learning_rate": 1.5950077977625924e-05, "loss": 0.9175, "step": 2152 }, { "epoch": 0.32, "learning_rate": 1.594622768308796e-05, "loss": 0.8755, "step": 2153 }, { "epoch": 0.32, "learning_rate": 1.5942376024387806e-05, "loss": 0.8994, "step": 2154 }, { "epoch": 0.32, "learning_rate": 1.5938523002409083e-05, "loss": 0.8247, "step": 2155 }, { "epoch": 0.32, "learning_rate": 1.593466861803575e-05, "loss": 0.8242, "step": 2156 }, { "epoch": 0.32, "learning_rate": 1.593081287215206e-05, "loss": 0.7808, "step": 2157 }, { "epoch": 0.32, "learning_rate": 1.5926955765642587e-05, "loss": 0.8242, "step": 2158 }, { "epoch": 0.32, "learning_rate": 1.5923097299392213e-05, "loss": 0.8599, "step": 2159 }, { "epoch": 0.32, "learning_rate": 1.5919237474286134e-05, "loss": 0.8335, "step": 2160 }, { "epoch": 0.32, "learning_rate": 1.5915376291209854e-05, "loss": 0.8525, "step": 2161 }, { "epoch": 0.32, "learning_rate": 1.5911513751049197e-05, "loss": 0.8076, "step": 2162 }, { "epoch": 0.32, "learning_rate": 1.5907649854690292e-05, "loss": 0.8506, "step": 2163 }, { "epoch": 0.32, "learning_rate": 1.5903784603019575e-05, "loss": 0.8926, "step": 2164 }, { "epoch": 0.32, "learning_rate": 1.5899917996923803e-05, "loss": 0.8892, "step": 2165 }, { "epoch": 0.32, "learning_rate": 1.5896050037290038e-05, "loss": 0.8247, "step": 2166 }, { "epoch": 0.32, "learning_rate": 1.5892180725005656e-05, "loss": 0.8501, "step": 2167 }, { "epoch": 0.32, "learning_rate": 1.5888310060958338e-05, "loss": 0.9023, "step": 2168 }, { "epoch": 0.32, "learning_rate": 1.5884438046036072e-05, "loss": 0.897, "step": 2169 }, { "epoch": 0.32, "learning_rate": 1.5880564681127172e-05, "loss": 0.7539, "step": 2170 }, { "epoch": 0.32, "learning_rate": 1.587668996712025e-05, "loss": 0.9092, "step": 2171 }, { "epoch": 0.32, "learning_rate": 1.587281390490422e-05, "loss": 0.8296, "step": 2172 }, { "epoch": 0.32, "learning_rate": 1.5868936495368322e-05, "loss": 0.8477, "step": 2173 }, { "epoch": 0.32, "learning_rate": 1.5865057739402098e-05, "loss": 0.8203, "step": 2174 }, { "epoch": 0.32, "learning_rate": 1.5861177637895397e-05, "loss": 0.76, "step": 2175 }, { "epoch": 0.32, "learning_rate": 1.5857296191738373e-05, "loss": 0.8989, "step": 2176 }, { "epoch": 0.32, "learning_rate": 1.58534134018215e-05, "loss": 0.8384, "step": 2177 }, { "epoch": 0.32, "learning_rate": 1.5849529269035547e-05, "loss": 0.8691, "step": 2178 }, { "epoch": 0.32, "learning_rate": 1.5845643794271604e-05, "loss": 0.832, "step": 2179 }, { "epoch": 0.32, "learning_rate": 1.5841756978421064e-05, "loss": 0.7969, "step": 2180 }, { "epoch": 0.32, "learning_rate": 1.5837868822375617e-05, "loss": 0.9062, "step": 2181 }, { "epoch": 0.32, "learning_rate": 1.583397932702728e-05, "loss": 0.8188, "step": 2182 }, { "epoch": 0.32, "learning_rate": 1.583008849326836e-05, "loss": 0.8247, "step": 2183 }, { "epoch": 0.32, "learning_rate": 1.5826196321991484e-05, "loss": 0.8374, "step": 2184 }, { "epoch": 0.32, "learning_rate": 1.5822302814089577e-05, "loss": 0.3231, "step": 2185 }, { "epoch": 0.32, "learning_rate": 1.581840797045587e-05, "loss": 0.8481, "step": 2186 }, { "epoch": 0.32, "learning_rate": 1.581451179198391e-05, "loss": 0.9204, "step": 2187 }, { "epoch": 0.32, "learning_rate": 1.5810614279567536e-05, "loss": 0.855, "step": 2188 }, { "epoch": 0.32, "learning_rate": 1.5806715434100916e-05, "loss": 0.8809, "step": 2189 }, { "epoch": 0.32, "learning_rate": 1.580281525647849e-05, "loss": 0.8599, "step": 2190 }, { "epoch": 0.32, "learning_rate": 1.5798913747595038e-05, "loss": 0.8594, "step": 2191 }, { "epoch": 0.32, "learning_rate": 1.5795010908345628e-05, "loss": 0.8188, "step": 2192 }, { "epoch": 0.32, "learning_rate": 1.5791106739625627e-05, "loss": 0.8208, "step": 2193 }, { "epoch": 0.32, "learning_rate": 1.5787201242330725e-05, "loss": 0.8896, "step": 2194 }, { "epoch": 0.32, "learning_rate": 1.57832944173569e-05, "loss": 0.8682, "step": 2195 }, { "epoch": 0.32, "learning_rate": 1.5779386265600444e-05, "loss": 0.8047, "step": 2196 }, { "epoch": 0.32, "learning_rate": 1.577547678795795e-05, "loss": 0.8501, "step": 2197 }, { "epoch": 0.33, "learning_rate": 1.5771565985326323e-05, "loss": 0.8447, "step": 2198 }, { "epoch": 0.33, "learning_rate": 1.576765385860276e-05, "loss": 0.8906, "step": 2199 }, { "epoch": 0.33, "learning_rate": 1.5763740408684766e-05, "loss": 0.854, "step": 2200 }, { "epoch": 0.33, "learning_rate": 1.575982563647015e-05, "loss": 0.3479, "step": 2201 }, { "epoch": 0.33, "learning_rate": 1.575590954285703e-05, "loss": 0.8022, "step": 2202 }, { "epoch": 0.33, "learning_rate": 1.575199212874382e-05, "loss": 0.9082, "step": 2203 }, { "epoch": 0.33, "learning_rate": 1.5748073395029236e-05, "loss": 0.7964, "step": 2204 }, { "epoch": 0.33, "learning_rate": 1.57441533426123e-05, "loss": 0.8442, "step": 2205 }, { "epoch": 0.33, "learning_rate": 1.574023197239234e-05, "loss": 0.8638, "step": 2206 }, { "epoch": 0.33, "learning_rate": 1.5736309285268982e-05, "loss": 0.834, "step": 2207 }, { "epoch": 0.33, "learning_rate": 1.5732385282142153e-05, "loss": 0.874, "step": 2208 }, { "epoch": 0.33, "learning_rate": 1.5728459963912088e-05, "loss": 0.8091, "step": 2209 }, { "epoch": 0.33, "learning_rate": 1.572453333147931e-05, "loss": 0.8232, "step": 2210 }, { "epoch": 0.33, "learning_rate": 1.572060538574466e-05, "loss": 0.8154, "step": 2211 }, { "epoch": 0.33, "learning_rate": 1.5716676127609277e-05, "loss": 0.8408, "step": 2212 }, { "epoch": 0.33, "learning_rate": 1.5712745557974588e-05, "loss": 0.8428, "step": 2213 }, { "epoch": 0.33, "learning_rate": 1.5708813677742334e-05, "loss": 0.8059, "step": 2214 }, { "epoch": 0.33, "learning_rate": 1.5704880487814553e-05, "loss": 0.8262, "step": 2215 }, { "epoch": 0.33, "learning_rate": 1.5700945989093587e-05, "loss": 0.8516, "step": 2216 }, { "epoch": 0.33, "learning_rate": 1.5697010182482067e-05, "loss": 0.8184, "step": 2217 }, { "epoch": 0.33, "learning_rate": 1.5693073068882942e-05, "loss": 0.8735, "step": 2218 }, { "epoch": 0.33, "learning_rate": 1.568913464919944e-05, "loss": 0.8354, "step": 2219 }, { "epoch": 0.33, "learning_rate": 1.5685194924335102e-05, "loss": 0.8838, "step": 2220 }, { "epoch": 0.33, "learning_rate": 1.568125389519377e-05, "loss": 0.8306, "step": 2221 }, { "epoch": 0.33, "learning_rate": 1.5677311562679575e-05, "loss": 0.8413, "step": 2222 }, { "epoch": 0.33, "learning_rate": 1.567336792769696e-05, "loss": 0.9092, "step": 2223 }, { "epoch": 0.33, "learning_rate": 1.566942299115065e-05, "loss": 0.8032, "step": 2224 }, { "epoch": 0.33, "learning_rate": 1.5665476753945682e-05, "loss": 0.7979, "step": 2225 }, { "epoch": 0.33, "learning_rate": 1.5661529216987393e-05, "loss": 0.7837, "step": 2226 }, { "epoch": 0.33, "learning_rate": 1.5657580381181404e-05, "loss": 0.8569, "step": 2227 }, { "epoch": 0.33, "learning_rate": 1.5653630247433653e-05, "loss": 0.8306, "step": 2228 }, { "epoch": 0.33, "learning_rate": 1.5649678816650357e-05, "loss": 0.7881, "step": 2229 }, { "epoch": 0.33, "learning_rate": 1.564572608973804e-05, "loss": 0.8594, "step": 2230 }, { "epoch": 0.33, "learning_rate": 1.5641772067603526e-05, "loss": 0.8535, "step": 2231 }, { "epoch": 0.33, "learning_rate": 1.5637816751153932e-05, "loss": 0.8936, "step": 2232 }, { "epoch": 0.33, "learning_rate": 1.563386014129667e-05, "loss": 0.874, "step": 2233 }, { "epoch": 0.33, "learning_rate": 1.562990223893945e-05, "loss": 0.8213, "step": 2234 }, { "epoch": 0.33, "learning_rate": 1.5625943044990284e-05, "loss": 0.8076, "step": 2235 }, { "epoch": 0.33, "learning_rate": 1.5621982560357473e-05, "loss": 0.8774, "step": 2236 }, { "epoch": 0.33, "learning_rate": 1.561802078594962e-05, "loss": 0.9302, "step": 2237 }, { "epoch": 0.33, "learning_rate": 1.5614057722675618e-05, "loss": 0.7983, "step": 2238 }, { "epoch": 0.33, "learning_rate": 1.5610093371444665e-05, "loss": 0.8174, "step": 2239 }, { "epoch": 0.33, "learning_rate": 1.5606127733166237e-05, "loss": 0.8745, "step": 2240 }, { "epoch": 0.33, "learning_rate": 1.5602160808750126e-05, "loss": 0.8223, "step": 2241 }, { "epoch": 0.33, "learning_rate": 1.5598192599106404e-05, "loss": 0.8335, "step": 2242 }, { "epoch": 0.33, "learning_rate": 1.5594223105145446e-05, "loss": 0.8457, "step": 2243 }, { "epoch": 0.33, "learning_rate": 1.5590252327777923e-05, "loss": 0.8535, "step": 2244 }, { "epoch": 0.33, "learning_rate": 1.558628026791479e-05, "loss": 0.813, "step": 2245 }, { "epoch": 0.33, "learning_rate": 1.5582306926467302e-05, "loss": 0.9058, "step": 2246 }, { "epoch": 0.33, "learning_rate": 1.5578332304347016e-05, "loss": 0.8848, "step": 2247 }, { "epoch": 0.33, "learning_rate": 1.557435640246577e-05, "loss": 0.9009, "step": 2248 }, { "epoch": 0.33, "learning_rate": 1.55703792217357e-05, "loss": 0.8179, "step": 2249 }, { "epoch": 0.33, "learning_rate": 1.556640076306924e-05, "loss": 0.8672, "step": 2250 }, { "epoch": 0.33, "learning_rate": 1.5562421027379116e-05, "loss": 0.8589, "step": 2251 }, { "epoch": 0.33, "learning_rate": 1.555844001557834e-05, "loss": 0.8643, "step": 2252 }, { "epoch": 0.33, "learning_rate": 1.555445772858022e-05, "loss": 0.8662, "step": 2253 }, { "epoch": 0.33, "learning_rate": 1.5550474167298364e-05, "loss": 0.853, "step": 2254 }, { "epoch": 0.33, "learning_rate": 1.5546489332646658e-05, "loss": 0.8784, "step": 2255 }, { "epoch": 0.33, "learning_rate": 1.55425032255393e-05, "loss": 0.8936, "step": 2256 }, { "epoch": 0.33, "learning_rate": 1.553851584689076e-05, "loss": 0.8281, "step": 2257 }, { "epoch": 0.33, "learning_rate": 1.5534527197615804e-05, "loss": 0.9126, "step": 2258 }, { "epoch": 0.33, "learning_rate": 1.5530537278629507e-05, "loss": 0.7949, "step": 2259 }, { "epoch": 0.33, "learning_rate": 1.552654609084721e-05, "loss": 0.8325, "step": 2260 }, { "epoch": 0.33, "learning_rate": 1.5522553635184567e-05, "loss": 0.832, "step": 2261 }, { "epoch": 0.33, "learning_rate": 1.5518559912557497e-05, "loss": 0.8877, "step": 2262 }, { "epoch": 0.33, "learning_rate": 1.5514564923882245e-05, "loss": 0.8232, "step": 2263 }, { "epoch": 0.33, "learning_rate": 1.5510568670075313e-05, "loss": 0.8418, "step": 2264 }, { "epoch": 0.33, "learning_rate": 1.5506571152053512e-05, "loss": 0.7817, "step": 2265 }, { "epoch": 0.34, "learning_rate": 1.550257237073394e-05, "loss": 0.7729, "step": 2266 }, { "epoch": 0.34, "learning_rate": 1.5498572327033984e-05, "loss": 0.8887, "step": 2267 }, { "epoch": 0.34, "learning_rate": 1.549457102187131e-05, "loss": 0.7915, "step": 2268 }, { "epoch": 0.34, "learning_rate": 1.549056845616389e-05, "loss": 0.7847, "step": 2269 }, { "epoch": 0.34, "learning_rate": 1.5486564630829983e-05, "loss": 0.9185, "step": 2270 }, { "epoch": 0.34, "learning_rate": 1.5482559546788127e-05, "loss": 0.3032, "step": 2271 }, { "epoch": 0.34, "learning_rate": 1.547855320495715e-05, "loss": 0.8511, "step": 2272 }, { "epoch": 0.34, "learning_rate": 1.547454560625618e-05, "loss": 0.835, "step": 2273 }, { "epoch": 0.34, "learning_rate": 1.5470536751604622e-05, "loss": 0.8496, "step": 2274 }, { "epoch": 0.34, "learning_rate": 1.5466526641922174e-05, "loss": 0.8398, "step": 2275 }, { "epoch": 0.34, "learning_rate": 1.5462515278128822e-05, "loss": 0.8076, "step": 2276 }, { "epoch": 0.34, "learning_rate": 1.5458502661144835e-05, "loss": 0.8877, "step": 2277 }, { "epoch": 0.34, "learning_rate": 1.5454488791890777e-05, "loss": 0.8203, "step": 2278 }, { "epoch": 0.34, "learning_rate": 1.5450473671287492e-05, "loss": 0.8423, "step": 2279 }, { "epoch": 0.34, "learning_rate": 1.5446457300256118e-05, "loss": 0.8354, "step": 2280 }, { "epoch": 0.34, "learning_rate": 1.5442439679718075e-05, "loss": 0.3486, "step": 2281 }, { "epoch": 0.34, "learning_rate": 1.5438420810595073e-05, "loss": 0.8232, "step": 2282 }, { "epoch": 0.34, "learning_rate": 1.54344006938091e-05, "loss": 0.8696, "step": 2283 }, { "epoch": 0.34, "learning_rate": 1.543037933028245e-05, "loss": 0.8413, "step": 2284 }, { "epoch": 0.34, "learning_rate": 1.542635672093767e-05, "loss": 0.832, "step": 2285 }, { "epoch": 0.34, "learning_rate": 1.542233286669763e-05, "loss": 0.853, "step": 2286 }, { "epoch": 0.34, "learning_rate": 1.541830776848546e-05, "loss": 0.8921, "step": 2287 }, { "epoch": 0.34, "learning_rate": 1.5414281427224588e-05, "loss": 0.9307, "step": 2288 }, { "epoch": 0.34, "learning_rate": 1.5410253843838717e-05, "loss": 0.9541, "step": 2289 }, { "epoch": 0.34, "learning_rate": 1.5406225019251846e-05, "loss": 0.813, "step": 2290 }, { "epoch": 0.34, "learning_rate": 1.540219495438825e-05, "loss": 0.8569, "step": 2291 }, { "epoch": 0.34, "learning_rate": 1.5398163650172495e-05, "loss": 0.8755, "step": 2292 }, { "epoch": 0.34, "learning_rate": 1.5394131107529427e-05, "loss": 0.8623, "step": 2293 }, { "epoch": 0.34, "learning_rate": 1.5390097327384176e-05, "loss": 0.8574, "step": 2294 }, { "epoch": 0.34, "learning_rate": 1.538606231066216e-05, "loss": 0.8789, "step": 2295 }, { "epoch": 0.34, "learning_rate": 1.538202605828907e-05, "loss": 0.894, "step": 2296 }, { "epoch": 0.34, "learning_rate": 1.5377988571190903e-05, "loss": 0.8193, "step": 2297 }, { "epoch": 0.34, "learning_rate": 1.5373949850293915e-05, "loss": 0.8584, "step": 2298 }, { "epoch": 0.34, "learning_rate": 1.5369909896524657e-05, "loss": 0.8706, "step": 2299 }, { "epoch": 0.34, "learning_rate": 1.5365868710809958e-05, "loss": 0.9121, "step": 2300 }, { "epoch": 0.34, "learning_rate": 1.5361826294076938e-05, "loss": 0.8931, "step": 2301 }, { "epoch": 0.34, "learning_rate": 1.5357782647252984e-05, "loss": 0.8267, "step": 2302 }, { "epoch": 0.34, "learning_rate": 1.5353737771265785e-05, "loss": 0.8008, "step": 2303 }, { "epoch": 0.34, "learning_rate": 1.53496916670433e-05, "loss": 0.9185, "step": 2304 }, { "epoch": 0.34, "learning_rate": 1.5345644335513773e-05, "loss": 0.9072, "step": 2305 }, { "epoch": 0.34, "learning_rate": 1.5341595777605718e-05, "loss": 0.8901, "step": 2306 }, { "epoch": 0.34, "learning_rate": 1.5337545994247948e-05, "loss": 0.856, "step": 2307 }, { "epoch": 0.34, "learning_rate": 1.5333494986369554e-05, "loss": 0.8086, "step": 2308 }, { "epoch": 0.34, "learning_rate": 1.5329442754899897e-05, "loss": 0.9014, "step": 2309 }, { "epoch": 0.34, "learning_rate": 1.532538930076863e-05, "loss": 0.8555, "step": 2310 }, { "epoch": 0.34, "learning_rate": 1.5321334624905677e-05, "loss": 0.876, "step": 2311 }, { "epoch": 0.34, "learning_rate": 1.5317278728241252e-05, "loss": 0.896, "step": 2312 }, { "epoch": 0.34, "learning_rate": 1.531322161170584e-05, "loss": 0.8721, "step": 2313 }, { "epoch": 0.34, "learning_rate": 1.5309163276230215e-05, "loss": 0.8696, "step": 2314 }, { "epoch": 0.34, "learning_rate": 1.530510372274542e-05, "loss": 0.8101, "step": 2315 }, { "epoch": 0.34, "learning_rate": 1.5301042952182787e-05, "loss": 0.8643, "step": 2316 }, { "epoch": 0.34, "learning_rate": 1.5296980965473918e-05, "loss": 0.8774, "step": 2317 }, { "epoch": 0.34, "learning_rate": 1.529291776355071e-05, "loss": 0.8442, "step": 2318 }, { "epoch": 0.34, "learning_rate": 1.528885334734532e-05, "loss": 0.8428, "step": 2319 }, { "epoch": 0.34, "learning_rate": 1.528478771779019e-05, "loss": 0.8135, "step": 2320 }, { "epoch": 0.34, "learning_rate": 1.528072087581805e-05, "loss": 0.8643, "step": 2321 }, { "epoch": 0.34, "learning_rate": 1.5276652822361894e-05, "loss": 0.9058, "step": 2322 }, { "epoch": 0.34, "learning_rate": 1.5272583558355005e-05, "loss": 0.8247, "step": 2323 }, { "epoch": 0.34, "learning_rate": 1.5268513084730935e-05, "loss": 0.9053, "step": 2324 }, { "epoch": 0.34, "learning_rate": 1.5264441402423518e-05, "loss": 0.916, "step": 2325 }, { "epoch": 0.34, "learning_rate": 1.5260368512366865e-05, "loss": 0.8037, "step": 2326 }, { "epoch": 0.34, "learning_rate": 1.525629441549537e-05, "loss": 0.8486, "step": 2327 }, { "epoch": 0.34, "learning_rate": 1.5252219112743685e-05, "loss": 0.8691, "step": 2328 }, { "epoch": 0.34, "learning_rate": 1.5248142605046758e-05, "loss": 0.8896, "step": 2329 }, { "epoch": 0.34, "learning_rate": 1.524406489333981e-05, "loss": 0.9043, "step": 2330 }, { "epoch": 0.34, "learning_rate": 1.5239985978558333e-05, "loss": 0.8042, "step": 2331 }, { "epoch": 0.34, "learning_rate": 1.5235905861638094e-05, "loss": 0.832, "step": 2332 }, { "epoch": 0.35, "learning_rate": 1.5231824543515141e-05, "loss": 0.8271, "step": 2333 }, { "epoch": 0.35, "learning_rate": 1.5227742025125794e-05, "loss": 0.8081, "step": 2334 }, { "epoch": 0.35, "learning_rate": 1.5223658307406654e-05, "loss": 0.855, "step": 2335 }, { "epoch": 0.35, "learning_rate": 1.5219573391294587e-05, "loss": 0.7393, "step": 2336 }, { "epoch": 0.35, "learning_rate": 1.5215487277726741e-05, "loss": 0.8706, "step": 2337 }, { "epoch": 0.35, "learning_rate": 1.521139996764054e-05, "loss": 0.8452, "step": 2338 }, { "epoch": 0.35, "learning_rate": 1.520731146197368e-05, "loss": 0.8569, "step": 2339 }, { "epoch": 0.35, "learning_rate": 1.5203221761664131e-05, "loss": 0.8623, "step": 2340 }, { "epoch": 0.35, "learning_rate": 1.5199130867650134e-05, "loss": 0.8418, "step": 2341 }, { "epoch": 0.35, "learning_rate": 1.5195038780870211e-05, "loss": 0.8481, "step": 2342 }, { "epoch": 0.35, "learning_rate": 1.5190945502263152e-05, "loss": 0.8042, "step": 2343 }, { "epoch": 0.35, "learning_rate": 1.5186851032768025e-05, "loss": 0.8306, "step": 2344 }, { "epoch": 0.35, "learning_rate": 1.5182755373324162e-05, "loss": 0.3555, "step": 2345 }, { "epoch": 0.35, "learning_rate": 1.5178658524871183e-05, "loss": 0.7915, "step": 2346 }, { "epoch": 0.35, "learning_rate": 1.5174560488348964e-05, "loss": 0.7549, "step": 2347 }, { "epoch": 0.35, "learning_rate": 1.5170461264697669e-05, "loss": 0.3352, "step": 2348 }, { "epoch": 0.35, "learning_rate": 1.5166360854857724e-05, "loss": 0.8184, "step": 2349 }, { "epoch": 0.35, "learning_rate": 1.5162259259769831e-05, "loss": 0.8848, "step": 2350 }, { "epoch": 0.35, "learning_rate": 1.515815648037496e-05, "loss": 0.8574, "step": 2351 }, { "epoch": 0.35, "learning_rate": 1.5154052517614361e-05, "loss": 0.8545, "step": 2352 }, { "epoch": 0.35, "learning_rate": 1.5149947372429546e-05, "loss": 0.8599, "step": 2353 }, { "epoch": 0.35, "learning_rate": 1.5145841045762304e-05, "loss": 0.8604, "step": 2354 }, { "epoch": 0.35, "learning_rate": 1.5141733538554694e-05, "loss": 0.8428, "step": 2355 }, { "epoch": 0.35, "learning_rate": 1.5137624851749047e-05, "loss": 0.8135, "step": 2356 }, { "epoch": 0.35, "learning_rate": 1.5133514986287962e-05, "loss": 0.8701, "step": 2357 }, { "epoch": 0.35, "learning_rate": 1.5129403943114311e-05, "loss": 0.8462, "step": 2358 }, { "epoch": 0.35, "learning_rate": 1.512529172317123e-05, "loss": 0.9102, "step": 2359 }, { "epoch": 0.35, "learning_rate": 1.5121178327402137e-05, "loss": 0.7793, "step": 2360 }, { "epoch": 0.35, "learning_rate": 1.5117063756750706e-05, "loss": 0.7681, "step": 2361 }, { "epoch": 0.35, "learning_rate": 1.5112948012160888e-05, "loss": 0.8667, "step": 2362 }, { "epoch": 0.35, "learning_rate": 1.5108831094576909e-05, "loss": 0.811, "step": 2363 }, { "epoch": 0.35, "learning_rate": 1.5104713004943245e-05, "loss": 0.8506, "step": 2364 }, { "epoch": 0.35, "learning_rate": 1.5100593744204665e-05, "loss": 0.8433, "step": 2365 }, { "epoch": 0.35, "learning_rate": 1.509647331330619e-05, "loss": 0.8652, "step": 2366 }, { "epoch": 0.35, "learning_rate": 1.5092351713193116e-05, "loss": 0.8452, "step": 2367 }, { "epoch": 0.35, "learning_rate": 1.5088228944811002e-05, "loss": 0.8594, "step": 2368 }, { "epoch": 0.35, "learning_rate": 1.5084105009105684e-05, "loss": 0.7832, "step": 2369 }, { "epoch": 0.35, "learning_rate": 1.5079979907023257e-05, "loss": 0.8779, "step": 2370 }, { "epoch": 0.35, "learning_rate": 1.5075853639510087e-05, "loss": 0.8472, "step": 2371 }, { "epoch": 0.35, "learning_rate": 1.5071726207512814e-05, "loss": 0.8521, "step": 2372 }, { "epoch": 0.35, "learning_rate": 1.506759761197833e-05, "loss": 0.8125, "step": 2373 }, { "epoch": 0.35, "learning_rate": 1.50634678538538e-05, "loss": 0.8779, "step": 2374 }, { "epoch": 0.35, "learning_rate": 1.5059336934086672e-05, "loss": 0.8477, "step": 2375 }, { "epoch": 0.35, "learning_rate": 1.5055204853624634e-05, "loss": 0.8662, "step": 2376 }, { "epoch": 0.35, "learning_rate": 1.5051071613415663e-05, "loss": 0.8755, "step": 2377 }, { "epoch": 0.35, "learning_rate": 1.5046937214407979e-05, "loss": 0.8267, "step": 2378 }, { "epoch": 0.35, "learning_rate": 1.5042801657550095e-05, "loss": 0.7886, "step": 2379 }, { "epoch": 0.35, "learning_rate": 1.5038664943790768e-05, "loss": 0.853, "step": 2380 }, { "epoch": 0.35, "learning_rate": 1.503452707407903e-05, "loss": 0.8613, "step": 2381 }, { "epoch": 0.35, "learning_rate": 1.5030388049364177e-05, "loss": 0.8477, "step": 2382 }, { "epoch": 0.35, "learning_rate": 1.5026247870595763e-05, "loss": 0.854, "step": 2383 }, { "epoch": 0.35, "learning_rate": 1.5022106538723625e-05, "loss": 0.834, "step": 2384 }, { "epoch": 0.35, "learning_rate": 1.5017964054697838e-05, "loss": 0.8423, "step": 2385 }, { "epoch": 0.35, "learning_rate": 1.5013820419468767e-05, "loss": 0.8477, "step": 2386 }, { "epoch": 0.35, "learning_rate": 1.5009675633987027e-05, "loss": 0.8081, "step": 2387 }, { "epoch": 0.35, "learning_rate": 1.5005529699203494e-05, "loss": 0.873, "step": 2388 }, { "epoch": 0.35, "learning_rate": 1.5001382616069325e-05, "loss": 0.8608, "step": 2389 }, { "epoch": 0.35, "learning_rate": 1.4997234385535917e-05, "loss": 0.8042, "step": 2390 }, { "epoch": 0.35, "learning_rate": 1.4993085008554951e-05, "loss": 0.8584, "step": 2391 }, { "epoch": 0.35, "learning_rate": 1.4988934486078355e-05, "loss": 0.7705, "step": 2392 }, { "epoch": 0.35, "learning_rate": 1.4984782819058334e-05, "loss": 0.356, "step": 2393 }, { "epoch": 0.35, "learning_rate": 1.4980630008447343e-05, "loss": 0.8364, "step": 2394 }, { "epoch": 0.35, "learning_rate": 1.4976476055198109e-05, "loss": 0.8398, "step": 2395 }, { "epoch": 0.35, "learning_rate": 1.4972320960263614e-05, "loss": 0.9092, "step": 2396 }, { "epoch": 0.35, "learning_rate": 1.4968164724597103e-05, "loss": 0.9053, "step": 2397 }, { "epoch": 0.35, "learning_rate": 1.496400734915209e-05, "loss": 0.875, "step": 2398 }, { "epoch": 0.35, "learning_rate": 1.4959848834882344e-05, "loss": 0.8584, "step": 2399 }, { "epoch": 0.35, "learning_rate": 1.4955689182741893e-05, "loss": 0.8735, "step": 2400 }, { "epoch": 0.36, "learning_rate": 1.4951528393685033e-05, "loss": 0.8853, "step": 2401 }, { "epoch": 0.36, "learning_rate": 1.4947366468666314e-05, "loss": 0.8721, "step": 2402 }, { "epoch": 0.36, "learning_rate": 1.4943203408640552e-05, "loss": 0.7871, "step": 2403 }, { "epoch": 0.36, "learning_rate": 1.4939039214562823e-05, "loss": 0.3411, "step": 2404 }, { "epoch": 0.36, "learning_rate": 1.4934873887388458e-05, "loss": 0.8037, "step": 2405 }, { "epoch": 0.36, "learning_rate": 1.4930707428073052e-05, "loss": 0.812, "step": 2406 }, { "epoch": 0.36, "learning_rate": 1.4926539837572462e-05, "loss": 0.812, "step": 2407 }, { "epoch": 0.36, "learning_rate": 1.49223711168428e-05, "loss": 0.3472, "step": 2408 }, { "epoch": 0.36, "learning_rate": 1.4918201266840437e-05, "loss": 0.7954, "step": 2409 }, { "epoch": 0.36, "learning_rate": 1.4914030288522006e-05, "loss": 0.895, "step": 2410 }, { "epoch": 0.36, "learning_rate": 1.4909858182844399e-05, "loss": 0.855, "step": 2411 }, { "epoch": 0.36, "learning_rate": 1.4905684950764768e-05, "loss": 0.8735, "step": 2412 }, { "epoch": 0.36, "learning_rate": 1.4901510593240514e-05, "loss": 0.8525, "step": 2413 }, { "epoch": 0.36, "learning_rate": 1.4897335111229307e-05, "loss": 0.8711, "step": 2414 }, { "epoch": 0.36, "learning_rate": 1.4893158505689071e-05, "loss": 0.8252, "step": 2415 }, { "epoch": 0.36, "learning_rate": 1.4888980777577988e-05, "loss": 0.8481, "step": 2416 }, { "epoch": 0.36, "learning_rate": 1.4884801927854501e-05, "loss": 0.9155, "step": 2417 }, { "epoch": 0.36, "learning_rate": 1.4880621957477299e-05, "loss": 0.9092, "step": 2418 }, { "epoch": 0.36, "learning_rate": 1.4876440867405341e-05, "loss": 0.8105, "step": 2419 }, { "epoch": 0.36, "learning_rate": 1.4872258658597835e-05, "loss": 0.8833, "step": 2420 }, { "epoch": 0.36, "learning_rate": 1.4868075332014253e-05, "loss": 0.8833, "step": 2421 }, { "epoch": 0.36, "learning_rate": 1.4863890888614314e-05, "loss": 0.7876, "step": 2422 }, { "epoch": 0.36, "learning_rate": 1.4859705329357999e-05, "loss": 0.7812, "step": 2423 }, { "epoch": 0.36, "learning_rate": 1.4855518655205546e-05, "loss": 0.8135, "step": 2424 }, { "epoch": 0.36, "learning_rate": 1.4851330867117444e-05, "loss": 0.8403, "step": 2425 }, { "epoch": 0.36, "learning_rate": 1.4847141966054438e-05, "loss": 0.9087, "step": 2426 }, { "epoch": 0.36, "learning_rate": 1.484295195297754e-05, "loss": 0.8359, "step": 2427 }, { "epoch": 0.36, "learning_rate": 1.4838760828847995e-05, "loss": 0.8413, "step": 2428 }, { "epoch": 0.36, "learning_rate": 1.483456859462733e-05, "loss": 0.8105, "step": 2429 }, { "epoch": 0.36, "learning_rate": 1.48303752512773e-05, "loss": 0.8501, "step": 2430 }, { "epoch": 0.36, "learning_rate": 1.4826180799759934e-05, "loss": 0.8862, "step": 2431 }, { "epoch": 0.36, "learning_rate": 1.4821985241037506e-05, "loss": 0.7983, "step": 2432 }, { "epoch": 0.36, "learning_rate": 1.481778857607254e-05, "loss": 0.8516, "step": 2433 }, { "epoch": 0.36, "learning_rate": 1.4813590805827831e-05, "loss": 0.8169, "step": 2434 }, { "epoch": 0.36, "learning_rate": 1.4809391931266411e-05, "loss": 0.8945, "step": 2435 }, { "epoch": 0.36, "learning_rate": 1.480519195335157e-05, "loss": 0.9263, "step": 2436 }, { "epoch": 0.36, "learning_rate": 1.4800990873046847e-05, "loss": 0.8252, "step": 2437 }, { "epoch": 0.36, "learning_rate": 1.4796788691316051e-05, "loss": 0.8687, "step": 2438 }, { "epoch": 0.36, "learning_rate": 1.4792585409123219e-05, "loss": 0.8315, "step": 2439 }, { "epoch": 0.36, "learning_rate": 1.4788381027432663e-05, "loss": 0.8452, "step": 2440 }, { "epoch": 0.36, "learning_rate": 1.4784175547208932e-05, "loss": 0.7612, "step": 2441 }, { "epoch": 0.36, "learning_rate": 1.477996896941683e-05, "loss": 0.8696, "step": 2442 }, { "epoch": 0.36, "learning_rate": 1.4775761295021418e-05, "loss": 0.8286, "step": 2443 }, { "epoch": 0.36, "learning_rate": 1.4771552524988008e-05, "loss": 0.897, "step": 2444 }, { "epoch": 0.36, "learning_rate": 1.4767342660282155e-05, "loss": 0.8672, "step": 2445 }, { "epoch": 0.36, "learning_rate": 1.4763131701869675e-05, "loss": 0.8418, "step": 2446 }, { "epoch": 0.36, "learning_rate": 1.4758919650716632e-05, "loss": 0.8208, "step": 2447 }, { "epoch": 0.36, "learning_rate": 1.4754706507789336e-05, "loss": 0.8403, "step": 2448 }, { "epoch": 0.36, "learning_rate": 1.4750492274054353e-05, "loss": 0.8521, "step": 2449 }, { "epoch": 0.36, "learning_rate": 1.47462769504785e-05, "loss": 0.7749, "step": 2450 }, { "epoch": 0.36, "learning_rate": 1.4742060538028833e-05, "loss": 0.8242, "step": 2451 }, { "epoch": 0.36, "learning_rate": 1.4737843037672677e-05, "loss": 0.8867, "step": 2452 }, { "epoch": 0.36, "learning_rate": 1.4733624450377589e-05, "loss": 0.8237, "step": 2453 }, { "epoch": 0.36, "learning_rate": 1.4729404777111383e-05, "loss": 0.8252, "step": 2454 }, { "epoch": 0.36, "learning_rate": 1.4725184018842123e-05, "loss": 0.8477, "step": 2455 }, { "epoch": 0.36, "learning_rate": 1.4720962176538117e-05, "loss": 0.7783, "step": 2456 }, { "epoch": 0.36, "learning_rate": 1.4716739251167931e-05, "loss": 0.8467, "step": 2457 }, { "epoch": 0.36, "learning_rate": 1.4712515243700367e-05, "loss": 0.8291, "step": 2458 }, { "epoch": 0.36, "learning_rate": 1.4708290155104483e-05, "loss": 0.8979, "step": 2459 }, { "epoch": 0.36, "learning_rate": 1.4704063986349587e-05, "loss": 0.7915, "step": 2460 }, { "epoch": 0.36, "learning_rate": 1.469983673840523e-05, "loss": 0.7944, "step": 2461 }, { "epoch": 0.36, "learning_rate": 1.4695608412241209e-05, "loss": 0.7983, "step": 2462 }, { "epoch": 0.36, "learning_rate": 1.4691379008827578e-05, "loss": 0.7632, "step": 2463 }, { "epoch": 0.36, "learning_rate": 1.4687148529134621e-05, "loss": 0.854, "step": 2464 }, { "epoch": 0.36, "learning_rate": 1.4682916974132889e-05, "loss": 0.3185, "step": 2465 }, { "epoch": 0.36, "learning_rate": 1.4678684344793166e-05, "loss": 0.8467, "step": 2466 }, { "epoch": 0.36, "learning_rate": 1.4674450642086486e-05, "loss": 0.8354, "step": 2467 }, { "epoch": 0.36, "learning_rate": 1.4670215866984134e-05, "loss": 0.8242, "step": 2468 }, { "epoch": 0.37, "learning_rate": 1.4665980020457631e-05, "loss": 0.8325, "step": 2469 }, { "epoch": 0.37, "learning_rate": 1.4661743103478759e-05, "loss": 0.8398, "step": 2470 }, { "epoch": 0.37, "learning_rate": 1.4657505117019523e-05, "loss": 0.9058, "step": 2471 }, { "epoch": 0.37, "learning_rate": 1.46532660620522e-05, "loss": 0.3374, "step": 2472 }, { "epoch": 0.37, "learning_rate": 1.4649025939549289e-05, "loss": 0.8105, "step": 2473 }, { "epoch": 0.37, "learning_rate": 1.4644784750483546e-05, "loss": 0.8628, "step": 2474 }, { "epoch": 0.37, "learning_rate": 1.4640542495827974e-05, "loss": 0.874, "step": 2475 }, { "epoch": 0.37, "learning_rate": 1.4636299176555813e-05, "loss": 0.8472, "step": 2476 }, { "epoch": 0.37, "learning_rate": 1.4632054793640547e-05, "loss": 0.7842, "step": 2477 }, { "epoch": 0.37, "learning_rate": 1.4627809348055908e-05, "loss": 0.8506, "step": 2478 }, { "epoch": 0.37, "learning_rate": 1.4623562840775874e-05, "loss": 0.8853, "step": 2479 }, { "epoch": 0.37, "learning_rate": 1.4619315272774662e-05, "loss": 0.7827, "step": 2480 }, { "epoch": 0.37, "learning_rate": 1.4615066645026735e-05, "loss": 0.8257, "step": 2481 }, { "epoch": 0.37, "learning_rate": 1.4610816958506796e-05, "loss": 0.8335, "step": 2482 }, { "epoch": 0.37, "learning_rate": 1.4606566214189793e-05, "loss": 0.853, "step": 2483 }, { "epoch": 0.37, "learning_rate": 1.4602314413050915e-05, "loss": 0.8965, "step": 2484 }, { "epoch": 0.37, "learning_rate": 1.4598061556065598e-05, "loss": 0.8252, "step": 2485 }, { "epoch": 0.37, "learning_rate": 1.4593807644209519e-05, "loss": 0.8369, "step": 2486 }, { "epoch": 0.37, "learning_rate": 1.458955267845859e-05, "loss": 0.8696, "step": 2487 }, { "epoch": 0.37, "learning_rate": 1.458529665978897e-05, "loss": 0.7935, "step": 2488 }, { "epoch": 0.37, "learning_rate": 1.4581039589177065e-05, "loss": 0.8535, "step": 2489 }, { "epoch": 0.37, "learning_rate": 1.457678146759951e-05, "loss": 0.832, "step": 2490 }, { "epoch": 0.37, "learning_rate": 1.4572522296033194e-05, "loss": 0.8857, "step": 2491 }, { "epoch": 0.37, "learning_rate": 1.4568262075455237e-05, "loss": 0.8779, "step": 2492 }, { "epoch": 0.37, "learning_rate": 1.4564000806843009e-05, "loss": 0.8535, "step": 2493 }, { "epoch": 0.37, "learning_rate": 1.4559738491174104e-05, "loss": 0.8701, "step": 2494 }, { "epoch": 0.37, "learning_rate": 1.455547512942638e-05, "loss": 0.8413, "step": 2495 }, { "epoch": 0.37, "learning_rate": 1.4551210722577911e-05, "loss": 0.873, "step": 2496 }, { "epoch": 0.37, "learning_rate": 1.454694527160703e-05, "loss": 0.8188, "step": 2497 }, { "epoch": 0.37, "learning_rate": 1.4542678777492298e-05, "loss": 0.8364, "step": 2498 }, { "epoch": 0.37, "learning_rate": 1.4538411241212518e-05, "loss": 0.8618, "step": 2499 }, { "epoch": 0.37, "learning_rate": 1.4534142663746736e-05, "loss": 0.876, "step": 2500 }, { "epoch": 0.37, "learning_rate": 1.452987304607423e-05, "loss": 0.8877, "step": 2501 }, { "epoch": 0.37, "learning_rate": 1.4525602389174523e-05, "loss": 0.8638, "step": 2502 }, { "epoch": 0.37, "learning_rate": 1.4521330694027372e-05, "loss": 0.8384, "step": 2503 }, { "epoch": 0.37, "learning_rate": 1.4517057961612777e-05, "loss": 0.834, "step": 2504 }, { "epoch": 0.37, "learning_rate": 1.4512784192910965e-05, "loss": 0.8486, "step": 2505 }, { "epoch": 0.37, "learning_rate": 1.4508509388902421e-05, "loss": 0.8877, "step": 2506 }, { "epoch": 0.37, "learning_rate": 1.4504233550567844e-05, "loss": 0.7734, "step": 2507 }, { "epoch": 0.37, "learning_rate": 1.449995667888819e-05, "loss": 0.7603, "step": 2508 }, { "epoch": 0.37, "learning_rate": 1.4495678774844634e-05, "loss": 0.8013, "step": 2509 }, { "epoch": 0.37, "learning_rate": 1.4491399839418608e-05, "loss": 0.7742, "step": 2510 }, { "epoch": 0.37, "learning_rate": 1.4487119873591761e-05, "loss": 0.8564, "step": 2511 }, { "epoch": 0.37, "learning_rate": 1.4482838878345992e-05, "loss": 0.8486, "step": 2512 }, { "epoch": 0.37, "learning_rate": 1.4478556854663435e-05, "loss": 0.8149, "step": 2513 }, { "epoch": 0.37, "learning_rate": 1.4474273803526446e-05, "loss": 0.7637, "step": 2514 }, { "epoch": 0.37, "learning_rate": 1.446998972591764e-05, "loss": 0.8398, "step": 2515 }, { "epoch": 0.37, "learning_rate": 1.4465704622819843e-05, "loss": 0.3525, "step": 2516 }, { "epoch": 0.37, "learning_rate": 1.4461418495216137e-05, "loss": 0.8462, "step": 2517 }, { "epoch": 0.37, "learning_rate": 1.4457131344089824e-05, "loss": 0.8164, "step": 2518 }, { "epoch": 0.37, "learning_rate": 1.445284317042445e-05, "loss": 0.8643, "step": 2519 }, { "epoch": 0.37, "learning_rate": 1.444855397520379e-05, "loss": 0.8203, "step": 2520 }, { "epoch": 0.37, "learning_rate": 1.4444263759411858e-05, "loss": 0.7915, "step": 2521 }, { "epoch": 0.37, "learning_rate": 1.4439972524032898e-05, "loss": 0.8442, "step": 2522 }, { "epoch": 0.37, "learning_rate": 1.4435680270051392e-05, "loss": 0.811, "step": 2523 }, { "epoch": 0.37, "learning_rate": 1.4431386998452052e-05, "loss": 0.8022, "step": 2524 }, { "epoch": 0.37, "learning_rate": 1.4427092710219826e-05, "loss": 0.8379, "step": 2525 }, { "epoch": 0.37, "learning_rate": 1.4422797406339893e-05, "loss": 0.8672, "step": 2526 }, { "epoch": 0.37, "learning_rate": 1.4418501087797667e-05, "loss": 0.8398, "step": 2527 }, { "epoch": 0.37, "learning_rate": 1.4414203755578791e-05, "loss": 0.8286, "step": 2528 }, { "epoch": 0.37, "learning_rate": 1.4409905410669147e-05, "loss": 0.8535, "step": 2529 }, { "epoch": 0.37, "learning_rate": 1.440560605405485e-05, "loss": 0.8281, "step": 2530 }, { "epoch": 0.37, "learning_rate": 1.4401305686722234e-05, "loss": 0.8589, "step": 2531 }, { "epoch": 0.37, "learning_rate": 1.4397004309657877e-05, "loss": 0.7939, "step": 2532 }, { "epoch": 0.37, "learning_rate": 1.4392701923848586e-05, "loss": 0.7803, "step": 2533 }, { "epoch": 0.37, "learning_rate": 1.4388398530281403e-05, "loss": 0.8447, "step": 2534 }, { "epoch": 0.37, "learning_rate": 1.4384094129943589e-05, "loss": 0.8457, "step": 2535 }, { "epoch": 0.38, "learning_rate": 1.437978872382265e-05, "loss": 0.8872, "step": 2536 }, { "epoch": 0.38, "learning_rate": 1.4375482312906314e-05, "loss": 0.8379, "step": 2537 }, { "epoch": 0.38, "learning_rate": 1.4371174898182547e-05, "loss": 0.8716, "step": 2538 }, { "epoch": 0.38, "learning_rate": 1.4366866480639532e-05, "loss": 0.8804, "step": 2539 }, { "epoch": 0.38, "learning_rate": 1.4362557061265698e-05, "loss": 0.8345, "step": 2540 }, { "epoch": 0.38, "learning_rate": 1.4358246641049696e-05, "loss": 0.8286, "step": 2541 }, { "epoch": 0.38, "learning_rate": 1.4353935220980404e-05, "loss": 0.7832, "step": 2542 }, { "epoch": 0.38, "learning_rate": 1.4349622802046933e-05, "loss": 0.8359, "step": 2543 }, { "epoch": 0.38, "learning_rate": 1.4345309385238624e-05, "loss": 0.7554, "step": 2544 }, { "epoch": 0.38, "learning_rate": 1.4340994971545046e-05, "loss": 0.8169, "step": 2545 }, { "epoch": 0.38, "learning_rate": 1.4336679561955993e-05, "loss": 0.8652, "step": 2546 }, { "epoch": 0.38, "learning_rate": 1.4332363157461498e-05, "loss": 0.7754, "step": 2547 }, { "epoch": 0.38, "learning_rate": 1.4328045759051805e-05, "loss": 0.8506, "step": 2548 }, { "epoch": 0.38, "learning_rate": 1.4323727367717404e-05, "loss": 0.8623, "step": 2549 }, { "epoch": 0.38, "learning_rate": 1.4319407984449e-05, "loss": 0.8354, "step": 2550 }, { "epoch": 0.38, "learning_rate": 1.4315087610237535e-05, "loss": 0.791, "step": 2551 }, { "epoch": 0.38, "learning_rate": 1.4310766246074168e-05, "loss": 0.8228, "step": 2552 }, { "epoch": 0.38, "learning_rate": 1.4306443892950297e-05, "loss": 0.8379, "step": 2553 }, { "epoch": 0.38, "learning_rate": 1.4302120551857535e-05, "loss": 0.856, "step": 2554 }, { "epoch": 0.38, "learning_rate": 1.4297796223787734e-05, "loss": 0.8789, "step": 2555 }, { "epoch": 0.38, "learning_rate": 1.4293470909732958e-05, "loss": 0.7954, "step": 2556 }, { "epoch": 0.38, "learning_rate": 1.4289144610685512e-05, "loss": 0.8525, "step": 2557 }, { "epoch": 0.38, "learning_rate": 1.4284817327637916e-05, "loss": 0.8452, "step": 2558 }, { "epoch": 0.38, "learning_rate": 1.4280489061582917e-05, "loss": 0.8638, "step": 2559 }, { "epoch": 0.38, "learning_rate": 1.4276159813513499e-05, "loss": 0.813, "step": 2560 }, { "epoch": 0.38, "learning_rate": 1.4271829584422854e-05, "loss": 0.8911, "step": 2561 }, { "epoch": 0.38, "learning_rate": 1.4267498375304417e-05, "loss": 0.894, "step": 2562 }, { "epoch": 0.38, "learning_rate": 1.4263166187151826e-05, "loss": 0.8267, "step": 2563 }, { "epoch": 0.38, "learning_rate": 1.4258833020958965e-05, "loss": 0.7842, "step": 2564 }, { "epoch": 0.38, "learning_rate": 1.425449887771993e-05, "loss": 0.8242, "step": 2565 }, { "epoch": 0.38, "learning_rate": 1.4250163758429045e-05, "loss": 0.7793, "step": 2566 }, { "epoch": 0.38, "learning_rate": 1.424582766408086e-05, "loss": 0.8604, "step": 2567 }, { "epoch": 0.38, "learning_rate": 1.4241490595670142e-05, "loss": 0.8408, "step": 2568 }, { "epoch": 0.38, "learning_rate": 1.4237152554191889e-05, "loss": 0.8765, "step": 2569 }, { "epoch": 0.38, "learning_rate": 1.4232813540641319e-05, "loss": 0.877, "step": 2570 }, { "epoch": 0.38, "learning_rate": 1.422847355601387e-05, "loss": 0.7983, "step": 2571 }, { "epoch": 0.38, "learning_rate": 1.4224132601305209e-05, "loss": 0.8633, "step": 2572 }, { "epoch": 0.38, "learning_rate": 1.4219790677511219e-05, "loss": 0.8877, "step": 2573 }, { "epoch": 0.38, "learning_rate": 1.421544778562801e-05, "loss": 0.7529, "step": 2574 }, { "epoch": 0.38, "learning_rate": 1.4211103926651915e-05, "loss": 0.8833, "step": 2575 }, { "epoch": 0.38, "learning_rate": 1.4206759101579481e-05, "loss": 0.8618, "step": 2576 }, { "epoch": 0.38, "learning_rate": 1.4202413311407488e-05, "loss": 0.8569, "step": 2577 }, { "epoch": 0.38, "learning_rate": 1.4198066557132928e-05, "loss": 0.7979, "step": 2578 }, { "epoch": 0.38, "learning_rate": 1.419371883975302e-05, "loss": 0.8247, "step": 2579 }, { "epoch": 0.38, "learning_rate": 1.41893701602652e-05, "loss": 0.7852, "step": 2580 }, { "epoch": 0.38, "learning_rate": 1.4185020519667126e-05, "loss": 0.8789, "step": 2581 }, { "epoch": 0.38, "learning_rate": 1.4180669918956676e-05, "loss": 0.7852, "step": 2582 }, { "epoch": 0.38, "learning_rate": 1.4176318359131955e-05, "loss": 0.8115, "step": 2583 }, { "epoch": 0.38, "learning_rate": 1.4171965841191278e-05, "loss": 0.8687, "step": 2584 }, { "epoch": 0.38, "learning_rate": 1.4167612366133184e-05, "loss": 0.7319, "step": 2585 }, { "epoch": 0.38, "learning_rate": 1.4163257934956427e-05, "loss": 0.8652, "step": 2586 }, { "epoch": 0.38, "learning_rate": 1.4158902548659996e-05, "loss": 0.876, "step": 2587 }, { "epoch": 0.38, "learning_rate": 1.4154546208243078e-05, "loss": 0.8413, "step": 2588 }, { "epoch": 0.38, "learning_rate": 1.4150188914705099e-05, "loss": 0.9585, "step": 2589 }, { "epoch": 0.38, "learning_rate": 1.414583066904568e-05, "loss": 0.814, "step": 2590 }, { "epoch": 0.38, "learning_rate": 1.4141471472264682e-05, "loss": 0.8438, "step": 2591 }, { "epoch": 0.38, "learning_rate": 1.4137111325362181e-05, "loss": 0.8486, "step": 2592 }, { "epoch": 0.38, "learning_rate": 1.413275022933846e-05, "loss": 0.8164, "step": 2593 }, { "epoch": 0.38, "learning_rate": 1.4128388185194026e-05, "loss": 0.834, "step": 2594 }, { "epoch": 0.38, "learning_rate": 1.4124025193929602e-05, "loss": 0.8223, "step": 2595 }, { "epoch": 0.38, "learning_rate": 1.4119661256546135e-05, "loss": 0.9126, "step": 2596 }, { "epoch": 0.38, "learning_rate": 1.411529637404478e-05, "loss": 0.8345, "step": 2597 }, { "epoch": 0.38, "learning_rate": 1.4110930547426917e-05, "loss": 0.855, "step": 2598 }, { "epoch": 0.38, "learning_rate": 1.4106563777694134e-05, "loss": 0.8379, "step": 2599 }, { "epoch": 0.38, "learning_rate": 1.4102196065848239e-05, "loss": 0.833, "step": 2600 }, { "epoch": 0.38, "learning_rate": 1.4097827412891258e-05, "loss": 0.7964, "step": 2601 }, { "epoch": 0.38, "learning_rate": 1.4093457819825436e-05, "loss": 0.8335, "step": 2602 }, { "epoch": 0.38, "learning_rate": 1.4089087287653222e-05, "loss": 0.856, "step": 2603 }, { "epoch": 0.39, "learning_rate": 1.4084715817377292e-05, "loss": 0.8223, "step": 2604 }, { "epoch": 0.39, "learning_rate": 1.4080343410000532e-05, "loss": 0.8384, "step": 2605 }, { "epoch": 0.39, "learning_rate": 1.4075970066526044e-05, "loss": 0.8979, "step": 2606 }, { "epoch": 0.39, "learning_rate": 1.4071595787957148e-05, "loss": 0.3518, "step": 2607 }, { "epoch": 0.39, "learning_rate": 1.4067220575297369e-05, "loss": 0.8423, "step": 2608 }, { "epoch": 0.39, "learning_rate": 1.4062844429550457e-05, "loss": 0.874, "step": 2609 }, { "epoch": 0.39, "learning_rate": 1.4058467351720371e-05, "loss": 0.792, "step": 2610 }, { "epoch": 0.39, "learning_rate": 1.4054089342811286e-05, "loss": 0.8613, "step": 2611 }, { "epoch": 0.39, "learning_rate": 1.4049710403827583e-05, "loss": 0.8599, "step": 2612 }, { "epoch": 0.39, "learning_rate": 1.4045330535773868e-05, "loss": 0.7954, "step": 2613 }, { "epoch": 0.39, "learning_rate": 1.4040949739654954e-05, "loss": 0.8311, "step": 2614 }, { "epoch": 0.39, "learning_rate": 1.403656801647587e-05, "loss": 0.8301, "step": 2615 }, { "epoch": 0.39, "learning_rate": 1.4032185367241846e-05, "loss": 0.8887, "step": 2616 }, { "epoch": 0.39, "learning_rate": 1.4027801792958343e-05, "loss": 0.8267, "step": 2617 }, { "epoch": 0.39, "learning_rate": 1.4023417294631019e-05, "loss": 0.8315, "step": 2618 }, { "epoch": 0.39, "learning_rate": 1.4019031873265756e-05, "loss": 0.8232, "step": 2619 }, { "epoch": 0.39, "learning_rate": 1.4014645529868634e-05, "loss": 0.7344, "step": 2620 }, { "epoch": 0.39, "learning_rate": 1.4010258265445957e-05, "loss": 0.8755, "step": 2621 }, { "epoch": 0.39, "learning_rate": 1.4005870081004233e-05, "loss": 0.8511, "step": 2622 }, { "epoch": 0.39, "learning_rate": 1.4001480977550186e-05, "loss": 0.8223, "step": 2623 }, { "epoch": 0.39, "learning_rate": 1.3997090956090751e-05, "loss": 0.8164, "step": 2624 }, { "epoch": 0.39, "learning_rate": 1.3992700017633063e-05, "loss": 0.8843, "step": 2625 }, { "epoch": 0.39, "learning_rate": 1.398830816318448e-05, "loss": 0.8105, "step": 2626 }, { "epoch": 0.39, "learning_rate": 1.3983915393752565e-05, "loss": 0.8687, "step": 2627 }, { "epoch": 0.39, "learning_rate": 1.3979521710345094e-05, "loss": 0.7861, "step": 2628 }, { "epoch": 0.39, "learning_rate": 1.3975127113970047e-05, "loss": 0.8286, "step": 2629 }, { "epoch": 0.39, "learning_rate": 1.3970731605635614e-05, "loss": 0.8018, "step": 2630 }, { "epoch": 0.39, "learning_rate": 1.3966335186350199e-05, "loss": 0.8667, "step": 2631 }, { "epoch": 0.39, "learning_rate": 1.3961937857122418e-05, "loss": 0.8184, "step": 2632 }, { "epoch": 0.39, "learning_rate": 1.3957539618961084e-05, "loss": 0.8457, "step": 2633 }, { "epoch": 0.39, "learning_rate": 1.3953140472875228e-05, "loss": 0.8364, "step": 2634 }, { "epoch": 0.39, "learning_rate": 1.3948740419874085e-05, "loss": 0.9224, "step": 2635 }, { "epoch": 0.39, "learning_rate": 1.3944339460967098e-05, "loss": 0.8267, "step": 2636 }, { "epoch": 0.39, "learning_rate": 1.393993759716392e-05, "loss": 0.8647, "step": 2637 }, { "epoch": 0.39, "learning_rate": 1.3935534829474414e-05, "loss": 0.8569, "step": 2638 }, { "epoch": 0.39, "learning_rate": 1.3931131158908644e-05, "loss": 0.3097, "step": 2639 }, { "epoch": 0.39, "learning_rate": 1.3926726586476883e-05, "loss": 0.875, "step": 2640 }, { "epoch": 0.39, "learning_rate": 1.3922321113189617e-05, "loss": 0.8901, "step": 2641 }, { "epoch": 0.39, "learning_rate": 1.3917914740057527e-05, "loss": 0.8511, "step": 2642 }, { "epoch": 0.39, "learning_rate": 1.3913507468091515e-05, "loss": 0.8242, "step": 2643 }, { "epoch": 0.39, "learning_rate": 1.3909099298302677e-05, "loss": 0.8281, "step": 2644 }, { "epoch": 0.39, "learning_rate": 1.390469023170232e-05, "loss": 0.8071, "step": 2645 }, { "epoch": 0.39, "learning_rate": 1.3900280269301957e-05, "loss": 0.7793, "step": 2646 }, { "epoch": 0.39, "learning_rate": 1.3895869412113308e-05, "loss": 0.8755, "step": 2647 }, { "epoch": 0.39, "learning_rate": 1.3891457661148289e-05, "loss": 0.3286, "step": 2648 }, { "epoch": 0.39, "learning_rate": 1.3887045017419032e-05, "loss": 0.8154, "step": 2649 }, { "epoch": 0.39, "learning_rate": 1.3882631481937872e-05, "loss": 0.835, "step": 2650 }, { "epoch": 0.39, "learning_rate": 1.3878217055717345e-05, "loss": 0.7852, "step": 2651 }, { "epoch": 0.39, "learning_rate": 1.3873801739770192e-05, "loss": 0.8691, "step": 2652 }, { "epoch": 0.39, "learning_rate": 1.3869385535109358e-05, "loss": 0.8892, "step": 2653 }, { "epoch": 0.39, "learning_rate": 1.3864968442747995e-05, "loss": 0.8145, "step": 2654 }, { "epoch": 0.39, "learning_rate": 1.3860550463699456e-05, "loss": 0.8569, "step": 2655 }, { "epoch": 0.39, "learning_rate": 1.3856131598977298e-05, "loss": 0.8164, "step": 2656 }, { "epoch": 0.39, "learning_rate": 1.385171184959528e-05, "loss": 0.791, "step": 2657 }, { "epoch": 0.39, "learning_rate": 1.3847291216567364e-05, "loss": 0.354, "step": 2658 }, { "epoch": 0.39, "learning_rate": 1.384286970090772e-05, "loss": 0.834, "step": 2659 }, { "epoch": 0.39, "learning_rate": 1.3838447303630713e-05, "loss": 0.8608, "step": 2660 }, { "epoch": 0.39, "learning_rate": 1.3834024025750914e-05, "loss": 0.7793, "step": 2661 }, { "epoch": 0.39, "learning_rate": 1.3829599868283094e-05, "loss": 0.832, "step": 2662 }, { "epoch": 0.39, "learning_rate": 1.3825174832242232e-05, "loss": 0.8286, "step": 2663 }, { "epoch": 0.39, "learning_rate": 1.38207489186435e-05, "loss": 0.8013, "step": 2664 }, { "epoch": 0.39, "learning_rate": 1.3816322128502276e-05, "loss": 0.8481, "step": 2665 }, { "epoch": 0.39, "learning_rate": 1.381189446283414e-05, "loss": 0.8286, "step": 2666 }, { "epoch": 0.39, "learning_rate": 1.3807465922654863e-05, "loss": 0.8154, "step": 2667 }, { "epoch": 0.39, "learning_rate": 1.3803036508980436e-05, "loss": 0.7695, "step": 2668 }, { "epoch": 0.39, "learning_rate": 1.3798606222827033e-05, "loss": 0.8809, "step": 2669 }, { "epoch": 0.39, "learning_rate": 1.3794175065211035e-05, "loss": 0.8936, "step": 2670 }, { "epoch": 0.4, "learning_rate": 1.3789743037149026e-05, "loss": 0.7544, "step": 2671 }, { "epoch": 0.4, "learning_rate": 1.378531013965778e-05, "loss": 0.7788, "step": 2672 }, { "epoch": 0.4, "learning_rate": 1.3780876373754282e-05, "loss": 0.8711, "step": 2673 }, { "epoch": 0.4, "learning_rate": 1.3776441740455706e-05, "loss": 0.8081, "step": 2674 }, { "epoch": 0.4, "learning_rate": 1.3772006240779435e-05, "loss": 0.8706, "step": 2675 }, { "epoch": 0.4, "learning_rate": 1.3767569875743036e-05, "loss": 0.8096, "step": 2676 }, { "epoch": 0.4, "learning_rate": 1.3763132646364294e-05, "loss": 0.8857, "step": 2677 }, { "epoch": 0.4, "learning_rate": 1.375869455366118e-05, "loss": 0.7861, "step": 2678 }, { "epoch": 0.4, "learning_rate": 1.3754255598651862e-05, "loss": 0.8579, "step": 2679 }, { "epoch": 0.4, "learning_rate": 1.374981578235471e-05, "loss": 0.7798, "step": 2680 }, { "epoch": 0.4, "learning_rate": 1.374537510578829e-05, "loss": 0.8496, "step": 2681 }, { "epoch": 0.4, "learning_rate": 1.3740933569971368e-05, "loss": 0.7729, "step": 2682 }, { "epoch": 0.4, "learning_rate": 1.3736491175922909e-05, "loss": 0.8599, "step": 2683 }, { "epoch": 0.4, "learning_rate": 1.373204792466206e-05, "loss": 0.8364, "step": 2684 }, { "epoch": 0.4, "learning_rate": 1.3727603817208186e-05, "loss": 0.8389, "step": 2685 }, { "epoch": 0.4, "learning_rate": 1.3723158854580834e-05, "loss": 0.8154, "step": 2686 }, { "epoch": 0.4, "learning_rate": 1.3718713037799752e-05, "loss": 0.811, "step": 2687 }, { "epoch": 0.4, "learning_rate": 1.3714266367884883e-05, "loss": 0.8623, "step": 2688 }, { "epoch": 0.4, "learning_rate": 1.3709818845856366e-05, "loss": 0.7759, "step": 2689 }, { "epoch": 0.4, "learning_rate": 1.3705370472734537e-05, "loss": 0.8965, "step": 2690 }, { "epoch": 0.4, "learning_rate": 1.3700921249539923e-05, "loss": 0.8638, "step": 2691 }, { "epoch": 0.4, "learning_rate": 1.3696471177293253e-05, "loss": 0.8594, "step": 2692 }, { "epoch": 0.4, "learning_rate": 1.3692020257015441e-05, "loss": 0.8892, "step": 2693 }, { "epoch": 0.4, "learning_rate": 1.3687568489727606e-05, "loss": 0.3252, "step": 2694 }, { "epoch": 0.4, "learning_rate": 1.3683115876451054e-05, "loss": 0.8311, "step": 2695 }, { "epoch": 0.4, "learning_rate": 1.3678662418207289e-05, "loss": 0.8604, "step": 2696 }, { "epoch": 0.4, "learning_rate": 1.3674208116018007e-05, "loss": 0.8481, "step": 2697 }, { "epoch": 0.4, "learning_rate": 1.3669752970905095e-05, "loss": 0.8174, "step": 2698 }, { "epoch": 0.4, "learning_rate": 1.3665296983890639e-05, "loss": 0.8423, "step": 2699 }, { "epoch": 0.4, "learning_rate": 1.366084015599692e-05, "loss": 0.7905, "step": 2700 }, { "epoch": 0.4, "learning_rate": 1.3656382488246399e-05, "loss": 0.8105, "step": 2701 }, { "epoch": 0.4, "learning_rate": 1.3651923981661741e-05, "loss": 0.7964, "step": 2702 }, { "epoch": 0.4, "learning_rate": 1.3647464637265803e-05, "loss": 0.8887, "step": 2703 }, { "epoch": 0.4, "learning_rate": 1.364300445608163e-05, "loss": 0.7964, "step": 2704 }, { "epoch": 0.4, "learning_rate": 1.3638543439132464e-05, "loss": 0.9229, "step": 2705 }, { "epoch": 0.4, "learning_rate": 1.363408158744173e-05, "loss": 0.8511, "step": 2706 }, { "epoch": 0.4, "learning_rate": 1.3629618902033053e-05, "loss": 0.8501, "step": 2707 }, { "epoch": 0.4, "learning_rate": 1.3625155383930246e-05, "loss": 0.8081, "step": 2708 }, { "epoch": 0.4, "learning_rate": 1.3620691034157314e-05, "loss": 0.832, "step": 2709 }, { "epoch": 0.4, "learning_rate": 1.361622585373845e-05, "loss": 0.875, "step": 2710 }, { "epoch": 0.4, "learning_rate": 1.3611759843698043e-05, "loss": 0.8647, "step": 2711 }, { "epoch": 0.4, "learning_rate": 1.3607293005060663e-05, "loss": 0.8188, "step": 2712 }, { "epoch": 0.4, "learning_rate": 1.3602825338851082e-05, "loss": 0.8638, "step": 2713 }, { "epoch": 0.4, "learning_rate": 1.3598356846094253e-05, "loss": 0.8994, "step": 2714 }, { "epoch": 0.4, "learning_rate": 1.3593887527815327e-05, "loss": 0.8213, "step": 2715 }, { "epoch": 0.4, "learning_rate": 1.358941738503963e-05, "loss": 0.877, "step": 2716 }, { "epoch": 0.4, "learning_rate": 1.3584946418792688e-05, "loss": 0.856, "step": 2717 }, { "epoch": 0.4, "learning_rate": 1.358047463010022e-05, "loss": 0.7471, "step": 2718 }, { "epoch": 0.4, "learning_rate": 1.3576002019988123e-05, "loss": 0.8496, "step": 2719 }, { "epoch": 0.4, "learning_rate": 1.3571528589482492e-05, "loss": 0.7363, "step": 2720 }, { "epoch": 0.4, "learning_rate": 1.3567054339609595e-05, "loss": 0.7954, "step": 2721 }, { "epoch": 0.4, "learning_rate": 1.3562579271395913e-05, "loss": 0.8604, "step": 2722 }, { "epoch": 0.4, "learning_rate": 1.3558103385868087e-05, "loss": 0.8472, "step": 2723 }, { "epoch": 0.4, "learning_rate": 1.3553626684052966e-05, "loss": 0.8042, "step": 2724 }, { "epoch": 0.4, "learning_rate": 1.3549149166977573e-05, "loss": 0.8311, "step": 2725 }, { "epoch": 0.4, "learning_rate": 1.354467083566913e-05, "loss": 0.8447, "step": 2726 }, { "epoch": 0.4, "learning_rate": 1.3540191691155036e-05, "loss": 0.7656, "step": 2727 }, { "epoch": 0.4, "learning_rate": 1.3535711734462883e-05, "loss": 0.7988, "step": 2728 }, { "epoch": 0.4, "learning_rate": 1.3531230966620444e-05, "loss": 0.855, "step": 2729 }, { "epoch": 0.4, "learning_rate": 1.352674938865568e-05, "loss": 0.8071, "step": 2730 }, { "epoch": 0.4, "learning_rate": 1.3522267001596742e-05, "loss": 0.8384, "step": 2731 }, { "epoch": 0.4, "learning_rate": 1.351778380647196e-05, "loss": 0.8018, "step": 2732 }, { "epoch": 0.4, "learning_rate": 1.3513299804309856e-05, "loss": 0.895, "step": 2733 }, { "epoch": 0.4, "learning_rate": 1.3508814996139128e-05, "loss": 0.7739, "step": 2734 }, { "epoch": 0.4, "learning_rate": 1.3504329382988671e-05, "loss": 0.877, "step": 2735 }, { "epoch": 0.4, "learning_rate": 1.3499842965887552e-05, "loss": 0.8477, "step": 2736 }, { "epoch": 0.4, "learning_rate": 1.3495355745865038e-05, "loss": 0.8423, "step": 2737 }, { "epoch": 0.4, "learning_rate": 1.3490867723950559e-05, "loss": 0.9014, "step": 2738 }, { "epoch": 0.41, "learning_rate": 1.3486378901173747e-05, "loss": 0.8315, "step": 2739 }, { "epoch": 0.41, "learning_rate": 1.3481889278564414e-05, "loss": 0.853, "step": 2740 }, { "epoch": 0.41, "learning_rate": 1.347739885715255e-05, "loss": 0.8184, "step": 2741 }, { "epoch": 0.41, "learning_rate": 1.3472907637968331e-05, "loss": 0.9121, "step": 2742 }, { "epoch": 0.41, "learning_rate": 1.3468415622042117e-05, "loss": 0.7944, "step": 2743 }, { "epoch": 0.41, "learning_rate": 1.3463922810404448e-05, "loss": 0.8018, "step": 2744 }, { "epoch": 0.41, "learning_rate": 1.3459429204086056e-05, "loss": 0.7764, "step": 2745 }, { "epoch": 0.41, "learning_rate": 1.345493480411784e-05, "loss": 0.9062, "step": 2746 }, { "epoch": 0.41, "learning_rate": 1.3450439611530892e-05, "loss": 0.7637, "step": 2747 }, { "epoch": 0.41, "learning_rate": 1.3445943627356481e-05, "loss": 0.8652, "step": 2748 }, { "epoch": 0.41, "learning_rate": 1.3441446852626064e-05, "loss": 0.8672, "step": 2749 }, { "epoch": 0.41, "learning_rate": 1.3436949288371275e-05, "loss": 0.8096, "step": 2750 }, { "epoch": 0.41, "learning_rate": 1.3432450935623922e-05, "loss": 0.8535, "step": 2751 }, { "epoch": 0.41, "learning_rate": 1.342795179541601e-05, "loss": 0.8101, "step": 2752 }, { "epoch": 0.41, "learning_rate": 1.3423451868779707e-05, "loss": 0.8149, "step": 2753 }, { "epoch": 0.41, "learning_rate": 1.3418951156747374e-05, "loss": 0.8447, "step": 2754 }, { "epoch": 0.41, "learning_rate": 1.3414449660351552e-05, "loss": 0.8242, "step": 2755 }, { "epoch": 0.41, "learning_rate": 1.3409947380624953e-05, "loss": 0.7524, "step": 2756 }, { "epoch": 0.41, "learning_rate": 1.3405444318600471e-05, "loss": 0.7881, "step": 2757 }, { "epoch": 0.41, "learning_rate": 1.3400940475311193e-05, "loss": 0.8228, "step": 2758 }, { "epoch": 0.41, "learning_rate": 1.3396435851790364e-05, "loss": 0.8076, "step": 2759 }, { "epoch": 0.41, "learning_rate": 1.3391930449071424e-05, "loss": 0.8589, "step": 2760 }, { "epoch": 0.41, "learning_rate": 1.3387424268187982e-05, "loss": 0.8345, "step": 2761 }, { "epoch": 0.41, "learning_rate": 1.3382917310173835e-05, "loss": 0.7798, "step": 2762 }, { "epoch": 0.41, "learning_rate": 1.3378409576062952e-05, "loss": 0.8076, "step": 2763 }, { "epoch": 0.41, "learning_rate": 1.3373901066889477e-05, "loss": 0.8574, "step": 2764 }, { "epoch": 0.41, "learning_rate": 1.3369391783687742e-05, "loss": 0.7749, "step": 2765 }, { "epoch": 0.41, "learning_rate": 1.3364881727492247e-05, "loss": 0.8467, "step": 2766 }, { "epoch": 0.41, "learning_rate": 1.3360370899337674e-05, "loss": 0.8057, "step": 2767 }, { "epoch": 0.41, "learning_rate": 1.3355859300258878e-05, "loss": 0.8398, "step": 2768 }, { "epoch": 0.41, "learning_rate": 1.3351346931290899e-05, "loss": 0.8032, "step": 2769 }, { "epoch": 0.41, "learning_rate": 1.3346833793468943e-05, "loss": 0.8252, "step": 2770 }, { "epoch": 0.41, "learning_rate": 1.3342319887828402e-05, "loss": 0.8525, "step": 2771 }, { "epoch": 0.41, "learning_rate": 1.3337805215404837e-05, "loss": 0.3821, "step": 2772 }, { "epoch": 0.41, "learning_rate": 1.3333289777233993e-05, "loss": 0.771, "step": 2773 }, { "epoch": 0.41, "learning_rate": 1.3328773574351779e-05, "loss": 0.9141, "step": 2774 }, { "epoch": 0.41, "learning_rate": 1.3324256607794292e-05, "loss": 0.8569, "step": 2775 }, { "epoch": 0.41, "learning_rate": 1.3319738878597792e-05, "loss": 0.8267, "step": 2776 }, { "epoch": 0.41, "learning_rate": 1.3315220387798728e-05, "loss": 0.7993, "step": 2777 }, { "epoch": 0.41, "learning_rate": 1.3310701136433708e-05, "loss": 0.9004, "step": 2778 }, { "epoch": 0.41, "learning_rate": 1.3306181125539528e-05, "loss": 0.8496, "step": 2779 }, { "epoch": 0.41, "learning_rate": 1.330166035615315e-05, "loss": 0.8423, "step": 2780 }, { "epoch": 0.41, "learning_rate": 1.3297138829311713e-05, "loss": 0.8364, "step": 2781 }, { "epoch": 0.41, "learning_rate": 1.329261654605253e-05, "loss": 0.8203, "step": 2782 }, { "epoch": 0.41, "learning_rate": 1.3288093507413086e-05, "loss": 0.8633, "step": 2783 }, { "epoch": 0.41, "learning_rate": 1.3283569714431042e-05, "loss": 0.9351, "step": 2784 }, { "epoch": 0.41, "learning_rate": 1.3279045168144228e-05, "loss": 0.8472, "step": 2785 }, { "epoch": 0.41, "learning_rate": 1.3274519869590656e-05, "loss": 0.71, "step": 2786 }, { "epoch": 0.41, "learning_rate": 1.3269993819808493e-05, "loss": 0.769, "step": 2787 }, { "epoch": 0.41, "learning_rate": 1.3265467019836095e-05, "loss": 0.8931, "step": 2788 }, { "epoch": 0.41, "learning_rate": 1.3260939470711984e-05, "loss": 0.8369, "step": 2789 }, { "epoch": 0.41, "learning_rate": 1.3256411173474854e-05, "loss": 0.8154, "step": 2790 }, { "epoch": 0.41, "learning_rate": 1.325188212916357e-05, "loss": 0.8408, "step": 2791 }, { "epoch": 0.41, "learning_rate": 1.3247352338817172e-05, "loss": 0.8354, "step": 2792 }, { "epoch": 0.41, "learning_rate": 1.3242821803474861e-05, "loss": 0.875, "step": 2793 }, { "epoch": 0.41, "learning_rate": 1.3238290524176023e-05, "loss": 0.7456, "step": 2794 }, { "epoch": 0.41, "learning_rate": 1.3233758501960205e-05, "loss": 0.7817, "step": 2795 }, { "epoch": 0.41, "learning_rate": 1.3229225737867126e-05, "loss": 0.8511, "step": 2796 }, { "epoch": 0.41, "learning_rate": 1.3224692232936685e-05, "loss": 0.8716, "step": 2797 }, { "epoch": 0.41, "learning_rate": 1.3220157988208926e-05, "loss": 0.8096, "step": 2798 }, { "epoch": 0.41, "learning_rate": 1.3215623004724096e-05, "loss": 0.8354, "step": 2799 }, { "epoch": 0.41, "learning_rate": 1.3211087283522586e-05, "loss": 0.8369, "step": 2800 }, { "epoch": 0.41, "learning_rate": 1.3206550825644965e-05, "loss": 0.8423, "step": 2801 }, { "epoch": 0.41, "learning_rate": 1.3202013632131973e-05, "loss": 0.894, "step": 2802 }, { "epoch": 0.41, "learning_rate": 1.3197475704024516e-05, "loss": 0.8052, "step": 2803 }, { "epoch": 0.41, "learning_rate": 1.319293704236367e-05, "loss": 0.7622, "step": 2804 }, { "epoch": 0.41, "learning_rate": 1.3188397648190676e-05, "loss": 0.8247, "step": 2805 }, { "epoch": 0.41, "learning_rate": 1.3183857522546948e-05, "loss": 0.9395, "step": 2806 }, { "epoch": 0.42, "learning_rate": 1.3179316666474063e-05, "loss": 0.8838, "step": 2807 }, { "epoch": 0.42, "learning_rate": 1.3174775081013768e-05, "loss": 0.8242, "step": 2808 }, { "epoch": 0.42, "learning_rate": 1.3170232767207982e-05, "loss": 0.8232, "step": 2809 }, { "epoch": 0.42, "learning_rate": 1.3165689726098779e-05, "loss": 0.8242, "step": 2810 }, { "epoch": 0.42, "learning_rate": 1.3161145958728412e-05, "loss": 0.813, "step": 2811 }, { "epoch": 0.42, "learning_rate": 1.3156601466139293e-05, "loss": 0.832, "step": 2812 }, { "epoch": 0.42, "learning_rate": 1.3152056249374008e-05, "loss": 0.8936, "step": 2813 }, { "epoch": 0.42, "learning_rate": 1.3147510309475301e-05, "loss": 0.8594, "step": 2814 }, { "epoch": 0.42, "learning_rate": 1.3142963647486084e-05, "loss": 0.8423, "step": 2815 }, { "epoch": 0.42, "learning_rate": 1.3138416264449439e-05, "loss": 0.8481, "step": 2816 }, { "epoch": 0.42, "learning_rate": 1.3133868161408605e-05, "loss": 0.8555, "step": 2817 }, { "epoch": 0.42, "learning_rate": 1.3129319339406998e-05, "loss": 0.7896, "step": 2818 }, { "epoch": 0.42, "learning_rate": 1.3124769799488189e-05, "loss": 0.8594, "step": 2819 }, { "epoch": 0.42, "learning_rate": 1.3120219542695916e-05, "loss": 0.3273, "step": 2820 }, { "epoch": 0.42, "learning_rate": 1.3115668570074083e-05, "loss": 0.79, "step": 2821 }, { "epoch": 0.42, "learning_rate": 1.3111116882666764e-05, "loss": 0.8125, "step": 2822 }, { "epoch": 0.42, "learning_rate": 1.310656448151818e-05, "loss": 0.833, "step": 2823 }, { "epoch": 0.42, "learning_rate": 1.3102011367672734e-05, "loss": 0.8345, "step": 2824 }, { "epoch": 0.42, "learning_rate": 1.3097457542174983e-05, "loss": 0.7666, "step": 2825 }, { "epoch": 0.42, "learning_rate": 1.3092903006069651e-05, "loss": 0.9297, "step": 2826 }, { "epoch": 0.42, "learning_rate": 1.308834776040162e-05, "loss": 0.8765, "step": 2827 }, { "epoch": 0.42, "learning_rate": 1.308379180621594e-05, "loss": 0.8784, "step": 2828 }, { "epoch": 0.42, "learning_rate": 1.307923514455782e-05, "loss": 0.8257, "step": 2829 }, { "epoch": 0.42, "learning_rate": 1.3074677776472635e-05, "loss": 0.8794, "step": 2830 }, { "epoch": 0.42, "learning_rate": 1.3070119703005922e-05, "loss": 0.8389, "step": 2831 }, { "epoch": 0.42, "learning_rate": 1.3065560925203373e-05, "loss": 0.3099, "step": 2832 }, { "epoch": 0.42, "learning_rate": 1.3061001444110852e-05, "loss": 0.8276, "step": 2833 }, { "epoch": 0.42, "learning_rate": 1.3056441260774371e-05, "loss": 0.8608, "step": 2834 }, { "epoch": 0.42, "learning_rate": 1.3051880376240117e-05, "loss": 0.8003, "step": 2835 }, { "epoch": 0.42, "learning_rate": 1.3047318791554431e-05, "loss": 0.752, "step": 2836 }, { "epoch": 0.42, "learning_rate": 1.3042756507763813e-05, "loss": 0.8062, "step": 2837 }, { "epoch": 0.42, "learning_rate": 1.3038193525914928e-05, "loss": 0.8149, "step": 2838 }, { "epoch": 0.42, "learning_rate": 1.3033629847054598e-05, "loss": 0.8013, "step": 2839 }, { "epoch": 0.42, "learning_rate": 1.3029065472229806e-05, "loss": 0.8359, "step": 2840 }, { "epoch": 0.42, "learning_rate": 1.3024500402487697e-05, "loss": 0.7896, "step": 2841 }, { "epoch": 0.42, "learning_rate": 1.3019934638875565e-05, "loss": 0.8813, "step": 2842 }, { "epoch": 0.42, "learning_rate": 1.3015368182440878e-05, "loss": 0.834, "step": 2843 }, { "epoch": 0.42, "learning_rate": 1.3010801034231255e-05, "loss": 0.8213, "step": 2844 }, { "epoch": 0.42, "learning_rate": 1.3006233195294474e-05, "loss": 0.8662, "step": 2845 }, { "epoch": 0.42, "learning_rate": 1.3001664666678475e-05, "loss": 0.8501, "step": 2846 }, { "epoch": 0.42, "learning_rate": 1.2997095449431348e-05, "loss": 0.8213, "step": 2847 }, { "epoch": 0.42, "learning_rate": 1.299252554460135e-05, "loss": 0.8589, "step": 2848 }, { "epoch": 0.42, "learning_rate": 1.298795495323689e-05, "loss": 0.8076, "step": 2849 }, { "epoch": 0.42, "learning_rate": 1.298338367638654e-05, "loss": 0.854, "step": 2850 }, { "epoch": 0.42, "learning_rate": 1.2978811715099023e-05, "loss": 0.8286, "step": 2851 }, { "epoch": 0.42, "learning_rate": 1.2974239070423222e-05, "loss": 0.834, "step": 2852 }, { "epoch": 0.42, "learning_rate": 1.296966574340818e-05, "loss": 0.6997, "step": 2853 }, { "epoch": 0.42, "learning_rate": 1.296509173510309e-05, "loss": 0.8608, "step": 2854 }, { "epoch": 0.42, "learning_rate": 1.2960517046557304e-05, "loss": 0.7734, "step": 2855 }, { "epoch": 0.42, "learning_rate": 1.2955941678820332e-05, "loss": 0.8975, "step": 2856 }, { "epoch": 0.42, "learning_rate": 1.295136563294184e-05, "loss": 0.8047, "step": 2857 }, { "epoch": 0.42, "learning_rate": 1.2946788909971646e-05, "loss": 0.7896, "step": 2858 }, { "epoch": 0.42, "learning_rate": 1.2942211510959726e-05, "loss": 0.8408, "step": 2859 }, { "epoch": 0.42, "learning_rate": 1.2937633436956208e-05, "loss": 0.8394, "step": 2860 }, { "epoch": 0.42, "learning_rate": 1.293305468901138e-05, "loss": 0.7637, "step": 2861 }, { "epoch": 0.42, "learning_rate": 1.2928475268175683e-05, "loss": 0.855, "step": 2862 }, { "epoch": 0.42, "learning_rate": 1.292389517549971e-05, "loss": 0.8682, "step": 2863 }, { "epoch": 0.42, "learning_rate": 1.2919314412034207e-05, "loss": 0.8438, "step": 2864 }, { "epoch": 0.42, "learning_rate": 1.2914732978830077e-05, "loss": 0.8267, "step": 2865 }, { "epoch": 0.42, "learning_rate": 1.2910150876938381e-05, "loss": 0.8755, "step": 2866 }, { "epoch": 0.42, "learning_rate": 1.2905568107410325e-05, "loss": 0.7661, "step": 2867 }, { "epoch": 0.42, "learning_rate": 1.290098467129727e-05, "loss": 0.895, "step": 2868 }, { "epoch": 0.42, "learning_rate": 1.2896400569650734e-05, "loss": 0.8682, "step": 2869 }, { "epoch": 0.42, "learning_rate": 1.2891815803522378e-05, "loss": 0.8442, "step": 2870 }, { "epoch": 0.42, "learning_rate": 1.2887230373964034e-05, "loss": 0.8506, "step": 2871 }, { "epoch": 0.42, "learning_rate": 1.2882644282027668e-05, "loss": 0.8335, "step": 2872 }, { "epoch": 0.42, "learning_rate": 1.2878057528765407e-05, "loss": 0.8145, "step": 2873 }, { "epoch": 0.43, "learning_rate": 1.2873470115229523e-05, "loss": 0.9297, "step": 2874 }, { "epoch": 0.43, "learning_rate": 1.2868882042472448e-05, "loss": 0.8867, "step": 2875 }, { "epoch": 0.43, "learning_rate": 1.2864293311546762e-05, "loss": 0.8374, "step": 2876 }, { "epoch": 0.43, "learning_rate": 1.2859703923505194e-05, "loss": 0.8477, "step": 2877 }, { "epoch": 0.43, "learning_rate": 1.2855113879400625e-05, "loss": 0.8926, "step": 2878 }, { "epoch": 0.43, "learning_rate": 1.2850523180286084e-05, "loss": 0.8867, "step": 2879 }, { "epoch": 0.43, "learning_rate": 1.2845931827214755e-05, "loss": 0.8486, "step": 2880 }, { "epoch": 0.43, "learning_rate": 1.284133982123997e-05, "loss": 0.8657, "step": 2881 }, { "epoch": 0.43, "learning_rate": 1.283674716341521e-05, "loss": 0.8677, "step": 2882 }, { "epoch": 0.43, "learning_rate": 1.2832153854794105e-05, "loss": 0.8613, "step": 2883 }, { "epoch": 0.43, "learning_rate": 1.2827559896430437e-05, "loss": 0.8853, "step": 2884 }, { "epoch": 0.43, "learning_rate": 1.2822965289378134e-05, "loss": 0.8662, "step": 2885 }, { "epoch": 0.43, "learning_rate": 1.2818370034691277e-05, "loss": 0.8345, "step": 2886 }, { "epoch": 0.43, "learning_rate": 1.281377413342409e-05, "loss": 0.9038, "step": 2887 }, { "epoch": 0.43, "learning_rate": 1.2809177586630948e-05, "loss": 0.8398, "step": 2888 }, { "epoch": 0.43, "learning_rate": 1.2804580395366375e-05, "loss": 0.8755, "step": 2889 }, { "epoch": 0.43, "learning_rate": 1.2799982560685042e-05, "loss": 0.8232, "step": 2890 }, { "epoch": 0.43, "learning_rate": 1.279538408364177e-05, "loss": 0.9043, "step": 2891 }, { "epoch": 0.43, "learning_rate": 1.2790784965291522e-05, "loss": 0.8779, "step": 2892 }, { "epoch": 0.43, "learning_rate": 1.2786185206689412e-05, "loss": 0.8325, "step": 2893 }, { "epoch": 0.43, "learning_rate": 1.27815848088907e-05, "loss": 0.853, "step": 2894 }, { "epoch": 0.43, "learning_rate": 1.2776983772950797e-05, "loss": 0.8359, "step": 2895 }, { "epoch": 0.43, "learning_rate": 1.2772382099925248e-05, "loss": 0.8711, "step": 2896 }, { "epoch": 0.43, "learning_rate": 1.276777979086976e-05, "loss": 0.8994, "step": 2897 }, { "epoch": 0.43, "learning_rate": 1.276317684684017e-05, "loss": 0.3552, "step": 2898 }, { "epoch": 0.43, "learning_rate": 1.275857326889248e-05, "loss": 0.7876, "step": 2899 }, { "epoch": 0.43, "learning_rate": 1.2753969058082817e-05, "loss": 0.8145, "step": 2900 }, { "epoch": 0.43, "learning_rate": 1.2749364215467464e-05, "loss": 0.8804, "step": 2901 }, { "epoch": 0.43, "learning_rate": 1.274475874210285e-05, "loss": 0.8833, "step": 2902 }, { "epoch": 0.43, "learning_rate": 1.2740152639045546e-05, "loss": 0.8223, "step": 2903 }, { "epoch": 0.43, "learning_rate": 1.2735545907352264e-05, "loss": 0.8818, "step": 2904 }, { "epoch": 0.43, "learning_rate": 1.2730938548079873e-05, "loss": 0.8301, "step": 2905 }, { "epoch": 0.43, "learning_rate": 1.2726330562285362e-05, "loss": 0.8672, "step": 2906 }, { "epoch": 0.43, "learning_rate": 1.2721721951025892e-05, "loss": 0.812, "step": 2907 }, { "epoch": 0.43, "learning_rate": 1.2717112715358748e-05, "loss": 0.8418, "step": 2908 }, { "epoch": 0.43, "learning_rate": 1.2712502856341364e-05, "loss": 0.7603, "step": 2909 }, { "epoch": 0.43, "learning_rate": 1.270789237503132e-05, "loss": 0.791, "step": 2910 }, { "epoch": 0.43, "learning_rate": 1.2703281272486334e-05, "loss": 0.8169, "step": 2911 }, { "epoch": 0.43, "learning_rate": 1.2698669549764272e-05, "loss": 0.897, "step": 2912 }, { "epoch": 0.43, "learning_rate": 1.2694057207923134e-05, "loss": 0.8086, "step": 2913 }, { "epoch": 0.43, "learning_rate": 1.268944424802107e-05, "loss": 0.8608, "step": 2914 }, { "epoch": 0.43, "learning_rate": 1.2684830671116364e-05, "loss": 0.7881, "step": 2915 }, { "epoch": 0.43, "learning_rate": 1.2680216478267453e-05, "loss": 0.8403, "step": 2916 }, { "epoch": 0.43, "learning_rate": 1.2675601670532905e-05, "loss": 0.8198, "step": 2917 }, { "epoch": 0.43, "learning_rate": 1.2670986248971433e-05, "loss": 0.8545, "step": 2918 }, { "epoch": 0.43, "learning_rate": 1.266637021464189e-05, "loss": 0.3522, "step": 2919 }, { "epoch": 0.43, "learning_rate": 1.2661753568603273e-05, "loss": 0.8311, "step": 2920 }, { "epoch": 0.43, "learning_rate": 1.265713631191471e-05, "loss": 0.875, "step": 2921 }, { "epoch": 0.43, "learning_rate": 1.2652518445635479e-05, "loss": 0.8579, "step": 2922 }, { "epoch": 0.43, "learning_rate": 1.2647899970825e-05, "loss": 0.8818, "step": 2923 }, { "epoch": 0.43, "learning_rate": 1.2643280888542815e-05, "loss": 0.8154, "step": 2924 }, { "epoch": 0.43, "learning_rate": 1.263866119984863e-05, "loss": 0.9102, "step": 2925 }, { "epoch": 0.43, "learning_rate": 1.2634040905802267e-05, "loss": 0.8335, "step": 2926 }, { "epoch": 0.43, "learning_rate": 1.2629420007463705e-05, "loss": 0.8506, "step": 2927 }, { "epoch": 0.43, "learning_rate": 1.2624798505893048e-05, "loss": 0.8267, "step": 2928 }, { "epoch": 0.43, "learning_rate": 1.2620176402150546e-05, "loss": 0.8115, "step": 2929 }, { "epoch": 0.43, "learning_rate": 1.2615553697296585e-05, "loss": 0.9009, "step": 2930 }, { "epoch": 0.43, "learning_rate": 1.2610930392391694e-05, "loss": 0.8574, "step": 2931 }, { "epoch": 0.43, "learning_rate": 1.2606306488496526e-05, "loss": 0.79, "step": 2932 }, { "epoch": 0.43, "learning_rate": 1.260168198667189e-05, "loss": 0.8228, "step": 2933 }, { "epoch": 0.43, "learning_rate": 1.2597056887978718e-05, "loss": 0.8315, "step": 2934 }, { "epoch": 0.43, "learning_rate": 1.2592431193478085e-05, "loss": 0.3303, "step": 2935 }, { "epoch": 0.43, "learning_rate": 1.2587804904231198e-05, "loss": 0.8042, "step": 2936 }, { "epoch": 0.43, "learning_rate": 1.2583178021299407e-05, "loss": 0.8594, "step": 2937 }, { "epoch": 0.43, "learning_rate": 1.257855054574419e-05, "loss": 0.8696, "step": 2938 }, { "epoch": 0.43, "learning_rate": 1.2573922478627173e-05, "loss": 0.8535, "step": 2939 }, { "epoch": 0.43, "learning_rate": 1.2569293821010109e-05, "loss": 0.8423, "step": 2940 }, { "epoch": 0.43, "learning_rate": 1.2564664573954883e-05, "loss": 0.8286, "step": 2941 }, { "epoch": 0.44, "learning_rate": 1.2560034738523524e-05, "loss": 0.832, "step": 2942 }, { "epoch": 0.44, "learning_rate": 1.255540431577819e-05, "loss": 0.7871, "step": 2943 }, { "epoch": 0.44, "learning_rate": 1.2550773306781181e-05, "loss": 0.8765, "step": 2944 }, { "epoch": 0.44, "learning_rate": 1.254614171259492e-05, "loss": 0.8232, "step": 2945 }, { "epoch": 0.44, "learning_rate": 1.2541509534281974e-05, "loss": 0.8062, "step": 2946 }, { "epoch": 0.44, "learning_rate": 1.253687677290504e-05, "loss": 0.8608, "step": 2947 }, { "epoch": 0.44, "learning_rate": 1.2532243429526951e-05, "loss": 0.8247, "step": 2948 }, { "epoch": 0.44, "learning_rate": 1.252760950521067e-05, "loss": 0.8877, "step": 2949 }, { "epoch": 0.44, "learning_rate": 1.2522975001019298e-05, "loss": 0.8828, "step": 2950 }, { "epoch": 0.44, "learning_rate": 1.251833991801606e-05, "loss": 0.8193, "step": 2951 }, { "epoch": 0.44, "learning_rate": 1.2513704257264327e-05, "loss": 0.8569, "step": 2952 }, { "epoch": 0.44, "learning_rate": 1.2509068019827592e-05, "loss": 0.9185, "step": 2953 }, { "epoch": 0.44, "learning_rate": 1.2504431206769487e-05, "loss": 0.8584, "step": 2954 }, { "epoch": 0.44, "learning_rate": 1.2499793819153772e-05, "loss": 0.8257, "step": 2955 }, { "epoch": 0.44, "learning_rate": 1.2495155858044332e-05, "loss": 0.8726, "step": 2956 }, { "epoch": 0.44, "learning_rate": 1.2490517324505205e-05, "loss": 0.8247, "step": 2957 }, { "epoch": 0.44, "learning_rate": 1.2485878219600537e-05, "loss": 0.8511, "step": 2958 }, { "epoch": 0.44, "learning_rate": 1.248123854439462e-05, "loss": 0.8721, "step": 2959 }, { "epoch": 0.44, "learning_rate": 1.2476598299951866e-05, "loss": 0.8433, "step": 2960 }, { "epoch": 0.44, "learning_rate": 1.247195748733683e-05, "loss": 0.8516, "step": 2961 }, { "epoch": 0.44, "learning_rate": 1.2467316107614185e-05, "loss": 0.8525, "step": 2962 }, { "epoch": 0.44, "learning_rate": 1.2462674161848742e-05, "loss": 0.811, "step": 2963 }, { "epoch": 0.44, "learning_rate": 1.245803165110544e-05, "loss": 0.8367, "step": 2964 }, { "epoch": 0.44, "learning_rate": 1.2453388576449343e-05, "loss": 0.7827, "step": 2965 }, { "epoch": 0.44, "learning_rate": 1.2448744938945656e-05, "loss": 0.7959, "step": 2966 }, { "epoch": 0.44, "learning_rate": 1.2444100739659702e-05, "loss": 0.8638, "step": 2967 }, { "epoch": 0.44, "learning_rate": 1.2439455979656931e-05, "loss": 0.3643, "step": 2968 }, { "epoch": 0.44, "learning_rate": 1.2434810660002937e-05, "loss": 0.8413, "step": 2969 }, { "epoch": 0.44, "learning_rate": 1.2430164781763422e-05, "loss": 0.8394, "step": 2970 }, { "epoch": 0.44, "learning_rate": 1.2425518346004237e-05, "loss": 0.8394, "step": 2971 }, { "epoch": 0.44, "learning_rate": 1.2420871353791348e-05, "loss": 0.318, "step": 2972 }, { "epoch": 0.44, "learning_rate": 1.2416223806190846e-05, "loss": 0.8413, "step": 2973 }, { "epoch": 0.44, "learning_rate": 1.2411575704268957e-05, "loss": 0.8457, "step": 2974 }, { "epoch": 0.44, "learning_rate": 1.2406927049092034e-05, "loss": 0.8535, "step": 2975 }, { "epoch": 0.44, "learning_rate": 1.2402277841726555e-05, "loss": 0.8735, "step": 2976 }, { "epoch": 0.44, "learning_rate": 1.2397628083239122e-05, "loss": 0.8193, "step": 2977 }, { "epoch": 0.44, "learning_rate": 1.2392977774696466e-05, "loss": 0.8457, "step": 2978 }, { "epoch": 0.44, "learning_rate": 1.2388326917165445e-05, "loss": 0.8311, "step": 2979 }, { "epoch": 0.44, "learning_rate": 1.2383675511713045e-05, "loss": 0.3267, "step": 2980 }, { "epoch": 0.44, "learning_rate": 1.2379023559406368e-05, "loss": 0.8662, "step": 2981 }, { "epoch": 0.44, "learning_rate": 1.2374371061312655e-05, "loss": 0.8477, "step": 2982 }, { "epoch": 0.44, "learning_rate": 1.236971801849926e-05, "loss": 0.8389, "step": 2983 }, { "epoch": 0.44, "learning_rate": 1.2365064432033674e-05, "loss": 0.8359, "step": 2984 }, { "epoch": 0.44, "learning_rate": 1.2360410302983497e-05, "loss": 0.8203, "step": 2985 }, { "epoch": 0.44, "learning_rate": 1.2355755632416469e-05, "loss": 0.8521, "step": 2986 }, { "epoch": 0.44, "learning_rate": 1.2351100421400444e-05, "loss": 0.8413, "step": 2987 }, { "epoch": 0.44, "learning_rate": 1.2346444671003408e-05, "loss": 0.874, "step": 2988 }, { "epoch": 0.44, "learning_rate": 1.2341788382293467e-05, "loss": 0.875, "step": 2989 }, { "epoch": 0.44, "learning_rate": 1.2337131556338843e-05, "loss": 0.8594, "step": 2990 }, { "epoch": 0.44, "learning_rate": 1.2332474194207895e-05, "loss": 0.814, "step": 2991 }, { "epoch": 0.44, "learning_rate": 1.2327816296969095e-05, "loss": 0.855, "step": 2992 }, { "epoch": 0.44, "learning_rate": 1.2323157865691045e-05, "loss": 0.8809, "step": 2993 }, { "epoch": 0.44, "learning_rate": 1.2318498901442461e-05, "loss": 0.8784, "step": 2994 }, { "epoch": 0.44, "learning_rate": 1.231383940529219e-05, "loss": 0.8645, "step": 2995 }, { "epoch": 0.44, "learning_rate": 1.2309179378309188e-05, "loss": 0.9399, "step": 2996 }, { "epoch": 0.44, "learning_rate": 1.2304518821562554e-05, "loss": 0.8994, "step": 2997 }, { "epoch": 0.44, "learning_rate": 1.2299857736121487e-05, "loss": 0.855, "step": 2998 }, { "epoch": 0.44, "learning_rate": 1.2295196123055325e-05, "loss": 0.9102, "step": 2999 }, { "epoch": 0.44, "learning_rate": 1.2290533983433508e-05, "loss": 0.9121, "step": 3000 }, { "epoch": 0.44, "learning_rate": 1.2285871318325611e-05, "loss": 0.8931, "step": 3001 }, { "epoch": 0.44, "learning_rate": 1.2281208128801331e-05, "loss": 0.8652, "step": 3002 }, { "epoch": 0.44, "learning_rate": 1.2276544415930476e-05, "loss": 0.9165, "step": 3003 }, { "epoch": 0.44, "learning_rate": 1.227188018078298e-05, "loss": 0.9033, "step": 3004 }, { "epoch": 0.44, "learning_rate": 1.2267215424428894e-05, "loss": 0.8486, "step": 3005 }, { "epoch": 0.44, "learning_rate": 1.2262550147938389e-05, "loss": 0.875, "step": 3006 }, { "epoch": 0.44, "learning_rate": 1.2257884352381755e-05, "loss": 0.8276, "step": 3007 }, { "epoch": 0.44, "learning_rate": 1.2253218038829404e-05, "loss": 0.8999, "step": 3008 }, { "epoch": 0.44, "learning_rate": 1.2248551208351866e-05, "loss": 0.855, "step": 3009 }, { "epoch": 0.45, "learning_rate": 1.2243883862019787e-05, "loss": 0.8901, "step": 3010 }, { "epoch": 0.45, "learning_rate": 1.223921600090393e-05, "loss": 0.897, "step": 3011 }, { "epoch": 0.45, "learning_rate": 1.2234547626075185e-05, "loss": 0.8979, "step": 3012 }, { "epoch": 0.45, "learning_rate": 1.222987873860455e-05, "loss": 0.897, "step": 3013 }, { "epoch": 0.45, "learning_rate": 1.2225209339563144e-05, "loss": 0.8325, "step": 3014 }, { "epoch": 0.45, "learning_rate": 1.2220539430022206e-05, "loss": 0.856, "step": 3015 }, { "epoch": 0.45, "learning_rate": 1.221586901105309e-05, "loss": 0.7322, "step": 3016 }, { "epoch": 0.45, "learning_rate": 1.2211198083727262e-05, "loss": 0.8779, "step": 3017 }, { "epoch": 0.45, "learning_rate": 1.2206526649116315e-05, "loss": 0.8008, "step": 3018 }, { "epoch": 0.45, "learning_rate": 1.2201854708291949e-05, "loss": 0.8081, "step": 3019 }, { "epoch": 0.45, "learning_rate": 1.2197182262325987e-05, "loss": 0.3074, "step": 3020 }, { "epoch": 0.45, "learning_rate": 1.2192509312290362e-05, "loss": 0.8896, "step": 3021 }, { "epoch": 0.45, "learning_rate": 1.2187835859257126e-05, "loss": 0.8193, "step": 3022 }, { "epoch": 0.45, "learning_rate": 1.2183161904298447e-05, "loss": 0.8013, "step": 3023 }, { "epoch": 0.45, "learning_rate": 1.2178487448486607e-05, "loss": 0.8457, "step": 3024 }, { "epoch": 0.45, "learning_rate": 1.2173812492894001e-05, "loss": 0.8945, "step": 3025 }, { "epoch": 0.45, "learning_rate": 1.2169137038593142e-05, "loss": 0.3496, "step": 3026 }, { "epoch": 0.45, "learning_rate": 1.2164461086656656e-05, "loss": 0.9067, "step": 3027 }, { "epoch": 0.45, "learning_rate": 1.2159784638157282e-05, "loss": 0.8784, "step": 3028 }, { "epoch": 0.45, "learning_rate": 1.2155107694167875e-05, "loss": 0.9009, "step": 3029 }, { "epoch": 0.45, "learning_rate": 1.2150430255761402e-05, "loss": 0.8735, "step": 3030 }, { "epoch": 0.45, "learning_rate": 1.2145752324010948e-05, "loss": 0.8936, "step": 3031 }, { "epoch": 0.45, "learning_rate": 1.2141073899989699e-05, "loss": 0.8672, "step": 3032 }, { "epoch": 0.45, "learning_rate": 1.2136394984770967e-05, "loss": 0.8657, "step": 3033 }, { "epoch": 0.45, "learning_rate": 1.2131715579428175e-05, "loss": 0.8682, "step": 3034 }, { "epoch": 0.45, "learning_rate": 1.2127035685034852e-05, "loss": 0.8857, "step": 3035 }, { "epoch": 0.45, "learning_rate": 1.2122355302664643e-05, "loss": 0.8198, "step": 3036 }, { "epoch": 0.45, "learning_rate": 1.2117674433391302e-05, "loss": 0.8315, "step": 3037 }, { "epoch": 0.45, "learning_rate": 1.2112993078288702e-05, "loss": 0.3191, "step": 3038 }, { "epoch": 0.45, "learning_rate": 1.2108311238430819e-05, "loss": 0.8848, "step": 3039 }, { "epoch": 0.45, "learning_rate": 1.2103628914891747e-05, "loss": 0.8188, "step": 3040 }, { "epoch": 0.45, "learning_rate": 1.2098946108745682e-05, "loss": 0.9067, "step": 3041 }, { "epoch": 0.45, "learning_rate": 1.2094262821066944e-05, "loss": 0.877, "step": 3042 }, { "epoch": 0.45, "learning_rate": 1.2089579052929952e-05, "loss": 0.3726, "step": 3043 }, { "epoch": 0.45, "learning_rate": 1.2084894805409242e-05, "loss": 0.9238, "step": 3044 }, { "epoch": 0.45, "learning_rate": 1.2080210079579452e-05, "loss": 0.8071, "step": 3045 }, { "epoch": 0.45, "learning_rate": 1.2075524876515339e-05, "loss": 0.8433, "step": 3046 }, { "epoch": 0.45, "learning_rate": 1.2070839197291764e-05, "loss": 0.8438, "step": 3047 }, { "epoch": 0.45, "learning_rate": 1.20661530429837e-05, "loss": 0.9468, "step": 3048 }, { "epoch": 0.45, "learning_rate": 1.2061466414666228e-05, "loss": 0.8018, "step": 3049 }, { "epoch": 0.45, "learning_rate": 1.2056779313414536e-05, "loss": 0.8364, "step": 3050 }, { "epoch": 0.45, "learning_rate": 1.2052091740303919e-05, "loss": 0.856, "step": 3051 }, { "epoch": 0.45, "learning_rate": 1.2047403696409787e-05, "loss": 0.8311, "step": 3052 }, { "epoch": 0.45, "learning_rate": 1.2042715182807659e-05, "loss": 0.8467, "step": 3053 }, { "epoch": 0.45, "learning_rate": 1.2038026200573148e-05, "loss": 0.3369, "step": 3054 }, { "epoch": 0.45, "learning_rate": 1.2033336750781985e-05, "loss": 0.8662, "step": 3055 }, { "epoch": 0.45, "learning_rate": 1.2028646834510012e-05, "loss": 0.8369, "step": 3056 }, { "epoch": 0.45, "learning_rate": 1.202395645283317e-05, "loss": 0.9014, "step": 3057 }, { "epoch": 0.45, "learning_rate": 1.2019265606827507e-05, "loss": 0.8315, "step": 3058 }, { "epoch": 0.45, "learning_rate": 1.2014574297569182e-05, "loss": 0.3065, "step": 3059 }, { "epoch": 0.45, "learning_rate": 1.200988252613446e-05, "loss": 0.8604, "step": 3060 }, { "epoch": 0.45, "learning_rate": 1.200519029359971e-05, "loss": 0.8462, "step": 3061 }, { "epoch": 0.45, "learning_rate": 1.2000497601041401e-05, "loss": 0.8076, "step": 3062 }, { "epoch": 0.45, "learning_rate": 1.1995804449536122e-05, "loss": 0.8662, "step": 3063 }, { "epoch": 0.45, "learning_rate": 1.1991110840160554e-05, "loss": 0.8735, "step": 3064 }, { "epoch": 0.45, "learning_rate": 1.198641677399149e-05, "loss": 0.8623, "step": 3065 }, { "epoch": 0.45, "learning_rate": 1.1981722252105827e-05, "loss": 0.7993, "step": 3066 }, { "epoch": 0.45, "learning_rate": 1.1977027275580561e-05, "loss": 0.8662, "step": 3067 }, { "epoch": 0.45, "learning_rate": 1.1972331845492801e-05, "loss": 0.8418, "step": 3068 }, { "epoch": 0.45, "learning_rate": 1.1967635962919754e-05, "loss": 0.7881, "step": 3069 }, { "epoch": 0.45, "learning_rate": 1.1962939628938735e-05, "loss": 0.8213, "step": 3070 }, { "epoch": 0.45, "learning_rate": 1.1958242844627155e-05, "loss": 0.8218, "step": 3071 }, { "epoch": 0.45, "learning_rate": 1.1953545611062536e-05, "loss": 0.8042, "step": 3072 }, { "epoch": 0.45, "learning_rate": 1.1948847929322498e-05, "loss": 0.9072, "step": 3073 }, { "epoch": 0.45, "learning_rate": 1.1944149800484774e-05, "loss": 0.8115, "step": 3074 }, { "epoch": 0.45, "learning_rate": 1.1939451225627184e-05, "loss": 0.8892, "step": 3075 }, { "epoch": 0.45, "learning_rate": 1.1934752205827663e-05, "loss": 0.8164, "step": 3076 }, { "epoch": 0.46, "learning_rate": 1.1930052742164234e-05, "loss": 0.8486, "step": 3077 }, { "epoch": 0.46, "learning_rate": 1.1925352835715045e-05, "loss": 0.8579, "step": 3078 }, { "epoch": 0.46, "learning_rate": 1.1920652487558322e-05, "loss": 0.8496, "step": 3079 }, { "epoch": 0.46, "learning_rate": 1.1915951698772403e-05, "loss": 0.8823, "step": 3080 }, { "epoch": 0.46, "learning_rate": 1.1911250470435731e-05, "loss": 0.8657, "step": 3081 }, { "epoch": 0.46, "learning_rate": 1.1906548803626839e-05, "loss": 0.9292, "step": 3082 }, { "epoch": 0.46, "learning_rate": 1.1901846699424374e-05, "loss": 0.3149, "step": 3083 }, { "epoch": 0.46, "learning_rate": 1.1897144158907066e-05, "loss": 0.8892, "step": 3084 }, { "epoch": 0.46, "learning_rate": 1.1892441183153762e-05, "loss": 0.8057, "step": 3085 }, { "epoch": 0.46, "learning_rate": 1.18877377732434e-05, "loss": 0.8223, "step": 3086 }, { "epoch": 0.46, "learning_rate": 1.1883033930255018e-05, "loss": 0.8428, "step": 3087 }, { "epoch": 0.46, "learning_rate": 1.1878329655267758e-05, "loss": 0.8574, "step": 3088 }, { "epoch": 0.46, "learning_rate": 1.1873624949360853e-05, "loss": 0.811, "step": 3089 }, { "epoch": 0.46, "learning_rate": 1.1868919813613645e-05, "loss": 0.8105, "step": 3090 }, { "epoch": 0.46, "learning_rate": 1.1864214249105565e-05, "loss": 0.2864, "step": 3091 }, { "epoch": 0.46, "learning_rate": 1.185950825691615e-05, "loss": 0.855, "step": 3092 }, { "epoch": 0.46, "learning_rate": 1.1854801838125032e-05, "loss": 0.855, "step": 3093 }, { "epoch": 0.46, "learning_rate": 1.1850094993811936e-05, "loss": 0.7344, "step": 3094 }, { "epoch": 0.46, "learning_rate": 1.1845387725056694e-05, "loss": 0.7959, "step": 3095 }, { "epoch": 0.46, "learning_rate": 1.1840680032939226e-05, "loss": 0.9033, "step": 3096 }, { "epoch": 0.46, "learning_rate": 1.1835971918539562e-05, "loss": 0.854, "step": 3097 }, { "epoch": 0.46, "learning_rate": 1.1831263382937814e-05, "loss": 0.7764, "step": 3098 }, { "epoch": 0.46, "learning_rate": 1.1826554427214198e-05, "loss": 0.7266, "step": 3099 }, { "epoch": 0.46, "learning_rate": 1.1821845052449026e-05, "loss": 0.8452, "step": 3100 }, { "epoch": 0.46, "learning_rate": 1.1817135259722707e-05, "loss": 0.8262, "step": 3101 }, { "epoch": 0.46, "learning_rate": 1.1812425050115749e-05, "loss": 0.8901, "step": 3102 }, { "epoch": 0.46, "learning_rate": 1.180771442470874e-05, "loss": 0.8926, "step": 3103 }, { "epoch": 0.46, "learning_rate": 1.1803003384582382e-05, "loss": 0.8726, "step": 3104 }, { "epoch": 0.46, "learning_rate": 1.1798291930817468e-05, "loss": 0.835, "step": 3105 }, { "epoch": 0.46, "learning_rate": 1.1793580064494878e-05, "loss": 0.8779, "step": 3106 }, { "epoch": 0.46, "learning_rate": 1.178886778669559e-05, "loss": 0.853, "step": 3107 }, { "epoch": 0.46, "learning_rate": 1.1784155098500682e-05, "loss": 0.8618, "step": 3108 }, { "epoch": 0.46, "learning_rate": 1.1779442000991321e-05, "loss": 0.8525, "step": 3109 }, { "epoch": 0.46, "learning_rate": 1.177472849524877e-05, "loss": 0.9023, "step": 3110 }, { "epoch": 0.46, "learning_rate": 1.1770014582354378e-05, "loss": 0.8701, "step": 3111 }, { "epoch": 0.46, "learning_rate": 1.17653002633896e-05, "loss": 0.8706, "step": 3112 }, { "epoch": 0.46, "learning_rate": 1.176058553943598e-05, "loss": 0.8462, "step": 3113 }, { "epoch": 0.46, "learning_rate": 1.1755870411575147e-05, "loss": 0.8857, "step": 3114 }, { "epoch": 0.46, "learning_rate": 1.1751154880888835e-05, "loss": 0.936, "step": 3115 }, { "epoch": 0.46, "learning_rate": 1.1746438948458858e-05, "loss": 0.8618, "step": 3116 }, { "epoch": 0.46, "learning_rate": 1.1741722615367132e-05, "loss": 0.3625, "step": 3117 }, { "epoch": 0.46, "learning_rate": 1.1737005882695658e-05, "loss": 0.8091, "step": 3118 }, { "epoch": 0.46, "learning_rate": 1.1732288751526537e-05, "loss": 0.9121, "step": 3119 }, { "epoch": 0.46, "learning_rate": 1.1727571222941952e-05, "loss": 0.7036, "step": 3120 }, { "epoch": 0.46, "learning_rate": 1.1722853298024184e-05, "loss": 0.7686, "step": 3121 }, { "epoch": 0.46, "learning_rate": 1.17181349778556e-05, "loss": 0.8608, "step": 3122 }, { "epoch": 0.46, "learning_rate": 1.1713416263518663e-05, "loss": 0.8613, "step": 3123 }, { "epoch": 0.46, "learning_rate": 1.1708697156095918e-05, "loss": 0.8237, "step": 3124 }, { "epoch": 0.46, "learning_rate": 1.1703977656670014e-05, "loss": 0.8545, "step": 3125 }, { "epoch": 0.46, "learning_rate": 1.1699257766323673e-05, "loss": 0.8691, "step": 3126 }, { "epoch": 0.46, "learning_rate": 1.169453748613972e-05, "loss": 0.8384, "step": 3127 }, { "epoch": 0.46, "learning_rate": 1.1689816817201065e-05, "loss": 0.8105, "step": 3128 }, { "epoch": 0.46, "learning_rate": 1.1685095760590706e-05, "loss": 0.3374, "step": 3129 }, { "epoch": 0.46, "learning_rate": 1.1680374317391734e-05, "loss": 0.8623, "step": 3130 }, { "epoch": 0.46, "learning_rate": 1.1675652488687318e-05, "loss": 0.8335, "step": 3131 }, { "epoch": 0.46, "learning_rate": 1.1670930275560728e-05, "loss": 0.8452, "step": 3132 }, { "epoch": 0.46, "learning_rate": 1.1666207679095316e-05, "loss": 0.9067, "step": 3133 }, { "epoch": 0.46, "learning_rate": 1.1661484700374528e-05, "loss": 0.306, "step": 3134 }, { "epoch": 0.46, "learning_rate": 1.1656761340481883e-05, "loss": 0.363, "step": 3135 }, { "epoch": 0.46, "learning_rate": 1.1652037600501007e-05, "loss": 0.8486, "step": 3136 }, { "epoch": 0.46, "learning_rate": 1.1647313481515596e-05, "loss": 0.325, "step": 3137 }, { "epoch": 0.46, "learning_rate": 1.1642588984609446e-05, "loss": 0.8252, "step": 3138 }, { "epoch": 0.46, "learning_rate": 1.163786411086643e-05, "loss": 0.856, "step": 3139 }, { "epoch": 0.46, "learning_rate": 1.1633138861370512e-05, "loss": 0.8823, "step": 3140 }, { "epoch": 0.46, "learning_rate": 1.1628413237205745e-05, "loss": 0.8257, "step": 3141 }, { "epoch": 0.46, "learning_rate": 1.1623687239456265e-05, "loss": 0.8394, "step": 3142 }, { "epoch": 0.46, "learning_rate": 1.1618960869206287e-05, "loss": 0.9087, "step": 3143 }, { "epoch": 0.46, "learning_rate": 1.1614234127540122e-05, "loss": 0.8472, "step": 3144 }, { "epoch": 0.47, "learning_rate": 1.1609507015542166e-05, "loss": 0.8306, "step": 3145 }, { "epoch": 0.47, "learning_rate": 1.1604779534296888e-05, "loss": 0.897, "step": 3146 }, { "epoch": 0.47, "learning_rate": 1.1600051684888858e-05, "loss": 0.8257, "step": 3147 }, { "epoch": 0.47, "learning_rate": 1.1595323468402715e-05, "loss": 0.8491, "step": 3148 }, { "epoch": 0.47, "learning_rate": 1.1590594885923192e-05, "loss": 0.8809, "step": 3149 }, { "epoch": 0.47, "learning_rate": 1.1585865938535106e-05, "loss": 0.8452, "step": 3150 }, { "epoch": 0.47, "learning_rate": 1.1581136627323355e-05, "loss": 0.8599, "step": 3151 }, { "epoch": 0.47, "learning_rate": 1.1576406953372916e-05, "loss": 0.8418, "step": 3152 }, { "epoch": 0.47, "learning_rate": 1.1571676917768859e-05, "loss": 0.7678, "step": 3153 }, { "epoch": 0.47, "learning_rate": 1.1566946521596323e-05, "loss": 0.8818, "step": 3154 }, { "epoch": 0.47, "learning_rate": 1.1562215765940553e-05, "loss": 0.8315, "step": 3155 }, { "epoch": 0.47, "learning_rate": 1.1557484651886848e-05, "loss": 0.8716, "step": 3156 }, { "epoch": 0.47, "learning_rate": 1.1552753180520612e-05, "loss": 0.8501, "step": 3157 }, { "epoch": 0.47, "learning_rate": 1.1548021352927317e-05, "loss": 0.7852, "step": 3158 }, { "epoch": 0.47, "learning_rate": 1.1543289170192524e-05, "loss": 0.7979, "step": 3159 }, { "epoch": 0.47, "learning_rate": 1.1538556633401876e-05, "loss": 0.8359, "step": 3160 }, { "epoch": 0.47, "learning_rate": 1.1533823743641091e-05, "loss": 0.8257, "step": 3161 }, { "epoch": 0.47, "learning_rate": 1.1529090501995972e-05, "loss": 0.7209, "step": 3162 }, { "epoch": 0.47, "learning_rate": 1.15243569095524e-05, "loss": 0.8911, "step": 3163 }, { "epoch": 0.47, "learning_rate": 1.1519622967396347e-05, "loss": 0.8911, "step": 3164 }, { "epoch": 0.47, "learning_rate": 1.151488867661385e-05, "loss": 0.3517, "step": 3165 }, { "epoch": 0.47, "learning_rate": 1.1510154038291035e-05, "loss": 0.854, "step": 3166 }, { "epoch": 0.47, "learning_rate": 1.1505419053514102e-05, "loss": 0.8311, "step": 3167 }, { "epoch": 0.47, "learning_rate": 1.1500683723369339e-05, "loss": 0.8359, "step": 3168 }, { "epoch": 0.47, "learning_rate": 1.1495948048943108e-05, "loss": 0.8667, "step": 3169 }, { "epoch": 0.47, "learning_rate": 1.149121203132185e-05, "loss": 0.856, "step": 3170 }, { "epoch": 0.47, "learning_rate": 1.1486475671592084e-05, "loss": 0.8198, "step": 3171 }, { "epoch": 0.47, "learning_rate": 1.1481738970840409e-05, "loss": 0.835, "step": 3172 }, { "epoch": 0.47, "learning_rate": 1.14770019301535e-05, "loss": 0.8286, "step": 3173 }, { "epoch": 0.47, "learning_rate": 1.1472264550618118e-05, "loss": 0.8267, "step": 3174 }, { "epoch": 0.47, "learning_rate": 1.1467526833321087e-05, "loss": 0.8286, "step": 3175 }, { "epoch": 0.47, "learning_rate": 1.1462788779349323e-05, "loss": 0.8066, "step": 3176 }, { "epoch": 0.47, "learning_rate": 1.1458050389789812e-05, "loss": 0.8232, "step": 3177 }, { "epoch": 0.47, "learning_rate": 1.1453311665729618e-05, "loss": 0.3433, "step": 3178 }, { "epoch": 0.47, "learning_rate": 1.1448572608255885e-05, "loss": 0.8848, "step": 3179 }, { "epoch": 0.47, "learning_rate": 1.1443833218455824e-05, "loss": 0.832, "step": 3180 }, { "epoch": 0.47, "learning_rate": 1.1439093497416735e-05, "loss": 0.8105, "step": 3181 }, { "epoch": 0.47, "learning_rate": 1.1434353446225983e-05, "loss": 0.7964, "step": 3182 }, { "epoch": 0.47, "learning_rate": 1.142961306597102e-05, "loss": 0.8218, "step": 3183 }, { "epoch": 0.47, "learning_rate": 1.142487235773936e-05, "loss": 0.8481, "step": 3184 }, { "epoch": 0.47, "learning_rate": 1.1420131322618601e-05, "loss": 0.8633, "step": 3185 }, { "epoch": 0.47, "learning_rate": 1.1415389961696416e-05, "loss": 0.3457, "step": 3186 }, { "epoch": 0.47, "learning_rate": 1.1410648276060553e-05, "loss": 0.8306, "step": 3187 }, { "epoch": 0.47, "learning_rate": 1.1405906266798827e-05, "loss": 0.8418, "step": 3188 }, { "epoch": 0.47, "learning_rate": 1.140116393499914e-05, "loss": 0.874, "step": 3189 }, { "epoch": 0.47, "learning_rate": 1.139642128174945e-05, "loss": 0.8164, "step": 3190 }, { "epoch": 0.47, "learning_rate": 1.1391678308137807e-05, "loss": 0.8149, "step": 3191 }, { "epoch": 0.47, "learning_rate": 1.138693501525233e-05, "loss": 0.8545, "step": 3192 }, { "epoch": 0.47, "learning_rate": 1.1382191404181201e-05, "loss": 0.8315, "step": 3193 }, { "epoch": 0.47, "learning_rate": 1.1377447476012686e-05, "loss": 0.8438, "step": 3194 }, { "epoch": 0.47, "learning_rate": 1.1372703231835119e-05, "loss": 0.853, "step": 3195 }, { "epoch": 0.47, "learning_rate": 1.1367958672736908e-05, "loss": 0.9263, "step": 3196 }, { "epoch": 0.47, "learning_rate": 1.1363213799806531e-05, "loss": 0.7393, "step": 3197 }, { "epoch": 0.47, "learning_rate": 1.1358468614132543e-05, "loss": 0.7637, "step": 3198 }, { "epoch": 0.47, "learning_rate": 1.135372311680356e-05, "loss": 0.8169, "step": 3199 }, { "epoch": 0.47, "learning_rate": 1.1348977308908289e-05, "loss": 0.7822, "step": 3200 }, { "epoch": 0.47, "learning_rate": 1.1344231191535488e-05, "loss": 0.8047, "step": 3201 }, { "epoch": 0.47, "learning_rate": 1.1339484765773995e-05, "loss": 0.9111, "step": 3202 }, { "epoch": 0.47, "learning_rate": 1.133473803271272e-05, "loss": 0.8687, "step": 3203 }, { "epoch": 0.47, "learning_rate": 1.1329990993440638e-05, "loss": 0.8398, "step": 3204 }, { "epoch": 0.47, "learning_rate": 1.13252436490468e-05, "loss": 0.8579, "step": 3205 }, { "epoch": 0.47, "learning_rate": 1.1320496000620325e-05, "loss": 0.8184, "step": 3206 }, { "epoch": 0.47, "learning_rate": 1.1315748049250404e-05, "loss": 0.8384, "step": 3207 }, { "epoch": 0.47, "learning_rate": 1.1310999796026285e-05, "loss": 0.7983, "step": 3208 }, { "epoch": 0.47, "learning_rate": 1.130625124203731e-05, "loss": 0.8384, "step": 3209 }, { "epoch": 0.47, "learning_rate": 1.1301502388372862e-05, "loss": 0.8545, "step": 3210 }, { "epoch": 0.47, "learning_rate": 1.1296753236122416e-05, "loss": 0.8633, "step": 3211 }, { "epoch": 0.48, "learning_rate": 1.1292003786375496e-05, "loss": 0.8311, "step": 3212 }, { "epoch": 0.48, "learning_rate": 1.128725404022171e-05, "loss": 0.8433, "step": 3213 }, { "epoch": 0.48, "learning_rate": 1.1282503998750726e-05, "loss": 0.79, "step": 3214 }, { "epoch": 0.48, "learning_rate": 1.1277753663052284e-05, "loss": 0.8555, "step": 3215 }, { "epoch": 0.48, "learning_rate": 1.1273003034216186e-05, "loss": 0.8491, "step": 3216 }, { "epoch": 0.48, "learning_rate": 1.1268252113332302e-05, "loss": 0.9131, "step": 3217 }, { "epoch": 0.48, "learning_rate": 1.1263500901490576e-05, "loss": 0.3213, "step": 3218 }, { "epoch": 0.48, "learning_rate": 1.1258749399781013e-05, "loss": 0.8711, "step": 3219 }, { "epoch": 0.48, "learning_rate": 1.1253997609293684e-05, "loss": 0.833, "step": 3220 }, { "epoch": 0.48, "learning_rate": 1.1249245531118725e-05, "loss": 0.8647, "step": 3221 }, { "epoch": 0.48, "learning_rate": 1.1244493166346344e-05, "loss": 0.8496, "step": 3222 }, { "epoch": 0.48, "learning_rate": 1.1239740516066813e-05, "loss": 0.8359, "step": 3223 }, { "epoch": 0.48, "learning_rate": 1.1234987581370468e-05, "loss": 0.8706, "step": 3224 }, { "epoch": 0.48, "learning_rate": 1.1230234363347708e-05, "loss": 0.8188, "step": 3225 }, { "epoch": 0.48, "learning_rate": 1.1225480863088997e-05, "loss": 0.8794, "step": 3226 }, { "epoch": 0.48, "learning_rate": 1.122072708168487e-05, "loss": 0.7739, "step": 3227 }, { "epoch": 0.48, "learning_rate": 1.1215973020225924e-05, "loss": 0.7935, "step": 3228 }, { "epoch": 0.48, "learning_rate": 1.1211218679802812e-05, "loss": 0.3257, "step": 3229 }, { "epoch": 0.48, "learning_rate": 1.1206464061506259e-05, "loss": 0.813, "step": 3230 }, { "epoch": 0.48, "learning_rate": 1.1201709166427059e-05, "loss": 0.8091, "step": 3231 }, { "epoch": 0.48, "learning_rate": 1.119695399565606e-05, "loss": 0.792, "step": 3232 }, { "epoch": 0.48, "learning_rate": 1.1192198550284172e-05, "loss": 0.7969, "step": 3233 }, { "epoch": 0.48, "learning_rate": 1.1187442831402378e-05, "loss": 0.7842, "step": 3234 }, { "epoch": 0.48, "learning_rate": 1.1182686840101711e-05, "loss": 0.7866, "step": 3235 }, { "epoch": 0.48, "learning_rate": 1.1177930577473281e-05, "loss": 0.8218, "step": 3236 }, { "epoch": 0.48, "learning_rate": 1.1173174044608249e-05, "loss": 0.8115, "step": 3237 }, { "epoch": 0.48, "learning_rate": 1.116841724259784e-05, "loss": 0.897, "step": 3238 }, { "epoch": 0.48, "learning_rate": 1.1163660172533346e-05, "loss": 0.8403, "step": 3239 }, { "epoch": 0.48, "learning_rate": 1.1158902835506114e-05, "loss": 0.7734, "step": 3240 }, { "epoch": 0.48, "learning_rate": 1.1154145232607558e-05, "loss": 0.8325, "step": 3241 }, { "epoch": 0.48, "learning_rate": 1.1149387364929148e-05, "loss": 0.7842, "step": 3242 }, { "epoch": 0.48, "learning_rate": 1.1144629233562418e-05, "loss": 0.8276, "step": 3243 }, { "epoch": 0.48, "learning_rate": 1.1139870839598959e-05, "loss": 0.8599, "step": 3244 }, { "epoch": 0.48, "learning_rate": 1.113511218413043e-05, "loss": 0.9126, "step": 3245 }, { "epoch": 0.48, "learning_rate": 1.1130353268248539e-05, "loss": 0.8301, "step": 3246 }, { "epoch": 0.48, "learning_rate": 1.1125594093045062e-05, "loss": 0.8774, "step": 3247 }, { "epoch": 0.48, "learning_rate": 1.1120834659611832e-05, "loss": 0.8037, "step": 3248 }, { "epoch": 0.48, "learning_rate": 1.111607496904074e-05, "loss": 0.8003, "step": 3249 }, { "epoch": 0.48, "learning_rate": 1.111131502242374e-05, "loss": 0.79, "step": 3250 }, { "epoch": 0.48, "learning_rate": 1.1106554820852837e-05, "loss": 0.8105, "step": 3251 }, { "epoch": 0.48, "learning_rate": 1.1101794365420103e-05, "loss": 0.8882, "step": 3252 }, { "epoch": 0.48, "learning_rate": 1.1097033657217662e-05, "loss": 0.77, "step": 3253 }, { "epoch": 0.48, "learning_rate": 1.1092272697337703e-05, "loss": 0.7637, "step": 3254 }, { "epoch": 0.48, "learning_rate": 1.1087511486872461e-05, "loss": 0.8716, "step": 3255 }, { "epoch": 0.48, "learning_rate": 1.1082750026914246e-05, "loss": 0.856, "step": 3256 }, { "epoch": 0.48, "learning_rate": 1.1077988318555405e-05, "loss": 0.8643, "step": 3257 }, { "epoch": 0.48, "learning_rate": 1.1073226362888358e-05, "loss": 0.7903, "step": 3258 }, { "epoch": 0.48, "learning_rate": 1.1068464161005575e-05, "loss": 0.9312, "step": 3259 }, { "epoch": 0.48, "learning_rate": 1.1063701713999582e-05, "loss": 0.9556, "step": 3260 }, { "epoch": 0.48, "learning_rate": 1.1058939022962962e-05, "loss": 0.9697, "step": 3261 }, { "epoch": 0.48, "learning_rate": 1.1054176088988352e-05, "loss": 1.0029, "step": 3262 }, { "epoch": 0.48, "learning_rate": 1.1049412913168456e-05, "loss": 0.9741, "step": 3263 }, { "epoch": 0.48, "learning_rate": 1.1044649496596018e-05, "loss": 0.9268, "step": 3264 }, { "epoch": 0.48, "learning_rate": 1.1039885840363845e-05, "loss": 0.9146, "step": 3265 }, { "epoch": 0.48, "learning_rate": 1.1035121945564795e-05, "loss": 0.9858, "step": 3266 }, { "epoch": 0.48, "learning_rate": 1.1030357813291787e-05, "loss": 0.9961, "step": 3267 }, { "epoch": 0.48, "learning_rate": 1.1025593444637797e-05, "loss": 0.9585, "step": 3268 }, { "epoch": 0.48, "learning_rate": 1.1020828840695836e-05, "loss": 0.9585, "step": 3269 }, { "epoch": 0.48, "learning_rate": 1.1016064002558993e-05, "loss": 0.3335, "step": 3270 }, { "epoch": 0.48, "learning_rate": 1.1011298931320393e-05, "loss": 0.9517, "step": 3271 }, { "epoch": 0.48, "learning_rate": 1.1006533628073229e-05, "loss": 0.9385, "step": 3272 }, { "epoch": 0.48, "learning_rate": 1.1001768093910734e-05, "loss": 0.9453, "step": 3273 }, { "epoch": 0.48, "learning_rate": 1.0997002329926201e-05, "loss": 0.8896, "step": 3274 }, { "epoch": 0.48, "learning_rate": 1.0992236337212976e-05, "loss": 0.8965, "step": 3275 }, { "epoch": 0.48, "learning_rate": 1.0987470116864454e-05, "loss": 0.8418, "step": 3276 }, { "epoch": 0.48, "learning_rate": 1.0982703669974087e-05, "loss": 0.7949, "step": 3277 }, { "epoch": 0.48, "learning_rate": 1.097793699763537e-05, "loss": 0.8701, "step": 3278 }, { "epoch": 0.48, "learning_rate": 1.0973170100941865e-05, "loss": 0.7666, "step": 3279 }, { "epoch": 0.49, "learning_rate": 1.0968402980987164e-05, "loss": 0.8389, "step": 3280 }, { "epoch": 0.49, "learning_rate": 1.0963635638864938e-05, "loss": 0.8398, "step": 3281 }, { "epoch": 0.49, "learning_rate": 1.095886807566888e-05, "loss": 0.7983, "step": 3282 }, { "epoch": 0.49, "learning_rate": 1.0954100292492758e-05, "loss": 0.8301, "step": 3283 }, { "epoch": 0.49, "learning_rate": 1.094933229043037e-05, "loss": 0.874, "step": 3284 }, { "epoch": 0.49, "learning_rate": 1.0944564070575576e-05, "loss": 0.8901, "step": 3285 }, { "epoch": 0.49, "learning_rate": 1.093979563402229e-05, "loss": 0.7676, "step": 3286 }, { "epoch": 0.49, "learning_rate": 1.0935026981864462e-05, "loss": 0.8691, "step": 3287 }, { "epoch": 0.49, "learning_rate": 1.093025811519611e-05, "loss": 0.918, "step": 3288 }, { "epoch": 0.49, "learning_rate": 1.0925489035111275e-05, "loss": 0.8403, "step": 3289 }, { "epoch": 0.49, "learning_rate": 1.0920719742704071e-05, "loss": 0.8096, "step": 3290 }, { "epoch": 0.49, "learning_rate": 1.0915950239068653e-05, "loss": 0.8467, "step": 3291 }, { "epoch": 0.49, "learning_rate": 1.0911180525299224e-05, "loss": 0.7642, "step": 3292 }, { "epoch": 0.49, "learning_rate": 1.090641060249003e-05, "loss": 0.8198, "step": 3293 }, { "epoch": 0.49, "learning_rate": 1.090164047173537e-05, "loss": 0.8501, "step": 3294 }, { "epoch": 0.49, "learning_rate": 1.0896870134129592e-05, "loss": 0.8882, "step": 3295 }, { "epoch": 0.49, "learning_rate": 1.089209959076709e-05, "loss": 0.8403, "step": 3296 }, { "epoch": 0.49, "learning_rate": 1.0887328842742307e-05, "loss": 0.7856, "step": 3297 }, { "epoch": 0.49, "learning_rate": 1.0882557891149725e-05, "loss": 0.8237, "step": 3298 }, { "epoch": 0.49, "learning_rate": 1.0877786737083882e-05, "loss": 0.8711, "step": 3299 }, { "epoch": 0.49, "learning_rate": 1.0873015381639363e-05, "loss": 0.8718, "step": 3300 }, { "epoch": 0.49, "learning_rate": 1.0868243825910786e-05, "loss": 0.8965, "step": 3301 }, { "epoch": 0.49, "learning_rate": 1.0863472070992834e-05, "loss": 0.338, "step": 3302 }, { "epoch": 0.49, "learning_rate": 1.0858700117980217e-05, "loss": 0.8159, "step": 3303 }, { "epoch": 0.49, "learning_rate": 1.0853927967967705e-05, "loss": 0.8496, "step": 3304 }, { "epoch": 0.49, "learning_rate": 1.0849155622050107e-05, "loss": 0.8237, "step": 3305 }, { "epoch": 0.49, "learning_rate": 1.0844383081322276e-05, "loss": 0.8799, "step": 3306 }, { "epoch": 0.49, "learning_rate": 1.0839610346879112e-05, "loss": 0.8691, "step": 3307 }, { "epoch": 0.49, "learning_rate": 1.0834837419815558e-05, "loss": 0.8975, "step": 3308 }, { "epoch": 0.49, "learning_rate": 1.0830064301226603e-05, "loss": 0.8647, "step": 3309 }, { "epoch": 0.49, "learning_rate": 1.0825290992207278e-05, "loss": 0.8452, "step": 3310 }, { "epoch": 0.49, "learning_rate": 1.0820517493852655e-05, "loss": 0.8296, "step": 3311 }, { "epoch": 0.49, "learning_rate": 1.0815743807257859e-05, "loss": 0.8843, "step": 3312 }, { "epoch": 0.49, "learning_rate": 1.081096993351805e-05, "loss": 0.8438, "step": 3313 }, { "epoch": 0.49, "learning_rate": 1.0806195873728429e-05, "loss": 0.8286, "step": 3314 }, { "epoch": 0.49, "learning_rate": 1.080142162898425e-05, "loss": 0.8027, "step": 3315 }, { "epoch": 0.49, "learning_rate": 1.0796647200380795e-05, "loss": 0.856, "step": 3316 }, { "epoch": 0.49, "learning_rate": 1.0791872589013404e-05, "loss": 0.8237, "step": 3317 }, { "epoch": 0.49, "learning_rate": 1.0787097795977447e-05, "loss": 0.855, "step": 3318 }, { "epoch": 0.49, "learning_rate": 1.0782322822368343e-05, "loss": 0.8696, "step": 3319 }, { "epoch": 0.49, "learning_rate": 1.077754766928155e-05, "loss": 0.9126, "step": 3320 }, { "epoch": 0.49, "learning_rate": 1.0772772337812557e-05, "loss": 0.8159, "step": 3321 }, { "epoch": 0.49, "learning_rate": 1.0767996829056914e-05, "loss": 0.8574, "step": 3322 }, { "epoch": 0.49, "learning_rate": 1.0763221144110196e-05, "loss": 0.8237, "step": 3323 }, { "epoch": 0.49, "learning_rate": 1.0758445284068027e-05, "loss": 0.8755, "step": 3324 }, { "epoch": 0.49, "learning_rate": 1.0753669250026062e-05, "loss": 0.8823, "step": 3325 }, { "epoch": 0.49, "learning_rate": 1.0748893043080008e-05, "loss": 0.8203, "step": 3326 }, { "epoch": 0.49, "learning_rate": 1.07441166643256e-05, "loss": 0.8452, "step": 3327 }, { "epoch": 0.49, "learning_rate": 1.0739340114858623e-05, "loss": 0.8438, "step": 3328 }, { "epoch": 0.49, "learning_rate": 1.073456339577489e-05, "loss": 0.7544, "step": 3329 }, { "epoch": 0.49, "learning_rate": 1.0729786508170261e-05, "loss": 0.8394, "step": 3330 }, { "epoch": 0.49, "learning_rate": 1.0725009453140636e-05, "loss": 0.8252, "step": 3331 }, { "epoch": 0.49, "learning_rate": 1.0720232231781944e-05, "loss": 0.8525, "step": 3332 }, { "epoch": 0.49, "learning_rate": 1.0715454845190161e-05, "loss": 0.8164, "step": 3333 }, { "epoch": 0.49, "learning_rate": 1.0710677294461298e-05, "loss": 0.8076, "step": 3334 }, { "epoch": 0.49, "learning_rate": 1.0705899580691402e-05, "loss": 0.8228, "step": 3335 }, { "epoch": 0.49, "learning_rate": 1.0701121704976561e-05, "loss": 0.3367, "step": 3336 }, { "epoch": 0.49, "learning_rate": 1.0696343668412898e-05, "loss": 0.811, "step": 3337 }, { "epoch": 0.49, "learning_rate": 1.069156547209657e-05, "loss": 0.8193, "step": 3338 }, { "epoch": 0.49, "learning_rate": 1.0686787117123776e-05, "loss": 0.8257, "step": 3339 }, { "epoch": 0.49, "learning_rate": 1.0682008604590748e-05, "loss": 0.332, "step": 3340 }, { "epoch": 0.49, "learning_rate": 1.067722993559376e-05, "loss": 0.79, "step": 3341 }, { "epoch": 0.49, "learning_rate": 1.0672451111229108e-05, "loss": 0.8052, "step": 3342 }, { "epoch": 0.49, "learning_rate": 1.066767213259314e-05, "loss": 0.772, "step": 3343 }, { "epoch": 0.49, "learning_rate": 1.0662893000782228e-05, "loss": 0.8223, "step": 3344 }, { "epoch": 0.49, "learning_rate": 1.0658113716892789e-05, "loss": 0.8457, "step": 3345 }, { "epoch": 0.49, "learning_rate": 1.0653334282021261e-05, "loss": 0.8335, "step": 3346 }, { "epoch": 0.49, "learning_rate": 1.0648554697264131e-05, "loss": 0.6777, "step": 3347 }, { "epoch": 0.5, "learning_rate": 1.064377496371791e-05, "loss": 0.8198, "step": 3348 }, { "epoch": 0.5, "learning_rate": 1.0638995082479151e-05, "loss": 0.8608, "step": 3349 }, { "epoch": 0.5, "learning_rate": 1.0634215054644437e-05, "loss": 0.8379, "step": 3350 }, { "epoch": 0.5, "learning_rate": 1.0629434881310382e-05, "loss": 0.3228, "step": 3351 }, { "epoch": 0.5, "learning_rate": 1.0624654563573635e-05, "loss": 0.7681, "step": 3352 }, { "epoch": 0.5, "learning_rate": 1.0619874102530886e-05, "loss": 0.8242, "step": 3353 }, { "epoch": 0.5, "learning_rate": 1.0615093499278846e-05, "loss": 0.8579, "step": 3354 }, { "epoch": 0.5, "learning_rate": 1.0610312754914262e-05, "loss": 0.7998, "step": 3355 }, { "epoch": 0.5, "learning_rate": 1.0605531870533922e-05, "loss": 0.8667, "step": 3356 }, { "epoch": 0.5, "learning_rate": 1.0600750847234633e-05, "loss": 0.7939, "step": 3357 }, { "epoch": 0.5, "learning_rate": 1.0595969686113245e-05, "loss": 0.8193, "step": 3358 }, { "epoch": 0.5, "learning_rate": 1.0591188388266632e-05, "loss": 0.8618, "step": 3359 }, { "epoch": 0.5, "learning_rate": 1.0586406954791702e-05, "loss": 0.8057, "step": 3360 }, { "epoch": 0.5, "learning_rate": 1.0581625386785392e-05, "loss": 0.3293, "step": 3361 }, { "epoch": 0.5, "learning_rate": 1.057684368534468e-05, "loss": 0.8667, "step": 3362 }, { "epoch": 0.5, "learning_rate": 1.0572061851566557e-05, "loss": 0.7871, "step": 3363 }, { "epoch": 0.5, "learning_rate": 1.0567279886548063e-05, "loss": 0.7954, "step": 3364 }, { "epoch": 0.5, "learning_rate": 1.0562497791386255e-05, "loss": 0.7861, "step": 3365 }, { "epoch": 0.5, "learning_rate": 1.055771556717822e-05, "loss": 0.7939, "step": 3366 }, { "epoch": 0.5, "learning_rate": 1.0552933215021088e-05, "loss": 0.7964, "step": 3367 }, { "epoch": 0.5, "learning_rate": 1.0548150736012002e-05, "loss": 0.8315, "step": 3368 }, { "epoch": 0.5, "learning_rate": 1.0543368131248144e-05, "loss": 0.8535, "step": 3369 }, { "epoch": 0.5, "learning_rate": 1.053858540182672e-05, "loss": 0.3442, "step": 3370 }, { "epoch": 0.5, "learning_rate": 1.0533802548844969e-05, "loss": 0.853, "step": 3371 }, { "epoch": 0.5, "learning_rate": 1.0529019573400154e-05, "loss": 0.8384, "step": 3372 }, { "epoch": 0.5, "learning_rate": 1.0524236476589571e-05, "loss": 0.7617, "step": 3373 }, { "epoch": 0.5, "learning_rate": 1.0519453259510535e-05, "loss": 0.8271, "step": 3374 }, { "epoch": 0.5, "learning_rate": 1.0514669923260398e-05, "loss": 0.7798, "step": 3375 }, { "epoch": 0.5, "learning_rate": 1.050988646893654e-05, "loss": 0.3088, "step": 3376 }, { "epoch": 0.5, "learning_rate": 1.0505102897636358e-05, "loss": 0.8247, "step": 3377 }, { "epoch": 0.5, "learning_rate": 1.0500319210457284e-05, "loss": 0.8823, "step": 3378 }, { "epoch": 0.5, "learning_rate": 1.0495535408496772e-05, "loss": 0.7959, "step": 3379 }, { "epoch": 0.5, "learning_rate": 1.049075149285231e-05, "loss": 0.769, "step": 3380 }, { "epoch": 0.5, "learning_rate": 1.0485967464621401e-05, "loss": 0.8091, "step": 3381 }, { "epoch": 0.5, "learning_rate": 1.0481183324901583e-05, "loss": 0.2905, "step": 3382 }, { "epoch": 0.5, "learning_rate": 1.0476399074790413e-05, "loss": 0.8657, "step": 3383 }, { "epoch": 0.5, "learning_rate": 1.047161471538548e-05, "loss": 0.8896, "step": 3384 }, { "epoch": 0.5, "learning_rate": 1.0466830247784394e-05, "loss": 0.8145, "step": 3385 }, { "epoch": 0.5, "learning_rate": 1.0462045673084788e-05, "loss": 0.8281, "step": 3386 }, { "epoch": 0.5, "learning_rate": 1.0457260992384326e-05, "loss": 0.7734, "step": 3387 }, { "epoch": 0.5, "learning_rate": 1.0452476206780686e-05, "loss": 0.854, "step": 3388 }, { "epoch": 0.5, "learning_rate": 1.0447691317371582e-05, "loss": 0.793, "step": 3389 }, { "epoch": 0.5, "learning_rate": 1.0442906325254747e-05, "loss": 0.8491, "step": 3390 }, { "epoch": 0.5, "learning_rate": 1.0438121231527928e-05, "loss": 0.7988, "step": 3391 }, { "epoch": 0.5, "learning_rate": 1.0433336037288915e-05, "loss": 0.8301, "step": 3392 }, { "epoch": 0.5, "learning_rate": 1.0428550743635502e-05, "loss": 0.7725, "step": 3393 }, { "epoch": 0.5, "learning_rate": 1.042376535166552e-05, "loss": 0.8853, "step": 3394 }, { "epoch": 0.5, "learning_rate": 1.041897986247681e-05, "loss": 0.8447, "step": 3395 }, { "epoch": 0.5, "learning_rate": 1.0414194277167244e-05, "loss": 0.8262, "step": 3396 }, { "epoch": 0.5, "learning_rate": 1.0409408596834718e-05, "loss": 0.873, "step": 3397 }, { "epoch": 0.5, "learning_rate": 1.040462282257714e-05, "loss": 0.8696, "step": 3398 }, { "epoch": 0.5, "learning_rate": 1.039983695549245e-05, "loss": 0.3159, "step": 3399 }, { "epoch": 0.5, "learning_rate": 1.0395050996678602e-05, "loss": 0.8242, "step": 3400 }, { "epoch": 0.5, "learning_rate": 1.0390264947233573e-05, "loss": 0.811, "step": 3401 }, { "epoch": 0.5, "learning_rate": 1.0385478808255358e-05, "loss": 0.7896, "step": 3402 }, { "epoch": 0.5, "learning_rate": 1.0380692580841985e-05, "loss": 0.8594, "step": 3403 }, { "epoch": 0.5, "learning_rate": 1.0375906266091485e-05, "loss": 0.7524, "step": 3404 }, { "epoch": 0.5, "learning_rate": 1.037111986510192e-05, "loss": 0.8467, "step": 3405 }, { "epoch": 0.5, "learning_rate": 1.036633337897137e-05, "loss": 0.813, "step": 3406 }, { "epoch": 0.5, "learning_rate": 1.0361546808797929e-05, "loss": 0.8271, "step": 3407 }, { "epoch": 0.5, "learning_rate": 1.0356760155679719e-05, "loss": 0.8081, "step": 3408 }, { "epoch": 0.5, "learning_rate": 1.0351973420714878e-05, "loss": 0.8149, "step": 3409 }, { "epoch": 0.5, "learning_rate": 1.0347186605001556e-05, "loss": 0.8359, "step": 3410 }, { "epoch": 0.5, "learning_rate": 1.0342399709637932e-05, "loss": 0.8354, "step": 3411 }, { "epoch": 0.5, "learning_rate": 1.0337612735722195e-05, "loss": 0.8179, "step": 3412 }, { "epoch": 0.5, "learning_rate": 1.0332825684352559e-05, "loss": 0.8472, "step": 3413 }, { "epoch": 0.5, "learning_rate": 1.032803855662725e-05, "loss": 0.3101, "step": 3414 }, { "epoch": 0.51, "learning_rate": 1.0323251353644512e-05, "loss": 0.8862, "step": 3415 }, { "epoch": 0.51, "learning_rate": 1.031846407650261e-05, "loss": 0.8213, "step": 3416 }, { "epoch": 0.51, "learning_rate": 1.0313676726299824e-05, "loss": 0.7944, "step": 3417 }, { "epoch": 0.51, "learning_rate": 1.0308889304134453e-05, "loss": 0.8545, "step": 3418 }, { "epoch": 0.51, "learning_rate": 1.0304101811104803e-05, "loss": 0.8198, "step": 3419 }, { "epoch": 0.51, "learning_rate": 1.029931424830921e-05, "loss": 0.8438, "step": 3420 }, { "epoch": 0.51, "learning_rate": 1.0294526616846017e-05, "loss": 0.3047, "step": 3421 }, { "epoch": 0.51, "learning_rate": 1.0289738917813585e-05, "loss": 0.8115, "step": 3422 }, { "epoch": 0.51, "learning_rate": 1.0284951152310292e-05, "loss": 0.8267, "step": 3423 }, { "epoch": 0.51, "learning_rate": 1.0280163321434528e-05, "loss": 0.7974, "step": 3424 }, { "epoch": 0.51, "learning_rate": 1.0275375426284704e-05, "loss": 0.293, "step": 3425 }, { "epoch": 0.51, "learning_rate": 1.027058746795924e-05, "loss": 0.7896, "step": 3426 }, { "epoch": 0.51, "learning_rate": 1.0265799447556566e-05, "loss": 0.7905, "step": 3427 }, { "epoch": 0.51, "learning_rate": 1.026101136617514e-05, "loss": 0.855, "step": 3428 }, { "epoch": 0.51, "learning_rate": 1.0256223224913422e-05, "loss": 0.8281, "step": 3429 }, { "epoch": 0.51, "learning_rate": 1.0251435024869894e-05, "loss": 0.8276, "step": 3430 }, { "epoch": 0.51, "learning_rate": 1.0246646767143046e-05, "loss": 0.8018, "step": 3431 }, { "epoch": 0.51, "learning_rate": 1.0241858452831384e-05, "loss": 0.7559, "step": 3432 }, { "epoch": 0.51, "learning_rate": 1.0237070083033422e-05, "loss": 0.7969, "step": 3433 }, { "epoch": 0.51, "learning_rate": 1.0232281658847693e-05, "loss": 0.752, "step": 3434 }, { "epoch": 0.51, "learning_rate": 1.0227493181372745e-05, "loss": 0.8462, "step": 3435 }, { "epoch": 0.51, "learning_rate": 1.0222704651707125e-05, "loss": 0.8486, "step": 3436 }, { "epoch": 0.51, "learning_rate": 1.0217916070949405e-05, "loss": 0.8101, "step": 3437 }, { "epoch": 0.51, "learning_rate": 1.0213127440198158e-05, "loss": 0.7769, "step": 3438 }, { "epoch": 0.51, "learning_rate": 1.0208338760551985e-05, "loss": 0.835, "step": 3439 }, { "epoch": 0.51, "learning_rate": 1.020355003310948e-05, "loss": 0.8076, "step": 3440 }, { "epoch": 0.51, "learning_rate": 1.019876125896926e-05, "loss": 0.8237, "step": 3441 }, { "epoch": 0.51, "learning_rate": 1.019397243922994e-05, "loss": 0.8159, "step": 3442 }, { "epoch": 0.51, "learning_rate": 1.0189183574990162e-05, "loss": 0.7339, "step": 3443 }, { "epoch": 0.51, "learning_rate": 1.0184394667348572e-05, "loss": 0.8301, "step": 3444 }, { "epoch": 0.51, "learning_rate": 1.0179605717403815e-05, "loss": 0.7583, "step": 3445 }, { "epoch": 0.51, "learning_rate": 1.0174816726254563e-05, "loss": 0.7266, "step": 3446 }, { "epoch": 0.51, "learning_rate": 1.0170027694999481e-05, "loss": 0.8477, "step": 3447 }, { "epoch": 0.51, "learning_rate": 1.0165238624737261e-05, "loss": 0.8013, "step": 3448 }, { "epoch": 0.51, "learning_rate": 1.0160449516566586e-05, "loss": 0.8423, "step": 3449 }, { "epoch": 0.51, "learning_rate": 1.0155660371586162e-05, "loss": 0.7988, "step": 3450 }, { "epoch": 0.51, "learning_rate": 1.0150871190894693e-05, "loss": 0.8574, "step": 3451 }, { "epoch": 0.51, "learning_rate": 1.0146081975590897e-05, "loss": 0.8149, "step": 3452 }, { "epoch": 0.51, "learning_rate": 1.01412927267735e-05, "loss": 0.8433, "step": 3453 }, { "epoch": 0.51, "learning_rate": 1.0136503445541235e-05, "loss": 0.7751, "step": 3454 }, { "epoch": 0.51, "learning_rate": 1.0131714132992836e-05, "loss": 0.812, "step": 3455 }, { "epoch": 0.51, "learning_rate": 1.0126924790227056e-05, "loss": 0.8066, "step": 3456 }, { "epoch": 0.51, "learning_rate": 1.0122135418342644e-05, "loss": 0.7905, "step": 3457 }, { "epoch": 0.51, "learning_rate": 1.0117346018438367e-05, "loss": 0.7905, "step": 3458 }, { "epoch": 0.51, "learning_rate": 1.0112556591612986e-05, "loss": 0.8325, "step": 3459 }, { "epoch": 0.51, "learning_rate": 1.0107767138965274e-05, "loss": 0.8179, "step": 3460 }, { "epoch": 0.51, "learning_rate": 1.0102977661594013e-05, "loss": 0.8291, "step": 3461 }, { "epoch": 0.51, "learning_rate": 1.0098188160597989e-05, "loss": 0.8618, "step": 3462 }, { "epoch": 0.51, "learning_rate": 1.009339863707599e-05, "loss": 0.8174, "step": 3463 }, { "epoch": 0.51, "learning_rate": 1.0088609092126808e-05, "loss": 0.7656, "step": 3464 }, { "epoch": 0.51, "learning_rate": 1.008381952684925e-05, "loss": 0.8457, "step": 3465 }, { "epoch": 0.51, "learning_rate": 1.0079029942342113e-05, "loss": 0.7661, "step": 3466 }, { "epoch": 0.51, "learning_rate": 1.0074240339704216e-05, "loss": 0.7739, "step": 3467 }, { "epoch": 0.51, "learning_rate": 1.0069450720034364e-05, "loss": 0.8623, "step": 3468 }, { "epoch": 0.51, "learning_rate": 1.0064661084431377e-05, "loss": 0.3267, "step": 3469 }, { "epoch": 0.51, "learning_rate": 1.0059871433994075e-05, "loss": 0.748, "step": 3470 }, { "epoch": 0.51, "learning_rate": 1.0055081769821287e-05, "loss": 0.8359, "step": 3471 }, { "epoch": 0.51, "learning_rate": 1.0050292093011835e-05, "loss": 0.8149, "step": 3472 }, { "epoch": 0.51, "learning_rate": 1.0045502404664555e-05, "loss": 0.8364, "step": 3473 }, { "epoch": 0.51, "learning_rate": 1.004071270587827e-05, "loss": 0.7925, "step": 3474 }, { "epoch": 0.51, "learning_rate": 1.0035922997751825e-05, "loss": 0.793, "step": 3475 }, { "epoch": 0.51, "learning_rate": 1.003113328138406e-05, "loss": 0.7683, "step": 3476 }, { "epoch": 0.51, "learning_rate": 1.0026343557873806e-05, "loss": 0.8486, "step": 3477 }, { "epoch": 0.51, "learning_rate": 1.0021553828319906e-05, "loss": 0.8359, "step": 3478 }, { "epoch": 0.51, "learning_rate": 1.0016764093821203e-05, "loss": 0.7866, "step": 3479 }, { "epoch": 0.51, "learning_rate": 1.0011974355476545e-05, "loss": 0.835, "step": 3480 }, { "epoch": 0.51, "learning_rate": 1.0007184614384773e-05, "loss": 0.8169, "step": 3481 }, { "epoch": 0.51, "learning_rate": 1.0002394871644734e-05, "loss": 0.8818, "step": 3482 }, { "epoch": 0.52, "learning_rate": 9.997605128355267e-06, "loss": 0.8213, "step": 3483 }, { "epoch": 0.52, "learning_rate": 9.992815385615227e-06, "loss": 0.8188, "step": 3484 }, { "epoch": 0.52, "learning_rate": 9.988025644523458e-06, "loss": 0.7959, "step": 3485 }, { "epoch": 0.52, "learning_rate": 9.983235906178798e-06, "loss": 0.8296, "step": 3486 }, { "epoch": 0.52, "learning_rate": 9.978446171680097e-06, "loss": 0.7812, "step": 3487 }, { "epoch": 0.52, "learning_rate": 9.973656442126196e-06, "loss": 0.8018, "step": 3488 }, { "epoch": 0.52, "learning_rate": 9.968866718615946e-06, "loss": 0.854, "step": 3489 }, { "epoch": 0.52, "learning_rate": 9.964077002248177e-06, "loss": 0.8726, "step": 3490 }, { "epoch": 0.52, "learning_rate": 9.959287294121733e-06, "loss": 0.7798, "step": 3491 }, { "epoch": 0.52, "learning_rate": 9.954497595335448e-06, "loss": 0.8203, "step": 3492 }, { "epoch": 0.52, "learning_rate": 9.949707906988165e-06, "loss": 0.7549, "step": 3493 }, { "epoch": 0.52, "learning_rate": 9.944918230178718e-06, "loss": 0.8306, "step": 3494 }, { "epoch": 0.52, "learning_rate": 9.940128566005928e-06, "loss": 0.8364, "step": 3495 }, { "epoch": 0.52, "learning_rate": 9.935338915568626e-06, "loss": 0.8325, "step": 3496 }, { "epoch": 0.52, "learning_rate": 9.930549279965636e-06, "loss": 0.8452, "step": 3497 }, { "epoch": 0.52, "learning_rate": 9.92575966029579e-06, "loss": 0.8188, "step": 3498 }, { "epoch": 0.52, "learning_rate": 9.920970057657888e-06, "loss": 0.7876, "step": 3499 }, { "epoch": 0.52, "learning_rate": 9.916180473150753e-06, "loss": 0.7891, "step": 3500 }, { "epoch": 0.52, "learning_rate": 9.911390907873193e-06, "loss": 0.8315, "step": 3501 }, { "epoch": 0.52, "learning_rate": 9.906601362924016e-06, "loss": 0.8169, "step": 3502 }, { "epoch": 0.52, "learning_rate": 9.901811839402015e-06, "loss": 0.7559, "step": 3503 }, { "epoch": 0.52, "learning_rate": 9.897022338405989e-06, "loss": 0.8223, "step": 3504 }, { "epoch": 0.52, "learning_rate": 9.892232861034728e-06, "loss": 0.8687, "step": 3505 }, { "epoch": 0.52, "learning_rate": 9.887443408387019e-06, "loss": 0.8389, "step": 3506 }, { "epoch": 0.52, "learning_rate": 9.882653981561638e-06, "loss": 0.3694, "step": 3507 }, { "epoch": 0.52, "learning_rate": 9.87786458165736e-06, "loss": 0.8833, "step": 3508 }, { "epoch": 0.52, "learning_rate": 9.87307520977295e-06, "loss": 0.8569, "step": 3509 }, { "epoch": 0.52, "learning_rate": 9.868285867007167e-06, "loss": 0.8291, "step": 3510 }, { "epoch": 0.52, "learning_rate": 9.86349655445877e-06, "loss": 0.8496, "step": 3511 }, { "epoch": 0.52, "learning_rate": 9.858707273226503e-06, "loss": 0.7998, "step": 3512 }, { "epoch": 0.52, "learning_rate": 9.853918024409104e-06, "loss": 0.8335, "step": 3513 }, { "epoch": 0.52, "learning_rate": 9.849128809105309e-06, "loss": 0.8237, "step": 3514 }, { "epoch": 0.52, "learning_rate": 9.844339628413842e-06, "loss": 0.791, "step": 3515 }, { "epoch": 0.52, "learning_rate": 9.839550483433417e-06, "loss": 0.8003, "step": 3516 }, { "epoch": 0.52, "learning_rate": 9.834761375262742e-06, "loss": 0.8604, "step": 3517 }, { "epoch": 0.52, "learning_rate": 9.829972305000519e-06, "loss": 0.792, "step": 3518 }, { "epoch": 0.52, "learning_rate": 9.825183273745442e-06, "loss": 0.8276, "step": 3519 }, { "epoch": 0.52, "learning_rate": 9.820394282596187e-06, "loss": 0.7185, "step": 3520 }, { "epoch": 0.52, "learning_rate": 9.815605332651433e-06, "loss": 0.8159, "step": 3521 }, { "epoch": 0.52, "learning_rate": 9.810816425009838e-06, "loss": 0.814, "step": 3522 }, { "epoch": 0.52, "learning_rate": 9.806027560770061e-06, "loss": 0.8218, "step": 3523 }, { "epoch": 0.52, "learning_rate": 9.801238741030746e-06, "loss": 0.3193, "step": 3524 }, { "epoch": 0.52, "learning_rate": 9.796449966890524e-06, "loss": 0.8267, "step": 3525 }, { "epoch": 0.52, "learning_rate": 9.791661239448018e-06, "loss": 0.7998, "step": 3526 }, { "epoch": 0.52, "learning_rate": 9.786872559801842e-06, "loss": 0.8071, "step": 3527 }, { "epoch": 0.52, "learning_rate": 9.782083929050601e-06, "loss": 0.8755, "step": 3528 }, { "epoch": 0.52, "learning_rate": 9.777295348292879e-06, "loss": 0.8647, "step": 3529 }, { "epoch": 0.52, "learning_rate": 9.772506818627258e-06, "loss": 0.8242, "step": 3530 }, { "epoch": 0.52, "learning_rate": 9.767718341152305e-06, "loss": 0.7915, "step": 3531 }, { "epoch": 0.52, "learning_rate": 9.76292991696658e-06, "loss": 0.8379, "step": 3532 }, { "epoch": 0.52, "learning_rate": 9.75814154716862e-06, "loss": 0.8354, "step": 3533 }, { "epoch": 0.52, "learning_rate": 9.753353232856955e-06, "loss": 0.7393, "step": 3534 }, { "epoch": 0.52, "learning_rate": 9.748564975130106e-06, "loss": 0.8335, "step": 3535 }, { "epoch": 0.52, "learning_rate": 9.74377677508658e-06, "loss": 0.7588, "step": 3536 }, { "epoch": 0.52, "learning_rate": 9.738988633824863e-06, "loss": 0.8657, "step": 3537 }, { "epoch": 0.52, "learning_rate": 9.734200552443437e-06, "loss": 0.771, "step": 3538 }, { "epoch": 0.52, "learning_rate": 9.729412532040766e-06, "loss": 0.8862, "step": 3539 }, { "epoch": 0.52, "learning_rate": 9.724624573715297e-06, "loss": 0.7842, "step": 3540 }, { "epoch": 0.52, "learning_rate": 9.719836678565473e-06, "loss": 0.8398, "step": 3541 }, { "epoch": 0.52, "learning_rate": 9.71504884768971e-06, "loss": 0.8262, "step": 3542 }, { "epoch": 0.52, "learning_rate": 9.710261082186417e-06, "loss": 0.7822, "step": 3543 }, { "epoch": 0.52, "learning_rate": 9.705473383153985e-06, "loss": 0.8413, "step": 3544 }, { "epoch": 0.52, "learning_rate": 9.700685751690794e-06, "loss": 0.8662, "step": 3545 }, { "epoch": 0.52, "learning_rate": 9.695898188895199e-06, "loss": 0.8667, "step": 3546 }, { "epoch": 0.52, "learning_rate": 9.69111069586555e-06, "loss": 0.7832, "step": 3547 }, { "epoch": 0.52, "learning_rate": 9.686323273700176e-06, "loss": 0.8271, "step": 3548 }, { "epoch": 0.52, "learning_rate": 9.681535923497394e-06, "loss": 0.8042, "step": 3549 }, { "epoch": 0.52, "learning_rate": 9.67674864635549e-06, "loss": 0.8135, "step": 3550 }, { "epoch": 0.53, "learning_rate": 9.671961443372752e-06, "loss": 0.8413, "step": 3551 }, { "epoch": 0.53, "learning_rate": 9.667174315647443e-06, "loss": 0.8599, "step": 3552 }, { "epoch": 0.53, "learning_rate": 9.66238726427781e-06, "loss": 0.8315, "step": 3553 }, { "epoch": 0.53, "learning_rate": 9.657600290362073e-06, "loss": 0.8052, "step": 3554 }, { "epoch": 0.53, "learning_rate": 9.652813394998447e-06, "loss": 0.8286, "step": 3555 }, { "epoch": 0.53, "learning_rate": 9.648026579285125e-06, "loss": 0.8081, "step": 3556 }, { "epoch": 0.53, "learning_rate": 9.643239844320283e-06, "loss": 0.813, "step": 3557 }, { "epoch": 0.53, "learning_rate": 9.638453191202076e-06, "loss": 0.8013, "step": 3558 }, { "epoch": 0.53, "learning_rate": 9.633666621028634e-06, "loss": 0.8032, "step": 3559 }, { "epoch": 0.53, "learning_rate": 9.628880134898081e-06, "loss": 0.853, "step": 3560 }, { "epoch": 0.53, "learning_rate": 9.624093733908516e-06, "loss": 0.7432, "step": 3561 }, { "epoch": 0.53, "learning_rate": 9.61930741915802e-06, "loss": 0.8359, "step": 3562 }, { "epoch": 0.53, "learning_rate": 9.614521191744644e-06, "loss": 0.8389, "step": 3563 }, { "epoch": 0.53, "learning_rate": 9.60973505276643e-06, "loss": 0.7998, "step": 3564 }, { "epoch": 0.53, "learning_rate": 9.6049490033214e-06, "loss": 0.3169, "step": 3565 }, { "epoch": 0.53, "learning_rate": 9.600163044507555e-06, "loss": 0.8325, "step": 3566 }, { "epoch": 0.53, "learning_rate": 9.595377177422862e-06, "loss": 0.7852, "step": 3567 }, { "epoch": 0.53, "learning_rate": 9.590591403165285e-06, "loss": 0.7817, "step": 3568 }, { "epoch": 0.53, "learning_rate": 9.585805722832754e-06, "loss": 0.8413, "step": 3569 }, { "epoch": 0.53, "learning_rate": 9.581020137523192e-06, "loss": 0.8657, "step": 3570 }, { "epoch": 0.53, "learning_rate": 9.576234648334486e-06, "loss": 0.8267, "step": 3571 }, { "epoch": 0.53, "learning_rate": 9.571449256364501e-06, "loss": 0.7642, "step": 3572 }, { "epoch": 0.53, "learning_rate": 9.56666396271109e-06, "loss": 0.718, "step": 3573 }, { "epoch": 0.53, "learning_rate": 9.56187876847207e-06, "loss": 0.8076, "step": 3574 }, { "epoch": 0.53, "learning_rate": 9.55709367474526e-06, "loss": 0.8389, "step": 3575 }, { "epoch": 0.53, "learning_rate": 9.552308682628421e-06, "loss": 0.8276, "step": 3576 }, { "epoch": 0.53, "learning_rate": 9.547523793219315e-06, "loss": 0.3435, "step": 3577 }, { "epoch": 0.53, "learning_rate": 9.542739007615676e-06, "loss": 0.8037, "step": 3578 }, { "epoch": 0.53, "learning_rate": 9.537954326915215e-06, "loss": 0.8438, "step": 3579 }, { "epoch": 0.53, "learning_rate": 9.533169752215609e-06, "loss": 0.8408, "step": 3580 }, { "epoch": 0.53, "learning_rate": 9.528385284614523e-06, "loss": 0.3149, "step": 3581 }, { "epoch": 0.53, "learning_rate": 9.52360092520959e-06, "loss": 0.8374, "step": 3582 }, { "epoch": 0.53, "learning_rate": 9.518816675098422e-06, "loss": 0.8354, "step": 3583 }, { "epoch": 0.53, "learning_rate": 9.514032535378604e-06, "loss": 0.8423, "step": 3584 }, { "epoch": 0.53, "learning_rate": 9.509248507147694e-06, "loss": 0.8384, "step": 3585 }, { "epoch": 0.53, "learning_rate": 9.504464591503231e-06, "loss": 0.8525, "step": 3586 }, { "epoch": 0.53, "learning_rate": 9.499680789542719e-06, "loss": 0.8799, "step": 3587 }, { "epoch": 0.53, "learning_rate": 9.494897102363647e-06, "loss": 0.3184, "step": 3588 }, { "epoch": 0.53, "learning_rate": 9.490113531063464e-06, "loss": 0.7788, "step": 3589 }, { "epoch": 0.53, "learning_rate": 9.485330076739604e-06, "loss": 0.6995, "step": 3590 }, { "epoch": 0.53, "learning_rate": 9.480546740489468e-06, "loss": 0.8188, "step": 3591 }, { "epoch": 0.53, "learning_rate": 9.475763523410436e-06, "loss": 0.7559, "step": 3592 }, { "epoch": 0.53, "learning_rate": 9.47098042659985e-06, "loss": 0.8291, "step": 3593 }, { "epoch": 0.53, "learning_rate": 9.466197451155034e-06, "loss": 0.8369, "step": 3594 }, { "epoch": 0.53, "learning_rate": 9.461414598173282e-06, "loss": 0.8462, "step": 3595 }, { "epoch": 0.53, "learning_rate": 9.45663186875186e-06, "loss": 0.8242, "step": 3596 }, { "epoch": 0.53, "learning_rate": 9.451849263988002e-06, "loss": 0.8188, "step": 3597 }, { "epoch": 0.53, "learning_rate": 9.447066784978914e-06, "loss": 0.3431, "step": 3598 }, { "epoch": 0.53, "learning_rate": 9.44228443282178e-06, "loss": 0.835, "step": 3599 }, { "epoch": 0.53, "learning_rate": 9.43750220861375e-06, "loss": 0.8721, "step": 3600 }, { "epoch": 0.53, "learning_rate": 9.43272011345194e-06, "loss": 0.8682, "step": 3601 }, { "epoch": 0.53, "learning_rate": 9.427938148433444e-06, "loss": 0.2971, "step": 3602 }, { "epoch": 0.53, "learning_rate": 9.423156314655324e-06, "loss": 0.8237, "step": 3603 }, { "epoch": 0.53, "learning_rate": 9.41837461321461e-06, "loss": 0.71, "step": 3604 }, { "epoch": 0.53, "learning_rate": 9.413593045208303e-06, "loss": 0.7861, "step": 3605 }, { "epoch": 0.53, "learning_rate": 9.408811611733373e-06, "loss": 0.7671, "step": 3606 }, { "epoch": 0.53, "learning_rate": 9.404030313886758e-06, "loss": 0.749, "step": 3607 }, { "epoch": 0.53, "learning_rate": 9.399249152765369e-06, "loss": 0.856, "step": 3608 }, { "epoch": 0.53, "learning_rate": 9.394468129466083e-06, "loss": 0.8345, "step": 3609 }, { "epoch": 0.53, "learning_rate": 9.38968724508574e-06, "loss": 0.8208, "step": 3610 }, { "epoch": 0.53, "learning_rate": 9.384906500721159e-06, "loss": 0.7944, "step": 3611 }, { "epoch": 0.53, "learning_rate": 9.380125897469116e-06, "loss": 0.8696, "step": 3612 }, { "epoch": 0.53, "learning_rate": 9.375345436426367e-06, "loss": 0.8115, "step": 3613 }, { "epoch": 0.53, "learning_rate": 9.370565118689623e-06, "loss": 0.801, "step": 3614 }, { "epoch": 0.53, "learning_rate": 9.365784945355567e-06, "loss": 0.8037, "step": 3615 }, { "epoch": 0.53, "learning_rate": 9.36100491752085e-06, "loss": 0.8091, "step": 3616 }, { "epoch": 0.53, "learning_rate": 9.35622503628209e-06, "loss": 0.814, "step": 3617 }, { "epoch": 0.54, "learning_rate": 9.351445302735874e-06, "loss": 0.752, "step": 3618 }, { "epoch": 0.54, "learning_rate": 9.346665717978742e-06, "loss": 0.7861, "step": 3619 }, { "epoch": 0.54, "learning_rate": 9.341886283107215e-06, "loss": 0.7886, "step": 3620 }, { "epoch": 0.54, "learning_rate": 9.337106999217772e-06, "loss": 0.8774, "step": 3621 }, { "epoch": 0.54, "learning_rate": 9.332327867406862e-06, "loss": 0.8394, "step": 3622 }, { "epoch": 0.54, "learning_rate": 9.327548888770894e-06, "loss": 0.8213, "step": 3623 }, { "epoch": 0.54, "learning_rate": 9.322770064406244e-06, "loss": 0.8579, "step": 3624 }, { "epoch": 0.54, "learning_rate": 9.317991395409252e-06, "loss": 0.8477, "step": 3625 }, { "epoch": 0.54, "learning_rate": 9.313212882876228e-06, "loss": 0.7646, "step": 3626 }, { "epoch": 0.54, "learning_rate": 9.308434527903432e-06, "loss": 0.7686, "step": 3627 }, { "epoch": 0.54, "learning_rate": 9.303656331587105e-06, "loss": 0.8188, "step": 3628 }, { "epoch": 0.54, "learning_rate": 9.29887829502344e-06, "loss": 0.7886, "step": 3629 }, { "epoch": 0.54, "learning_rate": 9.294100419308603e-06, "loss": 0.7783, "step": 3630 }, { "epoch": 0.54, "learning_rate": 9.289322705538705e-06, "loss": 0.8271, "step": 3631 }, { "epoch": 0.54, "learning_rate": 9.28454515480984e-06, "loss": 0.7854, "step": 3632 }, { "epoch": 0.54, "learning_rate": 9.279767768218058e-06, "loss": 0.3171, "step": 3633 }, { "epoch": 0.54, "learning_rate": 9.274990546859366e-06, "loss": 0.8032, "step": 3634 }, { "epoch": 0.54, "learning_rate": 9.270213491829742e-06, "loss": 0.7432, "step": 3635 }, { "epoch": 0.54, "learning_rate": 9.265436604225111e-06, "loss": 0.791, "step": 3636 }, { "epoch": 0.54, "learning_rate": 9.26065988514138e-06, "loss": 0.8198, "step": 3637 }, { "epoch": 0.54, "learning_rate": 9.2558833356744e-06, "loss": 0.8164, "step": 3638 }, { "epoch": 0.54, "learning_rate": 9.251106956919997e-06, "loss": 0.8711, "step": 3639 }, { "epoch": 0.54, "learning_rate": 9.246330749973943e-06, "loss": 0.8003, "step": 3640 }, { "epoch": 0.54, "learning_rate": 9.241554715931975e-06, "loss": 0.8228, "step": 3641 }, { "epoch": 0.54, "learning_rate": 9.236778855889804e-06, "loss": 0.8115, "step": 3642 }, { "epoch": 0.54, "learning_rate": 9.232003170943091e-06, "loss": 0.7944, "step": 3643 }, { "epoch": 0.54, "learning_rate": 9.227227662187447e-06, "loss": 0.8066, "step": 3644 }, { "epoch": 0.54, "learning_rate": 9.222452330718455e-06, "loss": 0.8403, "step": 3645 }, { "epoch": 0.54, "learning_rate": 9.217677177631657e-06, "loss": 0.8291, "step": 3646 }, { "epoch": 0.54, "learning_rate": 9.212902204022556e-06, "loss": 0.7822, "step": 3647 }, { "epoch": 0.54, "learning_rate": 9.2081274109866e-06, "loss": 0.7705, "step": 3648 }, { "epoch": 0.54, "learning_rate": 9.203352799619207e-06, "loss": 0.8203, "step": 3649 }, { "epoch": 0.54, "learning_rate": 9.198578371015752e-06, "loss": 0.8208, "step": 3650 }, { "epoch": 0.54, "learning_rate": 9.193804126271571e-06, "loss": 0.8066, "step": 3651 }, { "epoch": 0.54, "learning_rate": 9.189030066481956e-06, "loss": 0.7969, "step": 3652 }, { "epoch": 0.54, "learning_rate": 9.184256192742145e-06, "loss": 0.8203, "step": 3653 }, { "epoch": 0.54, "learning_rate": 9.179482506147346e-06, "loss": 0.8149, "step": 3654 }, { "epoch": 0.54, "learning_rate": 9.174709007792723e-06, "loss": 0.8105, "step": 3655 }, { "epoch": 0.54, "learning_rate": 9.169935698773402e-06, "loss": 0.8286, "step": 3656 }, { "epoch": 0.54, "learning_rate": 9.165162580184444e-06, "loss": 0.7896, "step": 3657 }, { "epoch": 0.54, "learning_rate": 9.16038965312089e-06, "loss": 0.8389, "step": 3658 }, { "epoch": 0.54, "learning_rate": 9.155616918677723e-06, "loss": 0.7817, "step": 3659 }, { "epoch": 0.54, "learning_rate": 9.150844377949897e-06, "loss": 0.8296, "step": 3660 }, { "epoch": 0.54, "learning_rate": 9.146072032032298e-06, "loss": 0.8057, "step": 3661 }, { "epoch": 0.54, "learning_rate": 9.141299882019786e-06, "loss": 0.7998, "step": 3662 }, { "epoch": 0.54, "learning_rate": 9.136527929007171e-06, "loss": 0.8242, "step": 3663 }, { "epoch": 0.54, "learning_rate": 9.131756174089217e-06, "loss": 0.7512, "step": 3664 }, { "epoch": 0.54, "learning_rate": 9.126984618360642e-06, "loss": 0.8081, "step": 3665 }, { "epoch": 0.54, "learning_rate": 9.122213262916121e-06, "loss": 0.8267, "step": 3666 }, { "epoch": 0.54, "learning_rate": 9.117442108850278e-06, "loss": 0.8481, "step": 3667 }, { "epoch": 0.54, "learning_rate": 9.112671157257698e-06, "loss": 0.7661, "step": 3668 }, { "epoch": 0.54, "learning_rate": 9.107900409232914e-06, "loss": 0.7891, "step": 3669 }, { "epoch": 0.54, "learning_rate": 9.103129865870411e-06, "loss": 0.8667, "step": 3670 }, { "epoch": 0.54, "learning_rate": 9.098359528264634e-06, "loss": 0.895, "step": 3671 }, { "epoch": 0.54, "learning_rate": 9.093589397509974e-06, "loss": 0.7676, "step": 3672 }, { "epoch": 0.54, "learning_rate": 9.088819474700781e-06, "loss": 0.8062, "step": 3673 }, { "epoch": 0.54, "learning_rate": 9.084049760931349e-06, "loss": 0.8384, "step": 3674 }, { "epoch": 0.54, "learning_rate": 9.07928025729593e-06, "loss": 0.8188, "step": 3675 }, { "epoch": 0.54, "learning_rate": 9.074510964888727e-06, "loss": 0.7944, "step": 3676 }, { "epoch": 0.54, "learning_rate": 9.069741884803897e-06, "loss": 0.3394, "step": 3677 }, { "epoch": 0.54, "learning_rate": 9.06497301813554e-06, "loss": 0.8086, "step": 3678 }, { "epoch": 0.54, "learning_rate": 9.060204365977715e-06, "loss": 0.8789, "step": 3679 }, { "epoch": 0.54, "learning_rate": 9.055435929424425e-06, "loss": 0.7842, "step": 3680 }, { "epoch": 0.54, "learning_rate": 9.050667709569633e-06, "loss": 0.7964, "step": 3681 }, { "epoch": 0.54, "learning_rate": 9.045899707507247e-06, "loss": 0.814, "step": 3682 }, { "epoch": 0.54, "learning_rate": 9.041131924331121e-06, "loss": 0.8105, "step": 3683 }, { "epoch": 0.54, "learning_rate": 9.036364361135066e-06, "loss": 0.8086, "step": 3684 }, { "epoch": 0.54, "learning_rate": 9.031597019012834e-06, "loss": 0.8706, "step": 3685 }, { "epoch": 0.55, "learning_rate": 9.02682989905814e-06, "loss": 0.7715, "step": 3686 }, { "epoch": 0.55, "learning_rate": 9.022063002364631e-06, "loss": 0.7217, "step": 3687 }, { "epoch": 0.55, "learning_rate": 9.017296330025917e-06, "loss": 0.3081, "step": 3688 }, { "epoch": 0.55, "learning_rate": 9.012529883135548e-06, "loss": 0.8506, "step": 3689 }, { "epoch": 0.55, "learning_rate": 9.007763662787028e-06, "loss": 0.7705, "step": 3690 }, { "epoch": 0.55, "learning_rate": 9.0029976700738e-06, "loss": 0.8188, "step": 3691 }, { "epoch": 0.55, "learning_rate": 8.998231906089268e-06, "loss": 0.3029, "step": 3692 }, { "epoch": 0.55, "learning_rate": 8.993466371926773e-06, "loss": 0.8413, "step": 3693 }, { "epoch": 0.55, "learning_rate": 8.98870106867961e-06, "loss": 0.8413, "step": 3694 }, { "epoch": 0.55, "learning_rate": 8.98393599744101e-06, "loss": 0.8652, "step": 3695 }, { "epoch": 0.55, "learning_rate": 8.979171159304166e-06, "loss": 0.7505, "step": 3696 }, { "epoch": 0.55, "learning_rate": 8.974406555362207e-06, "loss": 0.8179, "step": 3697 }, { "epoch": 0.55, "learning_rate": 8.969642186708211e-06, "loss": 0.853, "step": 3698 }, { "epoch": 0.55, "learning_rate": 8.964878054435207e-06, "loss": 0.811, "step": 3699 }, { "epoch": 0.55, "learning_rate": 8.96011415963616e-06, "loss": 0.8174, "step": 3700 }, { "epoch": 0.55, "learning_rate": 8.955350503403985e-06, "loss": 0.8066, "step": 3701 }, { "epoch": 0.55, "learning_rate": 8.950587086831544e-06, "loss": 0.7883, "step": 3702 }, { "epoch": 0.55, "learning_rate": 8.94582391101165e-06, "loss": 0.8364, "step": 3703 }, { "epoch": 0.55, "learning_rate": 8.941060977037042e-06, "loss": 0.793, "step": 3704 }, { "epoch": 0.55, "learning_rate": 8.936298286000421e-06, "loss": 0.8218, "step": 3705 }, { "epoch": 0.55, "learning_rate": 8.931535838994427e-06, "loss": 0.8652, "step": 3706 }, { "epoch": 0.55, "learning_rate": 8.926773637111647e-06, "loss": 0.813, "step": 3707 }, { "epoch": 0.55, "learning_rate": 8.922011681444596e-06, "loss": 0.8271, "step": 3708 }, { "epoch": 0.55, "learning_rate": 8.917249973085757e-06, "loss": 0.8428, "step": 3709 }, { "epoch": 0.55, "learning_rate": 8.912488513127539e-06, "loss": 0.8267, "step": 3710 }, { "epoch": 0.55, "learning_rate": 8.907727302662304e-06, "loss": 0.8247, "step": 3711 }, { "epoch": 0.55, "learning_rate": 8.90296634278234e-06, "loss": 0.8457, "step": 3712 }, { "epoch": 0.55, "learning_rate": 8.898205634579899e-06, "loss": 0.8276, "step": 3713 }, { "epoch": 0.55, "learning_rate": 8.893445179147165e-06, "loss": 0.8091, "step": 3714 }, { "epoch": 0.55, "learning_rate": 8.888684977576262e-06, "loss": 0.8501, "step": 3715 }, { "epoch": 0.55, "learning_rate": 8.883925030959266e-06, "loss": 0.7715, "step": 3716 }, { "epoch": 0.55, "learning_rate": 8.879165340388171e-06, "loss": 0.771, "step": 3717 }, { "epoch": 0.55, "learning_rate": 8.87440590695494e-06, "loss": 0.3491, "step": 3718 }, { "epoch": 0.55, "learning_rate": 8.869646731751463e-06, "loss": 0.853, "step": 3719 }, { "epoch": 0.55, "learning_rate": 8.864887815869577e-06, "loss": 0.3014, "step": 3720 }, { "epoch": 0.55, "learning_rate": 8.860129160401045e-06, "loss": 0.7656, "step": 3721 }, { "epoch": 0.55, "learning_rate": 8.855370766437584e-06, "loss": 0.7744, "step": 3722 }, { "epoch": 0.55, "learning_rate": 8.850612635070852e-06, "loss": 0.7793, "step": 3723 }, { "epoch": 0.55, "learning_rate": 8.845854767392448e-06, "loss": 0.7866, "step": 3724 }, { "epoch": 0.55, "learning_rate": 8.84109716449389e-06, "loss": 0.7856, "step": 3725 }, { "epoch": 0.55, "learning_rate": 8.836339827466656e-06, "loss": 0.7666, "step": 3726 }, { "epoch": 0.55, "learning_rate": 8.831582757402161e-06, "loss": 0.8345, "step": 3727 }, { "epoch": 0.55, "learning_rate": 8.826825955391753e-06, "loss": 0.7412, "step": 3728 }, { "epoch": 0.55, "learning_rate": 8.822069422526724e-06, "loss": 0.8232, "step": 3729 }, { "epoch": 0.55, "learning_rate": 8.817313159898292e-06, "loss": 0.7939, "step": 3730 }, { "epoch": 0.55, "learning_rate": 8.812557168597626e-06, "loss": 0.8462, "step": 3731 }, { "epoch": 0.55, "learning_rate": 8.80780144971583e-06, "loss": 0.7539, "step": 3732 }, { "epoch": 0.55, "learning_rate": 8.803046004343945e-06, "loss": 0.8281, "step": 3733 }, { "epoch": 0.55, "learning_rate": 8.798290833572944e-06, "loss": 0.9131, "step": 3734 }, { "epoch": 0.55, "learning_rate": 8.793535938493743e-06, "loss": 0.8057, "step": 3735 }, { "epoch": 0.55, "learning_rate": 8.78878132019719e-06, "loss": 0.8008, "step": 3736 }, { "epoch": 0.55, "learning_rate": 8.784026979774083e-06, "loss": 0.8101, "step": 3737 }, { "epoch": 0.55, "learning_rate": 8.779272918315135e-06, "loss": 0.7686, "step": 3738 }, { "epoch": 0.55, "learning_rate": 8.774519136911006e-06, "loss": 0.7734, "step": 3739 }, { "epoch": 0.55, "learning_rate": 8.769765636652296e-06, "loss": 0.8872, "step": 3740 }, { "epoch": 0.55, "learning_rate": 8.765012418629537e-06, "loss": 0.791, "step": 3741 }, { "epoch": 0.55, "learning_rate": 8.760259483933188e-06, "loss": 0.8398, "step": 3742 }, { "epoch": 0.55, "learning_rate": 8.755506833653658e-06, "loss": 0.8481, "step": 3743 }, { "epoch": 0.55, "learning_rate": 8.750754468881278e-06, "loss": 0.7866, "step": 3744 }, { "epoch": 0.55, "learning_rate": 8.746002390706318e-06, "loss": 0.77, "step": 3745 }, { "epoch": 0.55, "learning_rate": 8.741250600218992e-06, "loss": 0.7515, "step": 3746 }, { "epoch": 0.55, "learning_rate": 8.736499098509428e-06, "loss": 0.7505, "step": 3747 }, { "epoch": 0.55, "learning_rate": 8.731747886667701e-06, "loss": 0.3123, "step": 3748 }, { "epoch": 0.55, "learning_rate": 8.726996965783818e-06, "loss": 0.7861, "step": 3749 }, { "epoch": 0.55, "learning_rate": 8.722246336947721e-06, "loss": 0.7891, "step": 3750 }, { "epoch": 0.55, "learning_rate": 8.717496001249275e-06, "loss": 0.752, "step": 3751 }, { "epoch": 0.55, "learning_rate": 8.712745959778293e-06, "loss": 0.7751, "step": 3752 }, { "epoch": 0.56, "learning_rate": 8.707996213624507e-06, "loss": 0.8237, "step": 3753 }, { "epoch": 0.56, "learning_rate": 8.70324676387759e-06, "loss": 0.8096, "step": 3754 }, { "epoch": 0.56, "learning_rate": 8.698497611627141e-06, "loss": 0.3394, "step": 3755 }, { "epoch": 0.56, "learning_rate": 8.693748757962694e-06, "loss": 0.8232, "step": 3756 }, { "epoch": 0.56, "learning_rate": 8.689000203973715e-06, "loss": 0.7861, "step": 3757 }, { "epoch": 0.56, "learning_rate": 8.684251950749603e-06, "loss": 0.8096, "step": 3758 }, { "epoch": 0.56, "learning_rate": 8.679503999379679e-06, "loss": 0.7773, "step": 3759 }, { "epoch": 0.56, "learning_rate": 8.674756350953202e-06, "loss": 0.7788, "step": 3760 }, { "epoch": 0.56, "learning_rate": 8.670009006559365e-06, "loss": 0.7788, "step": 3761 }, { "epoch": 0.56, "learning_rate": 8.665261967287284e-06, "loss": 0.7578, "step": 3762 }, { "epoch": 0.56, "learning_rate": 8.660515234226008e-06, "loss": 0.8516, "step": 3763 }, { "epoch": 0.56, "learning_rate": 8.655768808464515e-06, "loss": 0.8071, "step": 3764 }, { "epoch": 0.56, "learning_rate": 8.651022691091713e-06, "loss": 0.7715, "step": 3765 }, { "epoch": 0.56, "learning_rate": 8.646276883196438e-06, "loss": 0.7891, "step": 3766 }, { "epoch": 0.56, "learning_rate": 8.641531385867462e-06, "loss": 0.7769, "step": 3767 }, { "epoch": 0.56, "learning_rate": 8.636786200193472e-06, "loss": 0.8433, "step": 3768 }, { "epoch": 0.56, "learning_rate": 8.632041327263094e-06, "loss": 0.8389, "step": 3769 }, { "epoch": 0.56, "learning_rate": 8.627296768164883e-06, "loss": 0.812, "step": 3770 }, { "epoch": 0.56, "learning_rate": 8.622552523987317e-06, "loss": 0.7725, "step": 3771 }, { "epoch": 0.56, "learning_rate": 8.617808595818802e-06, "loss": 0.7739, "step": 3772 }, { "epoch": 0.56, "learning_rate": 8.613064984747672e-06, "loss": 0.8184, "step": 3773 }, { "epoch": 0.56, "learning_rate": 8.608321691862193e-06, "loss": 0.8057, "step": 3774 }, { "epoch": 0.56, "learning_rate": 8.60357871825055e-06, "loss": 0.8555, "step": 3775 }, { "epoch": 0.56, "learning_rate": 8.598836065000864e-06, "loss": 0.7915, "step": 3776 }, { "epoch": 0.56, "learning_rate": 8.594093733201174e-06, "loss": 0.7661, "step": 3777 }, { "epoch": 0.56, "learning_rate": 8.589351723939448e-06, "loss": 0.855, "step": 3778 }, { "epoch": 0.56, "learning_rate": 8.584610038303584e-06, "loss": 0.7915, "step": 3779 }, { "epoch": 0.56, "learning_rate": 8.5798686773814e-06, "loss": 0.8076, "step": 3780 }, { "epoch": 0.56, "learning_rate": 8.575127642260643e-06, "loss": 0.6558, "step": 3781 }, { "epoch": 0.56, "learning_rate": 8.570386934028982e-06, "loss": 0.8389, "step": 3782 }, { "epoch": 0.56, "learning_rate": 8.565646553774016e-06, "loss": 0.8179, "step": 3783 }, { "epoch": 0.56, "learning_rate": 8.560906502583268e-06, "loss": 0.8677, "step": 3784 }, { "epoch": 0.56, "learning_rate": 8.556166781544178e-06, "loss": 0.8296, "step": 3785 }, { "epoch": 0.56, "learning_rate": 8.551427391744118e-06, "loss": 0.8042, "step": 3786 }, { "epoch": 0.56, "learning_rate": 8.546688334270381e-06, "loss": 0.8091, "step": 3787 }, { "epoch": 0.56, "learning_rate": 8.541949610210193e-06, "loss": 0.8159, "step": 3788 }, { "epoch": 0.56, "learning_rate": 8.53721122065068e-06, "loss": 0.8174, "step": 3789 }, { "epoch": 0.56, "learning_rate": 8.532473166678916e-06, "loss": 0.8237, "step": 3790 }, { "epoch": 0.56, "learning_rate": 8.527735449381887e-06, "loss": 0.3496, "step": 3791 }, { "epoch": 0.56, "learning_rate": 8.5229980698465e-06, "loss": 0.8262, "step": 3792 }, { "epoch": 0.56, "learning_rate": 8.518261029159596e-06, "loss": 0.854, "step": 3793 }, { "epoch": 0.56, "learning_rate": 8.51352432840792e-06, "loss": 0.3113, "step": 3794 }, { "epoch": 0.56, "learning_rate": 8.508787968678153e-06, "loss": 0.7935, "step": 3795 }, { "epoch": 0.56, "learning_rate": 8.504051951056894e-06, "loss": 0.8599, "step": 3796 }, { "epoch": 0.56, "learning_rate": 8.499316276630666e-06, "loss": 0.8647, "step": 3797 }, { "epoch": 0.56, "learning_rate": 8.4945809464859e-06, "loss": 0.8643, "step": 3798 }, { "epoch": 0.56, "learning_rate": 8.48984596170897e-06, "loss": 0.8071, "step": 3799 }, { "epoch": 0.56, "learning_rate": 8.485111323386152e-06, "loss": 0.8755, "step": 3800 }, { "epoch": 0.56, "learning_rate": 8.480377032603658e-06, "loss": 0.8101, "step": 3801 }, { "epoch": 0.56, "learning_rate": 8.475643090447603e-06, "loss": 0.8247, "step": 3802 }, { "epoch": 0.56, "learning_rate": 8.470909498004031e-06, "loss": 0.8398, "step": 3803 }, { "epoch": 0.56, "learning_rate": 8.46617625635891e-06, "loss": 0.854, "step": 3804 }, { "epoch": 0.56, "learning_rate": 8.46144336659813e-06, "loss": 0.769, "step": 3805 }, { "epoch": 0.56, "learning_rate": 8.456710829807479e-06, "loss": 0.7974, "step": 3806 }, { "epoch": 0.56, "learning_rate": 8.451978647072688e-06, "loss": 0.8115, "step": 3807 }, { "epoch": 0.56, "learning_rate": 8.44724681947939e-06, "loss": 0.8408, "step": 3808 }, { "epoch": 0.56, "learning_rate": 8.442515348113152e-06, "loss": 0.7407, "step": 3809 }, { "epoch": 0.56, "learning_rate": 8.437784234059454e-06, "loss": 0.7832, "step": 3810 }, { "epoch": 0.56, "learning_rate": 8.433053478403679e-06, "loss": 0.8516, "step": 3811 }, { "epoch": 0.56, "learning_rate": 8.428323082231144e-06, "loss": 0.8574, "step": 3812 }, { "epoch": 0.56, "learning_rate": 8.423593046627086e-06, "loss": 0.8472, "step": 3813 }, { "epoch": 0.56, "learning_rate": 8.418863372676652e-06, "loss": 0.8193, "step": 3814 }, { "epoch": 0.56, "learning_rate": 8.414134061464898e-06, "loss": 0.8555, "step": 3815 }, { "epoch": 0.56, "learning_rate": 8.40940511407681e-06, "loss": 0.8042, "step": 3816 }, { "epoch": 0.56, "learning_rate": 8.404676531597285e-06, "loss": 0.7905, "step": 3817 }, { "epoch": 0.56, "learning_rate": 8.399948315111148e-06, "loss": 0.811, "step": 3818 }, { "epoch": 0.56, "learning_rate": 8.395220465703116e-06, "loss": 0.8081, "step": 3819 }, { "epoch": 0.56, "learning_rate": 8.39049298445784e-06, "loss": 0.7944, "step": 3820 }, { "epoch": 0.57, "learning_rate": 8.38576587245988e-06, "loss": 0.8027, "step": 3821 }, { "epoch": 0.57, "learning_rate": 8.381039130793718e-06, "loss": 0.8159, "step": 3822 }, { "epoch": 0.57, "learning_rate": 8.37631276054374e-06, "loss": 0.8096, "step": 3823 }, { "epoch": 0.57, "learning_rate": 8.371586762794257e-06, "loss": 0.3267, "step": 3824 }, { "epoch": 0.57, "learning_rate": 8.36686113862949e-06, "loss": 0.8765, "step": 3825 }, { "epoch": 0.57, "learning_rate": 8.362135889133571e-06, "loss": 0.7817, "step": 3826 }, { "epoch": 0.57, "learning_rate": 8.357411015390559e-06, "loss": 0.812, "step": 3827 }, { "epoch": 0.57, "learning_rate": 8.352686518484407e-06, "loss": 0.8013, "step": 3828 }, { "epoch": 0.57, "learning_rate": 8.347962399498996e-06, "loss": 0.7783, "step": 3829 }, { "epoch": 0.57, "learning_rate": 8.343238659518119e-06, "loss": 0.7922, "step": 3830 }, { "epoch": 0.57, "learning_rate": 8.338515299625477e-06, "loss": 0.8281, "step": 3831 }, { "epoch": 0.57, "learning_rate": 8.333792320904686e-06, "loss": 0.7485, "step": 3832 }, { "epoch": 0.57, "learning_rate": 8.329069724439273e-06, "loss": 0.8389, "step": 3833 }, { "epoch": 0.57, "learning_rate": 8.324347511312685e-06, "loss": 0.8237, "step": 3834 }, { "epoch": 0.57, "learning_rate": 8.319625682608273e-06, "loss": 0.8652, "step": 3835 }, { "epoch": 0.57, "learning_rate": 8.314904239409295e-06, "loss": 0.7939, "step": 3836 }, { "epoch": 0.57, "learning_rate": 8.310183182798937e-06, "loss": 0.8359, "step": 3837 }, { "epoch": 0.57, "learning_rate": 8.305462513860282e-06, "loss": 0.3152, "step": 3838 }, { "epoch": 0.57, "learning_rate": 8.300742233676329e-06, "loss": 0.8218, "step": 3839 }, { "epoch": 0.57, "learning_rate": 8.29602234332999e-06, "loss": 0.7896, "step": 3840 }, { "epoch": 0.57, "learning_rate": 8.291302843904086e-06, "loss": 0.7861, "step": 3841 }, { "epoch": 0.57, "learning_rate": 8.286583736481342e-06, "loss": 0.7651, "step": 3842 }, { "epoch": 0.57, "learning_rate": 8.281865022144403e-06, "loss": 0.8096, "step": 3843 }, { "epoch": 0.57, "learning_rate": 8.27714670197582e-06, "loss": 0.8501, "step": 3844 }, { "epoch": 0.57, "learning_rate": 8.272428777058052e-06, "loss": 0.8511, "step": 3845 }, { "epoch": 0.57, "learning_rate": 8.267711248473465e-06, "loss": 0.8076, "step": 3846 }, { "epoch": 0.57, "learning_rate": 8.262994117304343e-06, "loss": 0.7808, "step": 3847 }, { "epoch": 0.57, "learning_rate": 8.258277384632873e-06, "loss": 0.8447, "step": 3848 }, { "epoch": 0.57, "learning_rate": 8.253561051541145e-06, "loss": 0.8193, "step": 3849 }, { "epoch": 0.57, "learning_rate": 8.248845119111168e-06, "loss": 0.8623, "step": 3850 }, { "epoch": 0.57, "learning_rate": 8.244129588424855e-06, "loss": 0.8223, "step": 3851 }, { "epoch": 0.57, "learning_rate": 8.239414460564024e-06, "loss": 0.8125, "step": 3852 }, { "epoch": 0.57, "learning_rate": 8.234699736610402e-06, "loss": 0.7852, "step": 3853 }, { "epoch": 0.57, "learning_rate": 8.229985417645624e-06, "loss": 0.8779, "step": 3854 }, { "epoch": 0.57, "learning_rate": 8.225271504751232e-06, "loss": 0.814, "step": 3855 }, { "epoch": 0.57, "learning_rate": 8.22055799900868e-06, "loss": 0.7993, "step": 3856 }, { "epoch": 0.57, "learning_rate": 8.21584490149932e-06, "loss": 0.8188, "step": 3857 }, { "epoch": 0.57, "learning_rate": 8.211132213304412e-06, "loss": 0.8198, "step": 3858 }, { "epoch": 0.57, "learning_rate": 8.206419935505125e-06, "loss": 0.8013, "step": 3859 }, { "epoch": 0.57, "learning_rate": 8.201708069182534e-06, "loss": 0.8047, "step": 3860 }, { "epoch": 0.57, "learning_rate": 8.19699661541762e-06, "loss": 0.7544, "step": 3861 }, { "epoch": 0.57, "learning_rate": 8.192285575291263e-06, "loss": 0.7983, "step": 3862 }, { "epoch": 0.57, "learning_rate": 8.187574949884256e-06, "loss": 0.8247, "step": 3863 }, { "epoch": 0.57, "learning_rate": 8.182864740277293e-06, "loss": 0.9131, "step": 3864 }, { "epoch": 0.57, "learning_rate": 8.178154947550976e-06, "loss": 0.8354, "step": 3865 }, { "epoch": 0.57, "learning_rate": 8.173445572785805e-06, "loss": 0.7305, "step": 3866 }, { "epoch": 0.57, "learning_rate": 8.168736617062188e-06, "loss": 0.8066, "step": 3867 }, { "epoch": 0.57, "learning_rate": 8.16402808146044e-06, "loss": 0.7456, "step": 3868 }, { "epoch": 0.57, "learning_rate": 8.159319967060777e-06, "loss": 0.8174, "step": 3869 }, { "epoch": 0.57, "learning_rate": 8.15461227494331e-06, "loss": 0.8467, "step": 3870 }, { "epoch": 0.57, "learning_rate": 8.149905006188067e-06, "loss": 0.8335, "step": 3871 }, { "epoch": 0.57, "learning_rate": 8.145198161874971e-06, "loss": 0.8267, "step": 3872 }, { "epoch": 0.57, "learning_rate": 8.14049174308385e-06, "loss": 0.8345, "step": 3873 }, { "epoch": 0.57, "learning_rate": 8.13578575089444e-06, "loss": 0.8193, "step": 3874 }, { "epoch": 0.57, "learning_rate": 8.131080186386357e-06, "loss": 0.8936, "step": 3875 }, { "epoch": 0.57, "learning_rate": 8.126375050639149e-06, "loss": 0.7944, "step": 3876 }, { "epoch": 0.57, "learning_rate": 8.121670344732244e-06, "loss": 0.7881, "step": 3877 }, { "epoch": 0.57, "learning_rate": 8.116966069744987e-06, "loss": 0.8516, "step": 3878 }, { "epoch": 0.57, "learning_rate": 8.112262226756603e-06, "loss": 0.8047, "step": 3879 }, { "epoch": 0.57, "learning_rate": 8.10755881684624e-06, "loss": 0.8242, "step": 3880 }, { "epoch": 0.57, "learning_rate": 8.102855841092934e-06, "loss": 0.7651, "step": 3881 }, { "epoch": 0.57, "learning_rate": 8.098153300575633e-06, "loss": 0.8281, "step": 3882 }, { "epoch": 0.57, "learning_rate": 8.093451196373163e-06, "loss": 0.7451, "step": 3883 }, { "epoch": 0.57, "learning_rate": 8.08874952956427e-06, "loss": 0.7578, "step": 3884 }, { "epoch": 0.57, "learning_rate": 8.084048301227597e-06, "loss": 0.7578, "step": 3885 }, { "epoch": 0.57, "learning_rate": 8.07934751244168e-06, "loss": 0.7798, "step": 3886 }, { "epoch": 0.57, "learning_rate": 8.074647164284962e-06, "loss": 0.79, "step": 3887 }, { "epoch": 0.57, "learning_rate": 8.069947257835768e-06, "loss": 0.9038, "step": 3888 }, { "epoch": 0.58, "learning_rate": 8.065247794172342e-06, "loss": 0.7568, "step": 3889 }, { "epoch": 0.58, "learning_rate": 8.060548774372818e-06, "loss": 0.7397, "step": 3890 }, { "epoch": 0.58, "learning_rate": 8.055850199515231e-06, "loss": 0.834, "step": 3891 }, { "epoch": 0.58, "learning_rate": 8.051152070677504e-06, "loss": 0.7773, "step": 3892 }, { "epoch": 0.58, "learning_rate": 8.046454388937466e-06, "loss": 0.7866, "step": 3893 }, { "epoch": 0.58, "learning_rate": 8.041757155372846e-06, "loss": 0.8027, "step": 3894 }, { "epoch": 0.58, "learning_rate": 8.03706037106127e-06, "loss": 0.897, "step": 3895 }, { "epoch": 0.58, "learning_rate": 8.032364037080247e-06, "loss": 0.771, "step": 3896 }, { "epoch": 0.58, "learning_rate": 8.0276681545072e-06, "loss": 0.7876, "step": 3897 }, { "epoch": 0.58, "learning_rate": 8.022972724419437e-06, "loss": 0.8555, "step": 3898 }, { "epoch": 0.58, "learning_rate": 8.018277747894178e-06, "loss": 0.3267, "step": 3899 }, { "epoch": 0.58, "learning_rate": 8.013583226008512e-06, "loss": 0.7886, "step": 3900 }, { "epoch": 0.58, "learning_rate": 8.00888915983945e-06, "loss": 0.8086, "step": 3901 }, { "epoch": 0.58, "learning_rate": 8.004195550463882e-06, "loss": 0.8496, "step": 3902 }, { "epoch": 0.58, "learning_rate": 7.999502398958599e-06, "loss": 0.8638, "step": 3903 }, { "epoch": 0.58, "learning_rate": 7.994809706400297e-06, "loss": 0.8472, "step": 3904 }, { "epoch": 0.58, "learning_rate": 7.990117473865543e-06, "loss": 0.8276, "step": 3905 }, { "epoch": 0.58, "learning_rate": 7.985425702430821e-06, "loss": 0.793, "step": 3906 }, { "epoch": 0.58, "learning_rate": 7.980734393172495e-06, "loss": 0.8086, "step": 3907 }, { "epoch": 0.58, "learning_rate": 7.976043547166835e-06, "loss": 0.8057, "step": 3908 }, { "epoch": 0.58, "learning_rate": 7.971353165489991e-06, "loss": 0.3077, "step": 3909 }, { "epoch": 0.58, "learning_rate": 7.966663249218018e-06, "loss": 0.8101, "step": 3910 }, { "epoch": 0.58, "learning_rate": 7.961973799426857e-06, "loss": 0.7837, "step": 3911 }, { "epoch": 0.58, "learning_rate": 7.957284817192346e-06, "loss": 0.8291, "step": 3912 }, { "epoch": 0.58, "learning_rate": 7.952596303590215e-06, "loss": 0.7769, "step": 3913 }, { "epoch": 0.58, "learning_rate": 7.947908259696084e-06, "loss": 0.7949, "step": 3914 }, { "epoch": 0.58, "learning_rate": 7.94322068658547e-06, "loss": 0.8511, "step": 3915 }, { "epoch": 0.58, "learning_rate": 7.938533585333777e-06, "loss": 0.7798, "step": 3916 }, { "epoch": 0.58, "learning_rate": 7.933846957016304e-06, "loss": 0.7334, "step": 3917 }, { "epoch": 0.58, "learning_rate": 7.929160802708239e-06, "loss": 0.8242, "step": 3918 }, { "epoch": 0.58, "learning_rate": 7.924475123484664e-06, "loss": 0.8442, "step": 3919 }, { "epoch": 0.58, "learning_rate": 7.91978992042055e-06, "loss": 0.7734, "step": 3920 }, { "epoch": 0.58, "learning_rate": 7.915105194590763e-06, "loss": 0.8535, "step": 3921 }, { "epoch": 0.58, "learning_rate": 7.91042094707005e-06, "loss": 0.8638, "step": 3922 }, { "epoch": 0.58, "learning_rate": 7.905737178933058e-06, "loss": 0.812, "step": 3923 }, { "epoch": 0.58, "learning_rate": 7.901053891254318e-06, "loss": 0.8335, "step": 3924 }, { "epoch": 0.58, "learning_rate": 7.896371085108258e-06, "loss": 0.7759, "step": 3925 }, { "epoch": 0.58, "learning_rate": 7.891688761569185e-06, "loss": 0.8584, "step": 3926 }, { "epoch": 0.58, "learning_rate": 7.887006921711301e-06, "loss": 0.8584, "step": 3927 }, { "epoch": 0.58, "learning_rate": 7.8823255666087e-06, "loss": 0.8237, "step": 3928 }, { "epoch": 0.58, "learning_rate": 7.877644697335362e-06, "loss": 0.7598, "step": 3929 }, { "epoch": 0.58, "learning_rate": 7.872964314965151e-06, "loss": 0.8389, "step": 3930 }, { "epoch": 0.58, "learning_rate": 7.868284420571826e-06, "loss": 0.8262, "step": 3931 }, { "epoch": 0.58, "learning_rate": 7.863605015229033e-06, "loss": 0.7905, "step": 3932 }, { "epoch": 0.58, "learning_rate": 7.858926100010303e-06, "loss": 0.8691, "step": 3933 }, { "epoch": 0.58, "learning_rate": 7.854247675989057e-06, "loss": 0.7808, "step": 3934 }, { "epoch": 0.58, "learning_rate": 7.8495697442386e-06, "loss": 0.7856, "step": 3935 }, { "epoch": 0.58, "learning_rate": 7.844892305832126e-06, "loss": 0.8079, "step": 3936 }, { "epoch": 0.58, "learning_rate": 7.840215361842718e-06, "loss": 0.9312, "step": 3937 }, { "epoch": 0.58, "learning_rate": 7.835538913343347e-06, "loss": 0.8457, "step": 3938 }, { "epoch": 0.58, "learning_rate": 7.83086296140686e-06, "loss": 0.8057, "step": 3939 }, { "epoch": 0.58, "learning_rate": 7.826187507106e-06, "loss": 0.7964, "step": 3940 }, { "epoch": 0.58, "learning_rate": 7.821512551513395e-06, "loss": 0.8242, "step": 3941 }, { "epoch": 0.58, "learning_rate": 7.816838095701555e-06, "loss": 0.8066, "step": 3942 }, { "epoch": 0.58, "learning_rate": 7.812164140742875e-06, "loss": 0.769, "step": 3943 }, { "epoch": 0.58, "learning_rate": 7.80749068770964e-06, "loss": 0.8057, "step": 3944 }, { "epoch": 0.58, "learning_rate": 7.802817737674015e-06, "loss": 0.7188, "step": 3945 }, { "epoch": 0.58, "learning_rate": 7.798145291708054e-06, "loss": 0.8223, "step": 3946 }, { "epoch": 0.58, "learning_rate": 7.793473350883688e-06, "loss": 0.8271, "step": 3947 }, { "epoch": 0.58, "learning_rate": 7.788801916272739e-06, "loss": 0.7788, "step": 3948 }, { "epoch": 0.58, "learning_rate": 7.784130988946912e-06, "loss": 0.8096, "step": 3949 }, { "epoch": 0.58, "learning_rate": 7.779460569977796e-06, "loss": 0.8198, "step": 3950 }, { "epoch": 0.58, "learning_rate": 7.774790660436857e-06, "loss": 0.8115, "step": 3951 }, { "epoch": 0.58, "learning_rate": 7.770121261395451e-06, "loss": 0.7693, "step": 3952 }, { "epoch": 0.58, "learning_rate": 7.765452373924816e-06, "loss": 0.8291, "step": 3953 }, { "epoch": 0.58, "learning_rate": 7.76078399909607e-06, "loss": 0.7827, "step": 3954 }, { "epoch": 0.58, "learning_rate": 7.75611613798022e-06, "loss": 0.7676, "step": 3955 }, { "epoch": 0.59, "learning_rate": 7.751448791648136e-06, "loss": 0.8208, "step": 3956 }, { "epoch": 0.59, "learning_rate": 7.746781961170598e-06, "loss": 0.7583, "step": 3957 }, { "epoch": 0.59, "learning_rate": 7.742115647618246e-06, "loss": 0.853, "step": 3958 }, { "epoch": 0.59, "learning_rate": 7.737449852061618e-06, "loss": 0.877, "step": 3959 }, { "epoch": 0.59, "learning_rate": 7.73278457557111e-06, "loss": 0.7881, "step": 3960 }, { "epoch": 0.59, "learning_rate": 7.728119819217022e-06, "loss": 0.7998, "step": 3961 }, { "epoch": 0.59, "learning_rate": 7.723455584069524e-06, "loss": 0.8457, "step": 3962 }, { "epoch": 0.59, "learning_rate": 7.718791871198674e-06, "loss": 0.8242, "step": 3963 }, { "epoch": 0.59, "learning_rate": 7.714128681674392e-06, "loss": 0.8101, "step": 3964 }, { "epoch": 0.59, "learning_rate": 7.709466016566496e-06, "loss": 0.8228, "step": 3965 }, { "epoch": 0.59, "learning_rate": 7.70480387694468e-06, "loss": 0.8369, "step": 3966 }, { "epoch": 0.59, "learning_rate": 7.700142263878513e-06, "loss": 0.8457, "step": 3967 }, { "epoch": 0.59, "learning_rate": 7.69548117843745e-06, "loss": 0.3354, "step": 3968 }, { "epoch": 0.59, "learning_rate": 7.690820621690815e-06, "loss": 0.7959, "step": 3969 }, { "epoch": 0.59, "learning_rate": 7.686160594707814e-06, "loss": 0.8037, "step": 3970 }, { "epoch": 0.59, "learning_rate": 7.68150109855754e-06, "loss": 0.8037, "step": 3971 }, { "epoch": 0.59, "learning_rate": 7.67684213430896e-06, "loss": 0.8203, "step": 3972 }, { "epoch": 0.59, "learning_rate": 7.672183703030906e-06, "loss": 0.7852, "step": 3973 }, { "epoch": 0.59, "learning_rate": 7.667525805792108e-06, "loss": 0.7656, "step": 3974 }, { "epoch": 0.59, "learning_rate": 7.662868443661157e-06, "loss": 0.7268, "step": 3975 }, { "epoch": 0.59, "learning_rate": 7.65821161770654e-06, "loss": 0.8223, "step": 3976 }, { "epoch": 0.59, "learning_rate": 7.653555328996595e-06, "loss": 0.8467, "step": 3977 }, { "epoch": 0.59, "learning_rate": 7.648899578599558e-06, "loss": 0.8159, "step": 3978 }, { "epoch": 0.59, "learning_rate": 7.644244367583533e-06, "loss": 0.7886, "step": 3979 }, { "epoch": 0.59, "learning_rate": 7.639589697016508e-06, "loss": 0.7583, "step": 3980 }, { "epoch": 0.59, "learning_rate": 7.634935567966333e-06, "loss": 0.8208, "step": 3981 }, { "epoch": 0.59, "learning_rate": 7.630281981500742e-06, "loss": 0.7612, "step": 3982 }, { "epoch": 0.59, "learning_rate": 7.625628938687349e-06, "loss": 0.8052, "step": 3983 }, { "epoch": 0.59, "learning_rate": 7.620976440593632e-06, "loss": 0.834, "step": 3984 }, { "epoch": 0.59, "learning_rate": 7.616324488286959e-06, "loss": 0.8081, "step": 3985 }, { "epoch": 0.59, "learning_rate": 7.611673082834556e-06, "loss": 0.7622, "step": 3986 }, { "epoch": 0.59, "learning_rate": 7.607022225303537e-06, "loss": 0.8237, "step": 3987 }, { "epoch": 0.59, "learning_rate": 7.602371916760881e-06, "loss": 0.834, "step": 3988 }, { "epoch": 0.59, "learning_rate": 7.59772215827345e-06, "loss": 0.7148, "step": 3989 }, { "epoch": 0.59, "learning_rate": 7.593072950907969e-06, "loss": 0.8174, "step": 3990 }, { "epoch": 0.59, "learning_rate": 7.588424295731045e-06, "loss": 0.771, "step": 3991 }, { "epoch": 0.59, "learning_rate": 7.583776193809158e-06, "loss": 0.7412, "step": 3992 }, { "epoch": 0.59, "learning_rate": 7.579128646208658e-06, "loss": 0.7583, "step": 3993 }, { "epoch": 0.59, "learning_rate": 7.5744816539957655e-06, "loss": 0.7954, "step": 3994 }, { "epoch": 0.59, "learning_rate": 7.569835218236578e-06, "loss": 0.8247, "step": 3995 }, { "epoch": 0.59, "learning_rate": 7.5651893399970674e-06, "loss": 0.8691, "step": 3996 }, { "epoch": 0.59, "learning_rate": 7.560544020343071e-06, "loss": 0.7529, "step": 3997 }, { "epoch": 0.59, "learning_rate": 7.555899260340305e-06, "loss": 0.8057, "step": 3998 }, { "epoch": 0.59, "learning_rate": 7.551255061054348e-06, "loss": 0.7549, "step": 3999 }, { "epoch": 0.59, "learning_rate": 7.546611423550658e-06, "loss": 0.8354, "step": 4000 }, { "epoch": 0.59, "learning_rate": 7.541968348894564e-06, "loss": 0.7759, "step": 4001 }, { "epoch": 0.59, "learning_rate": 7.537325838151263e-06, "loss": 0.8228, "step": 4002 }, { "epoch": 0.59, "learning_rate": 7.53268389238582e-06, "loss": 0.8057, "step": 4003 }, { "epoch": 0.59, "learning_rate": 7.528042512663174e-06, "loss": 0.7495, "step": 4004 }, { "epoch": 0.59, "learning_rate": 7.523401700048135e-06, "loss": 0.8525, "step": 4005 }, { "epoch": 0.59, "learning_rate": 7.518761455605384e-06, "loss": 0.8071, "step": 4006 }, { "epoch": 0.59, "learning_rate": 7.5141217803994645e-06, "loss": 0.8013, "step": 4007 }, { "epoch": 0.59, "learning_rate": 7.509482675494797e-06, "loss": 0.8203, "step": 4008 }, { "epoch": 0.59, "learning_rate": 7.504844141955667e-06, "loss": 0.8188, "step": 4009 }, { "epoch": 0.59, "learning_rate": 7.500206180846235e-06, "loss": 0.8091, "step": 4010 }, { "epoch": 0.59, "learning_rate": 7.495568793230516e-06, "loss": 0.7524, "step": 4011 }, { "epoch": 0.59, "learning_rate": 7.490931980172409e-06, "loss": 0.793, "step": 4012 }, { "epoch": 0.59, "learning_rate": 7.486295742735674e-06, "loss": 0.7866, "step": 4013 }, { "epoch": 0.59, "learning_rate": 7.481660081983942e-06, "loss": 0.8306, "step": 4014 }, { "epoch": 0.59, "learning_rate": 7.477024998980707e-06, "loss": 0.2963, "step": 4015 }, { "epoch": 0.59, "learning_rate": 7.4723904947893325e-06, "loss": 0.7969, "step": 4016 }, { "epoch": 0.59, "learning_rate": 7.4677565704730515e-06, "loss": 0.8594, "step": 4017 }, { "epoch": 0.59, "learning_rate": 7.463123227094962e-06, "loss": 0.8198, "step": 4018 }, { "epoch": 0.59, "learning_rate": 7.458490465718029e-06, "loss": 0.8296, "step": 4019 }, { "epoch": 0.59, "learning_rate": 7.453858287405082e-06, "loss": 0.8223, "step": 4020 }, { "epoch": 0.59, "learning_rate": 7.449226693218823e-06, "loss": 0.7983, "step": 4021 }, { "epoch": 0.59, "learning_rate": 7.444595684221811e-06, "loss": 0.7217, "step": 4022 }, { "epoch": 0.59, "learning_rate": 7.43996526147648e-06, "loss": 0.8237, "step": 4023 }, { "epoch": 0.6, "learning_rate": 7.435335426045121e-06, "loss": 0.8047, "step": 4024 }, { "epoch": 0.6, "learning_rate": 7.430706178989895e-06, "loss": 0.8438, "step": 4025 }, { "epoch": 0.6, "learning_rate": 7.4260775213728275e-06, "loss": 0.8047, "step": 4026 }, { "epoch": 0.6, "learning_rate": 7.421449454255814e-06, "loss": 0.8247, "step": 4027 }, { "epoch": 0.6, "learning_rate": 7.416821978700597e-06, "loss": 0.8237, "step": 4028 }, { "epoch": 0.6, "learning_rate": 7.412195095768805e-06, "loss": 0.3191, "step": 4029 }, { "epoch": 0.6, "learning_rate": 7.4075688065219186e-06, "loss": 0.7954, "step": 4030 }, { "epoch": 0.6, "learning_rate": 7.402943112021284e-06, "loss": 0.3041, "step": 4031 }, { "epoch": 0.6, "learning_rate": 7.398318013328112e-06, "loss": 0.7368, "step": 4032 }, { "epoch": 0.6, "learning_rate": 7.393693511503475e-06, "loss": 0.8652, "step": 4033 }, { "epoch": 0.6, "learning_rate": 7.38906960760831e-06, "loss": 0.7954, "step": 4034 }, { "epoch": 0.6, "learning_rate": 7.384446302703416e-06, "loss": 0.7974, "step": 4035 }, { "epoch": 0.6, "learning_rate": 7.3798235978494605e-06, "loss": 0.8389, "step": 4036 }, { "epoch": 0.6, "learning_rate": 7.375201494106956e-06, "loss": 0.7856, "step": 4037 }, { "epoch": 0.6, "learning_rate": 7.3705799925362985e-06, "loss": 0.7759, "step": 4038 }, { "epoch": 0.6, "learning_rate": 7.365959094197734e-06, "loss": 0.791, "step": 4039 }, { "epoch": 0.6, "learning_rate": 7.361338800151376e-06, "loss": 0.7783, "step": 4040 }, { "epoch": 0.6, "learning_rate": 7.356719111457187e-06, "loss": 0.8618, "step": 4041 }, { "epoch": 0.6, "learning_rate": 7.352100029175002e-06, "loss": 0.8281, "step": 4042 }, { "epoch": 0.6, "learning_rate": 7.347481554364519e-06, "loss": 0.8252, "step": 4043 }, { "epoch": 0.6, "learning_rate": 7.342863688085291e-06, "loss": 0.8184, "step": 4044 }, { "epoch": 0.6, "learning_rate": 7.338246431396734e-06, "loss": 0.7925, "step": 4045 }, { "epoch": 0.6, "learning_rate": 7.3336297853581115e-06, "loss": 0.2821, "step": 4046 }, { "epoch": 0.6, "learning_rate": 7.329013751028569e-06, "loss": 0.8423, "step": 4047 }, { "epoch": 0.6, "learning_rate": 7.324398329467096e-06, "loss": 0.7944, "step": 4048 }, { "epoch": 0.6, "learning_rate": 7.319783521732551e-06, "loss": 0.8037, "step": 4049 }, { "epoch": 0.6, "learning_rate": 7.31516932888364e-06, "loss": 0.8364, "step": 4050 }, { "epoch": 0.6, "learning_rate": 7.310555751978934e-06, "loss": 0.79, "step": 4051 }, { "epoch": 0.6, "learning_rate": 7.305942792076867e-06, "loss": 0.7832, "step": 4052 }, { "epoch": 0.6, "learning_rate": 7.301330450235733e-06, "loss": 0.8027, "step": 4053 }, { "epoch": 0.6, "learning_rate": 7.2967187275136685e-06, "loss": 0.8193, "step": 4054 }, { "epoch": 0.6, "learning_rate": 7.292107624968683e-06, "loss": 0.7744, "step": 4055 }, { "epoch": 0.6, "learning_rate": 7.287497143658635e-06, "loss": 0.8389, "step": 4056 }, { "epoch": 0.6, "learning_rate": 7.282887284641257e-06, "loss": 0.8184, "step": 4057 }, { "epoch": 0.6, "learning_rate": 7.278278048974112e-06, "loss": 0.7676, "step": 4058 }, { "epoch": 0.6, "learning_rate": 7.273669437714641e-06, "loss": 0.8013, "step": 4059 }, { "epoch": 0.6, "learning_rate": 7.2690614519201315e-06, "loss": 0.7695, "step": 4060 }, { "epoch": 0.6, "learning_rate": 7.264454092647735e-06, "loss": 0.79, "step": 4061 }, { "epoch": 0.6, "learning_rate": 7.25984736095446e-06, "loss": 0.8027, "step": 4062 }, { "epoch": 0.6, "learning_rate": 7.255241257897154e-06, "loss": 0.3135, "step": 4063 }, { "epoch": 0.6, "learning_rate": 7.2506357845325405e-06, "loss": 0.7925, "step": 4064 }, { "epoch": 0.6, "learning_rate": 7.246030941917186e-06, "loss": 0.8081, "step": 4065 }, { "epoch": 0.6, "learning_rate": 7.241426731107526e-06, "loss": 0.8071, "step": 4066 }, { "epoch": 0.6, "learning_rate": 7.236823153159832e-06, "loss": 0.7803, "step": 4067 }, { "epoch": 0.6, "learning_rate": 7.232220209130245e-06, "loss": 0.7236, "step": 4068 }, { "epoch": 0.6, "learning_rate": 7.227617900074755e-06, "loss": 0.8428, "step": 4069 }, { "epoch": 0.6, "learning_rate": 7.223016227049209e-06, "loss": 0.7998, "step": 4070 }, { "epoch": 0.6, "learning_rate": 7.218415191109303e-06, "loss": 0.7686, "step": 4071 }, { "epoch": 0.6, "learning_rate": 7.213814793310589e-06, "loss": 0.7769, "step": 4072 }, { "epoch": 0.6, "learning_rate": 7.209215034708481e-06, "loss": 0.856, "step": 4073 }, { "epoch": 0.6, "learning_rate": 7.204615916358234e-06, "loss": 0.8018, "step": 4074 }, { "epoch": 0.6, "learning_rate": 7.2000174393149615e-06, "loss": 0.8354, "step": 4075 }, { "epoch": 0.6, "learning_rate": 7.195419604633629e-06, "loss": 0.833, "step": 4076 }, { "epoch": 0.6, "learning_rate": 7.190822413369055e-06, "loss": 0.7788, "step": 4077 }, { "epoch": 0.6, "learning_rate": 7.186225866575913e-06, "loss": 0.8657, "step": 4078 }, { "epoch": 0.6, "learning_rate": 7.1816299653087276e-06, "loss": 0.2877, "step": 4079 }, { "epoch": 0.6, "learning_rate": 7.177034710621869e-06, "loss": 0.7998, "step": 4080 }, { "epoch": 0.6, "learning_rate": 7.172440103569566e-06, "loss": 0.7668, "step": 4081 }, { "epoch": 0.6, "learning_rate": 7.167846145205897e-06, "loss": 0.7871, "step": 4082 }, { "epoch": 0.6, "learning_rate": 7.163252836584794e-06, "loss": 0.7686, "step": 4083 }, { "epoch": 0.6, "learning_rate": 7.158660178760033e-06, "loss": 0.834, "step": 4084 }, { "epoch": 0.6, "learning_rate": 7.154068172785248e-06, "loss": 0.8193, "step": 4085 }, { "epoch": 0.6, "learning_rate": 7.149476819713919e-06, "loss": 0.7695, "step": 4086 }, { "epoch": 0.6, "learning_rate": 7.144886120599379e-06, "loss": 0.8208, "step": 4087 }, { "epoch": 0.6, "learning_rate": 7.140296076494809e-06, "loss": 0.7979, "step": 4088 }, { "epoch": 0.6, "learning_rate": 7.135706688453239e-06, "loss": 0.7676, "step": 4089 }, { "epoch": 0.6, "learning_rate": 7.131117957527553e-06, "loss": 0.8413, "step": 4090 }, { "epoch": 0.6, "learning_rate": 7.126529884770479e-06, "loss": 0.8164, "step": 4091 }, { "epoch": 0.61, "learning_rate": 7.121942471234598e-06, "loss": 0.3176, "step": 4092 }, { "epoch": 0.61, "learning_rate": 7.117355717972334e-06, "loss": 0.8008, "step": 4093 }, { "epoch": 0.61, "learning_rate": 7.112769626035968e-06, "loss": 0.8413, "step": 4094 }, { "epoch": 0.61, "learning_rate": 7.108184196477622e-06, "loss": 0.7092, "step": 4095 }, { "epoch": 0.61, "learning_rate": 7.103599430349271e-06, "loss": 0.7915, "step": 4096 }, { "epoch": 0.61, "learning_rate": 7.099015328702734e-06, "loss": 0.7749, "step": 4097 }, { "epoch": 0.61, "learning_rate": 7.0944318925896775e-06, "loss": 0.7847, "step": 4098 }, { "epoch": 0.61, "learning_rate": 7.08984912306162e-06, "loss": 0.9067, "step": 4099 }, { "epoch": 0.61, "learning_rate": 7.0852670211699236e-06, "loss": 0.8457, "step": 4100 }, { "epoch": 0.61, "learning_rate": 7.0806855879657964e-06, "loss": 0.7925, "step": 4101 }, { "epoch": 0.61, "learning_rate": 7.076104824500294e-06, "loss": 0.7856, "step": 4102 }, { "epoch": 0.61, "learning_rate": 7.07152473182432e-06, "loss": 0.78, "step": 4103 }, { "epoch": 0.61, "learning_rate": 7.066945310988623e-06, "loss": 0.7583, "step": 4104 }, { "epoch": 0.61, "learning_rate": 7.062366563043795e-06, "loss": 0.8271, "step": 4105 }, { "epoch": 0.61, "learning_rate": 7.057788489040278e-06, "loss": 0.7729, "step": 4106 }, { "epoch": 0.61, "learning_rate": 7.053211090028357e-06, "loss": 0.8232, "step": 4107 }, { "epoch": 0.61, "learning_rate": 7.048634367058163e-06, "loss": 0.8047, "step": 4108 }, { "epoch": 0.61, "learning_rate": 7.044058321179671e-06, "loss": 0.7217, "step": 4109 }, { "epoch": 0.61, "learning_rate": 7.039482953442699e-06, "loss": 0.8296, "step": 4110 }, { "epoch": 0.61, "learning_rate": 7.0349082648969135e-06, "loss": 0.7705, "step": 4111 }, { "epoch": 0.61, "learning_rate": 7.030334256591822e-06, "loss": 0.8608, "step": 4112 }, { "epoch": 0.61, "learning_rate": 7.025760929576779e-06, "loss": 0.8369, "step": 4113 }, { "epoch": 0.61, "learning_rate": 7.02118828490098e-06, "loss": 0.7749, "step": 4114 }, { "epoch": 0.61, "learning_rate": 7.016616323613462e-06, "loss": 0.7451, "step": 4115 }, { "epoch": 0.61, "learning_rate": 7.012045046763111e-06, "loss": 0.8042, "step": 4116 }, { "epoch": 0.61, "learning_rate": 7.007474455398655e-06, "loss": 0.791, "step": 4117 }, { "epoch": 0.61, "learning_rate": 7.002904550568654e-06, "loss": 0.7515, "step": 4118 }, { "epoch": 0.61, "learning_rate": 6.9983353333215275e-06, "loss": 0.7964, "step": 4119 }, { "epoch": 0.61, "learning_rate": 6.993766804705526e-06, "loss": 0.8184, "step": 4120 }, { "epoch": 0.61, "learning_rate": 6.989198965768749e-06, "loss": 0.8877, "step": 4121 }, { "epoch": 0.61, "learning_rate": 6.984631817559126e-06, "loss": 0.8638, "step": 4122 }, { "epoch": 0.61, "learning_rate": 6.980065361124437e-06, "loss": 0.7866, "step": 4123 }, { "epoch": 0.61, "learning_rate": 6.975499597512307e-06, "loss": 0.8262, "step": 4124 }, { "epoch": 0.61, "learning_rate": 6.970934527770195e-06, "loss": 0.8115, "step": 4125 }, { "epoch": 0.61, "learning_rate": 6.966370152945407e-06, "loss": 0.8149, "step": 4126 }, { "epoch": 0.61, "learning_rate": 6.961806474085075e-06, "loss": 0.8257, "step": 4127 }, { "epoch": 0.61, "learning_rate": 6.957243492236189e-06, "loss": 0.7871, "step": 4128 }, { "epoch": 0.61, "learning_rate": 6.952681208445571e-06, "loss": 0.7852, "step": 4129 }, { "epoch": 0.61, "learning_rate": 6.948119623759888e-06, "loss": 0.8433, "step": 4130 }, { "epoch": 0.61, "learning_rate": 6.943558739225633e-06, "loss": 0.7241, "step": 4131 }, { "epoch": 0.61, "learning_rate": 6.938998555889152e-06, "loss": 0.8218, "step": 4132 }, { "epoch": 0.61, "learning_rate": 6.934439074796627e-06, "loss": 0.7708, "step": 4133 }, { "epoch": 0.61, "learning_rate": 6.929880296994083e-06, "loss": 0.7705, "step": 4134 }, { "epoch": 0.61, "learning_rate": 6.925322223527366e-06, "loss": 0.8013, "step": 4135 }, { "epoch": 0.61, "learning_rate": 6.9207648554421825e-06, "loss": 0.7842, "step": 4136 }, { "epoch": 0.61, "learning_rate": 6.916208193784062e-06, "loss": 0.833, "step": 4137 }, { "epoch": 0.61, "learning_rate": 6.911652239598385e-06, "loss": 0.8062, "step": 4138 }, { "epoch": 0.61, "learning_rate": 6.907096993930355e-06, "loss": 0.8477, "step": 4139 }, { "epoch": 0.61, "learning_rate": 6.902542457825021e-06, "loss": 0.7729, "step": 4140 }, { "epoch": 0.61, "learning_rate": 6.897988632327268e-06, "loss": 0.7881, "step": 4141 }, { "epoch": 0.61, "learning_rate": 6.8934355184818205e-06, "loss": 0.7773, "step": 4142 }, { "epoch": 0.61, "learning_rate": 6.888883117333243e-06, "loss": 0.854, "step": 4143 }, { "epoch": 0.61, "learning_rate": 6.884331429925919e-06, "loss": 0.8369, "step": 4144 }, { "epoch": 0.61, "learning_rate": 6.879780457304087e-06, "loss": 0.7788, "step": 4145 }, { "epoch": 0.61, "learning_rate": 6.875230200511812e-06, "loss": 0.8125, "step": 4146 }, { "epoch": 0.61, "learning_rate": 6.870680660593007e-06, "loss": 0.8018, "step": 4147 }, { "epoch": 0.61, "learning_rate": 6.8661318385913974e-06, "loss": 0.8145, "step": 4148 }, { "epoch": 0.61, "learning_rate": 6.861583735550565e-06, "loss": 0.7808, "step": 4149 }, { "epoch": 0.61, "learning_rate": 6.857036352513918e-06, "loss": 0.7905, "step": 4150 }, { "epoch": 0.61, "learning_rate": 6.852489690524703e-06, "loss": 0.8149, "step": 4151 }, { "epoch": 0.61, "learning_rate": 6.8479437506259936e-06, "loss": 0.7842, "step": 4152 }, { "epoch": 0.61, "learning_rate": 6.8433985338607075e-06, "loss": 0.7251, "step": 4153 }, { "epoch": 0.61, "learning_rate": 6.83885404127159e-06, "loss": 0.7549, "step": 4154 }, { "epoch": 0.61, "learning_rate": 6.834310273901224e-06, "loss": 0.7471, "step": 4155 }, { "epoch": 0.61, "learning_rate": 6.829767232792024e-06, "loss": 0.7192, "step": 4156 }, { "epoch": 0.61, "learning_rate": 6.8252249189862355e-06, "loss": 0.7769, "step": 4157 }, { "epoch": 0.61, "learning_rate": 6.820683333525942e-06, "loss": 0.8291, "step": 4158 }, { "epoch": 0.62, "learning_rate": 6.816142477453056e-06, "loss": 0.7446, "step": 4159 }, { "epoch": 0.62, "learning_rate": 6.811602351809328e-06, "loss": 0.8135, "step": 4160 }, { "epoch": 0.62, "learning_rate": 6.807062957636334e-06, "loss": 0.7573, "step": 4161 }, { "epoch": 0.62, "learning_rate": 6.802524295975486e-06, "loss": 0.812, "step": 4162 }, { "epoch": 0.62, "learning_rate": 6.797986367868028e-06, "loss": 0.7529, "step": 4163 }, { "epoch": 0.62, "learning_rate": 6.793449174355038e-06, "loss": 0.7334, "step": 4164 }, { "epoch": 0.62, "learning_rate": 6.788912716477417e-06, "loss": 0.7598, "step": 4165 }, { "epoch": 0.62, "learning_rate": 6.784376995275905e-06, "loss": 0.7349, "step": 4166 }, { "epoch": 0.62, "learning_rate": 6.779842011791074e-06, "loss": 0.7778, "step": 4167 }, { "epoch": 0.62, "learning_rate": 6.775307767063322e-06, "loss": 0.749, "step": 4168 }, { "epoch": 0.62, "learning_rate": 6.770774262132875e-06, "loss": 0.7266, "step": 4169 }, { "epoch": 0.62, "learning_rate": 6.766241498039798e-06, "loss": 0.7798, "step": 4170 }, { "epoch": 0.62, "learning_rate": 6.76170947582398e-06, "loss": 0.813, "step": 4171 }, { "epoch": 0.62, "learning_rate": 6.7571781965251405e-06, "loss": 0.8599, "step": 4172 }, { "epoch": 0.62, "learning_rate": 6.752647661182834e-06, "loss": 0.8115, "step": 4173 }, { "epoch": 0.62, "learning_rate": 6.748117870836433e-06, "loss": 0.8008, "step": 4174 }, { "epoch": 0.62, "learning_rate": 6.743588826525148e-06, "loss": 0.7871, "step": 4175 }, { "epoch": 0.62, "learning_rate": 6.739060529288017e-06, "loss": 0.813, "step": 4176 }, { "epoch": 0.62, "learning_rate": 6.734532980163908e-06, "loss": 0.8281, "step": 4177 }, { "epoch": 0.62, "learning_rate": 6.730006180191511e-06, "loss": 0.7939, "step": 4178 }, { "epoch": 0.62, "learning_rate": 6.725480130409347e-06, "loss": 0.8135, "step": 4179 }, { "epoch": 0.62, "learning_rate": 6.720954831855771e-06, "loss": 0.3186, "step": 4180 }, { "epoch": 0.62, "learning_rate": 6.71643028556896e-06, "loss": 0.8052, "step": 4181 }, { "epoch": 0.62, "learning_rate": 6.7119064925869145e-06, "loss": 0.772, "step": 4182 }, { "epoch": 0.62, "learning_rate": 6.707383453947471e-06, "loss": 0.7627, "step": 4183 }, { "epoch": 0.62, "learning_rate": 6.702861170688288e-06, "loss": 0.8105, "step": 4184 }, { "epoch": 0.62, "learning_rate": 6.698339643846854e-06, "loss": 0.8389, "step": 4185 }, { "epoch": 0.62, "learning_rate": 6.693818874460475e-06, "loss": 0.7583, "step": 4186 }, { "epoch": 0.62, "learning_rate": 6.6892988635662935e-06, "loss": 0.792, "step": 4187 }, { "epoch": 0.62, "learning_rate": 6.6847796122012754e-06, "loss": 0.835, "step": 4188 }, { "epoch": 0.62, "learning_rate": 6.680261121402207e-06, "loss": 0.7212, "step": 4189 }, { "epoch": 0.62, "learning_rate": 6.6757433922057114e-06, "loss": 0.8994, "step": 4190 }, { "epoch": 0.62, "learning_rate": 6.671226425648223e-06, "loss": 0.8218, "step": 4191 }, { "epoch": 0.62, "learning_rate": 6.666710222766009e-06, "loss": 0.8965, "step": 4192 }, { "epoch": 0.62, "learning_rate": 6.662194784595164e-06, "loss": 0.7524, "step": 4193 }, { "epoch": 0.62, "learning_rate": 6.657680112171603e-06, "loss": 0.7729, "step": 4194 }, { "epoch": 0.62, "learning_rate": 6.65316620653106e-06, "loss": 0.8315, "step": 4195 }, { "epoch": 0.62, "learning_rate": 6.648653068709105e-06, "loss": 0.8706, "step": 4196 }, { "epoch": 0.62, "learning_rate": 6.644140699741124e-06, "loss": 0.8413, "step": 4197 }, { "epoch": 0.62, "learning_rate": 6.639629100662332e-06, "loss": 0.7622, "step": 4198 }, { "epoch": 0.62, "learning_rate": 6.635118272507756e-06, "loss": 0.7925, "step": 4199 }, { "epoch": 0.62, "learning_rate": 6.63060821631226e-06, "loss": 0.8281, "step": 4200 }, { "epoch": 0.62, "learning_rate": 6.626098933110523e-06, "loss": 0.8306, "step": 4201 }, { "epoch": 0.62, "learning_rate": 6.62159042393705e-06, "loss": 0.8809, "step": 4202 }, { "epoch": 0.62, "learning_rate": 6.617082689826169e-06, "loss": 0.7139, "step": 4203 }, { "epoch": 0.62, "learning_rate": 6.6125757318120185e-06, "loss": 0.8232, "step": 4204 }, { "epoch": 0.62, "learning_rate": 6.6080695509285795e-06, "loss": 0.8037, "step": 4205 }, { "epoch": 0.62, "learning_rate": 6.603564148209638e-06, "loss": 0.8105, "step": 4206 }, { "epoch": 0.62, "learning_rate": 6.599059524688813e-06, "loss": 0.8105, "step": 4207 }, { "epoch": 0.62, "learning_rate": 6.594555681399532e-06, "loss": 0.7778, "step": 4208 }, { "epoch": 0.62, "learning_rate": 6.59005261937505e-06, "loss": 0.7954, "step": 4209 }, { "epoch": 0.62, "learning_rate": 6.58555033964845e-06, "loss": 0.8032, "step": 4210 }, { "epoch": 0.62, "learning_rate": 6.581048843252629e-06, "loss": 0.79, "step": 4211 }, { "epoch": 0.62, "learning_rate": 6.576548131220297e-06, "loss": 0.8057, "step": 4212 }, { "epoch": 0.62, "learning_rate": 6.572048204583993e-06, "loss": 0.833, "step": 4213 }, { "epoch": 0.62, "learning_rate": 6.567549064376078e-06, "loss": 0.8252, "step": 4214 }, { "epoch": 0.62, "learning_rate": 6.56305071162873e-06, "loss": 0.855, "step": 4215 }, { "epoch": 0.62, "learning_rate": 6.5585531473739385e-06, "loss": 0.7939, "step": 4216 }, { "epoch": 0.62, "learning_rate": 6.55405637264352e-06, "loss": 0.7876, "step": 4217 }, { "epoch": 0.62, "learning_rate": 6.5495603884691095e-06, "loss": 0.79, "step": 4218 }, { "epoch": 0.62, "learning_rate": 6.5450651958821605e-06, "loss": 0.8276, "step": 4219 }, { "epoch": 0.62, "learning_rate": 6.54057079591395e-06, "loss": 0.7939, "step": 4220 }, { "epoch": 0.62, "learning_rate": 6.536077189595554e-06, "loss": 0.8384, "step": 4221 }, { "epoch": 0.62, "learning_rate": 6.5315843779578865e-06, "loss": 0.7334, "step": 4222 }, { "epoch": 0.62, "learning_rate": 6.52709236203167e-06, "loss": 0.7449, "step": 4223 }, { "epoch": 0.62, "learning_rate": 6.522601142847456e-06, "loss": 0.7656, "step": 4224 }, { "epoch": 0.62, "learning_rate": 6.51811072143559e-06, "loss": 0.77, "step": 4225 }, { "epoch": 0.62, "learning_rate": 6.513621098826255e-06, "loss": 0.7861, "step": 4226 }, { "epoch": 0.63, "learning_rate": 6.509132276049441e-06, "loss": 0.7083, "step": 4227 }, { "epoch": 0.63, "learning_rate": 6.504644254134969e-06, "loss": 0.8198, "step": 4228 }, { "epoch": 0.63, "learning_rate": 6.50015703411245e-06, "loss": 0.8325, "step": 4229 }, { "epoch": 0.63, "learning_rate": 6.495670617011332e-06, "loss": 0.8428, "step": 4230 }, { "epoch": 0.63, "learning_rate": 6.491185003860874e-06, "loss": 0.8911, "step": 4231 }, { "epoch": 0.63, "learning_rate": 6.48670019569015e-06, "loss": 0.8301, "step": 4232 }, { "epoch": 0.63, "learning_rate": 6.482216193528044e-06, "loss": 0.811, "step": 4233 }, { "epoch": 0.63, "learning_rate": 6.477732998403261e-06, "loss": 0.8149, "step": 4234 }, { "epoch": 0.63, "learning_rate": 6.4732506113443215e-06, "loss": 0.8481, "step": 4235 }, { "epoch": 0.63, "learning_rate": 6.468769033379559e-06, "loss": 0.7993, "step": 4236 }, { "epoch": 0.63, "learning_rate": 6.4642882655371216e-06, "loss": 0.7666, "step": 4237 }, { "epoch": 0.63, "learning_rate": 6.459808308844967e-06, "loss": 0.7832, "step": 4238 }, { "epoch": 0.63, "learning_rate": 6.455329164330872e-06, "loss": 0.9014, "step": 4239 }, { "epoch": 0.63, "learning_rate": 6.450850833022429e-06, "loss": 0.855, "step": 4240 }, { "epoch": 0.63, "learning_rate": 6.44637331594704e-06, "loss": 0.3093, "step": 4241 }, { "epoch": 0.63, "learning_rate": 6.441896614131918e-06, "loss": 0.8115, "step": 4242 }, { "epoch": 0.63, "learning_rate": 6.437420728604091e-06, "loss": 0.8066, "step": 4243 }, { "epoch": 0.63, "learning_rate": 6.432945660390404e-06, "loss": 0.3104, "step": 4244 }, { "epoch": 0.63, "learning_rate": 6.428471410517513e-06, "loss": 0.7754, "step": 4245 }, { "epoch": 0.63, "learning_rate": 6.423997980011878e-06, "loss": 0.7437, "step": 4246 }, { "epoch": 0.63, "learning_rate": 6.419525369899782e-06, "loss": 0.7563, "step": 4247 }, { "epoch": 0.63, "learning_rate": 6.415053581207314e-06, "loss": 0.8521, "step": 4248 }, { "epoch": 0.63, "learning_rate": 6.410582614960375e-06, "loss": 0.8315, "step": 4249 }, { "epoch": 0.63, "learning_rate": 6.406112472184678e-06, "loss": 0.792, "step": 4250 }, { "epoch": 0.63, "learning_rate": 6.4016431539057476e-06, "loss": 0.709, "step": 4251 }, { "epoch": 0.63, "learning_rate": 6.397174661148919e-06, "loss": 0.7949, "step": 4252 }, { "epoch": 0.63, "learning_rate": 6.392706994939338e-06, "loss": 0.811, "step": 4253 }, { "epoch": 0.63, "learning_rate": 6.388240156301961e-06, "loss": 0.7993, "step": 4254 }, { "epoch": 0.63, "learning_rate": 6.383774146261552e-06, "loss": 0.8101, "step": 4255 }, { "epoch": 0.63, "learning_rate": 6.379308965842689e-06, "loss": 0.7847, "step": 4256 }, { "epoch": 0.63, "learning_rate": 6.374844616069755e-06, "loss": 0.7983, "step": 4257 }, { "epoch": 0.63, "learning_rate": 6.370381097966949e-06, "loss": 0.8198, "step": 4258 }, { "epoch": 0.63, "learning_rate": 6.3659184125582716e-06, "loss": 0.769, "step": 4259 }, { "epoch": 0.63, "learning_rate": 6.361456560867538e-06, "loss": 0.7871, "step": 4260 }, { "epoch": 0.63, "learning_rate": 6.356995543918369e-06, "loss": 0.7505, "step": 4261 }, { "epoch": 0.63, "learning_rate": 6.352535362734199e-06, "loss": 0.8574, "step": 4262 }, { "epoch": 0.63, "learning_rate": 6.34807601833826e-06, "loss": 0.7773, "step": 4263 }, { "epoch": 0.63, "learning_rate": 6.343617511753604e-06, "loss": 0.7759, "step": 4264 }, { "epoch": 0.63, "learning_rate": 6.339159844003085e-06, "loss": 0.8237, "step": 4265 }, { "epoch": 0.63, "learning_rate": 6.33470301610936e-06, "loss": 0.8433, "step": 4266 }, { "epoch": 0.63, "learning_rate": 6.330247029094908e-06, "loss": 0.7681, "step": 4267 }, { "epoch": 0.63, "learning_rate": 6.325791883981997e-06, "loss": 0.7979, "step": 4268 }, { "epoch": 0.63, "learning_rate": 6.3213375817927125e-06, "loss": 0.7734, "step": 4269 }, { "epoch": 0.63, "learning_rate": 6.316884123548947e-06, "loss": 0.8125, "step": 4270 }, { "epoch": 0.63, "learning_rate": 6.3124315102723965e-06, "loss": 0.7661, "step": 4271 }, { "epoch": 0.63, "learning_rate": 6.30797974298456e-06, "loss": 0.7891, "step": 4272 }, { "epoch": 0.63, "learning_rate": 6.30352882270675e-06, "loss": 0.7646, "step": 4273 }, { "epoch": 0.63, "learning_rate": 6.299078750460077e-06, "loss": 0.769, "step": 4274 }, { "epoch": 0.63, "learning_rate": 6.294629527265468e-06, "loss": 0.7935, "step": 4275 }, { "epoch": 0.63, "learning_rate": 6.290181154143635e-06, "loss": 0.8311, "step": 4276 }, { "epoch": 0.63, "learning_rate": 6.285733632115118e-06, "loss": 0.8618, "step": 4277 }, { "epoch": 0.63, "learning_rate": 6.281286962200251e-06, "loss": 0.8188, "step": 4278 }, { "epoch": 0.63, "learning_rate": 6.2768411454191715e-06, "loss": 0.7715, "step": 4279 }, { "epoch": 0.63, "learning_rate": 6.272396182791817e-06, "loss": 0.8076, "step": 4280 }, { "epoch": 0.63, "learning_rate": 6.267952075337942e-06, "loss": 0.8301, "step": 4281 }, { "epoch": 0.63, "learning_rate": 6.263508824077096e-06, "loss": 0.8076, "step": 4282 }, { "epoch": 0.63, "learning_rate": 6.259066430028632e-06, "loss": 0.8584, "step": 4283 }, { "epoch": 0.63, "learning_rate": 6.2546248942117134e-06, "loss": 0.7891, "step": 4284 }, { "epoch": 0.63, "learning_rate": 6.250184217645293e-06, "loss": 0.8193, "step": 4285 }, { "epoch": 0.63, "learning_rate": 6.2457444013481405e-06, "loss": 0.8525, "step": 4286 }, { "epoch": 0.63, "learning_rate": 6.241305446338821e-06, "loss": 0.8081, "step": 4287 }, { "epoch": 0.63, "learning_rate": 6.236867353635709e-06, "loss": 0.7852, "step": 4288 }, { "epoch": 0.63, "learning_rate": 6.232430124256966e-06, "loss": 0.7754, "step": 4289 }, { "epoch": 0.63, "learning_rate": 6.227993759220568e-06, "loss": 0.8267, "step": 4290 }, { "epoch": 0.63, "learning_rate": 6.2235582595442935e-06, "loss": 0.8394, "step": 4291 }, { "epoch": 0.63, "learning_rate": 6.219123626245722e-06, "loss": 0.7588, "step": 4292 }, { "epoch": 0.63, "learning_rate": 6.2146898603422225e-06, "loss": 0.812, "step": 4293 }, { "epoch": 0.64, "learning_rate": 6.210256962850976e-06, "loss": 0.7983, "step": 4294 }, { "epoch": 0.64, "learning_rate": 6.2058249347889646e-06, "loss": 0.7668, "step": 4295 }, { "epoch": 0.64, "learning_rate": 6.201393777172971e-06, "loss": 0.7979, "step": 4296 }, { "epoch": 0.64, "learning_rate": 6.196963491019569e-06, "loss": 0.8223, "step": 4297 }, { "epoch": 0.64, "learning_rate": 6.19253407734514e-06, "loss": 0.8115, "step": 4298 }, { "epoch": 0.64, "learning_rate": 6.188105537165865e-06, "loss": 0.3074, "step": 4299 }, { "epoch": 0.64, "learning_rate": 6.183677871497726e-06, "loss": 0.8599, "step": 4300 }, { "epoch": 0.64, "learning_rate": 6.179251081356505e-06, "loss": 0.8076, "step": 4301 }, { "epoch": 0.64, "learning_rate": 6.174825167757772e-06, "loss": 0.8232, "step": 4302 }, { "epoch": 0.64, "learning_rate": 6.1704001317169075e-06, "loss": 0.8232, "step": 4303 }, { "epoch": 0.64, "learning_rate": 6.165975974249086e-06, "loss": 0.7817, "step": 4304 }, { "epoch": 0.64, "learning_rate": 6.161552696369291e-06, "loss": 0.8394, "step": 4305 }, { "epoch": 0.64, "learning_rate": 6.157130299092282e-06, "loss": 0.7329, "step": 4306 }, { "epoch": 0.64, "learning_rate": 6.152708783432637e-06, "loss": 0.79, "step": 4307 }, { "epoch": 0.64, "learning_rate": 6.148288150404722e-06, "loss": 0.302, "step": 4308 }, { "epoch": 0.64, "learning_rate": 6.143868401022705e-06, "loss": 0.8057, "step": 4309 }, { "epoch": 0.64, "learning_rate": 6.139449536300548e-06, "loss": 0.7734, "step": 4310 }, { "epoch": 0.64, "learning_rate": 6.135031557252008e-06, "loss": 0.7983, "step": 4311 }, { "epoch": 0.64, "learning_rate": 6.130614464890645e-06, "loss": 0.7896, "step": 4312 }, { "epoch": 0.64, "learning_rate": 6.12619826022981e-06, "loss": 0.7832, "step": 4313 }, { "epoch": 0.64, "learning_rate": 6.1217829442826595e-06, "loss": 0.8101, "step": 4314 }, { "epoch": 0.64, "learning_rate": 6.117368518062131e-06, "loss": 0.8638, "step": 4315 }, { "epoch": 0.64, "learning_rate": 6.11295498258097e-06, "loss": 0.8516, "step": 4316 }, { "epoch": 0.64, "learning_rate": 6.1085423388517154e-06, "loss": 0.7861, "step": 4317 }, { "epoch": 0.64, "learning_rate": 6.1041305878866985e-06, "loss": 0.8257, "step": 4318 }, { "epoch": 0.64, "learning_rate": 6.099719730698046e-06, "loss": 0.7495, "step": 4319 }, { "epoch": 0.64, "learning_rate": 6.095309768297681e-06, "loss": 0.3164, "step": 4320 }, { "epoch": 0.64, "learning_rate": 6.090900701697324e-06, "loss": 0.7549, "step": 4321 }, { "epoch": 0.64, "learning_rate": 6.086492531908488e-06, "loss": 0.8799, "step": 4322 }, { "epoch": 0.64, "learning_rate": 6.082085259942474e-06, "loss": 0.8188, "step": 4323 }, { "epoch": 0.64, "learning_rate": 6.077678886810386e-06, "loss": 0.7612, "step": 4324 }, { "epoch": 0.64, "learning_rate": 6.073273413523119e-06, "loss": 0.8027, "step": 4325 }, { "epoch": 0.64, "learning_rate": 6.068868841091361e-06, "loss": 0.811, "step": 4326 }, { "epoch": 0.64, "learning_rate": 6.0644651705255905e-06, "loss": 0.8218, "step": 4327 }, { "epoch": 0.64, "learning_rate": 6.060062402836082e-06, "loss": 0.811, "step": 4328 }, { "epoch": 0.64, "learning_rate": 6.055660539032906e-06, "loss": 0.793, "step": 4329 }, { "epoch": 0.64, "learning_rate": 6.0512595801259185e-06, "loss": 0.7778, "step": 4330 }, { "epoch": 0.64, "learning_rate": 6.0468595271247755e-06, "loss": 0.8613, "step": 4331 }, { "epoch": 0.64, "learning_rate": 6.042460381038918e-06, "loss": 0.7661, "step": 4332 }, { "epoch": 0.64, "learning_rate": 6.038062142877583e-06, "loss": 0.8462, "step": 4333 }, { "epoch": 0.64, "learning_rate": 6.0336648136498e-06, "loss": 0.8057, "step": 4334 }, { "epoch": 0.64, "learning_rate": 6.029268394364389e-06, "loss": 0.7456, "step": 4335 }, { "epoch": 0.64, "learning_rate": 6.024872886029958e-06, "loss": 0.7954, "step": 4336 }, { "epoch": 0.64, "learning_rate": 6.020478289654909e-06, "loss": 0.7705, "step": 4337 }, { "epoch": 0.64, "learning_rate": 6.016084606247435e-06, "loss": 0.7334, "step": 4338 }, { "epoch": 0.64, "learning_rate": 6.011691836815523e-06, "loss": 0.8066, "step": 4339 }, { "epoch": 0.64, "learning_rate": 6.00729998236694e-06, "loss": 0.8638, "step": 4340 }, { "epoch": 0.64, "learning_rate": 6.002909043909253e-06, "loss": 0.772, "step": 4341 }, { "epoch": 0.64, "learning_rate": 5.9985190224498135e-06, "loss": 0.7856, "step": 4342 }, { "epoch": 0.64, "learning_rate": 5.994129918995769e-06, "loss": 0.8184, "step": 4343 }, { "epoch": 0.64, "learning_rate": 5.989741734554046e-06, "loss": 0.7744, "step": 4344 }, { "epoch": 0.64, "learning_rate": 5.9853544701313694e-06, "loss": 0.8022, "step": 4345 }, { "epoch": 0.64, "learning_rate": 5.980968126734248e-06, "loss": 0.7837, "step": 4346 }, { "epoch": 0.64, "learning_rate": 5.976582705368982e-06, "loss": 0.8511, "step": 4347 }, { "epoch": 0.64, "learning_rate": 5.972198207041661e-06, "loss": 0.7004, "step": 4348 }, { "epoch": 0.64, "learning_rate": 5.967814632758155e-06, "loss": 0.8286, "step": 4349 }, { "epoch": 0.64, "learning_rate": 5.963431983524134e-06, "loss": 0.8325, "step": 4350 }, { "epoch": 0.64, "learning_rate": 5.9590502603450455e-06, "loss": 0.7681, "step": 4351 }, { "epoch": 0.64, "learning_rate": 5.9546694642261325e-06, "loss": 0.7361, "step": 4352 }, { "epoch": 0.64, "learning_rate": 5.950289596172418e-06, "loss": 0.8735, "step": 4353 }, { "epoch": 0.64, "learning_rate": 5.945910657188717e-06, "loss": 0.8223, "step": 4354 }, { "epoch": 0.64, "learning_rate": 5.941532648279629e-06, "loss": 0.8418, "step": 4355 }, { "epoch": 0.64, "learning_rate": 5.937155570449547e-06, "loss": 0.8047, "step": 4356 }, { "epoch": 0.64, "learning_rate": 5.9327794247026325e-06, "loss": 0.769, "step": 4357 }, { "epoch": 0.64, "learning_rate": 5.928404212042855e-06, "loss": 0.7778, "step": 4358 }, { "epoch": 0.64, "learning_rate": 5.924029933473956e-06, "loss": 0.8218, "step": 4359 }, { "epoch": 0.64, "learning_rate": 5.9196565899994695e-06, "loss": 0.7939, "step": 4360 }, { "epoch": 0.64, "learning_rate": 5.9152841826227136e-06, "loss": 0.7485, "step": 4361 }, { "epoch": 0.65, "learning_rate": 5.910912712346781e-06, "loss": 0.8364, "step": 4362 }, { "epoch": 0.65, "learning_rate": 5.906542180174568e-06, "loss": 0.8711, "step": 4363 }, { "epoch": 0.65, "learning_rate": 5.902172587108742e-06, "loss": 0.7637, "step": 4364 }, { "epoch": 0.65, "learning_rate": 5.897803934151767e-06, "loss": 0.8057, "step": 4365 }, { "epoch": 0.65, "learning_rate": 5.893436222305869e-06, "loss": 0.7239, "step": 4366 }, { "epoch": 0.65, "learning_rate": 5.889069452573085e-06, "loss": 0.8623, "step": 4367 }, { "epoch": 0.65, "learning_rate": 5.884703625955219e-06, "loss": 0.8442, "step": 4368 }, { "epoch": 0.65, "learning_rate": 5.880338743453868e-06, "loss": 0.7256, "step": 4369 }, { "epoch": 0.65, "learning_rate": 5.875974806070402e-06, "loss": 0.873, "step": 4370 }, { "epoch": 0.65, "learning_rate": 5.871611814805978e-06, "loss": 0.813, "step": 4371 }, { "epoch": 0.65, "learning_rate": 5.867249770661543e-06, "loss": 0.8477, "step": 4372 }, { "epoch": 0.65, "learning_rate": 5.862888674637823e-06, "loss": 0.7964, "step": 4373 }, { "epoch": 0.65, "learning_rate": 5.85852852773532e-06, "loss": 0.792, "step": 4374 }, { "epoch": 0.65, "learning_rate": 5.854169330954324e-06, "loss": 0.8511, "step": 4375 }, { "epoch": 0.65, "learning_rate": 5.849811085294905e-06, "loss": 0.3257, "step": 4376 }, { "epoch": 0.65, "learning_rate": 5.845453791756921e-06, "loss": 0.7974, "step": 4377 }, { "epoch": 0.65, "learning_rate": 5.841097451340008e-06, "loss": 0.7896, "step": 4378 }, { "epoch": 0.65, "learning_rate": 5.836742065043575e-06, "loss": 0.8252, "step": 4379 }, { "epoch": 0.65, "learning_rate": 5.832387633866819e-06, "loss": 0.814, "step": 4380 }, { "epoch": 0.65, "learning_rate": 5.828034158808726e-06, "loss": 0.8662, "step": 4381 }, { "epoch": 0.65, "learning_rate": 5.823681640868049e-06, "loss": 0.7603, "step": 4382 }, { "epoch": 0.65, "learning_rate": 5.819330081043324e-06, "loss": 0.7817, "step": 4383 }, { "epoch": 0.65, "learning_rate": 5.814979480332878e-06, "loss": 0.751, "step": 4384 }, { "epoch": 0.65, "learning_rate": 5.810629839734803e-06, "loss": 0.7993, "step": 4385 }, { "epoch": 0.65, "learning_rate": 5.806281160246983e-06, "loss": 0.731, "step": 4386 }, { "epoch": 0.65, "learning_rate": 5.801933442867076e-06, "loss": 0.8452, "step": 4387 }, { "epoch": 0.65, "learning_rate": 5.797586688592513e-06, "loss": 0.7319, "step": 4388 }, { "epoch": 0.65, "learning_rate": 5.793240898420521e-06, "loss": 0.8506, "step": 4389 }, { "epoch": 0.65, "learning_rate": 5.78889607334809e-06, "loss": 0.2996, "step": 4390 }, { "epoch": 0.65, "learning_rate": 5.784552214371992e-06, "loss": 0.8022, "step": 4391 }, { "epoch": 0.65, "learning_rate": 5.7802093224887845e-06, "loss": 0.8315, "step": 4392 }, { "epoch": 0.65, "learning_rate": 5.775867398694792e-06, "loss": 0.792, "step": 4393 }, { "epoch": 0.65, "learning_rate": 5.771526443986132e-06, "loss": 0.7612, "step": 4394 }, { "epoch": 0.65, "learning_rate": 5.767186459358684e-06, "loss": 0.8286, "step": 4395 }, { "epoch": 0.65, "learning_rate": 5.762847445808111e-06, "loss": 0.8589, "step": 4396 }, { "epoch": 0.65, "learning_rate": 5.75850940432986e-06, "loss": 0.7598, "step": 4397 }, { "epoch": 0.65, "learning_rate": 5.754172335919142e-06, "loss": 0.8511, "step": 4398 }, { "epoch": 0.65, "learning_rate": 5.74983624157096e-06, "loss": 0.2893, "step": 4399 }, { "epoch": 0.65, "learning_rate": 5.745501122280075e-06, "loss": 0.8135, "step": 4400 }, { "epoch": 0.65, "learning_rate": 5.741166979041037e-06, "loss": 0.79, "step": 4401 }, { "epoch": 0.65, "learning_rate": 5.736833812848177e-06, "loss": 0.8628, "step": 4402 }, { "epoch": 0.65, "learning_rate": 5.73250162469559e-06, "loss": 0.7896, "step": 4403 }, { "epoch": 0.65, "learning_rate": 5.728170415577146e-06, "loss": 0.8159, "step": 4404 }, { "epoch": 0.65, "learning_rate": 5.723840186486504e-06, "loss": 0.312, "step": 4405 }, { "epoch": 0.65, "learning_rate": 5.719510938417081e-06, "loss": 0.7119, "step": 4406 }, { "epoch": 0.65, "learning_rate": 5.715182672362092e-06, "loss": 0.7944, "step": 4407 }, { "epoch": 0.65, "learning_rate": 5.710855389314491e-06, "loss": 0.8359, "step": 4408 }, { "epoch": 0.65, "learning_rate": 5.706529090267042e-06, "loss": 0.791, "step": 4409 }, { "epoch": 0.65, "learning_rate": 5.702203776212269e-06, "loss": 0.7852, "step": 4410 }, { "epoch": 0.65, "learning_rate": 5.6978794481424626e-06, "loss": 0.7896, "step": 4411 }, { "epoch": 0.65, "learning_rate": 5.693556107049709e-06, "loss": 0.7842, "step": 4412 }, { "epoch": 0.65, "learning_rate": 5.689233753925834e-06, "loss": 0.7842, "step": 4413 }, { "epoch": 0.65, "learning_rate": 5.684912389762467e-06, "loss": 0.7988, "step": 4414 }, { "epoch": 0.65, "learning_rate": 5.680592015551001e-06, "loss": 0.7896, "step": 4415 }, { "epoch": 0.65, "learning_rate": 5.6762726322826e-06, "loss": 0.8726, "step": 4416 }, { "epoch": 0.65, "learning_rate": 5.6719542409482e-06, "loss": 0.8804, "step": 4417 }, { "epoch": 0.65, "learning_rate": 5.6676368425385065e-06, "loss": 0.8311, "step": 4418 }, { "epoch": 0.65, "learning_rate": 5.663320438044005e-06, "loss": 0.8105, "step": 4419 }, { "epoch": 0.65, "learning_rate": 5.6590050284549604e-06, "loss": 0.7754, "step": 4420 }, { "epoch": 0.65, "learning_rate": 5.654690614761378e-06, "loss": 0.8101, "step": 4421 }, { "epoch": 0.65, "learning_rate": 5.650377197953072e-06, "loss": 0.7886, "step": 4422 }, { "epoch": 0.65, "learning_rate": 5.646064779019598e-06, "loss": 0.7993, "step": 4423 }, { "epoch": 0.65, "learning_rate": 5.6417533589503036e-06, "loss": 0.7876, "step": 4424 }, { "epoch": 0.65, "learning_rate": 5.637442938734306e-06, "loss": 0.8271, "step": 4425 }, { "epoch": 0.65, "learning_rate": 5.63313351936047e-06, "loss": 0.8003, "step": 4426 }, { "epoch": 0.65, "learning_rate": 5.6288251018174585e-06, "loss": 0.7529, "step": 4427 }, { "epoch": 0.65, "learning_rate": 5.6245176870936865e-06, "loss": 0.8149, "step": 4428 }, { "epoch": 0.65, "learning_rate": 5.620211276177353e-06, "loss": 0.7578, "step": 4429 }, { "epoch": 0.66, "learning_rate": 5.615905870056415e-06, "loss": 0.793, "step": 4430 }, { "epoch": 0.66, "learning_rate": 5.611601469718601e-06, "loss": 0.8613, "step": 4431 }, { "epoch": 0.66, "learning_rate": 5.607298076151416e-06, "loss": 0.8027, "step": 4432 }, { "epoch": 0.66, "learning_rate": 5.602995690342128e-06, "loss": 0.856, "step": 4433 }, { "epoch": 0.66, "learning_rate": 5.59869431327777e-06, "loss": 0.8071, "step": 4434 }, { "epoch": 0.66, "learning_rate": 5.5943939459451555e-06, "loss": 0.7554, "step": 4435 }, { "epoch": 0.66, "learning_rate": 5.5900945893308526e-06, "loss": 0.7988, "step": 4436 }, { "epoch": 0.66, "learning_rate": 5.585796244421211e-06, "loss": 0.8169, "step": 4437 }, { "epoch": 0.66, "learning_rate": 5.5814989122023385e-06, "loss": 0.7891, "step": 4438 }, { "epoch": 0.66, "learning_rate": 5.577202593660109e-06, "loss": 0.7485, "step": 4439 }, { "epoch": 0.66, "learning_rate": 5.5729072897801784e-06, "loss": 0.7661, "step": 4440 }, { "epoch": 0.66, "learning_rate": 5.5686130015479486e-06, "loss": 0.7803, "step": 4441 }, { "epoch": 0.66, "learning_rate": 5.564319729948611e-06, "loss": 0.772, "step": 4442 }, { "epoch": 0.66, "learning_rate": 5.560027475967107e-06, "loss": 0.8438, "step": 4443 }, { "epoch": 0.66, "learning_rate": 5.555736240588145e-06, "loss": 0.7886, "step": 4444 }, { "epoch": 0.66, "learning_rate": 5.551446024796214e-06, "loss": 0.8047, "step": 4445 }, { "epoch": 0.66, "learning_rate": 5.547156829575556e-06, "loss": 0.8682, "step": 4446 }, { "epoch": 0.66, "learning_rate": 5.542868655910179e-06, "loss": 0.7959, "step": 4447 }, { "epoch": 0.66, "learning_rate": 5.5385815047838685e-06, "loss": 0.811, "step": 4448 }, { "epoch": 0.66, "learning_rate": 5.534295377180158e-06, "loss": 0.8008, "step": 4449 }, { "epoch": 0.66, "learning_rate": 5.530010274082365e-06, "loss": 0.7632, "step": 4450 }, { "epoch": 0.66, "learning_rate": 5.525726196473557e-06, "loss": 0.7944, "step": 4451 }, { "epoch": 0.66, "learning_rate": 5.521443145336568e-06, "loss": 0.7759, "step": 4452 }, { "epoch": 0.66, "learning_rate": 5.51716112165401e-06, "loss": 0.7681, "step": 4453 }, { "epoch": 0.66, "learning_rate": 5.512880126408243e-06, "loss": 0.8242, "step": 4454 }, { "epoch": 0.66, "learning_rate": 5.508600160581396e-06, "loss": 0.7798, "step": 4455 }, { "epoch": 0.66, "learning_rate": 5.504321225155369e-06, "loss": 0.8149, "step": 4456 }, { "epoch": 0.66, "learning_rate": 5.500043321111814e-06, "loss": 0.8203, "step": 4457 }, { "epoch": 0.66, "learning_rate": 5.4957664494321585e-06, "loss": 0.8496, "step": 4458 }, { "epoch": 0.66, "learning_rate": 5.491490611097586e-06, "loss": 0.7778, "step": 4459 }, { "epoch": 0.66, "learning_rate": 5.487215807089035e-06, "loss": 0.7573, "step": 4460 }, { "epoch": 0.66, "learning_rate": 5.482942038387229e-06, "loss": 0.7612, "step": 4461 }, { "epoch": 0.66, "learning_rate": 5.478669305972628e-06, "loss": 0.7832, "step": 4462 }, { "epoch": 0.66, "learning_rate": 5.474397610825479e-06, "loss": 0.7852, "step": 4463 }, { "epoch": 0.66, "learning_rate": 5.470126953925774e-06, "loss": 0.894, "step": 4464 }, { "epoch": 0.66, "learning_rate": 5.465857336253266e-06, "loss": 0.9023, "step": 4465 }, { "epoch": 0.66, "learning_rate": 5.461588758787484e-06, "loss": 0.3379, "step": 4466 }, { "epoch": 0.66, "learning_rate": 5.457321222507705e-06, "loss": 0.8003, "step": 4467 }, { "epoch": 0.66, "learning_rate": 5.453054728392971e-06, "loss": 0.7471, "step": 4468 }, { "epoch": 0.66, "learning_rate": 5.448789277422091e-06, "loss": 0.8496, "step": 4469 }, { "epoch": 0.66, "learning_rate": 5.444524870573622e-06, "loss": 0.7915, "step": 4470 }, { "epoch": 0.66, "learning_rate": 5.440261508825897e-06, "loss": 0.8545, "step": 4471 }, { "epoch": 0.66, "learning_rate": 5.435999193156997e-06, "loss": 0.8896, "step": 4472 }, { "epoch": 0.66, "learning_rate": 5.431737924544763e-06, "loss": 0.855, "step": 4473 }, { "epoch": 0.66, "learning_rate": 5.427477703966808e-06, "loss": 0.7554, "step": 4474 }, { "epoch": 0.66, "learning_rate": 5.423218532400489e-06, "loss": 0.7515, "step": 4475 }, { "epoch": 0.66, "learning_rate": 5.418960410822938e-06, "loss": 0.8037, "step": 4476 }, { "epoch": 0.66, "learning_rate": 5.414703340211034e-06, "loss": 0.7197, "step": 4477 }, { "epoch": 0.66, "learning_rate": 5.410447321541412e-06, "loss": 0.77, "step": 4478 }, { "epoch": 0.66, "learning_rate": 5.406192355790485e-06, "loss": 0.7769, "step": 4479 }, { "epoch": 0.66, "learning_rate": 5.401938443934405e-06, "loss": 0.7476, "step": 4480 }, { "epoch": 0.66, "learning_rate": 5.397685586949086e-06, "loss": 0.8149, "step": 4481 }, { "epoch": 0.66, "learning_rate": 5.3934337858102115e-06, "loss": 0.7969, "step": 4482 }, { "epoch": 0.66, "learning_rate": 5.389183041493206e-06, "loss": 0.7622, "step": 4483 }, { "epoch": 0.66, "learning_rate": 5.384933354973272e-06, "loss": 0.8003, "step": 4484 }, { "epoch": 0.66, "learning_rate": 5.380684727225341e-06, "loss": 0.8184, "step": 4485 }, { "epoch": 0.66, "learning_rate": 5.376437159224126e-06, "loss": 0.7822, "step": 4486 }, { "epoch": 0.66, "learning_rate": 5.3721906519440945e-06, "loss": 0.8188, "step": 4487 }, { "epoch": 0.66, "learning_rate": 5.367945206359455e-06, "loss": 0.8281, "step": 4488 }, { "epoch": 0.66, "learning_rate": 5.363700823444195e-06, "loss": 0.7598, "step": 4489 }, { "epoch": 0.66, "learning_rate": 5.35945750417203e-06, "loss": 0.8062, "step": 4490 }, { "epoch": 0.66, "learning_rate": 5.3552152495164535e-06, "loss": 0.8242, "step": 4491 }, { "epoch": 0.66, "learning_rate": 5.3509740604507135e-06, "loss": 0.7295, "step": 4492 }, { "epoch": 0.66, "learning_rate": 5.346733937947804e-06, "loss": 0.8237, "step": 4493 }, { "epoch": 0.66, "learning_rate": 5.34249488298048e-06, "loss": 0.7778, "step": 4494 }, { "epoch": 0.66, "learning_rate": 5.338256896521246e-06, "loss": 0.8301, "step": 4495 }, { "epoch": 0.66, "learning_rate": 5.334019979542366e-06, "loss": 0.7866, "step": 4496 }, { "epoch": 0.67, "learning_rate": 5.329784133015872e-06, "loss": 0.7852, "step": 4497 }, { "epoch": 0.67, "learning_rate": 5.325549357913515e-06, "loss": 0.8135, "step": 4498 }, { "epoch": 0.67, "learning_rate": 5.3213156552068394e-06, "loss": 0.8555, "step": 4499 }, { "epoch": 0.67, "learning_rate": 5.317083025867114e-06, "loss": 0.7744, "step": 4500 }, { "epoch": 0.67, "learning_rate": 5.312851470865383e-06, "loss": 0.77, "step": 4501 }, { "epoch": 0.67, "learning_rate": 5.30862099117243e-06, "loss": 0.8228, "step": 4502 }, { "epoch": 0.67, "learning_rate": 5.304391587758793e-06, "loss": 0.77, "step": 4503 }, { "epoch": 0.67, "learning_rate": 5.3001632615947706e-06, "loss": 0.752, "step": 4504 }, { "epoch": 0.67, "learning_rate": 5.295936013650413e-06, "loss": 0.7842, "step": 4505 }, { "epoch": 0.67, "learning_rate": 5.291709844895518e-06, "loss": 0.7427, "step": 4506 }, { "epoch": 0.67, "learning_rate": 5.287484756299638e-06, "loss": 0.7944, "step": 4507 }, { "epoch": 0.67, "learning_rate": 5.283260748832072e-06, "loss": 0.7832, "step": 4508 }, { "epoch": 0.67, "learning_rate": 5.279037823461881e-06, "loss": 0.3065, "step": 4509 }, { "epoch": 0.67, "learning_rate": 5.274815981157883e-06, "loss": 0.791, "step": 4510 }, { "epoch": 0.67, "learning_rate": 5.2705952228886195e-06, "loss": 0.8345, "step": 4511 }, { "epoch": 0.67, "learning_rate": 5.266375549622415e-06, "loss": 0.8398, "step": 4512 }, { "epoch": 0.67, "learning_rate": 5.262156962327325e-06, "loss": 0.7744, "step": 4513 }, { "epoch": 0.67, "learning_rate": 5.257939461971169e-06, "loss": 0.7014, "step": 4514 }, { "epoch": 0.67, "learning_rate": 5.253723049521507e-06, "loss": 0.835, "step": 4515 }, { "epoch": 0.67, "learning_rate": 5.249507725945648e-06, "loss": 0.7891, "step": 4516 }, { "epoch": 0.67, "learning_rate": 5.245293492210668e-06, "loss": 0.8164, "step": 4517 }, { "epoch": 0.67, "learning_rate": 5.241080349283369e-06, "loss": 0.7515, "step": 4518 }, { "epoch": 0.67, "learning_rate": 5.236868298130327e-06, "loss": 0.7651, "step": 4519 }, { "epoch": 0.67, "learning_rate": 5.232657339717848e-06, "loss": 0.8481, "step": 4520 }, { "epoch": 0.67, "learning_rate": 5.228447475011995e-06, "loss": 0.7803, "step": 4521 }, { "epoch": 0.67, "learning_rate": 5.224238704978584e-06, "loss": 0.7676, "step": 4522 }, { "epoch": 0.67, "learning_rate": 5.220031030583175e-06, "loss": 0.8228, "step": 4523 }, { "epoch": 0.67, "learning_rate": 5.2158244527910715e-06, "loss": 0.8188, "step": 4524 }, { "epoch": 0.67, "learning_rate": 5.21161897256734e-06, "loss": 0.7485, "step": 4525 }, { "epoch": 0.67, "learning_rate": 5.20741459087678e-06, "loss": 0.7119, "step": 4526 }, { "epoch": 0.67, "learning_rate": 5.203211308683953e-06, "loss": 0.291, "step": 4527 }, { "epoch": 0.67, "learning_rate": 5.199009126953155e-06, "loss": 0.8188, "step": 4528 }, { "epoch": 0.67, "learning_rate": 5.194808046648434e-06, "loss": 0.7515, "step": 4529 }, { "epoch": 0.67, "learning_rate": 5.190608068733593e-06, "loss": 0.7539, "step": 4530 }, { "epoch": 0.67, "learning_rate": 5.186409194172173e-06, "loss": 0.7588, "step": 4531 }, { "epoch": 0.67, "learning_rate": 5.182211423927459e-06, "loss": 0.8667, "step": 4532 }, { "epoch": 0.67, "learning_rate": 5.178014758962499e-06, "loss": 0.8218, "step": 4533 }, { "epoch": 0.67, "learning_rate": 5.173819200240067e-06, "loss": 0.7671, "step": 4534 }, { "epoch": 0.67, "learning_rate": 5.169624748722701e-06, "loss": 0.7593, "step": 4535 }, { "epoch": 0.67, "learning_rate": 5.165431405372674e-06, "loss": 0.7756, "step": 4536 }, { "epoch": 0.67, "learning_rate": 5.161239171152004e-06, "loss": 0.8091, "step": 4537 }, { "epoch": 0.67, "learning_rate": 5.1570480470224636e-06, "loss": 0.2808, "step": 4538 }, { "epoch": 0.67, "learning_rate": 5.1528580339455615e-06, "loss": 0.7705, "step": 4539 }, { "epoch": 0.67, "learning_rate": 5.14866913288256e-06, "loss": 0.8047, "step": 4540 }, { "epoch": 0.67, "learning_rate": 5.144481344794459e-06, "loss": 0.7661, "step": 4541 }, { "epoch": 0.67, "learning_rate": 5.140294670642002e-06, "loss": 0.8057, "step": 4542 }, { "epoch": 0.67, "learning_rate": 5.1361091113856875e-06, "loss": 0.7612, "step": 4543 }, { "epoch": 0.67, "learning_rate": 5.1319246679857504e-06, "loss": 0.8354, "step": 4544 }, { "epoch": 0.67, "learning_rate": 5.127741341402164e-06, "loss": 0.8052, "step": 4545 }, { "epoch": 0.67, "learning_rate": 5.123559132594661e-06, "loss": 0.8203, "step": 4546 }, { "epoch": 0.67, "learning_rate": 5.1193780425227e-06, "loss": 0.7764, "step": 4547 }, { "epoch": 0.67, "learning_rate": 5.115198072145502e-06, "loss": 0.7749, "step": 4548 }, { "epoch": 0.67, "learning_rate": 5.111019222422013e-06, "loss": 0.7773, "step": 4549 }, { "epoch": 0.67, "learning_rate": 5.106841494310929e-06, "loss": 0.3215, "step": 4550 }, { "epoch": 0.67, "learning_rate": 5.102664888770695e-06, "loss": 0.8423, "step": 4551 }, { "epoch": 0.67, "learning_rate": 5.098489406759487e-06, "loss": 0.7588, "step": 4552 }, { "epoch": 0.67, "learning_rate": 5.094315049235236e-06, "loss": 0.7905, "step": 4553 }, { "epoch": 0.67, "learning_rate": 5.0901418171556035e-06, "loss": 0.8472, "step": 4554 }, { "epoch": 0.67, "learning_rate": 5.085969711477993e-06, "loss": 0.8062, "step": 4555 }, { "epoch": 0.67, "learning_rate": 5.081798733159566e-06, "loss": 0.8301, "step": 4556 }, { "epoch": 0.67, "learning_rate": 5.077628883157205e-06, "loss": 0.2653, "step": 4557 }, { "epoch": 0.67, "learning_rate": 5.073460162427539e-06, "loss": 0.7915, "step": 4558 }, { "epoch": 0.67, "learning_rate": 5.069292571926949e-06, "loss": 0.7754, "step": 4559 }, { "epoch": 0.67, "learning_rate": 5.065126112611542e-06, "loss": 0.8135, "step": 4560 }, { "epoch": 0.67, "learning_rate": 5.060960785437183e-06, "loss": 0.7988, "step": 4561 }, { "epoch": 0.67, "learning_rate": 5.056796591359451e-06, "loss": 0.7925, "step": 4562 }, { "epoch": 0.67, "learning_rate": 5.052633531333687e-06, "loss": 0.8086, "step": 4563 }, { "epoch": 0.67, "learning_rate": 5.048471606314971e-06, "loss": 0.7568, "step": 4564 }, { "epoch": 0.68, "learning_rate": 5.044310817258111e-06, "loss": 0.3557, "step": 4565 }, { "epoch": 0.68, "learning_rate": 5.0401511651176624e-06, "loss": 0.8555, "step": 4566 }, { "epoch": 0.68, "learning_rate": 5.035992650847913e-06, "loss": 0.8052, "step": 4567 }, { "epoch": 0.68, "learning_rate": 5.031835275402898e-06, "loss": 0.8018, "step": 4568 }, { "epoch": 0.68, "learning_rate": 5.027679039736391e-06, "loss": 0.8101, "step": 4569 }, { "epoch": 0.68, "learning_rate": 5.023523944801896e-06, "loss": 0.7466, "step": 4570 }, { "epoch": 0.68, "learning_rate": 5.019369991552658e-06, "loss": 0.8184, "step": 4571 }, { "epoch": 0.68, "learning_rate": 5.015217180941669e-06, "loss": 0.79, "step": 4572 }, { "epoch": 0.68, "learning_rate": 5.011065513921645e-06, "loss": 0.8003, "step": 4573 }, { "epoch": 0.68, "learning_rate": 5.006914991445056e-06, "loss": 0.7988, "step": 4574 }, { "epoch": 0.68, "learning_rate": 5.002765614464085e-06, "loss": 0.7949, "step": 4575 }, { "epoch": 0.68, "learning_rate": 4.9986173839306765e-06, "loss": 0.3162, "step": 4576 }, { "epoch": 0.68, "learning_rate": 4.994470300796505e-06, "loss": 0.7808, "step": 4577 }, { "epoch": 0.68, "learning_rate": 4.990324366012977e-06, "loss": 0.8242, "step": 4578 }, { "epoch": 0.68, "learning_rate": 4.986179580531238e-06, "loss": 0.7563, "step": 4579 }, { "epoch": 0.68, "learning_rate": 4.982035945302164e-06, "loss": 0.8145, "step": 4580 }, { "epoch": 0.68, "learning_rate": 4.977893461276378e-06, "loss": 0.7539, "step": 4581 }, { "epoch": 0.68, "learning_rate": 4.973752129404237e-06, "loss": 0.8481, "step": 4582 }, { "epoch": 0.68, "learning_rate": 4.969611950635827e-06, "loss": 0.8057, "step": 4583 }, { "epoch": 0.68, "learning_rate": 4.965472925920975e-06, "loss": 0.7837, "step": 4584 }, { "epoch": 0.68, "learning_rate": 4.961335056209234e-06, "loss": 0.7988, "step": 4585 }, { "epoch": 0.68, "learning_rate": 4.957198342449904e-06, "loss": 0.2999, "step": 4586 }, { "epoch": 0.68, "learning_rate": 4.9530627855920236e-06, "loss": 0.8105, "step": 4587 }, { "epoch": 0.68, "learning_rate": 4.948928386584342e-06, "loss": 0.8218, "step": 4588 }, { "epoch": 0.68, "learning_rate": 4.944795146375368e-06, "loss": 0.8062, "step": 4589 }, { "epoch": 0.68, "learning_rate": 4.94066306591333e-06, "loss": 0.7842, "step": 4590 }, { "epoch": 0.68, "learning_rate": 4.9365321461462e-06, "loss": 0.8066, "step": 4591 }, { "epoch": 0.68, "learning_rate": 4.932402388021677e-06, "loss": 0.8203, "step": 4592 }, { "epoch": 0.68, "learning_rate": 4.928273792487189e-06, "loss": 0.8013, "step": 4593 }, { "epoch": 0.68, "learning_rate": 4.924146360489914e-06, "loss": 0.7959, "step": 4594 }, { "epoch": 0.68, "learning_rate": 4.920020092976746e-06, "loss": 0.7759, "step": 4595 }, { "epoch": 0.68, "learning_rate": 4.915894990894317e-06, "loss": 0.8223, "step": 4596 }, { "epoch": 0.68, "learning_rate": 4.911771055189001e-06, "loss": 0.7998, "step": 4597 }, { "epoch": 0.68, "learning_rate": 4.9076482868068856e-06, "loss": 0.749, "step": 4598 }, { "epoch": 0.68, "learning_rate": 4.9035266866938125e-06, "loss": 0.7891, "step": 4599 }, { "epoch": 0.68, "learning_rate": 4.899406255795338e-06, "loss": 0.7769, "step": 4600 }, { "epoch": 0.68, "learning_rate": 4.895286995056756e-06, "loss": 0.7822, "step": 4601 }, { "epoch": 0.68, "learning_rate": 4.891168905423097e-06, "loss": 0.7612, "step": 4602 }, { "epoch": 0.68, "learning_rate": 4.887051987839112e-06, "loss": 0.7607, "step": 4603 }, { "epoch": 0.68, "learning_rate": 4.882936243249298e-06, "loss": 0.7681, "step": 4604 }, { "epoch": 0.68, "learning_rate": 4.878821672597868e-06, "loss": 0.7998, "step": 4605 }, { "epoch": 0.68, "learning_rate": 4.87470827682877e-06, "loss": 0.8169, "step": 4606 }, { "epoch": 0.68, "learning_rate": 4.870596056885693e-06, "loss": 0.8066, "step": 4607 }, { "epoch": 0.68, "learning_rate": 4.866485013712041e-06, "loss": 0.8027, "step": 4608 }, { "epoch": 0.68, "learning_rate": 4.862375148250954e-06, "loss": 0.8516, "step": 4609 }, { "epoch": 0.68, "learning_rate": 4.858266461445308e-06, "loss": 0.7192, "step": 4610 }, { "epoch": 0.68, "learning_rate": 4.854158954237697e-06, "loss": 0.8213, "step": 4611 }, { "epoch": 0.68, "learning_rate": 4.850052627570457e-06, "loss": 0.7593, "step": 4612 }, { "epoch": 0.68, "learning_rate": 4.8459474823856445e-06, "loss": 0.8354, "step": 4613 }, { "epoch": 0.68, "learning_rate": 4.841843519625042e-06, "loss": 0.7156, "step": 4614 }, { "epoch": 0.68, "learning_rate": 4.837740740230174e-06, "loss": 0.7793, "step": 4615 }, { "epoch": 0.68, "learning_rate": 4.833639145142277e-06, "loss": 0.8115, "step": 4616 }, { "epoch": 0.68, "learning_rate": 4.829538735302333e-06, "loss": 0.811, "step": 4617 }, { "epoch": 0.68, "learning_rate": 4.8254395116510374e-06, "loss": 0.7622, "step": 4618 }, { "epoch": 0.68, "learning_rate": 4.821341475128819e-06, "loss": 0.77, "step": 4619 }, { "epoch": 0.68, "learning_rate": 4.81724462667584e-06, "loss": 0.8511, "step": 4620 }, { "epoch": 0.68, "learning_rate": 4.813148967231981e-06, "loss": 0.812, "step": 4621 }, { "epoch": 0.68, "learning_rate": 4.809054497736849e-06, "loss": 0.7402, "step": 4622 }, { "epoch": 0.68, "learning_rate": 4.804961219129792e-06, "loss": 0.7896, "step": 4623 }, { "epoch": 0.68, "learning_rate": 4.8008691323498664e-06, "loss": 0.7598, "step": 4624 }, { "epoch": 0.68, "learning_rate": 4.796778238335872e-06, "loss": 0.8154, "step": 4625 }, { "epoch": 0.68, "learning_rate": 4.792688538026324e-06, "loss": 0.7905, "step": 4626 }, { "epoch": 0.68, "learning_rate": 4.788600032359461e-06, "loss": 0.8101, "step": 4627 }, { "epoch": 0.68, "learning_rate": 4.784512722273261e-06, "loss": 0.79, "step": 4628 }, { "epoch": 0.68, "learning_rate": 4.780426608705415e-06, "loss": 0.7822, "step": 4629 }, { "epoch": 0.68, "learning_rate": 4.7763416925933495e-06, "loss": 0.8154, "step": 4630 }, { "epoch": 0.68, "learning_rate": 4.7722579748742086e-06, "loss": 0.7881, "step": 4631 }, { "epoch": 0.69, "learning_rate": 4.7681754564848606e-06, "loss": 0.8159, "step": 4632 }, { "epoch": 0.69, "learning_rate": 4.764094138361909e-06, "loss": 0.8105, "step": 4633 }, { "epoch": 0.69, "learning_rate": 4.760014021441671e-06, "loss": 0.8643, "step": 4634 }, { "epoch": 0.69, "learning_rate": 4.75593510666019e-06, "loss": 0.7803, "step": 4635 }, { "epoch": 0.69, "learning_rate": 4.751857394953243e-06, "loss": 0.8174, "step": 4636 }, { "epoch": 0.69, "learning_rate": 4.747780887256316e-06, "loss": 0.7349, "step": 4637 }, { "epoch": 0.69, "learning_rate": 4.743705584504634e-06, "loss": 0.7725, "step": 4638 }, { "epoch": 0.69, "learning_rate": 4.739631487633136e-06, "loss": 0.7856, "step": 4639 }, { "epoch": 0.69, "learning_rate": 4.735558597576482e-06, "loss": 0.8188, "step": 4640 }, { "epoch": 0.69, "learning_rate": 4.731486915269066e-06, "loss": 0.3047, "step": 4641 }, { "epoch": 0.69, "learning_rate": 4.727416441644998e-06, "loss": 0.8257, "step": 4642 }, { "epoch": 0.69, "learning_rate": 4.723347177638106e-06, "loss": 0.7739, "step": 4643 }, { "epoch": 0.69, "learning_rate": 4.719279124181953e-06, "loss": 0.8232, "step": 4644 }, { "epoch": 0.69, "learning_rate": 4.715212282209809e-06, "loss": 0.2848, "step": 4645 }, { "epoch": 0.69, "learning_rate": 4.7111466526546835e-06, "loss": 0.8232, "step": 4646 }, { "epoch": 0.69, "learning_rate": 4.707082236449294e-06, "loss": 0.7974, "step": 4647 }, { "epoch": 0.69, "learning_rate": 4.703019034526082e-06, "loss": 0.8447, "step": 4648 }, { "epoch": 0.69, "learning_rate": 4.698957047817217e-06, "loss": 0.7642, "step": 4649 }, { "epoch": 0.69, "learning_rate": 4.694896277254581e-06, "loss": 0.7554, "step": 4650 }, { "epoch": 0.69, "learning_rate": 4.690836723769792e-06, "loss": 0.7422, "step": 4651 }, { "epoch": 0.69, "learning_rate": 4.686778388294163e-06, "loss": 0.8589, "step": 4652 }, { "epoch": 0.69, "learning_rate": 4.682721271758749e-06, "loss": 0.8101, "step": 4653 }, { "epoch": 0.69, "learning_rate": 4.678665375094325e-06, "loss": 0.8076, "step": 4654 }, { "epoch": 0.69, "learning_rate": 4.674610699231373e-06, "loss": 0.7583, "step": 4655 }, { "epoch": 0.69, "learning_rate": 4.670557245100105e-06, "loss": 0.7979, "step": 4656 }, { "epoch": 0.69, "learning_rate": 4.666505013630448e-06, "loss": 0.2937, "step": 4657 }, { "epoch": 0.69, "learning_rate": 4.66245400575205e-06, "loss": 0.7632, "step": 4658 }, { "epoch": 0.69, "learning_rate": 4.658404222394288e-06, "loss": 0.8052, "step": 4659 }, { "epoch": 0.69, "learning_rate": 4.654355664486233e-06, "loss": 0.7646, "step": 4660 }, { "epoch": 0.69, "learning_rate": 4.650308332956704e-06, "loss": 0.7793, "step": 4661 }, { "epoch": 0.69, "learning_rate": 4.6462622287342154e-06, "loss": 0.8091, "step": 4662 }, { "epoch": 0.69, "learning_rate": 4.642217352747015e-06, "loss": 0.7852, "step": 4663 }, { "epoch": 0.69, "learning_rate": 4.63817370592307e-06, "loss": 0.8042, "step": 4664 }, { "epoch": 0.69, "learning_rate": 4.634131289190046e-06, "loss": 0.8081, "step": 4665 }, { "epoch": 0.69, "learning_rate": 4.630090103475349e-06, "loss": 0.8003, "step": 4666 }, { "epoch": 0.69, "learning_rate": 4.6260501497060875e-06, "loss": 0.7793, "step": 4667 }, { "epoch": 0.69, "learning_rate": 4.6220114288091e-06, "loss": 0.7734, "step": 4668 }, { "epoch": 0.69, "learning_rate": 4.617973941710932e-06, "loss": 0.7925, "step": 4669 }, { "epoch": 0.69, "learning_rate": 4.613937689337844e-06, "loss": 0.7524, "step": 4670 }, { "epoch": 0.69, "learning_rate": 4.609902672615824e-06, "loss": 0.8052, "step": 4671 }, { "epoch": 0.69, "learning_rate": 4.605868892470579e-06, "loss": 0.7905, "step": 4672 }, { "epoch": 0.69, "learning_rate": 4.601836349827507e-06, "loss": 0.769, "step": 4673 }, { "epoch": 0.69, "learning_rate": 4.597805045611753e-06, "loss": 0.7666, "step": 4674 }, { "epoch": 0.69, "learning_rate": 4.593774980748155e-06, "loss": 0.8315, "step": 4675 }, { "epoch": 0.69, "learning_rate": 4.5897461561612814e-06, "loss": 0.7778, "step": 4676 }, { "epoch": 0.69, "learning_rate": 4.585718572775417e-06, "loss": 0.2926, "step": 4677 }, { "epoch": 0.69, "learning_rate": 4.58169223151454e-06, "loss": 0.8003, "step": 4678 }, { "epoch": 0.69, "learning_rate": 4.5776671333023725e-06, "loss": 0.7993, "step": 4679 }, { "epoch": 0.69, "learning_rate": 4.5736432790623295e-06, "loss": 0.8237, "step": 4680 }, { "epoch": 0.69, "learning_rate": 4.569620669717556e-06, "loss": 0.7739, "step": 4681 }, { "epoch": 0.69, "learning_rate": 4.565599306190902e-06, "loss": 0.8066, "step": 4682 }, { "epoch": 0.69, "learning_rate": 4.561579189404929e-06, "loss": 0.8418, "step": 4683 }, { "epoch": 0.69, "learning_rate": 4.557560320281927e-06, "loss": 0.7847, "step": 4684 }, { "epoch": 0.69, "learning_rate": 4.553542699743884e-06, "loss": 0.8047, "step": 4685 }, { "epoch": 0.69, "learning_rate": 4.549526328712509e-06, "loss": 0.748, "step": 4686 }, { "epoch": 0.69, "learning_rate": 4.5455112081092265e-06, "loss": 0.7061, "step": 4687 }, { "epoch": 0.69, "learning_rate": 4.541497338855166e-06, "loss": 0.7583, "step": 4688 }, { "epoch": 0.69, "learning_rate": 4.537484721871181e-06, "loss": 0.7681, "step": 4689 }, { "epoch": 0.69, "learning_rate": 4.5334733580778305e-06, "loss": 0.7954, "step": 4690 }, { "epoch": 0.69, "learning_rate": 4.529463248395379e-06, "loss": 0.7944, "step": 4691 }, { "epoch": 0.69, "learning_rate": 4.525454393743822e-06, "loss": 0.7939, "step": 4692 }, { "epoch": 0.69, "learning_rate": 4.52144679504285e-06, "loss": 0.72, "step": 4693 }, { "epoch": 0.69, "learning_rate": 4.517440453211876e-06, "loss": 0.7778, "step": 4694 }, { "epoch": 0.69, "learning_rate": 4.5134353691700196e-06, "loss": 0.7856, "step": 4695 }, { "epoch": 0.69, "learning_rate": 4.509431543836109e-06, "loss": 0.8062, "step": 4696 }, { "epoch": 0.69, "learning_rate": 4.505428978128693e-06, "loss": 0.7349, "step": 4697 }, { "epoch": 0.69, "learning_rate": 4.5014276729660235e-06, "loss": 0.772, "step": 4698 }, { "epoch": 0.69, "learning_rate": 4.497427629266061e-06, "loss": 0.772, "step": 4699 }, { "epoch": 0.7, "learning_rate": 4.493428847946489e-06, "loss": 0.7251, "step": 4700 }, { "epoch": 0.7, "learning_rate": 4.4894313299246874e-06, "loss": 0.7207, "step": 4701 }, { "epoch": 0.7, "learning_rate": 4.485435076117758e-06, "loss": 0.7549, "step": 4702 }, { "epoch": 0.7, "learning_rate": 4.481440087442505e-06, "loss": 0.7944, "step": 4703 }, { "epoch": 0.7, "learning_rate": 4.4774463648154375e-06, "loss": 0.8008, "step": 4704 }, { "epoch": 0.7, "learning_rate": 4.473453909152792e-06, "loss": 0.7949, "step": 4705 }, { "epoch": 0.7, "learning_rate": 4.469462721370498e-06, "loss": 0.748, "step": 4706 }, { "epoch": 0.7, "learning_rate": 4.465472802384196e-06, "loss": 0.7578, "step": 4707 }, { "epoch": 0.7, "learning_rate": 4.461484153109246e-06, "loss": 0.7852, "step": 4708 }, { "epoch": 0.7, "learning_rate": 4.4574967744607025e-06, "loss": 0.7129, "step": 4709 }, { "epoch": 0.7, "learning_rate": 4.453510667353342e-06, "loss": 0.8174, "step": 4710 }, { "epoch": 0.7, "learning_rate": 4.4495258327016415e-06, "loss": 0.77, "step": 4711 }, { "epoch": 0.7, "learning_rate": 4.445542271419781e-06, "loss": 0.8232, "step": 4712 }, { "epoch": 0.7, "learning_rate": 4.441559984421664e-06, "loss": 0.8208, "step": 4713 }, { "epoch": 0.7, "learning_rate": 4.437578972620884e-06, "loss": 0.7217, "step": 4714 }, { "epoch": 0.7, "learning_rate": 4.433599236930759e-06, "loss": 0.8506, "step": 4715 }, { "epoch": 0.7, "learning_rate": 4.429620778264302e-06, "loss": 0.8389, "step": 4716 }, { "epoch": 0.7, "learning_rate": 4.425643597534231e-06, "loss": 0.8096, "step": 4717 }, { "epoch": 0.7, "learning_rate": 4.421667695652987e-06, "loss": 0.8413, "step": 4718 }, { "epoch": 0.7, "learning_rate": 4.4176930735327005e-06, "loss": 0.3108, "step": 4719 }, { "epoch": 0.7, "learning_rate": 4.413719732085212e-06, "loss": 0.835, "step": 4720 }, { "epoch": 0.7, "learning_rate": 4.40974767222208e-06, "loss": 0.7915, "step": 4721 }, { "epoch": 0.7, "learning_rate": 4.405776894854552e-06, "loss": 0.7983, "step": 4722 }, { "epoch": 0.7, "learning_rate": 4.401807400893601e-06, "loss": 0.7231, "step": 4723 }, { "epoch": 0.7, "learning_rate": 4.397839191249879e-06, "loss": 0.771, "step": 4724 }, { "epoch": 0.7, "learning_rate": 4.393872266833764e-06, "loss": 0.8501, "step": 4725 }, { "epoch": 0.7, "learning_rate": 4.38990662855534e-06, "loss": 0.8096, "step": 4726 }, { "epoch": 0.7, "learning_rate": 4.385942277324381e-06, "loss": 0.8389, "step": 4727 }, { "epoch": 0.7, "learning_rate": 4.381979214050385e-06, "loss": 0.7852, "step": 4728 }, { "epoch": 0.7, "learning_rate": 4.378017439642529e-06, "loss": 0.791, "step": 4729 }, { "epoch": 0.7, "learning_rate": 4.374056955009716e-06, "loss": 0.8027, "step": 4730 }, { "epoch": 0.7, "learning_rate": 4.370097761060552e-06, "loss": 0.8687, "step": 4731 }, { "epoch": 0.7, "learning_rate": 4.3661398587033355e-06, "loss": 0.7705, "step": 4732 }, { "epoch": 0.7, "learning_rate": 4.362183248846074e-06, "loss": 0.8271, "step": 4733 }, { "epoch": 0.7, "learning_rate": 4.358227932396476e-06, "loss": 0.7944, "step": 4734 }, { "epoch": 0.7, "learning_rate": 4.354273910261959e-06, "loss": 0.7783, "step": 4735 }, { "epoch": 0.7, "learning_rate": 4.350321183349648e-06, "loss": 0.7715, "step": 4736 }, { "epoch": 0.7, "learning_rate": 4.34636975256635e-06, "loss": 0.7373, "step": 4737 }, { "epoch": 0.7, "learning_rate": 4.342419618818594e-06, "loss": 0.8774, "step": 4738 }, { "epoch": 0.7, "learning_rate": 4.338470783012609e-06, "loss": 0.2917, "step": 4739 }, { "epoch": 0.7, "learning_rate": 4.334523246054316e-06, "loss": 0.3115, "step": 4740 }, { "epoch": 0.7, "learning_rate": 4.330577008849356e-06, "loss": 0.8574, "step": 4741 }, { "epoch": 0.7, "learning_rate": 4.326632072303045e-06, "loss": 0.8188, "step": 4742 }, { "epoch": 0.7, "learning_rate": 4.3226884373204245e-06, "loss": 0.8232, "step": 4743 }, { "epoch": 0.7, "learning_rate": 4.318746104806233e-06, "loss": 0.7969, "step": 4744 }, { "epoch": 0.7, "learning_rate": 4.314805075664901e-06, "loss": 0.8042, "step": 4745 }, { "epoch": 0.7, "learning_rate": 4.310865350800566e-06, "loss": 0.8174, "step": 4746 }, { "epoch": 0.7, "learning_rate": 4.306926931117061e-06, "loss": 0.7666, "step": 4747 }, { "epoch": 0.7, "learning_rate": 4.302989817517931e-06, "loss": 0.8457, "step": 4748 }, { "epoch": 0.7, "learning_rate": 4.299054010906418e-06, "loss": 0.7935, "step": 4749 }, { "epoch": 0.7, "learning_rate": 4.295119512185448e-06, "loss": 0.7993, "step": 4750 }, { "epoch": 0.7, "learning_rate": 4.29118632225767e-06, "loss": 0.7578, "step": 4751 }, { "epoch": 0.7, "learning_rate": 4.2872544420254145e-06, "loss": 0.8071, "step": 4752 }, { "epoch": 0.7, "learning_rate": 4.283323872390728e-06, "loss": 0.7607, "step": 4753 }, { "epoch": 0.7, "learning_rate": 4.279394614255343e-06, "loss": 0.7617, "step": 4754 }, { "epoch": 0.7, "learning_rate": 4.275466668520692e-06, "loss": 0.7163, "step": 4755 }, { "epoch": 0.7, "learning_rate": 4.2715400360879174e-06, "loss": 0.3066, "step": 4756 }, { "epoch": 0.7, "learning_rate": 4.267614717857847e-06, "loss": 0.8062, "step": 4757 }, { "epoch": 0.7, "learning_rate": 4.26369071473102e-06, "loss": 0.7871, "step": 4758 }, { "epoch": 0.7, "learning_rate": 4.259768027607663e-06, "loss": 0.7627, "step": 4759 }, { "epoch": 0.7, "learning_rate": 4.255846657387701e-06, "loss": 0.8174, "step": 4760 }, { "epoch": 0.7, "learning_rate": 4.251926604970769e-06, "loss": 0.8169, "step": 4761 }, { "epoch": 0.7, "learning_rate": 4.248007871256185e-06, "loss": 0.7529, "step": 4762 }, { "epoch": 0.7, "learning_rate": 4.244090457142972e-06, "loss": 0.8384, "step": 4763 }, { "epoch": 0.7, "learning_rate": 4.240174363529852e-06, "loss": 0.8145, "step": 4764 }, { "epoch": 0.7, "learning_rate": 4.236259591315235e-06, "loss": 0.8364, "step": 4765 }, { "epoch": 0.7, "learning_rate": 4.232346141397242e-06, "loss": 0.8457, "step": 4766 }, { "epoch": 0.7, "learning_rate": 4.228434014673679e-06, "loss": 0.3204, "step": 4767 }, { "epoch": 0.71, "learning_rate": 4.2245232120420474e-06, "loss": 0.7886, "step": 4768 }, { "epoch": 0.71, "learning_rate": 4.220613734399558e-06, "loss": 0.7998, "step": 4769 }, { "epoch": 0.71, "learning_rate": 4.216705582643105e-06, "loss": 0.3048, "step": 4770 }, { "epoch": 0.71, "learning_rate": 4.212798757669278e-06, "loss": 0.8506, "step": 4771 }, { "epoch": 0.71, "learning_rate": 4.208893260374376e-06, "loss": 0.2631, "step": 4772 }, { "epoch": 0.71, "learning_rate": 4.204989091654374e-06, "loss": 0.7832, "step": 4773 }, { "epoch": 0.71, "learning_rate": 4.201086252404962e-06, "loss": 0.7861, "step": 4774 }, { "epoch": 0.71, "learning_rate": 4.197184743521512e-06, "loss": 0.7881, "step": 4775 }, { "epoch": 0.71, "learning_rate": 4.19328456589909e-06, "loss": 0.792, "step": 4776 }, { "epoch": 0.71, "learning_rate": 4.189385720432465e-06, "loss": 0.7563, "step": 4777 }, { "epoch": 0.71, "learning_rate": 4.185488208016093e-06, "loss": 0.8213, "step": 4778 }, { "epoch": 0.71, "learning_rate": 4.181592029544134e-06, "loss": 0.7871, "step": 4779 }, { "epoch": 0.71, "learning_rate": 4.177697185910428e-06, "loss": 0.8057, "step": 4780 }, { "epoch": 0.71, "learning_rate": 4.1738036780085175e-06, "loss": 0.6899, "step": 4781 }, { "epoch": 0.71, "learning_rate": 4.1699115067316405e-06, "loss": 0.8247, "step": 4782 }, { "epoch": 0.71, "learning_rate": 4.166020672972723e-06, "loss": 0.8042, "step": 4783 }, { "epoch": 0.71, "learning_rate": 4.162131177624383e-06, "loss": 0.7979, "step": 4784 }, { "epoch": 0.71, "learning_rate": 4.158243021578941e-06, "loss": 0.8438, "step": 4785 }, { "epoch": 0.71, "learning_rate": 4.154356205728396e-06, "loss": 0.7534, "step": 4786 }, { "epoch": 0.71, "learning_rate": 4.150470730964454e-06, "loss": 0.8193, "step": 4787 }, { "epoch": 0.71, "learning_rate": 4.1465865981785055e-06, "loss": 0.7705, "step": 4788 }, { "epoch": 0.71, "learning_rate": 4.14270380826163e-06, "loss": 0.7053, "step": 4789 }, { "epoch": 0.71, "learning_rate": 4.138822362104608e-06, "loss": 0.8179, "step": 4790 }, { "epoch": 0.71, "learning_rate": 4.134942260597903e-06, "loss": 0.7285, "step": 4791 }, { "epoch": 0.71, "learning_rate": 4.131063504631678e-06, "loss": 0.7891, "step": 4792 }, { "epoch": 0.71, "learning_rate": 4.1271860950957835e-06, "loss": 0.8105, "step": 4793 }, { "epoch": 0.71, "learning_rate": 4.123310032879754e-06, "loss": 0.8135, "step": 4794 }, { "epoch": 0.71, "learning_rate": 4.11943531887283e-06, "loss": 0.7676, "step": 4795 }, { "epoch": 0.71, "learning_rate": 4.11556195396393e-06, "loss": 0.7671, "step": 4796 }, { "epoch": 0.71, "learning_rate": 4.111689939041666e-06, "loss": 0.7344, "step": 4797 }, { "epoch": 0.71, "learning_rate": 4.107819274994348e-06, "loss": 0.8545, "step": 4798 }, { "epoch": 0.71, "learning_rate": 4.103949962709961e-06, "loss": 0.8115, "step": 4799 }, { "epoch": 0.71, "learning_rate": 4.1000820030762024e-06, "loss": 0.7764, "step": 4800 }, { "epoch": 0.71, "learning_rate": 4.096215396980429e-06, "loss": 0.7539, "step": 4801 }, { "epoch": 0.71, "learning_rate": 4.0923501453097115e-06, "loss": 0.7998, "step": 4802 }, { "epoch": 0.71, "learning_rate": 4.088486248950806e-06, "loss": 0.7979, "step": 4803 }, { "epoch": 0.71, "learning_rate": 4.084623708790145e-06, "loss": 0.7808, "step": 4804 }, { "epoch": 0.71, "learning_rate": 4.080762525713873e-06, "loss": 0.7612, "step": 4805 }, { "epoch": 0.71, "learning_rate": 4.07690270060779e-06, "loss": 0.7715, "step": 4806 }, { "epoch": 0.71, "learning_rate": 4.073044234357413e-06, "loss": 0.7783, "step": 4807 }, { "epoch": 0.71, "learning_rate": 4.069187127847941e-06, "loss": 0.7637, "step": 4808 }, { "epoch": 0.71, "learning_rate": 4.065331381964252e-06, "loss": 0.8325, "step": 4809 }, { "epoch": 0.71, "learning_rate": 4.061476997590918e-06, "loss": 0.7329, "step": 4810 }, { "epoch": 0.71, "learning_rate": 4.057623975612199e-06, "loss": 0.8418, "step": 4811 }, { "epoch": 0.71, "learning_rate": 4.05377231691204e-06, "loss": 0.3296, "step": 4812 }, { "epoch": 0.71, "learning_rate": 4.049922022374082e-06, "loss": 0.8232, "step": 4813 }, { "epoch": 0.71, "learning_rate": 4.046073092881634e-06, "loss": 0.7578, "step": 4814 }, { "epoch": 0.71, "learning_rate": 4.0422255293177074e-06, "loss": 0.8237, "step": 4815 }, { "epoch": 0.71, "learning_rate": 4.0383793325650025e-06, "loss": 0.8491, "step": 4816 }, { "epoch": 0.71, "learning_rate": 4.034534503505896e-06, "loss": 0.2971, "step": 4817 }, { "epoch": 0.71, "learning_rate": 4.030691043022454e-06, "loss": 0.834, "step": 4818 }, { "epoch": 0.71, "learning_rate": 4.026848951996427e-06, "loss": 0.7837, "step": 4819 }, { "epoch": 0.71, "learning_rate": 4.023008231309257e-06, "loss": 0.7886, "step": 4820 }, { "epoch": 0.71, "learning_rate": 4.019168881842071e-06, "loss": 0.7847, "step": 4821 }, { "epoch": 0.71, "learning_rate": 4.015330904475675e-06, "loss": 0.877, "step": 4822 }, { "epoch": 0.71, "learning_rate": 4.0114943000905645e-06, "loss": 0.7969, "step": 4823 }, { "epoch": 0.71, "learning_rate": 4.0076590695669186e-06, "loss": 0.7529, "step": 4824 }, { "epoch": 0.71, "learning_rate": 4.0038252137846e-06, "loss": 0.8066, "step": 4825 }, { "epoch": 0.71, "learning_rate": 3.99999273362317e-06, "loss": 0.7876, "step": 4826 }, { "epoch": 0.71, "learning_rate": 3.996161629961848e-06, "loss": 0.8052, "step": 4827 }, { "epoch": 0.71, "learning_rate": 3.992331903679559e-06, "loss": 0.7715, "step": 4828 }, { "epoch": 0.71, "learning_rate": 3.988503555654902e-06, "loss": 0.7695, "step": 4829 }, { "epoch": 0.71, "learning_rate": 3.984676586766167e-06, "loss": 0.7417, "step": 4830 }, { "epoch": 0.71, "learning_rate": 3.980850997891321e-06, "loss": 0.8101, "step": 4831 }, { "epoch": 0.71, "learning_rate": 3.977026789908015e-06, "loss": 0.7712, "step": 4832 }, { "epoch": 0.71, "learning_rate": 3.973203963693589e-06, "loss": 0.8442, "step": 4833 }, { "epoch": 0.71, "learning_rate": 3.969382520125058e-06, "loss": 0.8062, "step": 4834 }, { "epoch": 0.72, "learning_rate": 3.9655624600791285e-06, "loss": 0.769, "step": 4835 }, { "epoch": 0.72, "learning_rate": 3.9617437844321834e-06, "loss": 0.77, "step": 4836 }, { "epoch": 0.72, "learning_rate": 3.957926494060285e-06, "loss": 0.8794, "step": 4837 }, { "epoch": 0.72, "learning_rate": 3.954110589839185e-06, "loss": 0.7939, "step": 4838 }, { "epoch": 0.72, "learning_rate": 3.950296072644323e-06, "loss": 0.3143, "step": 4839 }, { "epoch": 0.72, "learning_rate": 3.946482943350797e-06, "loss": 0.2867, "step": 4840 }, { "epoch": 0.72, "learning_rate": 3.942671202833412e-06, "loss": 0.8433, "step": 4841 }, { "epoch": 0.72, "learning_rate": 3.9388608519666375e-06, "loss": 0.8135, "step": 4842 }, { "epoch": 0.72, "learning_rate": 3.935051891624636e-06, "loss": 0.7412, "step": 4843 }, { "epoch": 0.72, "learning_rate": 3.931244322681243e-06, "loss": 0.7769, "step": 4844 }, { "epoch": 0.72, "learning_rate": 3.927438146009974e-06, "loss": 0.7695, "step": 4845 }, { "epoch": 0.72, "learning_rate": 3.923633362484036e-06, "loss": 0.8428, "step": 4846 }, { "epoch": 0.72, "learning_rate": 3.919829972976304e-06, "loss": 0.8311, "step": 4847 }, { "epoch": 0.72, "learning_rate": 3.916027978359335e-06, "loss": 0.7734, "step": 4848 }, { "epoch": 0.72, "learning_rate": 3.9122273795053745e-06, "loss": 0.812, "step": 4849 }, { "epoch": 0.72, "learning_rate": 3.9084281772863375e-06, "loss": 0.7866, "step": 4850 }, { "epoch": 0.72, "learning_rate": 3.90463037257383e-06, "loss": 0.7871, "step": 4851 }, { "epoch": 0.72, "learning_rate": 3.900833966239126e-06, "loss": 0.8042, "step": 4852 }, { "epoch": 0.72, "learning_rate": 3.89703895915318e-06, "loss": 0.791, "step": 4853 }, { "epoch": 0.72, "learning_rate": 3.8932453521866365e-06, "loss": 0.75, "step": 4854 }, { "epoch": 0.72, "learning_rate": 3.889453146209804e-06, "loss": 0.8086, "step": 4855 }, { "epoch": 0.72, "learning_rate": 3.885662342092684e-06, "loss": 0.8311, "step": 4856 }, { "epoch": 0.72, "learning_rate": 3.881872940704946e-06, "loss": 0.8394, "step": 4857 }, { "epoch": 0.72, "learning_rate": 3.8780849429159365e-06, "loss": 0.8647, "step": 4858 }, { "epoch": 0.72, "learning_rate": 3.874298349594692e-06, "loss": 0.8687, "step": 4859 }, { "epoch": 0.72, "learning_rate": 3.870513161609915e-06, "loss": 0.7441, "step": 4860 }, { "epoch": 0.72, "learning_rate": 3.866729379829986e-06, "loss": 0.7886, "step": 4861 }, { "epoch": 0.72, "learning_rate": 3.862947005122975e-06, "loss": 0.8325, "step": 4862 }, { "epoch": 0.72, "learning_rate": 3.859166038356612e-06, "loss": 0.8423, "step": 4863 }, { "epoch": 0.72, "learning_rate": 3.855386480398322e-06, "loss": 0.7197, "step": 4864 }, { "epoch": 0.72, "learning_rate": 3.851608332115192e-06, "loss": 0.2643, "step": 4865 }, { "epoch": 0.72, "learning_rate": 3.8478315943739895e-06, "loss": 0.7544, "step": 4866 }, { "epoch": 0.72, "learning_rate": 3.844056268041165e-06, "loss": 0.7896, "step": 4867 }, { "epoch": 0.72, "learning_rate": 3.840282353982836e-06, "loss": 0.7661, "step": 4868 }, { "epoch": 0.72, "learning_rate": 3.8365098530648045e-06, "loss": 0.7969, "step": 4869 }, { "epoch": 0.72, "learning_rate": 3.832738766152544e-06, "loss": 0.7881, "step": 4870 }, { "epoch": 0.72, "learning_rate": 3.828969094111197e-06, "loss": 0.7676, "step": 4871 }, { "epoch": 0.72, "learning_rate": 3.825200837805595e-06, "loss": 0.7783, "step": 4872 }, { "epoch": 0.72, "learning_rate": 3.8214339981002364e-06, "loss": 0.7827, "step": 4873 }, { "epoch": 0.72, "learning_rate": 3.817668575859292e-06, "loss": 0.8203, "step": 4874 }, { "epoch": 0.72, "learning_rate": 3.8139045719466197e-06, "loss": 0.7717, "step": 4875 }, { "epoch": 0.72, "learning_rate": 3.8101419872257327e-06, "loss": 0.8018, "step": 4876 }, { "epoch": 0.72, "learning_rate": 3.8063808225598407e-06, "loss": 0.793, "step": 4877 }, { "epoch": 0.72, "learning_rate": 3.802621078811811e-06, "loss": 0.7563, "step": 4878 }, { "epoch": 0.72, "learning_rate": 3.7988627568441884e-06, "loss": 0.7729, "step": 4879 }, { "epoch": 0.72, "learning_rate": 3.795105857519199e-06, "loss": 0.7812, "step": 4880 }, { "epoch": 0.72, "learning_rate": 3.791350381698735e-06, "loss": 0.8208, "step": 4881 }, { "epoch": 0.72, "learning_rate": 3.7875963302443597e-06, "loss": 0.7122, "step": 4882 }, { "epoch": 0.72, "learning_rate": 3.7838437040173216e-06, "loss": 0.8086, "step": 4883 }, { "epoch": 0.72, "learning_rate": 3.7800925038785274e-06, "loss": 0.7158, "step": 4884 }, { "epoch": 0.72, "learning_rate": 3.7763427306885725e-06, "loss": 0.8252, "step": 4885 }, { "epoch": 0.72, "learning_rate": 3.7725943853077105e-06, "loss": 0.8081, "step": 4886 }, { "epoch": 0.72, "learning_rate": 3.768847468595871e-06, "loss": 0.8169, "step": 4887 }, { "epoch": 0.72, "learning_rate": 3.7651019814126656e-06, "loss": 0.2904, "step": 4888 }, { "epoch": 0.72, "learning_rate": 3.7613579246173624e-06, "loss": 0.7896, "step": 4889 }, { "epoch": 0.72, "learning_rate": 3.7576152990689217e-06, "loss": 0.751, "step": 4890 }, { "epoch": 0.72, "learning_rate": 3.7538741056259478e-06, "loss": 0.7598, "step": 4891 }, { "epoch": 0.72, "learning_rate": 3.7501343451467386e-06, "loss": 0.793, "step": 4892 }, { "epoch": 0.72, "learning_rate": 3.746396018489261e-06, "loss": 0.7158, "step": 4893 }, { "epoch": 0.72, "learning_rate": 3.7426591265111445e-06, "loss": 0.769, "step": 4894 }, { "epoch": 0.72, "learning_rate": 3.738923670069694e-06, "loss": 0.7925, "step": 4895 }, { "epoch": 0.72, "learning_rate": 3.73518965002188e-06, "loss": 0.7817, "step": 4896 }, { "epoch": 0.72, "learning_rate": 3.7314570672243523e-06, "loss": 0.8032, "step": 4897 }, { "epoch": 0.72, "learning_rate": 3.7277259225334284e-06, "loss": 0.8467, "step": 4898 }, { "epoch": 0.72, "learning_rate": 3.7239962168050935e-06, "loss": 0.7554, "step": 4899 }, { "epoch": 0.72, "learning_rate": 3.7202679508950015e-06, "loss": 0.7896, "step": 4900 }, { "epoch": 0.72, "learning_rate": 3.716541125658475e-06, "loss": 0.7593, "step": 4901 }, { "epoch": 0.72, "learning_rate": 3.712815741950511e-06, "loss": 0.8154, "step": 4902 }, { "epoch": 0.73, "learning_rate": 3.7090918006257825e-06, "loss": 0.7847, "step": 4903 }, { "epoch": 0.73, "learning_rate": 3.7053693025386074e-06, "loss": 0.7388, "step": 4904 }, { "epoch": 0.73, "learning_rate": 3.701648248542995e-06, "loss": 0.8159, "step": 4905 }, { "epoch": 0.73, "learning_rate": 3.6979286394926204e-06, "loss": 0.7676, "step": 4906 }, { "epoch": 0.73, "learning_rate": 3.6942104762408183e-06, "loss": 0.8027, "step": 4907 }, { "epoch": 0.73, "learning_rate": 3.6904937596405975e-06, "loss": 0.7598, "step": 4908 }, { "epoch": 0.73, "learning_rate": 3.68677849054463e-06, "loss": 0.77, "step": 4909 }, { "epoch": 0.73, "learning_rate": 3.683064669805263e-06, "loss": 0.7695, "step": 4910 }, { "epoch": 0.73, "learning_rate": 3.6793522982745135e-06, "loss": 0.8418, "step": 4911 }, { "epoch": 0.73, "learning_rate": 3.6756413768040487e-06, "loss": 0.8062, "step": 4912 }, { "epoch": 0.73, "learning_rate": 3.671931906245224e-06, "loss": 0.7471, "step": 4913 }, { "epoch": 0.73, "learning_rate": 3.6682238874490463e-06, "loss": 0.791, "step": 4914 }, { "epoch": 0.73, "learning_rate": 3.664517321266199e-06, "loss": 0.769, "step": 4915 }, { "epoch": 0.73, "learning_rate": 3.6608122085470367e-06, "loss": 0.7366, "step": 4916 }, { "epoch": 0.73, "learning_rate": 3.6571085501415583e-06, "loss": 0.7505, "step": 4917 }, { "epoch": 0.73, "learning_rate": 3.6534063468994554e-06, "loss": 0.8271, "step": 4918 }, { "epoch": 0.73, "learning_rate": 3.649705599670067e-06, "loss": 0.7744, "step": 4919 }, { "epoch": 0.73, "learning_rate": 3.6460063093024113e-06, "loss": 0.8013, "step": 4920 }, { "epoch": 0.73, "learning_rate": 3.6423084766451622e-06, "loss": 0.8442, "step": 4921 }, { "epoch": 0.73, "learning_rate": 3.6386121025466626e-06, "loss": 0.7827, "step": 4922 }, { "epoch": 0.73, "learning_rate": 3.634917187854925e-06, "loss": 0.7656, "step": 4923 }, { "epoch": 0.73, "learning_rate": 3.6312237334176216e-06, "loss": 0.8169, "step": 4924 }, { "epoch": 0.73, "learning_rate": 3.6275317400820884e-06, "loss": 0.811, "step": 4925 }, { "epoch": 0.73, "learning_rate": 3.6238412086953356e-06, "loss": 0.7471, "step": 4926 }, { "epoch": 0.73, "learning_rate": 3.620152140104025e-06, "loss": 0.7209, "step": 4927 }, { "epoch": 0.73, "learning_rate": 3.6164645351544956e-06, "loss": 0.8311, "step": 4928 }, { "epoch": 0.73, "learning_rate": 3.612778394692741e-06, "loss": 0.7852, "step": 4929 }, { "epoch": 0.73, "learning_rate": 3.6090937195644205e-06, "loss": 0.811, "step": 4930 }, { "epoch": 0.73, "learning_rate": 3.6054105106148642e-06, "loss": 0.8174, "step": 4931 }, { "epoch": 0.73, "learning_rate": 3.6017287686890545e-06, "loss": 0.8066, "step": 4932 }, { "epoch": 0.73, "learning_rate": 3.5980484946316507e-06, "loss": 0.8232, "step": 4933 }, { "epoch": 0.73, "learning_rate": 3.594369689286963e-06, "loss": 0.7471, "step": 4934 }, { "epoch": 0.73, "learning_rate": 3.590692353498968e-06, "loss": 0.8433, "step": 4935 }, { "epoch": 0.73, "learning_rate": 3.5870164881113135e-06, "loss": 0.7837, "step": 4936 }, { "epoch": 0.73, "learning_rate": 3.583342093967299e-06, "loss": 0.7036, "step": 4937 }, { "epoch": 0.73, "learning_rate": 3.5796691719098886e-06, "loss": 0.748, "step": 4938 }, { "epoch": 0.73, "learning_rate": 3.5759977227817167e-06, "loss": 0.8052, "step": 4939 }, { "epoch": 0.73, "learning_rate": 3.572327747425066e-06, "loss": 0.7959, "step": 4940 }, { "epoch": 0.73, "learning_rate": 3.5686592466818992e-06, "loss": 0.8159, "step": 4941 }, { "epoch": 0.73, "learning_rate": 3.564992221393825e-06, "loss": 0.8018, "step": 4942 }, { "epoch": 0.73, "learning_rate": 3.5613266724021156e-06, "loss": 0.792, "step": 4943 }, { "epoch": 0.73, "learning_rate": 3.5576626005477153e-06, "loss": 0.8066, "step": 4944 }, { "epoch": 0.73, "learning_rate": 3.5540000066712156e-06, "loss": 0.7627, "step": 4945 }, { "epoch": 0.73, "learning_rate": 3.5503388916128824e-06, "loss": 0.7559, "step": 4946 }, { "epoch": 0.73, "learning_rate": 3.546679256212633e-06, "loss": 0.7583, "step": 4947 }, { "epoch": 0.73, "learning_rate": 3.5430211013100424e-06, "loss": 0.312, "step": 4948 }, { "epoch": 0.73, "learning_rate": 3.5393644277443596e-06, "loss": 0.8179, "step": 4949 }, { "epoch": 0.73, "learning_rate": 3.535709236354482e-06, "loss": 0.7549, "step": 4950 }, { "epoch": 0.73, "learning_rate": 3.532055527978967e-06, "loss": 0.7515, "step": 4951 }, { "epoch": 0.73, "learning_rate": 3.5284033034560415e-06, "loss": 0.7607, "step": 4952 }, { "epoch": 0.73, "learning_rate": 3.5247525636235802e-06, "loss": 0.7568, "step": 4953 }, { "epoch": 0.73, "learning_rate": 3.5211033093191282e-06, "loss": 0.7407, "step": 4954 }, { "epoch": 0.73, "learning_rate": 3.5174555413798805e-06, "loss": 0.3083, "step": 4955 }, { "epoch": 0.73, "learning_rate": 3.513809260642694e-06, "loss": 0.8574, "step": 4956 }, { "epoch": 0.73, "learning_rate": 3.510164467944089e-06, "loss": 0.7705, "step": 4957 }, { "epoch": 0.73, "learning_rate": 3.50652116412024e-06, "loss": 0.3071, "step": 4958 }, { "epoch": 0.73, "learning_rate": 3.502879350006977e-06, "loss": 0.7603, "step": 4959 }, { "epoch": 0.73, "learning_rate": 3.4992390264397967e-06, "loss": 0.7588, "step": 4960 }, { "epoch": 0.73, "learning_rate": 3.495600194253843e-06, "loss": 0.7881, "step": 4961 }, { "epoch": 0.73, "learning_rate": 3.491962854283932e-06, "loss": 0.7671, "step": 4962 }, { "epoch": 0.73, "learning_rate": 3.488327007364525e-06, "loss": 0.7529, "step": 4963 }, { "epoch": 0.73, "learning_rate": 3.48469265432974e-06, "loss": 0.3048, "step": 4964 }, { "epoch": 0.73, "learning_rate": 3.4810597960133665e-06, "loss": 0.7915, "step": 4965 }, { "epoch": 0.73, "learning_rate": 3.477428433248833e-06, "loss": 0.813, "step": 4966 }, { "epoch": 0.73, "learning_rate": 3.473798566869244e-06, "loss": 0.8452, "step": 4967 }, { "epoch": 0.73, "learning_rate": 3.4701701977073386e-06, "loss": 0.7891, "step": 4968 }, { "epoch": 0.73, "learning_rate": 3.4665433265955307e-06, "loss": 0.8306, "step": 4969 }, { "epoch": 0.73, "learning_rate": 3.4629179543658852e-06, "loss": 0.8164, "step": 4970 }, { "epoch": 0.74, "learning_rate": 3.459294081850121e-06, "loss": 0.8223, "step": 4971 }, { "epoch": 0.74, "learning_rate": 3.4556717098796124e-06, "loss": 0.7715, "step": 4972 }, { "epoch": 0.74, "learning_rate": 3.452050839285388e-06, "loss": 0.8062, "step": 4973 }, { "epoch": 0.74, "learning_rate": 3.448431470898138e-06, "loss": 0.7852, "step": 4974 }, { "epoch": 0.74, "learning_rate": 3.444813605548213e-06, "loss": 0.8203, "step": 4975 }, { "epoch": 0.74, "learning_rate": 3.4411972440655963e-06, "loss": 0.3362, "step": 4976 }, { "epoch": 0.74, "learning_rate": 3.437582387279946e-06, "loss": 0.7495, "step": 4977 }, { "epoch": 0.74, "learning_rate": 3.4339690360205757e-06, "loss": 0.7983, "step": 4978 }, { "epoch": 0.74, "learning_rate": 3.430357191116439e-06, "loss": 0.8174, "step": 4979 }, { "epoch": 0.74, "learning_rate": 3.426746853396162e-06, "loss": 0.7959, "step": 4980 }, { "epoch": 0.74, "learning_rate": 3.423138023688003e-06, "loss": 0.7651, "step": 4981 }, { "epoch": 0.74, "learning_rate": 3.419530702819893e-06, "loss": 0.3052, "step": 4982 }, { "epoch": 0.74, "learning_rate": 3.4159248916194144e-06, "loss": 0.8281, "step": 4983 }, { "epoch": 0.74, "learning_rate": 3.412320590913796e-06, "loss": 0.7441, "step": 4984 }, { "epoch": 0.74, "learning_rate": 3.4087178015299226e-06, "loss": 0.7983, "step": 4985 }, { "epoch": 0.74, "learning_rate": 3.405116524294331e-06, "loss": 0.7471, "step": 4986 }, { "epoch": 0.74, "learning_rate": 3.4015167600332166e-06, "loss": 0.8047, "step": 4987 }, { "epoch": 0.74, "learning_rate": 3.3979185095724298e-06, "loss": 0.731, "step": 4988 }, { "epoch": 0.74, "learning_rate": 3.3943217737374556e-06, "loss": 0.7197, "step": 4989 }, { "epoch": 0.74, "learning_rate": 3.390726553353455e-06, "loss": 0.3218, "step": 4990 }, { "epoch": 0.74, "learning_rate": 3.387132849245224e-06, "loss": 0.752, "step": 4991 }, { "epoch": 0.74, "learning_rate": 3.383540662237219e-06, "loss": 0.7485, "step": 4992 }, { "epoch": 0.74, "learning_rate": 3.379949993153554e-06, "loss": 0.8232, "step": 4993 }, { "epoch": 0.74, "learning_rate": 3.376360842817975e-06, "loss": 0.7322, "step": 4994 }, { "epoch": 0.74, "learning_rate": 3.3727732120539005e-06, "loss": 0.8027, "step": 4995 }, { "epoch": 0.74, "learning_rate": 3.369187101684387e-06, "loss": 0.8384, "step": 4996 }, { "epoch": 0.74, "learning_rate": 3.3656025125321512e-06, "loss": 0.8135, "step": 4997 }, { "epoch": 0.74, "learning_rate": 3.3620194454195565e-06, "loss": 0.8037, "step": 4998 }, { "epoch": 0.74, "learning_rate": 3.358437901168611e-06, "loss": 0.8135, "step": 4999 }, { "epoch": 0.74, "learning_rate": 3.354857880600988e-06, "loss": 0.752, "step": 5000 }, { "epoch": 0.74, "learning_rate": 3.351279384538e-06, "loss": 0.7583, "step": 5001 }, { "epoch": 0.74, "learning_rate": 3.3477024138006074e-06, "loss": 0.8208, "step": 5002 }, { "epoch": 0.74, "learning_rate": 3.3441269692094346e-06, "loss": 0.7866, "step": 5003 }, { "epoch": 0.74, "learning_rate": 3.3405530515847406e-06, "loss": 0.8032, "step": 5004 }, { "epoch": 0.74, "learning_rate": 3.336980661746446e-06, "loss": 0.7534, "step": 5005 }, { "epoch": 0.74, "learning_rate": 3.3334098005141123e-06, "loss": 0.8022, "step": 5006 }, { "epoch": 0.74, "learning_rate": 3.329840468706952e-06, "loss": 0.7515, "step": 5007 }, { "epoch": 0.74, "learning_rate": 3.3262726671438337e-06, "loss": 0.7021, "step": 5008 }, { "epoch": 0.74, "learning_rate": 3.322706396643264e-06, "loss": 0.7686, "step": 5009 }, { "epoch": 0.74, "learning_rate": 3.3191416580234093e-06, "loss": 0.8208, "step": 5010 }, { "epoch": 0.74, "learning_rate": 3.315578452102076e-06, "loss": 0.3289, "step": 5011 }, { "epoch": 0.74, "learning_rate": 3.3120167796967195e-06, "loss": 0.8325, "step": 5012 }, { "epoch": 0.74, "learning_rate": 3.3084566416244525e-06, "loss": 0.7817, "step": 5013 }, { "epoch": 0.74, "learning_rate": 3.3048980387020245e-06, "loss": 0.7886, "step": 5014 }, { "epoch": 0.74, "learning_rate": 3.3013409717458355e-06, "loss": 0.7544, "step": 5015 }, { "epoch": 0.74, "learning_rate": 3.2977854415719412e-06, "loss": 0.7554, "step": 5016 }, { "epoch": 0.74, "learning_rate": 3.2942314489960314e-06, "loss": 0.7642, "step": 5017 }, { "epoch": 0.74, "learning_rate": 3.290678994833457e-06, "loss": 0.7856, "step": 5018 }, { "epoch": 0.74, "learning_rate": 3.2871280798992065e-06, "loss": 0.7964, "step": 5019 }, { "epoch": 0.74, "learning_rate": 3.283578705007915e-06, "loss": 0.7539, "step": 5020 }, { "epoch": 0.74, "learning_rate": 3.280030870973874e-06, "loss": 0.7881, "step": 5021 }, { "epoch": 0.74, "learning_rate": 3.27648457861101e-06, "loss": 0.8291, "step": 5022 }, { "epoch": 0.74, "learning_rate": 3.2729398287328983e-06, "loss": 0.7046, "step": 5023 }, { "epoch": 0.74, "learning_rate": 3.2693966221527707e-06, "loss": 0.7695, "step": 5024 }, { "epoch": 0.74, "learning_rate": 3.2658549596834875e-06, "loss": 0.7534, "step": 5025 }, { "epoch": 0.74, "learning_rate": 3.262314842137573e-06, "loss": 0.813, "step": 5026 }, { "epoch": 0.74, "learning_rate": 3.258776270327184e-06, "loss": 0.7988, "step": 5027 }, { "epoch": 0.74, "learning_rate": 3.2552392450641248e-06, "loss": 0.7861, "step": 5028 }, { "epoch": 0.74, "learning_rate": 3.2517037671598516e-06, "loss": 0.8105, "step": 5029 }, { "epoch": 0.74, "learning_rate": 3.2481698374254556e-06, "loss": 0.7598, "step": 5030 }, { "epoch": 0.74, "learning_rate": 3.2446374566716854e-06, "loss": 0.7637, "step": 5031 }, { "epoch": 0.74, "learning_rate": 3.241106625708923e-06, "loss": 0.8032, "step": 5032 }, { "epoch": 0.74, "learning_rate": 3.237577345347196e-06, "loss": 0.8555, "step": 5033 }, { "epoch": 0.74, "learning_rate": 3.2340496163961855e-06, "loss": 0.8022, "step": 5034 }, { "epoch": 0.74, "learning_rate": 3.230523439665206e-06, "loss": 0.7725, "step": 5035 }, { "epoch": 0.74, "learning_rate": 3.2269988159632203e-06, "loss": 0.7944, "step": 5036 }, { "epoch": 0.74, "learning_rate": 3.2234757460988386e-06, "loss": 0.8003, "step": 5037 }, { "epoch": 0.75, "learning_rate": 3.2199542308803055e-06, "loss": 0.791, "step": 5038 }, { "epoch": 0.75, "learning_rate": 3.216434271115524e-06, "loss": 0.7695, "step": 5039 }, { "epoch": 0.75, "learning_rate": 3.2129158676120176e-06, "loss": 0.812, "step": 5040 }, { "epoch": 0.75, "learning_rate": 3.209399021176971e-06, "loss": 0.7393, "step": 5041 }, { "epoch": 0.75, "learning_rate": 3.205883732617212e-06, "loss": 0.7607, "step": 5042 }, { "epoch": 0.75, "learning_rate": 3.202370002739198e-06, "loss": 0.7368, "step": 5043 }, { "epoch": 0.75, "learning_rate": 3.1988578323490427e-06, "loss": 0.7446, "step": 5044 }, { "epoch": 0.75, "learning_rate": 3.1953472222524918e-06, "loss": 0.7959, "step": 5045 }, { "epoch": 0.75, "learning_rate": 3.191838173254934e-06, "loss": 0.8032, "step": 5046 }, { "epoch": 0.75, "learning_rate": 3.1883306861614104e-06, "loss": 0.7417, "step": 5047 }, { "epoch": 0.75, "learning_rate": 3.1848247617765915e-06, "loss": 0.8062, "step": 5048 }, { "epoch": 0.75, "learning_rate": 3.1813204009047902e-06, "loss": 0.7866, "step": 5049 }, { "epoch": 0.75, "learning_rate": 3.177817604349973e-06, "loss": 0.8833, "step": 5050 }, { "epoch": 0.75, "learning_rate": 3.17431637291573e-06, "loss": 0.8301, "step": 5051 }, { "epoch": 0.75, "learning_rate": 3.170816707405312e-06, "loss": 0.751, "step": 5052 }, { "epoch": 0.75, "learning_rate": 3.167318608621587e-06, "loss": 0.7412, "step": 5053 }, { "epoch": 0.75, "learning_rate": 3.1638220773670825e-06, "loss": 0.2949, "step": 5054 }, { "epoch": 0.75, "learning_rate": 3.160327114443963e-06, "loss": 0.7739, "step": 5055 }, { "epoch": 0.75, "learning_rate": 3.1568337206540246e-06, "loss": 0.7354, "step": 5056 }, { "epoch": 0.75, "learning_rate": 3.1533418967987172e-06, "loss": 0.8042, "step": 5057 }, { "epoch": 0.75, "learning_rate": 3.1498516436791113e-06, "loss": 0.7046, "step": 5058 }, { "epoch": 0.75, "learning_rate": 3.1463629620959347e-06, "loss": 0.7563, "step": 5059 }, { "epoch": 0.75, "learning_rate": 3.142875852849551e-06, "loss": 0.7852, "step": 5060 }, { "epoch": 0.75, "learning_rate": 3.1393903167399553e-06, "loss": 0.79, "step": 5061 }, { "epoch": 0.75, "learning_rate": 3.13590635456679e-06, "loss": 0.8071, "step": 5062 }, { "epoch": 0.75, "learning_rate": 3.1324239671293276e-06, "loss": 0.7227, "step": 5063 }, { "epoch": 0.75, "learning_rate": 3.128943155226489e-06, "loss": 0.7939, "step": 5064 }, { "epoch": 0.75, "learning_rate": 3.125463919656836e-06, "loss": 0.7935, "step": 5065 }, { "epoch": 0.75, "learning_rate": 3.1219862612185493e-06, "loss": 0.8486, "step": 5066 }, { "epoch": 0.75, "learning_rate": 3.118510180709471e-06, "loss": 0.811, "step": 5067 }, { "epoch": 0.75, "learning_rate": 3.115035678927063e-06, "loss": 0.8115, "step": 5068 }, { "epoch": 0.75, "learning_rate": 3.1115627566684415e-06, "loss": 0.7703, "step": 5069 }, { "epoch": 0.75, "learning_rate": 3.1080914147303465e-06, "loss": 0.7925, "step": 5070 }, { "epoch": 0.75, "learning_rate": 3.10462165390916e-06, "loss": 0.8257, "step": 5071 }, { "epoch": 0.75, "learning_rate": 3.1011534750009033e-06, "loss": 0.7495, "step": 5072 }, { "epoch": 0.75, "learning_rate": 3.097686878801237e-06, "loss": 0.7876, "step": 5073 }, { "epoch": 0.75, "learning_rate": 3.0942218661054533e-06, "loss": 0.835, "step": 5074 }, { "epoch": 0.75, "learning_rate": 3.090758437708482e-06, "loss": 0.6992, "step": 5075 }, { "epoch": 0.75, "learning_rate": 3.087296594404887e-06, "loss": 0.8052, "step": 5076 }, { "epoch": 0.75, "learning_rate": 3.083836336988876e-06, "loss": 0.7588, "step": 5077 }, { "epoch": 0.75, "learning_rate": 3.080377666254294e-06, "loss": 0.749, "step": 5078 }, { "epoch": 0.75, "learning_rate": 3.0769205829946048e-06, "loss": 0.8179, "step": 5079 }, { "epoch": 0.75, "learning_rate": 3.0734650880029293e-06, "loss": 0.8066, "step": 5080 }, { "epoch": 0.75, "learning_rate": 3.070011182072008e-06, "loss": 0.7256, "step": 5081 }, { "epoch": 0.75, "learning_rate": 3.0665588659942314e-06, "loss": 0.7949, "step": 5082 }, { "epoch": 0.75, "learning_rate": 3.0631081405616136e-06, "loss": 0.7676, "step": 5083 }, { "epoch": 0.75, "learning_rate": 3.059659006565804e-06, "loss": 0.8296, "step": 5084 }, { "epoch": 0.75, "learning_rate": 3.0562114647980966e-06, "loss": 0.7935, "step": 5085 }, { "epoch": 0.75, "learning_rate": 3.0527655160494117e-06, "loss": 0.7964, "step": 5086 }, { "epoch": 0.75, "learning_rate": 3.0493211611103034e-06, "loss": 0.6865, "step": 5087 }, { "epoch": 0.75, "learning_rate": 3.0458784007709685e-06, "loss": 0.811, "step": 5088 }, { "epoch": 0.75, "learning_rate": 3.0424372358212285e-06, "loss": 0.7698, "step": 5089 }, { "epoch": 0.75, "learning_rate": 3.038997667050546e-06, "loss": 0.7578, "step": 5090 }, { "epoch": 0.75, "learning_rate": 3.035559695248015e-06, "loss": 0.7925, "step": 5091 }, { "epoch": 0.75, "learning_rate": 3.032123321202357e-06, "loss": 0.2915, "step": 5092 }, { "epoch": 0.75, "learning_rate": 3.0286885457019398e-06, "loss": 0.7632, "step": 5093 }, { "epoch": 0.75, "learning_rate": 3.025255369534751e-06, "loss": 0.7993, "step": 5094 }, { "epoch": 0.75, "learning_rate": 3.021823793488423e-06, "loss": 0.7222, "step": 5095 }, { "epoch": 0.75, "learning_rate": 3.0183938183502147e-06, "loss": 0.8047, "step": 5096 }, { "epoch": 0.75, "learning_rate": 3.014965444907013e-06, "loss": 0.7876, "step": 5097 }, { "epoch": 0.75, "learning_rate": 3.01153867394535e-06, "loss": 0.7446, "step": 5098 }, { "epoch": 0.75, "learning_rate": 3.0081135062513813e-06, "loss": 0.8237, "step": 5099 }, { "epoch": 0.75, "learning_rate": 3.0046899426108924e-06, "loss": 0.771, "step": 5100 }, { "epoch": 0.75, "learning_rate": 3.0012679838093107e-06, "loss": 0.8257, "step": 5101 }, { "epoch": 0.75, "learning_rate": 2.997847630631685e-06, "loss": 0.7832, "step": 5102 }, { "epoch": 0.75, "learning_rate": 2.9944288838627055e-06, "loss": 0.7952, "step": 5103 }, { "epoch": 0.75, "learning_rate": 2.991011744286686e-06, "loss": 0.7588, "step": 5104 }, { "epoch": 0.75, "learning_rate": 2.987596212687571e-06, "loss": 0.8457, "step": 5105 }, { "epoch": 0.76, "learning_rate": 2.9841822898489457e-06, "loss": 0.8008, "step": 5106 }, { "epoch": 0.76, "learning_rate": 2.9807699765540144e-06, "loss": 0.7964, "step": 5107 }, { "epoch": 0.76, "learning_rate": 2.977359273585624e-06, "loss": 0.8057, "step": 5108 }, { "epoch": 0.76, "learning_rate": 2.9739501817262416e-06, "loss": 0.7964, "step": 5109 }, { "epoch": 0.76, "learning_rate": 2.970542701757967e-06, "loss": 0.7842, "step": 5110 }, { "epoch": 0.76, "learning_rate": 2.9671368344625375e-06, "loss": 0.7793, "step": 5111 }, { "epoch": 0.76, "learning_rate": 2.9637325806213115e-06, "loss": 0.7993, "step": 5112 }, { "epoch": 0.76, "learning_rate": 2.9603299410152774e-06, "loss": 0.7925, "step": 5113 }, { "epoch": 0.76, "learning_rate": 2.9569289164250647e-06, "loss": 0.8555, "step": 5114 }, { "epoch": 0.76, "learning_rate": 2.9535295076309156e-06, "loss": 0.7612, "step": 5115 }, { "epoch": 0.76, "learning_rate": 2.9501317154127184e-06, "loss": 0.7402, "step": 5116 }, { "epoch": 0.76, "learning_rate": 2.9467355405499788e-06, "loss": 0.7944, "step": 5117 }, { "epoch": 0.76, "learning_rate": 2.9433409838218307e-06, "loss": 0.7773, "step": 5118 }, { "epoch": 0.76, "learning_rate": 2.9399480460070486e-06, "loss": 0.791, "step": 5119 }, { "epoch": 0.76, "learning_rate": 2.9365567278840214e-06, "loss": 0.814, "step": 5120 }, { "epoch": 0.76, "learning_rate": 2.933167030230779e-06, "loss": 0.3159, "step": 5121 }, { "epoch": 0.76, "learning_rate": 2.9297789538249712e-06, "loss": 0.7793, "step": 5122 }, { "epoch": 0.76, "learning_rate": 2.9263924994438754e-06, "loss": 0.8452, "step": 5123 }, { "epoch": 0.76, "learning_rate": 2.923007667864405e-06, "loss": 0.8423, "step": 5124 }, { "epoch": 0.76, "learning_rate": 2.919624459863093e-06, "loss": 0.8438, "step": 5125 }, { "epoch": 0.76, "learning_rate": 2.916242876216101e-06, "loss": 0.7173, "step": 5126 }, { "epoch": 0.76, "learning_rate": 2.912862917699225e-06, "loss": 0.8447, "step": 5127 }, { "epoch": 0.76, "learning_rate": 2.9094845850878773e-06, "loss": 0.8413, "step": 5128 }, { "epoch": 0.76, "learning_rate": 2.9061078791571105e-06, "loss": 0.855, "step": 5129 }, { "epoch": 0.76, "learning_rate": 2.902732800681586e-06, "loss": 0.7705, "step": 5130 }, { "epoch": 0.76, "learning_rate": 2.8993593504356065e-06, "loss": 0.7852, "step": 5131 }, { "epoch": 0.76, "learning_rate": 2.8959875291931018e-06, "loss": 0.7969, "step": 5132 }, { "epoch": 0.76, "learning_rate": 2.892617337727619e-06, "loss": 0.7305, "step": 5133 }, { "epoch": 0.76, "learning_rate": 2.8892487768123356e-06, "loss": 0.8018, "step": 5134 }, { "epoch": 0.76, "learning_rate": 2.88588184722005e-06, "loss": 0.769, "step": 5135 }, { "epoch": 0.76, "learning_rate": 2.8825165497231964e-06, "loss": 0.7891, "step": 5136 }, { "epoch": 0.76, "learning_rate": 2.879152885093832e-06, "loss": 0.7793, "step": 5137 }, { "epoch": 0.76, "learning_rate": 2.8757908541036338e-06, "loss": 0.3008, "step": 5138 }, { "epoch": 0.76, "learning_rate": 2.8724304575239048e-06, "loss": 0.728, "step": 5139 }, { "epoch": 0.76, "learning_rate": 2.869071696125574e-06, "loss": 0.7544, "step": 5140 }, { "epoch": 0.76, "learning_rate": 2.865714570679199e-06, "loss": 0.8218, "step": 5141 }, { "epoch": 0.76, "learning_rate": 2.8623590819549653e-06, "loss": 0.7622, "step": 5142 }, { "epoch": 0.76, "learning_rate": 2.8590052307226646e-06, "loss": 0.3176, "step": 5143 }, { "epoch": 0.76, "learning_rate": 2.8556530177517326e-06, "loss": 0.8496, "step": 5144 }, { "epoch": 0.76, "learning_rate": 2.8523024438112236e-06, "loss": 0.8081, "step": 5145 }, { "epoch": 0.76, "learning_rate": 2.848953509669813e-06, "loss": 0.7837, "step": 5146 }, { "epoch": 0.76, "learning_rate": 2.8456062160957986e-06, "loss": 0.792, "step": 5147 }, { "epoch": 0.76, "learning_rate": 2.8422605638571042e-06, "loss": 0.8628, "step": 5148 }, { "epoch": 0.76, "learning_rate": 2.838916553721278e-06, "loss": 0.8232, "step": 5149 }, { "epoch": 0.76, "learning_rate": 2.8355741864554964e-06, "loss": 0.8213, "step": 5150 }, { "epoch": 0.76, "learning_rate": 2.832233462826548e-06, "loss": 0.7749, "step": 5151 }, { "epoch": 0.76, "learning_rate": 2.828894383600851e-06, "loss": 0.7876, "step": 5152 }, { "epoch": 0.76, "learning_rate": 2.8255569495444403e-06, "loss": 0.7207, "step": 5153 }, { "epoch": 0.76, "learning_rate": 2.822221161422983e-06, "loss": 0.8135, "step": 5154 }, { "epoch": 0.76, "learning_rate": 2.818887020001769e-06, "loss": 0.7925, "step": 5155 }, { "epoch": 0.76, "learning_rate": 2.8155545260456917e-06, "loss": 0.8149, "step": 5156 }, { "epoch": 0.76, "learning_rate": 2.8122236803192915e-06, "loss": 0.8169, "step": 5157 }, { "epoch": 0.76, "learning_rate": 2.8088944835867104e-06, "loss": 0.7788, "step": 5158 }, { "epoch": 0.76, "learning_rate": 2.805566936611728e-06, "loss": 0.6833, "step": 5159 }, { "epoch": 0.76, "learning_rate": 2.8022410401577347e-06, "loss": 0.7808, "step": 5160 }, { "epoch": 0.76, "learning_rate": 2.798916794987744e-06, "loss": 0.8027, "step": 5161 }, { "epoch": 0.76, "learning_rate": 2.795594201864398e-06, "loss": 0.7876, "step": 5162 }, { "epoch": 0.76, "learning_rate": 2.792273261549949e-06, "loss": 0.3162, "step": 5163 }, { "epoch": 0.76, "learning_rate": 2.7889539748062746e-06, "loss": 0.7554, "step": 5164 }, { "epoch": 0.76, "learning_rate": 2.7856363423948774e-06, "loss": 0.7561, "step": 5165 }, { "epoch": 0.76, "learning_rate": 2.782320365076874e-06, "loss": 0.7983, "step": 5166 }, { "epoch": 0.76, "learning_rate": 2.779006043613006e-06, "loss": 0.7915, "step": 5167 }, { "epoch": 0.76, "learning_rate": 2.775693378763633e-06, "loss": 0.8374, "step": 5168 }, { "epoch": 0.76, "learning_rate": 2.772382371288731e-06, "loss": 0.2827, "step": 5169 }, { "epoch": 0.76, "learning_rate": 2.7690730219479054e-06, "loss": 0.7905, "step": 5170 }, { "epoch": 0.76, "learning_rate": 2.7657653315003686e-06, "loss": 0.7812, "step": 5171 }, { "epoch": 0.76, "learning_rate": 2.762459300704966e-06, "loss": 0.79, "step": 5172 }, { "epoch": 0.77, "learning_rate": 2.7591549303201513e-06, "loss": 0.7939, "step": 5173 }, { "epoch": 0.77, "learning_rate": 2.7558522211039995e-06, "loss": 0.7695, "step": 5174 }, { "epoch": 0.77, "learning_rate": 2.752551173814212e-06, "loss": 0.7676, "step": 5175 }, { "epoch": 0.77, "learning_rate": 2.7492517892080982e-06, "loss": 0.8047, "step": 5176 }, { "epoch": 0.77, "learning_rate": 2.7459540680425912e-06, "loss": 0.7954, "step": 5177 }, { "epoch": 0.77, "learning_rate": 2.742658011074246e-06, "loss": 0.7651, "step": 5178 }, { "epoch": 0.77, "learning_rate": 2.7393636190592278e-06, "loss": 0.8008, "step": 5179 }, { "epoch": 0.77, "learning_rate": 2.7360708927533285e-06, "loss": 0.8042, "step": 5180 }, { "epoch": 0.77, "learning_rate": 2.7327798329119525e-06, "loss": 0.7573, "step": 5181 }, { "epoch": 0.77, "learning_rate": 2.729490440290118e-06, "loss": 0.7104, "step": 5182 }, { "epoch": 0.77, "learning_rate": 2.7262027156424733e-06, "loss": 0.8345, "step": 5183 }, { "epoch": 0.77, "learning_rate": 2.722916659723268e-06, "loss": 0.8013, "step": 5184 }, { "epoch": 0.77, "learning_rate": 2.7196322732863855e-06, "loss": 0.7754, "step": 5185 }, { "epoch": 0.77, "learning_rate": 2.716349557085315e-06, "loss": 0.7617, "step": 5186 }, { "epoch": 0.77, "learning_rate": 2.7130685118731615e-06, "loss": 0.3508, "step": 5187 }, { "epoch": 0.77, "learning_rate": 2.7097891384026562e-06, "loss": 0.7578, "step": 5188 }, { "epoch": 0.77, "learning_rate": 2.706511437426139e-06, "loss": 0.7852, "step": 5189 }, { "epoch": 0.77, "learning_rate": 2.703235409695566e-06, "loss": 0.8286, "step": 5190 }, { "epoch": 0.77, "learning_rate": 2.6999610559625156e-06, "loss": 0.7773, "step": 5191 }, { "epoch": 0.77, "learning_rate": 2.6966883769781737e-06, "loss": 0.7495, "step": 5192 }, { "epoch": 0.77, "learning_rate": 2.6934173734933524e-06, "loss": 0.8276, "step": 5193 }, { "epoch": 0.77, "learning_rate": 2.6901480462584707e-06, "loss": 0.7549, "step": 5194 }, { "epoch": 0.77, "learning_rate": 2.6868803960235624e-06, "loss": 0.7471, "step": 5195 }, { "epoch": 0.77, "learning_rate": 2.6836144235382864e-06, "loss": 0.7424, "step": 5196 }, { "epoch": 0.77, "learning_rate": 2.6803501295519085e-06, "loss": 0.7783, "step": 5197 }, { "epoch": 0.77, "learning_rate": 2.6770875148133058e-06, "loss": 0.7803, "step": 5198 }, { "epoch": 0.77, "learning_rate": 2.673826580070984e-06, "loss": 0.2963, "step": 5199 }, { "epoch": 0.77, "learning_rate": 2.670567326073047e-06, "loss": 0.8359, "step": 5200 }, { "epoch": 0.77, "learning_rate": 2.6673097535672287e-06, "loss": 0.8389, "step": 5201 }, { "epoch": 0.77, "learning_rate": 2.664053863300866e-06, "loss": 0.8208, "step": 5202 }, { "epoch": 0.77, "learning_rate": 2.6607996560209103e-06, "loss": 0.7578, "step": 5203 }, { "epoch": 0.77, "learning_rate": 2.6575471324739376e-06, "loss": 0.8281, "step": 5204 }, { "epoch": 0.77, "learning_rate": 2.6542962934061224e-06, "loss": 0.8257, "step": 5205 }, { "epoch": 0.77, "learning_rate": 2.6510471395632707e-06, "loss": 0.8384, "step": 5206 }, { "epoch": 0.77, "learning_rate": 2.6477996716907796e-06, "loss": 0.7529, "step": 5207 }, { "epoch": 0.77, "learning_rate": 2.6445538905336764e-06, "loss": 0.79, "step": 5208 }, { "epoch": 0.77, "learning_rate": 2.6413097968365996e-06, "loss": 0.7061, "step": 5209 }, { "epoch": 0.77, "learning_rate": 2.638067391343797e-06, "loss": 0.8027, "step": 5210 }, { "epoch": 0.77, "learning_rate": 2.6348266747991236e-06, "loss": 0.7212, "step": 5211 }, { "epoch": 0.77, "learning_rate": 2.631587647946061e-06, "loss": 0.8237, "step": 5212 }, { "epoch": 0.77, "learning_rate": 2.6283503115276875e-06, "loss": 0.7061, "step": 5213 }, { "epoch": 0.77, "learning_rate": 2.625114666286709e-06, "loss": 0.8281, "step": 5214 }, { "epoch": 0.77, "learning_rate": 2.621880712965431e-06, "loss": 0.2712, "step": 5215 }, { "epoch": 0.77, "learning_rate": 2.618648452305773e-06, "loss": 0.8101, "step": 5216 }, { "epoch": 0.77, "learning_rate": 2.6154178850492752e-06, "loss": 0.7773, "step": 5217 }, { "epoch": 0.77, "learning_rate": 2.6121890119370753e-06, "loss": 0.8066, "step": 5218 }, { "epoch": 0.77, "learning_rate": 2.608961833709941e-06, "loss": 0.7383, "step": 5219 }, { "epoch": 0.77, "learning_rate": 2.6057363511082255e-06, "loss": 0.7227, "step": 5220 }, { "epoch": 0.77, "learning_rate": 2.6025125648719153e-06, "loss": 0.8301, "step": 5221 }, { "epoch": 0.77, "learning_rate": 2.5992904757406025e-06, "loss": 0.812, "step": 5222 }, { "epoch": 0.77, "learning_rate": 2.5960700844534827e-06, "loss": 0.7827, "step": 5223 }, { "epoch": 0.77, "learning_rate": 2.592851391749368e-06, "loss": 0.8062, "step": 5224 }, { "epoch": 0.77, "learning_rate": 2.589634398366676e-06, "loss": 0.7417, "step": 5225 }, { "epoch": 0.77, "learning_rate": 2.586419105043442e-06, "loss": 0.7417, "step": 5226 }, { "epoch": 0.77, "learning_rate": 2.5832055125173095e-06, "loss": 0.7827, "step": 5227 }, { "epoch": 0.77, "learning_rate": 2.5799936215255216e-06, "loss": 0.3033, "step": 5228 }, { "epoch": 0.77, "learning_rate": 2.5767834328049444e-06, "loss": 0.7671, "step": 5229 }, { "epoch": 0.77, "learning_rate": 2.5735749470920446e-06, "loss": 0.8218, "step": 5230 }, { "epoch": 0.77, "learning_rate": 2.570368165122902e-06, "loss": 0.8291, "step": 5231 }, { "epoch": 0.77, "learning_rate": 2.5671630876332132e-06, "loss": 0.7861, "step": 5232 }, { "epoch": 0.77, "learning_rate": 2.563959715358262e-06, "loss": 0.8169, "step": 5233 }, { "epoch": 0.77, "learning_rate": 2.560758049032963e-06, "loss": 0.8218, "step": 5234 }, { "epoch": 0.77, "learning_rate": 2.557558089391827e-06, "loss": 0.79, "step": 5235 }, { "epoch": 0.77, "learning_rate": 2.5543598371689826e-06, "loss": 0.7617, "step": 5236 }, { "epoch": 0.77, "learning_rate": 2.551163293098159e-06, "loss": 0.6978, "step": 5237 }, { "epoch": 0.77, "learning_rate": 2.5479684579126905e-06, "loss": 0.7612, "step": 5238 }, { "epoch": 0.77, "learning_rate": 2.5447753323455294e-06, "loss": 0.7866, "step": 5239 }, { "epoch": 0.77, "learning_rate": 2.541583917129237e-06, "loss": 0.7832, "step": 5240 }, { "epoch": 0.78, "learning_rate": 2.5383942129959637e-06, "loss": 0.7827, "step": 5241 }, { "epoch": 0.78, "learning_rate": 2.5352062206774896e-06, "loss": 0.8159, "step": 5242 }, { "epoch": 0.78, "learning_rate": 2.532019940905186e-06, "loss": 0.7749, "step": 5243 }, { "epoch": 0.78, "learning_rate": 2.528835374410045e-06, "loss": 0.8394, "step": 5244 }, { "epoch": 0.78, "learning_rate": 2.5256525219226533e-06, "loss": 0.7227, "step": 5245 }, { "epoch": 0.78, "learning_rate": 2.5224713841732084e-06, "loss": 0.7734, "step": 5246 }, { "epoch": 0.78, "learning_rate": 2.51929196189152e-06, "loss": 0.7539, "step": 5247 }, { "epoch": 0.78, "learning_rate": 2.516114255806995e-06, "loss": 0.7886, "step": 5248 }, { "epoch": 0.78, "learning_rate": 2.5129382666486555e-06, "loss": 0.8052, "step": 5249 }, { "epoch": 0.78, "learning_rate": 2.5097639951451247e-06, "loss": 0.7881, "step": 5250 }, { "epoch": 0.78, "learning_rate": 2.5065914420246295e-06, "loss": 0.8076, "step": 5251 }, { "epoch": 0.78, "learning_rate": 2.5034206080150093e-06, "loss": 0.8037, "step": 5252 }, { "epoch": 0.78, "learning_rate": 2.500251493843705e-06, "loss": 0.7676, "step": 5253 }, { "epoch": 0.78, "learning_rate": 2.497084100237759e-06, "loss": 0.7354, "step": 5254 }, { "epoch": 0.78, "learning_rate": 2.493918427923829e-06, "loss": 0.8027, "step": 5255 }, { "epoch": 0.78, "learning_rate": 2.490754477628168e-06, "loss": 0.8213, "step": 5256 }, { "epoch": 0.78, "learning_rate": 2.4875922500766414e-06, "loss": 0.8037, "step": 5257 }, { "epoch": 0.78, "learning_rate": 2.4844317459947167e-06, "loss": 0.8247, "step": 5258 }, { "epoch": 0.78, "learning_rate": 2.4812729661074587e-06, "loss": 0.7461, "step": 5259 }, { "epoch": 0.78, "learning_rate": 2.4781159111395515e-06, "loss": 0.7031, "step": 5260 }, { "epoch": 0.78, "learning_rate": 2.474960581815269e-06, "loss": 0.79, "step": 5261 }, { "epoch": 0.78, "learning_rate": 2.471806978858501e-06, "loss": 0.8115, "step": 5262 }, { "epoch": 0.78, "learning_rate": 2.468655102992733e-06, "loss": 0.7749, "step": 5263 }, { "epoch": 0.78, "learning_rate": 2.4655049549410535e-06, "loss": 0.8013, "step": 5264 }, { "epoch": 0.78, "learning_rate": 2.462356535426166e-06, "loss": 0.7495, "step": 5265 }, { "epoch": 0.78, "learning_rate": 2.459209845170365e-06, "loss": 0.7295, "step": 5266 }, { "epoch": 0.78, "learning_rate": 2.4560648848955495e-06, "loss": 0.8188, "step": 5267 }, { "epoch": 0.78, "learning_rate": 2.4529216553232326e-06, "loss": 0.811, "step": 5268 }, { "epoch": 0.78, "learning_rate": 2.449780157174515e-06, "loss": 0.3337, "step": 5269 }, { "epoch": 0.78, "learning_rate": 2.4466403911701152e-06, "loss": 0.2997, "step": 5270 }, { "epoch": 0.78, "learning_rate": 2.443502358030344e-06, "loss": 0.7476, "step": 5271 }, { "epoch": 0.78, "learning_rate": 2.4403660584751134e-06, "loss": 0.77, "step": 5272 }, { "epoch": 0.78, "learning_rate": 2.4372314932239495e-06, "loss": 0.7837, "step": 5273 }, { "epoch": 0.78, "learning_rate": 2.4340986629959694e-06, "loss": 0.7539, "step": 5274 }, { "epoch": 0.78, "learning_rate": 2.430967568509892e-06, "loss": 0.8335, "step": 5275 }, { "epoch": 0.78, "learning_rate": 2.427838210484049e-06, "loss": 0.7461, "step": 5276 }, { "epoch": 0.78, "learning_rate": 2.42471058963636e-06, "loss": 0.7881, "step": 5277 }, { "epoch": 0.78, "learning_rate": 2.421584706684359e-06, "loss": 0.8232, "step": 5278 }, { "epoch": 0.78, "learning_rate": 2.4184605623451707e-06, "loss": 0.7959, "step": 5279 }, { "epoch": 0.78, "learning_rate": 2.4153381573355227e-06, "loss": 0.791, "step": 5280 }, { "epoch": 0.78, "learning_rate": 2.4122174923717525e-06, "loss": 0.72, "step": 5281 }, { "epoch": 0.78, "learning_rate": 2.409098568169784e-06, "loss": 0.7661, "step": 5282 }, { "epoch": 0.78, "learning_rate": 2.4059813854451586e-06, "loss": 0.645, "step": 5283 }, { "epoch": 0.78, "learning_rate": 2.4028659449130033e-06, "loss": 0.8179, "step": 5284 }, { "epoch": 0.78, "learning_rate": 2.3997522472880496e-06, "loss": 0.7764, "step": 5285 }, { "epoch": 0.78, "learning_rate": 2.3966402932846365e-06, "loss": 0.7668, "step": 5286 }, { "epoch": 0.78, "learning_rate": 2.393530083616694e-06, "loss": 0.8491, "step": 5287 }, { "epoch": 0.78, "learning_rate": 2.3904216189977534e-06, "loss": 0.7695, "step": 5288 }, { "epoch": 0.78, "learning_rate": 2.3873149001409533e-06, "loss": 0.6934, "step": 5289 }, { "epoch": 0.78, "learning_rate": 2.384209927759019e-06, "loss": 0.8281, "step": 5290 }, { "epoch": 0.78, "learning_rate": 2.3811067025642907e-06, "loss": 0.769, "step": 5291 }, { "epoch": 0.78, "learning_rate": 2.378005225268689e-06, "loss": 0.7527, "step": 5292 }, { "epoch": 0.78, "learning_rate": 2.3749054965837492e-06, "loss": 0.8052, "step": 5293 }, { "epoch": 0.78, "learning_rate": 2.371807517220601e-06, "loss": 0.834, "step": 5294 }, { "epoch": 0.78, "learning_rate": 2.3687112878899676e-06, "loss": 0.8179, "step": 5295 }, { "epoch": 0.78, "learning_rate": 2.365616809302184e-06, "loss": 0.791, "step": 5296 }, { "epoch": 0.78, "learning_rate": 2.3625240821671613e-06, "loss": 0.7739, "step": 5297 }, { "epoch": 0.78, "learning_rate": 2.3594331071944277e-06, "loss": 0.748, "step": 5298 }, { "epoch": 0.78, "learning_rate": 2.3563438850931076e-06, "loss": 0.8105, "step": 5299 }, { "epoch": 0.78, "learning_rate": 2.353256416571916e-06, "loss": 0.7778, "step": 5300 }, { "epoch": 0.78, "learning_rate": 2.3501707023391683e-06, "loss": 0.8105, "step": 5301 }, { "epoch": 0.78, "learning_rate": 2.3470867431027754e-06, "loss": 0.8218, "step": 5302 }, { "epoch": 0.78, "learning_rate": 2.3440045395702514e-06, "loss": 0.8418, "step": 5303 }, { "epoch": 0.78, "learning_rate": 2.3409240924487086e-06, "loss": 0.3137, "step": 5304 }, { "epoch": 0.78, "learning_rate": 2.3378454024448427e-06, "loss": 0.7036, "step": 5305 }, { "epoch": 0.78, "learning_rate": 2.334768470264963e-06, "loss": 0.7334, "step": 5306 }, { "epoch": 0.78, "learning_rate": 2.331693296614963e-06, "loss": 0.7544, "step": 5307 }, { "epoch": 0.78, "learning_rate": 2.3286198822003414e-06, "loss": 0.8057, "step": 5308 }, { "epoch": 0.79, "learning_rate": 2.325548227726194e-06, "loss": 0.7686, "step": 5309 }, { "epoch": 0.79, "learning_rate": 2.322478333897199e-06, "loss": 0.8433, "step": 5310 }, { "epoch": 0.79, "learning_rate": 2.3194102014176447e-06, "loss": 0.3177, "step": 5311 }, { "epoch": 0.79, "learning_rate": 2.3163438309914145e-06, "loss": 0.811, "step": 5312 }, { "epoch": 0.79, "learning_rate": 2.3132792233219814e-06, "loss": 0.8066, "step": 5313 }, { "epoch": 0.79, "learning_rate": 2.3102163791124167e-06, "loss": 0.8569, "step": 5314 }, { "epoch": 0.79, "learning_rate": 2.3071552990653844e-06, "loss": 0.7578, "step": 5315 }, { "epoch": 0.79, "learning_rate": 2.3040959838831488e-06, "loss": 0.7959, "step": 5316 }, { "epoch": 0.79, "learning_rate": 2.301038434267573e-06, "loss": 0.7544, "step": 5317 }, { "epoch": 0.79, "learning_rate": 2.2979826509200974e-06, "loss": 0.7852, "step": 5318 }, { "epoch": 0.79, "learning_rate": 2.2949286345417777e-06, "loss": 0.7593, "step": 5319 }, { "epoch": 0.79, "learning_rate": 2.2918763858332503e-06, "loss": 0.7231, "step": 5320 }, { "epoch": 0.79, "learning_rate": 2.2888259054947548e-06, "loss": 0.7749, "step": 5321 }, { "epoch": 0.79, "learning_rate": 2.285777194226121e-06, "loss": 0.7417, "step": 5322 }, { "epoch": 0.79, "learning_rate": 2.2827302527267693e-06, "loss": 0.7822, "step": 5323 }, { "epoch": 0.79, "learning_rate": 2.2796850816957227e-06, "loss": 0.7739, "step": 5324 }, { "epoch": 0.79, "learning_rate": 2.2766416818315897e-06, "loss": 0.8413, "step": 5325 }, { "epoch": 0.79, "learning_rate": 2.2736000538325807e-06, "loss": 0.8018, "step": 5326 }, { "epoch": 0.79, "learning_rate": 2.2705601983964933e-06, "loss": 0.7129, "step": 5327 }, { "epoch": 0.79, "learning_rate": 2.2675221162207153e-06, "loss": 0.7744, "step": 5328 }, { "epoch": 0.79, "learning_rate": 2.2644858080022403e-06, "loss": 0.748, "step": 5329 }, { "epoch": 0.79, "learning_rate": 2.2614512744376436e-06, "loss": 0.7461, "step": 5330 }, { "epoch": 0.79, "learning_rate": 2.258418516223094e-06, "loss": 0.8281, "step": 5331 }, { "epoch": 0.79, "learning_rate": 2.2553875340543617e-06, "loss": 0.8276, "step": 5332 }, { "epoch": 0.79, "learning_rate": 2.252358328626799e-06, "loss": 0.7476, "step": 5333 }, { "epoch": 0.79, "learning_rate": 2.249330900635359e-06, "loss": 0.2892, "step": 5334 }, { "epoch": 0.79, "learning_rate": 2.246305250774583e-06, "loss": 0.7056, "step": 5335 }, { "epoch": 0.79, "learning_rate": 2.2432813797386e-06, "loss": 0.7764, "step": 5336 }, { "epoch": 0.79, "learning_rate": 2.2402592882211418e-06, "loss": 0.7983, "step": 5337 }, { "epoch": 0.79, "learning_rate": 2.2372389769155235e-06, "loss": 0.8442, "step": 5338 }, { "epoch": 0.79, "learning_rate": 2.23422044651465e-06, "loss": 0.7842, "step": 5339 }, { "epoch": 0.79, "learning_rate": 2.2312036977110283e-06, "loss": 0.7661, "step": 5340 }, { "epoch": 0.79, "learning_rate": 2.2281887311967454e-06, "loss": 0.7544, "step": 5341 }, { "epoch": 0.79, "learning_rate": 2.2251755476634883e-06, "loss": 0.8242, "step": 5342 }, { "epoch": 0.79, "learning_rate": 2.222164147802528e-06, "loss": 0.7856, "step": 5343 }, { "epoch": 0.79, "learning_rate": 2.2191545323047257e-06, "loss": 0.7627, "step": 5344 }, { "epoch": 0.79, "learning_rate": 2.216146701860544e-06, "loss": 0.7959, "step": 5345 }, { "epoch": 0.79, "learning_rate": 2.213140657160021e-06, "loss": 0.811, "step": 5346 }, { "epoch": 0.79, "learning_rate": 2.2101363988928006e-06, "loss": 0.8335, "step": 5347 }, { "epoch": 0.79, "learning_rate": 2.207133927748104e-06, "loss": 0.7583, "step": 5348 }, { "epoch": 0.79, "learning_rate": 2.2041332444147447e-06, "loss": 0.7324, "step": 5349 }, { "epoch": 0.79, "learning_rate": 2.2011343495811353e-06, "loss": 0.749, "step": 5350 }, { "epoch": 0.79, "learning_rate": 2.1981372439352687e-06, "loss": 0.832, "step": 5351 }, { "epoch": 0.79, "learning_rate": 2.1951419281647267e-06, "loss": 0.7856, "step": 5352 }, { "epoch": 0.79, "learning_rate": 2.1921484029566887e-06, "loss": 0.7979, "step": 5353 }, { "epoch": 0.79, "learning_rate": 2.189156668997915e-06, "loss": 0.7729, "step": 5354 }, { "epoch": 0.79, "learning_rate": 2.1861667269747623e-06, "loss": 0.8037, "step": 5355 }, { "epoch": 0.79, "learning_rate": 2.1831785775731705e-06, "loss": 0.7949, "step": 5356 }, { "epoch": 0.79, "learning_rate": 2.1801922214786663e-06, "loss": 0.749, "step": 5357 }, { "epoch": 0.79, "learning_rate": 2.1772076593763757e-06, "loss": 0.7788, "step": 5358 }, { "epoch": 0.79, "learning_rate": 2.174224891951e-06, "loss": 0.8296, "step": 5359 }, { "epoch": 0.79, "learning_rate": 2.1712439198868408e-06, "loss": 0.8228, "step": 5360 }, { "epoch": 0.79, "learning_rate": 2.1682647438677782e-06, "loss": 0.7993, "step": 5361 }, { "epoch": 0.79, "learning_rate": 2.165287364577282e-06, "loss": 0.7549, "step": 5362 }, { "epoch": 0.79, "learning_rate": 2.1623117826984187e-06, "loss": 0.8096, "step": 5363 }, { "epoch": 0.79, "learning_rate": 2.1593379989138306e-06, "loss": 0.7407, "step": 5364 }, { "epoch": 0.79, "learning_rate": 2.1563660139057506e-06, "loss": 0.7783, "step": 5365 }, { "epoch": 0.79, "learning_rate": 2.1533958283560064e-06, "loss": 0.7456, "step": 5366 }, { "epoch": 0.79, "learning_rate": 2.1504274429460024e-06, "loss": 0.7339, "step": 5367 }, { "epoch": 0.79, "learning_rate": 2.1474608583567426e-06, "loss": 0.7998, "step": 5368 }, { "epoch": 0.79, "learning_rate": 2.1444960752687994e-06, "loss": 0.7998, "step": 5369 }, { "epoch": 0.79, "learning_rate": 2.141533094362347e-06, "loss": 0.7773, "step": 5370 }, { "epoch": 0.79, "learning_rate": 2.138571916317146e-06, "loss": 0.7891, "step": 5371 }, { "epoch": 0.79, "learning_rate": 2.135612541812534e-06, "loss": 0.8076, "step": 5372 }, { "epoch": 0.79, "learning_rate": 2.1326549715274467e-06, "loss": 0.7964, "step": 5373 }, { "epoch": 0.79, "learning_rate": 2.1296992061403898e-06, "loss": 0.2772, "step": 5374 }, { "epoch": 0.79, "learning_rate": 2.126745246329469e-06, "loss": 0.7656, "step": 5375 }, { "epoch": 0.8, "learning_rate": 2.1237930927723736e-06, "loss": 0.7178, "step": 5376 }, { "epoch": 0.8, "learning_rate": 2.1208427461463753e-06, "loss": 0.2944, "step": 5377 }, { "epoch": 0.8, "learning_rate": 2.117894207128327e-06, "loss": 0.7744, "step": 5378 }, { "epoch": 0.8, "learning_rate": 2.1149474763946777e-06, "loss": 0.7881, "step": 5379 }, { "epoch": 0.8, "learning_rate": 2.1120025546214516e-06, "loss": 0.7671, "step": 5380 }, { "epoch": 0.8, "learning_rate": 2.1090594424842694e-06, "loss": 0.7373, "step": 5381 }, { "epoch": 0.8, "learning_rate": 2.1061181406583184e-06, "loss": 0.7583, "step": 5382 }, { "epoch": 0.8, "learning_rate": 2.103178649818387e-06, "loss": 0.8013, "step": 5383 }, { "epoch": 0.8, "learning_rate": 2.1002409706388462e-06, "loss": 0.7656, "step": 5384 }, { "epoch": 0.8, "learning_rate": 2.097305103793643e-06, "loss": 0.7451, "step": 5385 }, { "epoch": 0.8, "learning_rate": 2.0943710499563164e-06, "loss": 0.834, "step": 5386 }, { "epoch": 0.8, "learning_rate": 2.0914388097999803e-06, "loss": 0.8037, "step": 5387 }, { "epoch": 0.8, "learning_rate": 2.088508383997344e-06, "loss": 0.3103, "step": 5388 }, { "epoch": 0.8, "learning_rate": 2.085579773220697e-06, "loss": 0.7646, "step": 5389 }, { "epoch": 0.8, "learning_rate": 2.0826529781419092e-06, "loss": 0.7974, "step": 5390 }, { "epoch": 0.8, "learning_rate": 2.079727999432434e-06, "loss": 0.7861, "step": 5391 }, { "epoch": 0.8, "learning_rate": 2.0768048377633065e-06, "loss": 0.7798, "step": 5392 }, { "epoch": 0.8, "learning_rate": 2.073883493805152e-06, "loss": 0.7129, "step": 5393 }, { "epoch": 0.8, "learning_rate": 2.070963968228179e-06, "loss": 0.7593, "step": 5394 }, { "epoch": 0.8, "learning_rate": 2.0680462617021644e-06, "loss": 0.7261, "step": 5395 }, { "epoch": 0.8, "learning_rate": 2.065130374896486e-06, "loss": 0.7983, "step": 5396 }, { "epoch": 0.8, "learning_rate": 2.0622163084800904e-06, "loss": 0.8486, "step": 5397 }, { "epoch": 0.8, "learning_rate": 2.059304063121518e-06, "loss": 0.771, "step": 5398 }, { "epoch": 0.8, "learning_rate": 2.0563936394888827e-06, "loss": 0.8208, "step": 5399 }, { "epoch": 0.8, "learning_rate": 2.0534850382498807e-06, "loss": 0.772, "step": 5400 }, { "epoch": 0.8, "learning_rate": 2.050578260071798e-06, "loss": 0.814, "step": 5401 }, { "epoch": 0.8, "learning_rate": 2.047673305621496e-06, "loss": 0.3116, "step": 5402 }, { "epoch": 0.8, "learning_rate": 2.0447701755654138e-06, "loss": 0.8193, "step": 5403 }, { "epoch": 0.8, "learning_rate": 2.0418688705695846e-06, "loss": 0.8052, "step": 5404 }, { "epoch": 0.8, "learning_rate": 2.038969391299609e-06, "loss": 0.7812, "step": 5405 }, { "epoch": 0.8, "learning_rate": 2.0360717384206785e-06, "loss": 0.7871, "step": 5406 }, { "epoch": 0.8, "learning_rate": 2.033175912597566e-06, "loss": 0.7563, "step": 5407 }, { "epoch": 0.8, "learning_rate": 2.030281914494612e-06, "loss": 0.7905, "step": 5408 }, { "epoch": 0.8, "learning_rate": 2.027389744775755e-06, "loss": 0.7832, "step": 5409 }, { "epoch": 0.8, "learning_rate": 2.0244994041045016e-06, "loss": 0.7686, "step": 5410 }, { "epoch": 0.8, "learning_rate": 2.021610893143947e-06, "loss": 0.8218, "step": 5411 }, { "epoch": 0.8, "learning_rate": 2.018724212556762e-06, "loss": 0.8506, "step": 5412 }, { "epoch": 0.8, "learning_rate": 2.0158393630051944e-06, "loss": 0.77, "step": 5413 }, { "epoch": 0.8, "learning_rate": 2.0129563451510814e-06, "loss": 0.7788, "step": 5414 }, { "epoch": 0.8, "learning_rate": 2.0100751596558333e-06, "loss": 0.8203, "step": 5415 }, { "epoch": 0.8, "learning_rate": 2.0071958071804385e-06, "loss": 0.2914, "step": 5416 }, { "epoch": 0.8, "learning_rate": 2.004318288385472e-06, "loss": 0.8472, "step": 5417 }, { "epoch": 0.8, "learning_rate": 2.0014426039310786e-06, "loss": 0.8354, "step": 5418 }, { "epoch": 0.8, "learning_rate": 1.9985687544769936e-06, "loss": 0.8335, "step": 5419 }, { "epoch": 0.8, "learning_rate": 1.995696740682521e-06, "loss": 0.7471, "step": 5420 }, { "epoch": 0.8, "learning_rate": 1.992826563206548e-06, "loss": 0.75, "step": 5421 }, { "epoch": 0.8, "learning_rate": 1.989958222707543e-06, "loss": 0.8286, "step": 5422 }, { "epoch": 0.8, "learning_rate": 1.9870917198435467e-06, "loss": 0.7847, "step": 5423 }, { "epoch": 0.8, "learning_rate": 1.9842270552721864e-06, "loss": 0.7783, "step": 5424 }, { "epoch": 0.8, "learning_rate": 1.9813642296506606e-06, "loss": 0.7847, "step": 5425 }, { "epoch": 0.8, "learning_rate": 1.9785032436357467e-06, "loss": 0.7915, "step": 5426 }, { "epoch": 0.8, "learning_rate": 1.9756440978838056e-06, "loss": 0.873, "step": 5427 }, { "epoch": 0.8, "learning_rate": 1.9727867930507706e-06, "loss": 0.7578, "step": 5428 }, { "epoch": 0.8, "learning_rate": 1.969931329792152e-06, "loss": 0.8101, "step": 5429 }, { "epoch": 0.8, "learning_rate": 1.967077708763043e-06, "loss": 0.2897, "step": 5430 }, { "epoch": 0.8, "learning_rate": 1.9642259306181088e-06, "loss": 0.7529, "step": 5431 }, { "epoch": 0.8, "learning_rate": 1.9613759960115986e-06, "loss": 0.7695, "step": 5432 }, { "epoch": 0.8, "learning_rate": 1.9585279055973296e-06, "loss": 0.3184, "step": 5433 }, { "epoch": 0.8, "learning_rate": 1.9556816600286997e-06, "loss": 0.7334, "step": 5434 }, { "epoch": 0.8, "learning_rate": 1.9528372599586896e-06, "loss": 0.74, "step": 5435 }, { "epoch": 0.8, "learning_rate": 1.949994706039845e-06, "loss": 0.8037, "step": 5436 }, { "epoch": 0.8, "learning_rate": 1.9471539989243005e-06, "loss": 0.8149, "step": 5437 }, { "epoch": 0.8, "learning_rate": 1.944315139263758e-06, "loss": 0.7754, "step": 5438 }, { "epoch": 0.8, "learning_rate": 1.9414781277094963e-06, "loss": 0.752, "step": 5439 }, { "epoch": 0.8, "learning_rate": 1.938642964912376e-06, "loss": 0.7939, "step": 5440 }, { "epoch": 0.8, "learning_rate": 1.93580965152283e-06, "loss": 0.7964, "step": 5441 }, { "epoch": 0.8, "learning_rate": 1.932978188190863e-06, "loss": 0.7891, "step": 5442 }, { "epoch": 0.8, "learning_rate": 1.9301485755660633e-06, "loss": 0.752, "step": 5443 }, { "epoch": 0.81, "learning_rate": 1.9273208142975865e-06, "loss": 0.7598, "step": 5444 }, { "epoch": 0.81, "learning_rate": 1.9244949050341723e-06, "loss": 0.2965, "step": 5445 }, { "epoch": 0.81, "learning_rate": 1.9216708484241275e-06, "loss": 0.8491, "step": 5446 }, { "epoch": 0.81, "learning_rate": 1.9188486451153353e-06, "loss": 0.7271, "step": 5447 }, { "epoch": 0.81, "learning_rate": 1.9160282957552614e-06, "loss": 0.7578, "step": 5448 }, { "epoch": 0.81, "learning_rate": 1.913209800990935e-06, "loss": 0.814, "step": 5449 }, { "epoch": 0.81, "learning_rate": 1.9103931614689653e-06, "loss": 0.2769, "step": 5450 }, { "epoch": 0.81, "learning_rate": 1.9075783778355383e-06, "loss": 0.792, "step": 5451 }, { "epoch": 0.81, "learning_rate": 1.9047654507364087e-06, "loss": 0.8071, "step": 5452 }, { "epoch": 0.81, "learning_rate": 1.9019543808169117e-06, "loss": 0.8286, "step": 5453 }, { "epoch": 0.81, "learning_rate": 1.8991451687219509e-06, "loss": 0.7764, "step": 5454 }, { "epoch": 0.81, "learning_rate": 1.8963378150960032e-06, "loss": 0.8623, "step": 5455 }, { "epoch": 0.81, "learning_rate": 1.8935323205831257e-06, "loss": 0.7637, "step": 5456 }, { "epoch": 0.81, "learning_rate": 1.8907286858269413e-06, "loss": 0.8691, "step": 5457 }, { "epoch": 0.81, "learning_rate": 1.8879269114706556e-06, "loss": 0.8208, "step": 5458 }, { "epoch": 0.81, "learning_rate": 1.8851269981570343e-06, "loss": 0.7764, "step": 5459 }, { "epoch": 0.81, "learning_rate": 1.8823289465284244e-06, "loss": 0.8184, "step": 5460 }, { "epoch": 0.81, "learning_rate": 1.8795327572267519e-06, "loss": 0.7676, "step": 5461 }, { "epoch": 0.81, "learning_rate": 1.8767384308935033e-06, "loss": 0.7202, "step": 5462 }, { "epoch": 0.81, "learning_rate": 1.8739459681697425e-06, "loss": 0.7822, "step": 5463 }, { "epoch": 0.81, "learning_rate": 1.871155369696105e-06, "loss": 0.7642, "step": 5464 }, { "epoch": 0.81, "learning_rate": 1.8683666361128028e-06, "loss": 0.8599, "step": 5465 }, { "epoch": 0.81, "learning_rate": 1.865579768059621e-06, "loss": 0.2931, "step": 5466 }, { "epoch": 0.81, "learning_rate": 1.8627947661759027e-06, "loss": 0.7778, "step": 5467 }, { "epoch": 0.81, "learning_rate": 1.8600116311005823e-06, "loss": 0.8354, "step": 5468 }, { "epoch": 0.81, "learning_rate": 1.857230363472149e-06, "loss": 0.8364, "step": 5469 }, { "epoch": 0.81, "learning_rate": 1.8544509639286756e-06, "loss": 0.8257, "step": 5470 }, { "epoch": 0.81, "learning_rate": 1.8516734331078068e-06, "loss": 0.7739, "step": 5471 }, { "epoch": 0.81, "learning_rate": 1.8488977716467438e-06, "loss": 0.7886, "step": 5472 }, { "epoch": 0.81, "learning_rate": 1.8461239801822761e-06, "loss": 0.7627, "step": 5473 }, { "epoch": 0.81, "learning_rate": 1.8433520593507515e-06, "loss": 0.731, "step": 5474 }, { "epoch": 0.81, "learning_rate": 1.8405820097881e-06, "loss": 0.7949, "step": 5475 }, { "epoch": 0.81, "learning_rate": 1.8378138321298122e-06, "loss": 0.8213, "step": 5476 }, { "epoch": 0.81, "learning_rate": 1.8350475270109536e-06, "loss": 0.7666, "step": 5477 }, { "epoch": 0.81, "learning_rate": 1.8322830950661607e-06, "loss": 0.7671, "step": 5478 }, { "epoch": 0.81, "learning_rate": 1.8295205369296443e-06, "loss": 0.7891, "step": 5479 }, { "epoch": 0.81, "learning_rate": 1.8267598532351727e-06, "loss": 0.3225, "step": 5480 }, { "epoch": 0.81, "learning_rate": 1.8240010446160973e-06, "loss": 0.8071, "step": 5481 }, { "epoch": 0.81, "learning_rate": 1.8212441117053293e-06, "loss": 0.7747, "step": 5482 }, { "epoch": 0.81, "learning_rate": 1.8184890551353574e-06, "loss": 0.7954, "step": 5483 }, { "epoch": 0.81, "learning_rate": 1.8157358755382427e-06, "loss": 0.7402, "step": 5484 }, { "epoch": 0.81, "learning_rate": 1.8129845735455986e-06, "loss": 0.7861, "step": 5485 }, { "epoch": 0.81, "learning_rate": 1.8102351497886262e-06, "loss": 0.7637, "step": 5486 }, { "epoch": 0.81, "learning_rate": 1.8074876048980838e-06, "loss": 0.7195, "step": 5487 }, { "epoch": 0.81, "learning_rate": 1.8047419395043086e-06, "loss": 0.8354, "step": 5488 }, { "epoch": 0.81, "learning_rate": 1.801998154237199e-06, "loss": 0.7866, "step": 5489 }, { "epoch": 0.81, "learning_rate": 1.7992562497262211e-06, "loss": 0.835, "step": 5490 }, { "epoch": 0.81, "learning_rate": 1.7965162266004177e-06, "loss": 0.6855, "step": 5491 }, { "epoch": 0.81, "learning_rate": 1.7937780854883936e-06, "loss": 0.8066, "step": 5492 }, { "epoch": 0.81, "learning_rate": 1.7910418270183195e-06, "loss": 0.8335, "step": 5493 }, { "epoch": 0.81, "learning_rate": 1.7883074518179445e-06, "loss": 0.8145, "step": 5494 }, { "epoch": 0.81, "learning_rate": 1.7855749605145722e-06, "loss": 0.8291, "step": 5495 }, { "epoch": 0.81, "learning_rate": 1.7828443537350871e-06, "loss": 0.8022, "step": 5496 }, { "epoch": 0.81, "learning_rate": 1.7801156321059332e-06, "loss": 0.7556, "step": 5497 }, { "epoch": 0.81, "learning_rate": 1.7773887962531211e-06, "loss": 0.7251, "step": 5498 }, { "epoch": 0.81, "learning_rate": 1.774663846802236e-06, "loss": 0.7866, "step": 5499 }, { "epoch": 0.81, "learning_rate": 1.771940784378422e-06, "loss": 0.7554, "step": 5500 }, { "epoch": 0.81, "learning_rate": 1.7692196096063985e-06, "loss": 0.7939, "step": 5501 }, { "epoch": 0.81, "learning_rate": 1.766500323110445e-06, "loss": 0.7886, "step": 5502 }, { "epoch": 0.81, "learning_rate": 1.763782925514409e-06, "loss": 0.3323, "step": 5503 }, { "epoch": 0.81, "learning_rate": 1.7610674174417108e-06, "loss": 0.7373, "step": 5504 }, { "epoch": 0.81, "learning_rate": 1.758353799515329e-06, "loss": 0.8066, "step": 5505 }, { "epoch": 0.81, "learning_rate": 1.7556420723578106e-06, "loss": 0.7627, "step": 5506 }, { "epoch": 0.81, "learning_rate": 1.752932236591275e-06, "loss": 0.7305, "step": 5507 }, { "epoch": 0.81, "learning_rate": 1.750224292837398e-06, "loss": 0.8496, "step": 5508 }, { "epoch": 0.81, "learning_rate": 1.7475182417174318e-06, "loss": 0.7549, "step": 5509 }, { "epoch": 0.81, "learning_rate": 1.7448140838521854e-06, "loss": 0.7715, "step": 5510 }, { "epoch": 0.81, "learning_rate": 1.7421118198620345e-06, "loss": 0.8311, "step": 5511 }, { "epoch": 0.82, "learning_rate": 1.739411450366929e-06, "loss": 0.835, "step": 5512 }, { "epoch": 0.82, "learning_rate": 1.7367129759863754e-06, "loss": 0.7773, "step": 5513 }, { "epoch": 0.82, "learning_rate": 1.7340163973394441e-06, "loss": 0.7231, "step": 5514 }, { "epoch": 0.82, "learning_rate": 1.7313217150447802e-06, "loss": 0.7866, "step": 5515 }, { "epoch": 0.82, "learning_rate": 1.7286289297205826e-06, "loss": 0.77, "step": 5516 }, { "epoch": 0.82, "learning_rate": 1.7259380419846272e-06, "loss": 0.8276, "step": 5517 }, { "epoch": 0.82, "learning_rate": 1.7232490524542434e-06, "loss": 0.7126, "step": 5518 }, { "epoch": 0.82, "learning_rate": 1.7205619617463276e-06, "loss": 0.8501, "step": 5519 }, { "epoch": 0.82, "learning_rate": 1.717876770477347e-06, "loss": 0.772, "step": 5520 }, { "epoch": 0.82, "learning_rate": 1.715193479263325e-06, "loss": 0.8374, "step": 5521 }, { "epoch": 0.82, "learning_rate": 1.7125120887198566e-06, "loss": 0.7822, "step": 5522 }, { "epoch": 0.82, "learning_rate": 1.7098325994620934e-06, "loss": 0.8091, "step": 5523 }, { "epoch": 0.82, "learning_rate": 1.7071550121047543e-06, "loss": 0.7297, "step": 5524 }, { "epoch": 0.82, "learning_rate": 1.7044793272621241e-06, "loss": 0.7759, "step": 5525 }, { "epoch": 0.82, "learning_rate": 1.7018055455480475e-06, "loss": 0.79, "step": 5526 }, { "epoch": 0.82, "learning_rate": 1.6991336675759318e-06, "loss": 0.7886, "step": 5527 }, { "epoch": 0.82, "learning_rate": 1.6964636939587541e-06, "loss": 0.8394, "step": 5528 }, { "epoch": 0.82, "learning_rate": 1.693795625309045e-06, "loss": 0.731, "step": 5529 }, { "epoch": 0.82, "learning_rate": 1.6911294622389075e-06, "loss": 0.7354, "step": 5530 }, { "epoch": 0.82, "learning_rate": 1.6884652053600027e-06, "loss": 0.7266, "step": 5531 }, { "epoch": 0.82, "learning_rate": 1.6858028552835503e-06, "loss": 0.7314, "step": 5532 }, { "epoch": 0.82, "learning_rate": 1.6831424126203444e-06, "loss": 0.8154, "step": 5533 }, { "epoch": 0.82, "learning_rate": 1.6804838779807264e-06, "loss": 0.7798, "step": 5534 }, { "epoch": 0.82, "learning_rate": 1.6778272519746174e-06, "loss": 0.8193, "step": 5535 }, { "epoch": 0.82, "learning_rate": 1.67517253521148e-06, "loss": 0.7671, "step": 5536 }, { "epoch": 0.82, "learning_rate": 1.6725197283003548e-06, "loss": 0.7051, "step": 5537 }, { "epoch": 0.82, "learning_rate": 1.6698688318498423e-06, "loss": 0.7578, "step": 5538 }, { "epoch": 0.82, "learning_rate": 1.6672198464680989e-06, "loss": 0.8105, "step": 5539 }, { "epoch": 0.82, "learning_rate": 1.6645727727628447e-06, "loss": 0.2991, "step": 5540 }, { "epoch": 0.82, "learning_rate": 1.6619276113413607e-06, "loss": 0.7324, "step": 5541 }, { "epoch": 0.82, "learning_rate": 1.6592843628104915e-06, "loss": 0.7573, "step": 5542 }, { "epoch": 0.82, "learning_rate": 1.6566430277766478e-06, "loss": 0.7935, "step": 5543 }, { "epoch": 0.82, "learning_rate": 1.6540036068457833e-06, "loss": 0.7944, "step": 5544 }, { "epoch": 0.82, "learning_rate": 1.6513661006234315e-06, "loss": 0.8633, "step": 5545 }, { "epoch": 0.82, "learning_rate": 1.648730509714681e-06, "loss": 0.7925, "step": 5546 }, { "epoch": 0.82, "learning_rate": 1.6460968347241756e-06, "loss": 0.7358, "step": 5547 }, { "epoch": 0.82, "learning_rate": 1.64346507625613e-06, "loss": 0.7842, "step": 5548 }, { "epoch": 0.82, "learning_rate": 1.6408352349143032e-06, "loss": 0.731, "step": 5549 }, { "epoch": 0.82, "learning_rate": 1.638207311302029e-06, "loss": 0.7881, "step": 5550 }, { "epoch": 0.82, "learning_rate": 1.6355813060221993e-06, "loss": 0.7544, "step": 5551 }, { "epoch": 0.82, "learning_rate": 1.6329572196772581e-06, "loss": 0.813, "step": 5552 }, { "epoch": 0.82, "learning_rate": 1.6303350528692163e-06, "loss": 0.7783, "step": 5553 }, { "epoch": 0.82, "learning_rate": 1.6277148061996385e-06, "loss": 0.8193, "step": 5554 }, { "epoch": 0.82, "learning_rate": 1.6250964802696545e-06, "loss": 0.751, "step": 5555 }, { "epoch": 0.82, "learning_rate": 1.6224800756799576e-06, "loss": 0.7842, "step": 5556 }, { "epoch": 0.82, "learning_rate": 1.6198655930307816e-06, "loss": 0.8345, "step": 5557 }, { "epoch": 0.82, "learning_rate": 1.6172530329219416e-06, "loss": 0.7988, "step": 5558 }, { "epoch": 0.82, "learning_rate": 1.6146423959527947e-06, "loss": 0.7832, "step": 5559 }, { "epoch": 0.82, "learning_rate": 1.6120336827222705e-06, "loss": 0.7607, "step": 5560 }, { "epoch": 0.82, "learning_rate": 1.6094268938288472e-06, "loss": 0.8115, "step": 5561 }, { "epoch": 0.82, "learning_rate": 1.606822029870564e-06, "loss": 0.8403, "step": 5562 }, { "epoch": 0.82, "learning_rate": 1.6042190914450217e-06, "loss": 0.8169, "step": 5563 }, { "epoch": 0.82, "learning_rate": 1.6016180791493741e-06, "loss": 0.7715, "step": 5564 }, { "epoch": 0.82, "learning_rate": 1.5990189935803402e-06, "loss": 0.7661, "step": 5565 }, { "epoch": 0.82, "learning_rate": 1.5964218353341898e-06, "loss": 0.7642, "step": 5566 }, { "epoch": 0.82, "learning_rate": 1.593826605006753e-06, "loss": 0.9038, "step": 5567 }, { "epoch": 0.82, "learning_rate": 1.5912333031934224e-06, "loss": 0.7373, "step": 5568 }, { "epoch": 0.82, "learning_rate": 1.588641930489141e-06, "loss": 0.812, "step": 5569 }, { "epoch": 0.82, "learning_rate": 1.586052487488411e-06, "loss": 0.8438, "step": 5570 }, { "epoch": 0.82, "learning_rate": 1.5834649747852958e-06, "loss": 0.7412, "step": 5571 }, { "epoch": 0.82, "learning_rate": 1.58087939297341e-06, "loss": 0.7969, "step": 5572 }, { "epoch": 0.82, "learning_rate": 1.5782957426459334e-06, "loss": 0.772, "step": 5573 }, { "epoch": 0.82, "learning_rate": 1.575714024395595e-06, "loss": 0.7412, "step": 5574 }, { "epoch": 0.82, "learning_rate": 1.5731342388146809e-06, "loss": 0.79, "step": 5575 }, { "epoch": 0.82, "learning_rate": 1.5705563864950412e-06, "loss": 0.7881, "step": 5576 }, { "epoch": 0.82, "learning_rate": 1.5679804680280719e-06, "loss": 0.8198, "step": 5577 }, { "epoch": 0.82, "learning_rate": 1.5654064840047367e-06, "loss": 0.7461, "step": 5578 }, { "epoch": 0.83, "learning_rate": 1.5628344350155477e-06, "loss": 0.7715, "step": 5579 }, { "epoch": 0.83, "learning_rate": 1.5602643216505719e-06, "loss": 0.7563, "step": 5580 }, { "epoch": 0.83, "learning_rate": 1.5576961444994398e-06, "loss": 0.7612, "step": 5581 }, { "epoch": 0.83, "learning_rate": 1.5551299041513324e-06, "loss": 0.8257, "step": 5582 }, { "epoch": 0.83, "learning_rate": 1.552565601194984e-06, "loss": 0.792, "step": 5583 }, { "epoch": 0.83, "learning_rate": 1.5500032362186923e-06, "loss": 0.7207, "step": 5584 }, { "epoch": 0.83, "learning_rate": 1.5474428098103022e-06, "loss": 0.771, "step": 5585 }, { "epoch": 0.83, "learning_rate": 1.5448843225572218e-06, "loss": 0.2783, "step": 5586 }, { "epoch": 0.83, "learning_rate": 1.5423277750464071e-06, "loss": 0.7393, "step": 5587 }, { "epoch": 0.83, "learning_rate": 1.5397731678643713e-06, "loss": 0.8271, "step": 5588 }, { "epoch": 0.83, "learning_rate": 1.5372205015971853e-06, "loss": 0.7637, "step": 5589 }, { "epoch": 0.83, "learning_rate": 1.534669776830473e-06, "loss": 0.7202, "step": 5590 }, { "epoch": 0.83, "learning_rate": 1.5321209941494075e-06, "loss": 0.728, "step": 5591 }, { "epoch": 0.83, "learning_rate": 1.5295741541387287e-06, "loss": 0.3461, "step": 5592 }, { "epoch": 0.83, "learning_rate": 1.5270292573827173e-06, "loss": 0.7505, "step": 5593 }, { "epoch": 0.83, "learning_rate": 1.524486304465218e-06, "loss": 0.8208, "step": 5594 }, { "epoch": 0.83, "learning_rate": 1.5219452959696269e-06, "loss": 0.7656, "step": 5595 }, { "epoch": 0.83, "learning_rate": 1.5194062324788872e-06, "loss": 0.7856, "step": 5596 }, { "epoch": 0.83, "learning_rate": 1.5168691145755087e-06, "loss": 0.7949, "step": 5597 }, { "epoch": 0.83, "learning_rate": 1.5143339428415426e-06, "loss": 0.7969, "step": 5598 }, { "epoch": 0.83, "learning_rate": 1.5118007178586024e-06, "loss": 0.7837, "step": 5599 }, { "epoch": 0.83, "learning_rate": 1.509269440207851e-06, "loss": 0.7715, "step": 5600 }, { "epoch": 0.83, "learning_rate": 1.506740110470002e-06, "loss": 0.8726, "step": 5601 }, { "epoch": 0.83, "learning_rate": 1.5042127292253284e-06, "loss": 0.7983, "step": 5602 }, { "epoch": 0.83, "learning_rate": 1.5016872970536523e-06, "loss": 0.835, "step": 5603 }, { "epoch": 0.83, "learning_rate": 1.4991638145343467e-06, "loss": 0.8218, "step": 5604 }, { "epoch": 0.83, "learning_rate": 1.4966422822463443e-06, "loss": 0.748, "step": 5605 }, { "epoch": 0.83, "learning_rate": 1.4941227007681214e-06, "loss": 0.7817, "step": 5606 }, { "epoch": 0.83, "learning_rate": 1.4916050706777185e-06, "loss": 0.7485, "step": 5607 }, { "epoch": 0.83, "learning_rate": 1.4890893925527118e-06, "loss": 0.7837, "step": 5608 }, { "epoch": 0.83, "learning_rate": 1.4865756669702425e-06, "loss": 0.3191, "step": 5609 }, { "epoch": 0.83, "learning_rate": 1.4840638945070051e-06, "loss": 0.8633, "step": 5610 }, { "epoch": 0.83, "learning_rate": 1.4815540757392354e-06, "loss": 0.7073, "step": 5611 }, { "epoch": 0.83, "learning_rate": 1.4790462112427317e-06, "loss": 0.8418, "step": 5612 }, { "epoch": 0.83, "learning_rate": 1.476540301592837e-06, "loss": 0.7847, "step": 5613 }, { "epoch": 0.83, "learning_rate": 1.474036347364446e-06, "loss": 0.7708, "step": 5614 }, { "epoch": 0.83, "learning_rate": 1.4715343491320122e-06, "loss": 0.293, "step": 5615 }, { "epoch": 0.83, "learning_rate": 1.4690343074695312e-06, "loss": 0.7832, "step": 5616 }, { "epoch": 0.83, "learning_rate": 1.466536222950552e-06, "loss": 0.7935, "step": 5617 }, { "epoch": 0.83, "learning_rate": 1.4640400961481815e-06, "loss": 0.7983, "step": 5618 }, { "epoch": 0.83, "learning_rate": 1.4615459276350663e-06, "loss": 0.7676, "step": 5619 }, { "epoch": 0.83, "learning_rate": 1.4590537179834174e-06, "loss": 0.7134, "step": 5620 }, { "epoch": 0.83, "learning_rate": 1.4565634677649786e-06, "loss": 0.7539, "step": 5621 }, { "epoch": 0.83, "learning_rate": 1.4540751775510598e-06, "loss": 0.7915, "step": 5622 }, { "epoch": 0.83, "learning_rate": 1.451588847912516e-06, "loss": 0.7993, "step": 5623 }, { "epoch": 0.83, "learning_rate": 1.4491044794197528e-06, "loss": 0.7412, "step": 5624 }, { "epoch": 0.83, "learning_rate": 1.4466220726427227e-06, "loss": 0.7661, "step": 5625 }, { "epoch": 0.83, "learning_rate": 1.444141628150928e-06, "loss": 0.8511, "step": 5626 }, { "epoch": 0.83, "learning_rate": 1.4416631465134278e-06, "loss": 0.7388, "step": 5627 }, { "epoch": 0.83, "learning_rate": 1.4391866282988266e-06, "loss": 0.7715, "step": 5628 }, { "epoch": 0.83, "learning_rate": 1.4367120740752772e-06, "loss": 0.8032, "step": 5629 }, { "epoch": 0.83, "learning_rate": 1.4342394844104824e-06, "loss": 0.8262, "step": 5630 }, { "epoch": 0.83, "learning_rate": 1.4317688598716928e-06, "loss": 0.7729, "step": 5631 }, { "epoch": 0.83, "learning_rate": 1.429300201025713e-06, "loss": 0.2886, "step": 5632 }, { "epoch": 0.83, "learning_rate": 1.4268335084388974e-06, "loss": 0.8257, "step": 5633 }, { "epoch": 0.83, "learning_rate": 1.424368782677138e-06, "loss": 0.7505, "step": 5634 }, { "epoch": 0.83, "learning_rate": 1.4219060243058879e-06, "loss": 0.7231, "step": 5635 }, { "epoch": 0.83, "learning_rate": 1.419445233890142e-06, "loss": 0.8228, "step": 5636 }, { "epoch": 0.83, "learning_rate": 1.4169864119944498e-06, "loss": 0.813, "step": 5637 }, { "epoch": 0.83, "learning_rate": 1.4145295591829023e-06, "loss": 0.7581, "step": 5638 }, { "epoch": 0.83, "learning_rate": 1.4120746760191407e-06, "loss": 0.7222, "step": 5639 }, { "epoch": 0.83, "learning_rate": 1.409621763066359e-06, "loss": 0.7217, "step": 5640 }, { "epoch": 0.83, "learning_rate": 1.4071708208872925e-06, "loss": 0.7637, "step": 5641 }, { "epoch": 0.83, "learning_rate": 1.4047218500442305e-06, "loss": 0.8003, "step": 5642 }, { "epoch": 0.83, "learning_rate": 1.402274851099006e-06, "loss": 0.7646, "step": 5643 }, { "epoch": 0.83, "learning_rate": 1.3998298246129983e-06, "loss": 0.7227, "step": 5644 }, { "epoch": 0.83, "learning_rate": 1.3973867711471378e-06, "loss": 0.2899, "step": 5645 }, { "epoch": 0.83, "learning_rate": 1.3949456912619075e-06, "loss": 0.7505, "step": 5646 }, { "epoch": 0.84, "learning_rate": 1.3925065855173204e-06, "loss": 0.7539, "step": 5647 }, { "epoch": 0.84, "learning_rate": 1.3900694544729554e-06, "loss": 0.7778, "step": 5648 }, { "epoch": 0.84, "learning_rate": 1.3876342986879243e-06, "loss": 0.7241, "step": 5649 }, { "epoch": 0.84, "learning_rate": 1.385201118720898e-06, "loss": 0.8696, "step": 5650 }, { "epoch": 0.84, "learning_rate": 1.3827699151300845e-06, "loss": 0.7954, "step": 5651 }, { "epoch": 0.84, "learning_rate": 1.3803406884732396e-06, "loss": 0.7217, "step": 5652 }, { "epoch": 0.84, "learning_rate": 1.3779134393076732e-06, "loss": 0.7156, "step": 5653 }, { "epoch": 0.84, "learning_rate": 1.375488168190232e-06, "loss": 0.75, "step": 5654 }, { "epoch": 0.84, "learning_rate": 1.3730648756773136e-06, "loss": 0.7632, "step": 5655 }, { "epoch": 0.84, "learning_rate": 1.3706435623248627e-06, "loss": 0.7954, "step": 5656 }, { "epoch": 0.84, "learning_rate": 1.3682242286883662e-06, "loss": 0.7886, "step": 5657 }, { "epoch": 0.84, "learning_rate": 1.3658068753228614e-06, "loss": 0.7773, "step": 5658 }, { "epoch": 0.84, "learning_rate": 1.363391502782927e-06, "loss": 0.3052, "step": 5659 }, { "epoch": 0.84, "learning_rate": 1.3609781116226883e-06, "loss": 0.7891, "step": 5660 }, { "epoch": 0.84, "learning_rate": 1.3585667023958193e-06, "loss": 0.752, "step": 5661 }, { "epoch": 0.84, "learning_rate": 1.3561572756555342e-06, "loss": 0.7944, "step": 5662 }, { "epoch": 0.84, "learning_rate": 1.3537498319545984e-06, "loss": 0.7271, "step": 5663 }, { "epoch": 0.84, "learning_rate": 1.3513443718453166e-06, "loss": 0.7319, "step": 5664 }, { "epoch": 0.84, "learning_rate": 1.3489408958795392e-06, "loss": 0.8252, "step": 5665 }, { "epoch": 0.84, "learning_rate": 1.3465394046086677e-06, "loss": 0.7651, "step": 5666 }, { "epoch": 0.84, "learning_rate": 1.3441398985836407e-06, "loss": 0.8662, "step": 5667 }, { "epoch": 0.84, "learning_rate": 1.3417423783549422e-06, "loss": 0.7349, "step": 5668 }, { "epoch": 0.84, "learning_rate": 1.3393468444726066e-06, "loss": 0.7051, "step": 5669 }, { "epoch": 0.84, "learning_rate": 1.3369532974862053e-06, "loss": 0.8188, "step": 5670 }, { "epoch": 0.84, "learning_rate": 1.3345617379448616e-06, "loss": 0.8003, "step": 5671 }, { "epoch": 0.84, "learning_rate": 1.3321721663972353e-06, "loss": 0.8008, "step": 5672 }, { "epoch": 0.84, "learning_rate": 1.3297845833915323e-06, "loss": 0.7192, "step": 5673 }, { "epoch": 0.84, "learning_rate": 1.3273989894755067e-06, "loss": 0.7393, "step": 5674 }, { "epoch": 0.84, "learning_rate": 1.3250153851964498e-06, "loss": 0.7886, "step": 5675 }, { "epoch": 0.84, "learning_rate": 1.3226337711012016e-06, "loss": 0.7783, "step": 5676 }, { "epoch": 0.84, "learning_rate": 1.3202541477361441e-06, "loss": 0.7542, "step": 5677 }, { "epoch": 0.84, "learning_rate": 1.3178765156471973e-06, "loss": 0.8096, "step": 5678 }, { "epoch": 0.84, "learning_rate": 1.3155008753798348e-06, "loss": 0.8267, "step": 5679 }, { "epoch": 0.84, "learning_rate": 1.3131272274790653e-06, "loss": 0.8086, "step": 5680 }, { "epoch": 0.84, "learning_rate": 1.3107555724894394e-06, "loss": 0.7769, "step": 5681 }, { "epoch": 0.84, "learning_rate": 1.3083859109550601e-06, "loss": 0.8179, "step": 5682 }, { "epoch": 0.84, "learning_rate": 1.306018243419559e-06, "loss": 0.7939, "step": 5683 }, { "epoch": 0.84, "learning_rate": 1.303652570426125e-06, "loss": 0.7988, "step": 5684 }, { "epoch": 0.84, "learning_rate": 1.3012888925174794e-06, "loss": 0.7603, "step": 5685 }, { "epoch": 0.84, "learning_rate": 1.2989272102358851e-06, "loss": 0.7754, "step": 5686 }, { "epoch": 0.84, "learning_rate": 1.2965675241231568e-06, "loss": 0.7842, "step": 5687 }, { "epoch": 0.84, "learning_rate": 1.2942098347206412e-06, "loss": 0.7656, "step": 5688 }, { "epoch": 0.84, "learning_rate": 1.2918541425692343e-06, "loss": 0.7202, "step": 5689 }, { "epoch": 0.84, "learning_rate": 1.2895004482093675e-06, "loss": 0.8267, "step": 5690 }, { "epoch": 0.84, "learning_rate": 1.2871487521810166e-06, "loss": 0.8013, "step": 5691 }, { "epoch": 0.84, "learning_rate": 1.2847990550237021e-06, "loss": 0.8071, "step": 5692 }, { "epoch": 0.84, "learning_rate": 1.282451357276483e-06, "loss": 0.7769, "step": 5693 }, { "epoch": 0.84, "learning_rate": 1.2801056594779548e-06, "loss": 0.7568, "step": 5694 }, { "epoch": 0.84, "learning_rate": 1.277761962166265e-06, "loss": 0.7617, "step": 5695 }, { "epoch": 0.84, "learning_rate": 1.2754202658790915e-06, "loss": 0.7925, "step": 5696 }, { "epoch": 0.84, "learning_rate": 1.2730805711536641e-06, "loss": 0.8242, "step": 5697 }, { "epoch": 0.84, "learning_rate": 1.2707428785267396e-06, "loss": 0.73, "step": 5698 }, { "epoch": 0.84, "learning_rate": 1.268407188534626e-06, "loss": 0.8027, "step": 5699 }, { "epoch": 0.84, "learning_rate": 1.266073501713172e-06, "loss": 0.7056, "step": 5700 }, { "epoch": 0.84, "learning_rate": 1.2637418185977602e-06, "loss": 0.8203, "step": 5701 }, { "epoch": 0.84, "learning_rate": 1.2614121397233191e-06, "loss": 0.7686, "step": 5702 }, { "epoch": 0.84, "learning_rate": 1.2590844656243107e-06, "loss": 0.8271, "step": 5703 }, { "epoch": 0.84, "learning_rate": 1.2567587968347461e-06, "loss": 0.3098, "step": 5704 }, { "epoch": 0.84, "learning_rate": 1.2544351338881721e-06, "loss": 0.7764, "step": 5705 }, { "epoch": 0.84, "learning_rate": 1.2521134773176745e-06, "loss": 0.7769, "step": 5706 }, { "epoch": 0.84, "learning_rate": 1.2497938276558786e-06, "loss": 0.7822, "step": 5707 }, { "epoch": 0.84, "learning_rate": 1.2474761854349483e-06, "loss": 0.814, "step": 5708 }, { "epoch": 0.84, "learning_rate": 1.2451605511865894e-06, "loss": 0.7588, "step": 5709 }, { "epoch": 0.84, "learning_rate": 1.2428469254420528e-06, "loss": 0.8267, "step": 5710 }, { "epoch": 0.84, "learning_rate": 1.2405353087321126e-06, "loss": 0.7461, "step": 5711 }, { "epoch": 0.84, "learning_rate": 1.2382257015870957e-06, "loss": 0.7803, "step": 5712 }, { "epoch": 0.84, "learning_rate": 1.2359181045368674e-06, "loss": 0.772, "step": 5713 }, { "epoch": 0.85, "learning_rate": 1.233612518110824e-06, "loss": 0.7715, "step": 5714 }, { "epoch": 0.85, "learning_rate": 1.2313089428379066e-06, "loss": 0.7388, "step": 5715 }, { "epoch": 0.85, "learning_rate": 1.2290073792465895e-06, "loss": 0.791, "step": 5716 }, { "epoch": 0.85, "learning_rate": 1.2267078278648937e-06, "loss": 0.8086, "step": 5717 }, { "epoch": 0.85, "learning_rate": 1.2244102892203758e-06, "loss": 0.8208, "step": 5718 }, { "epoch": 0.85, "learning_rate": 1.222114763840121e-06, "loss": 0.7734, "step": 5719 }, { "epoch": 0.85, "learning_rate": 1.2198212522507679e-06, "loss": 0.7583, "step": 5720 }, { "epoch": 0.85, "learning_rate": 1.2175297549784803e-06, "loss": 0.8667, "step": 5721 }, { "epoch": 0.85, "learning_rate": 1.2152402725489686e-06, "loss": 0.7705, "step": 5722 }, { "epoch": 0.85, "learning_rate": 1.2129528054874807e-06, "loss": 0.7886, "step": 5723 }, { "epoch": 0.85, "learning_rate": 1.2106673543187909e-06, "loss": 0.7734, "step": 5724 }, { "epoch": 0.85, "learning_rate": 1.2083839195672253e-06, "loss": 0.7988, "step": 5725 }, { "epoch": 0.85, "learning_rate": 1.2061025017566374e-06, "loss": 0.8042, "step": 5726 }, { "epoch": 0.85, "learning_rate": 1.2038231014104273e-06, "loss": 0.7222, "step": 5727 }, { "epoch": 0.85, "learning_rate": 1.2015457190515223e-06, "loss": 0.2944, "step": 5728 }, { "epoch": 0.85, "learning_rate": 1.199270355202391e-06, "loss": 0.7024, "step": 5729 }, { "epoch": 0.85, "learning_rate": 1.1969970103850426e-06, "loss": 0.7695, "step": 5730 }, { "epoch": 0.85, "learning_rate": 1.1947256851210176e-06, "loss": 0.8535, "step": 5731 }, { "epoch": 0.85, "learning_rate": 1.1924563799313937e-06, "loss": 0.7988, "step": 5732 }, { "epoch": 0.85, "learning_rate": 1.190189095336791e-06, "loss": 0.7998, "step": 5733 }, { "epoch": 0.85, "learning_rate": 1.1879238318573573e-06, "loss": 0.8311, "step": 5734 }, { "epoch": 0.85, "learning_rate": 1.1856605900127848e-06, "loss": 0.7168, "step": 5735 }, { "epoch": 0.85, "learning_rate": 1.183399370322297e-06, "loss": 0.7241, "step": 5736 }, { "epoch": 0.85, "learning_rate": 1.1811401733046523e-06, "loss": 0.7915, "step": 5737 }, { "epoch": 0.85, "learning_rate": 1.1788829994781525e-06, "loss": 0.8457, "step": 5738 }, { "epoch": 0.85, "learning_rate": 1.1766278493606253e-06, "loss": 0.8369, "step": 5739 }, { "epoch": 0.85, "learning_rate": 1.1743747234694437e-06, "loss": 0.7769, "step": 5740 }, { "epoch": 0.85, "learning_rate": 1.1721236223215092e-06, "loss": 0.3274, "step": 5741 }, { "epoch": 0.85, "learning_rate": 1.1698745464332595e-06, "loss": 0.7261, "step": 5742 }, { "epoch": 0.85, "learning_rate": 1.1676274963206747e-06, "loss": 0.8081, "step": 5743 }, { "epoch": 0.85, "learning_rate": 1.1653824724992601e-06, "loss": 0.769, "step": 5744 }, { "epoch": 0.85, "learning_rate": 1.163139475484063e-06, "loss": 0.8022, "step": 5745 }, { "epoch": 0.85, "learning_rate": 1.1608985057896638e-06, "loss": 0.8184, "step": 5746 }, { "epoch": 0.85, "learning_rate": 1.1586595639301768e-06, "loss": 0.7544, "step": 5747 }, { "epoch": 0.85, "learning_rate": 1.1564226504192532e-06, "loss": 0.79, "step": 5748 }, { "epoch": 0.85, "learning_rate": 1.1541877657700784e-06, "loss": 0.7715, "step": 5749 }, { "epoch": 0.85, "learning_rate": 1.1519549104953686e-06, "loss": 0.7866, "step": 5750 }, { "epoch": 0.85, "learning_rate": 1.1497240851073798e-06, "loss": 0.2767, "step": 5751 }, { "epoch": 0.85, "learning_rate": 1.1474952901178982e-06, "loss": 0.6943, "step": 5752 }, { "epoch": 0.85, "learning_rate": 1.1452685260382501e-06, "loss": 0.2902, "step": 5753 }, { "epoch": 0.85, "learning_rate": 1.143043793379287e-06, "loss": 0.6895, "step": 5754 }, { "epoch": 0.85, "learning_rate": 1.1408210926513997e-06, "loss": 0.7529, "step": 5755 }, { "epoch": 0.85, "learning_rate": 1.1386004243645143e-06, "loss": 0.769, "step": 5756 }, { "epoch": 0.85, "learning_rate": 1.1363817890280892e-06, "loss": 0.7773, "step": 5757 }, { "epoch": 0.85, "learning_rate": 1.1341651871511106e-06, "loss": 0.832, "step": 5758 }, { "epoch": 0.85, "learning_rate": 1.1319506192421092e-06, "loss": 0.7554, "step": 5759 }, { "epoch": 0.85, "learning_rate": 1.129738085809139e-06, "loss": 0.7905, "step": 5760 }, { "epoch": 0.85, "learning_rate": 1.1275275873597957e-06, "loss": 0.7832, "step": 5761 }, { "epoch": 0.85, "learning_rate": 1.1253191244012007e-06, "loss": 0.8184, "step": 5762 }, { "epoch": 0.85, "learning_rate": 1.1231126974400108e-06, "loss": 0.2966, "step": 5763 }, { "epoch": 0.85, "learning_rate": 1.1209083069824212e-06, "loss": 0.8213, "step": 5764 }, { "epoch": 0.85, "learning_rate": 1.1187059535341505e-06, "loss": 0.6914, "step": 5765 }, { "epoch": 0.85, "learning_rate": 1.1165056376004558e-06, "loss": 0.7466, "step": 5766 }, { "epoch": 0.85, "learning_rate": 1.1143073596861276e-06, "loss": 0.771, "step": 5767 }, { "epoch": 0.85, "learning_rate": 1.1121111202954836e-06, "loss": 0.7603, "step": 5768 }, { "epoch": 0.85, "learning_rate": 1.1099169199323823e-06, "loss": 0.6914, "step": 5769 }, { "epoch": 0.85, "learning_rate": 1.107724759100205e-06, "loss": 0.731, "step": 5770 }, { "epoch": 0.85, "learning_rate": 1.1055346383018683e-06, "loss": 0.7549, "step": 5771 }, { "epoch": 0.85, "learning_rate": 1.1033465580398273e-06, "loss": 0.8149, "step": 5772 }, { "epoch": 0.85, "learning_rate": 1.1011605188160579e-06, "loss": 0.834, "step": 5773 }, { "epoch": 0.85, "learning_rate": 1.0989765211320792e-06, "loss": 0.7944, "step": 5774 }, { "epoch": 0.85, "learning_rate": 1.096794565488929e-06, "loss": 0.79, "step": 5775 }, { "epoch": 0.85, "learning_rate": 1.0946146523871882e-06, "loss": 0.311, "step": 5776 }, { "epoch": 0.85, "learning_rate": 1.0924367823269644e-06, "loss": 0.7939, "step": 5777 }, { "epoch": 0.85, "learning_rate": 1.090260955807897e-06, "loss": 0.7593, "step": 5778 }, { "epoch": 0.85, "learning_rate": 1.0880871733291532e-06, "loss": 0.7773, "step": 5779 }, { "epoch": 0.85, "learning_rate": 1.0859154353894385e-06, "loss": 0.8018, "step": 5780 }, { "epoch": 0.85, "learning_rate": 1.0837457424869823e-06, "loss": 0.7925, "step": 5781 }, { "epoch": 0.86, "learning_rate": 1.0815780951195521e-06, "loss": 0.7, "step": 5782 }, { "epoch": 0.86, "learning_rate": 1.0794124937844341e-06, "loss": 0.7456, "step": 5783 }, { "epoch": 0.86, "learning_rate": 1.077248938978458e-06, "loss": 0.749, "step": 5784 }, { "epoch": 0.86, "learning_rate": 1.0750874311979786e-06, "loss": 0.8091, "step": 5785 }, { "epoch": 0.86, "learning_rate": 1.0729279709388796e-06, "loss": 0.8003, "step": 5786 }, { "epoch": 0.86, "learning_rate": 1.0707705586965812e-06, "loss": 0.7329, "step": 5787 }, { "epoch": 0.86, "learning_rate": 1.0686151949660217e-06, "loss": 0.7739, "step": 5788 }, { "epoch": 0.86, "learning_rate": 1.0664618802416814e-06, "loss": 0.7427, "step": 5789 }, { "epoch": 0.86, "learning_rate": 1.0643106150175664e-06, "loss": 0.7935, "step": 5790 }, { "epoch": 0.86, "learning_rate": 1.0621613997872115e-06, "loss": 0.2903, "step": 5791 }, { "epoch": 0.86, "learning_rate": 1.0600142350436816e-06, "loss": 0.7617, "step": 5792 }, { "epoch": 0.86, "learning_rate": 1.05786912127957e-06, "loss": 0.7217, "step": 5793 }, { "epoch": 0.86, "learning_rate": 1.0557260589870022e-06, "loss": 0.6851, "step": 5794 }, { "epoch": 0.86, "learning_rate": 1.0535850486576372e-06, "loss": 0.8232, "step": 5795 }, { "epoch": 0.86, "learning_rate": 1.0514460907826473e-06, "loss": 0.8003, "step": 5796 }, { "epoch": 0.86, "learning_rate": 1.0493091858527538e-06, "loss": 0.7231, "step": 5797 }, { "epoch": 0.86, "learning_rate": 1.0471743343581907e-06, "loss": 0.8018, "step": 5798 }, { "epoch": 0.86, "learning_rate": 1.0450415367887324e-06, "loss": 0.8027, "step": 5799 }, { "epoch": 0.86, "learning_rate": 1.0429107936336803e-06, "loss": 0.7744, "step": 5800 }, { "epoch": 0.86, "learning_rate": 1.0407821053818535e-06, "loss": 0.7485, "step": 5801 }, { "epoch": 0.86, "learning_rate": 1.0386554725216158e-06, "loss": 0.8154, "step": 5802 }, { "epoch": 0.86, "learning_rate": 1.0365308955408459e-06, "loss": 0.7368, "step": 5803 }, { "epoch": 0.86, "learning_rate": 1.0344083749269608e-06, "loss": 0.75, "step": 5804 }, { "epoch": 0.86, "learning_rate": 1.0322879111669004e-06, "loss": 0.8105, "step": 5805 }, { "epoch": 0.86, "learning_rate": 1.0301695047471326e-06, "loss": 0.7246, "step": 5806 }, { "epoch": 0.86, "learning_rate": 1.0280531561536567e-06, "loss": 0.7603, "step": 5807 }, { "epoch": 0.86, "learning_rate": 1.025938865871996e-06, "loss": 0.8384, "step": 5808 }, { "epoch": 0.86, "learning_rate": 1.0238266343872028e-06, "loss": 0.7969, "step": 5809 }, { "epoch": 0.86, "learning_rate": 1.0217164621838605e-06, "loss": 0.812, "step": 5810 }, { "epoch": 0.86, "learning_rate": 1.019608349746074e-06, "loss": 0.7847, "step": 5811 }, { "epoch": 0.86, "learning_rate": 1.017502297557481e-06, "loss": 0.813, "step": 5812 }, { "epoch": 0.86, "learning_rate": 1.015398306101245e-06, "loss": 0.8301, "step": 5813 }, { "epoch": 0.86, "learning_rate": 1.0132963758600533e-06, "loss": 0.7202, "step": 5814 }, { "epoch": 0.86, "learning_rate": 1.0111965073161268e-06, "loss": 0.7275, "step": 5815 }, { "epoch": 0.86, "learning_rate": 1.0090987009512055e-06, "loss": 0.7729, "step": 5816 }, { "epoch": 0.86, "learning_rate": 1.0070029572465657e-06, "loss": 0.8179, "step": 5817 }, { "epoch": 0.86, "learning_rate": 1.0049092766830015e-06, "loss": 0.7588, "step": 5818 }, { "epoch": 0.86, "learning_rate": 1.0028176597408378e-06, "loss": 0.7119, "step": 5819 }, { "epoch": 0.86, "learning_rate": 1.0007281068999286e-06, "loss": 0.8101, "step": 5820 }, { "epoch": 0.86, "learning_rate": 9.986406186396503e-07, "loss": 0.7974, "step": 5821 }, { "epoch": 0.86, "learning_rate": 9.965551954389042e-07, "loss": 0.8188, "step": 5822 }, { "epoch": 0.86, "learning_rate": 9.944718377761242e-07, "loss": 0.7812, "step": 5823 }, { "epoch": 0.86, "learning_rate": 9.923905461292638e-07, "loss": 0.8091, "step": 5824 }, { "epoch": 0.86, "learning_rate": 9.903113209758098e-07, "loss": 0.7393, "step": 5825 }, { "epoch": 0.86, "learning_rate": 9.88234162792767e-07, "loss": 0.792, "step": 5826 }, { "epoch": 0.86, "learning_rate": 9.861590720566684e-07, "loss": 0.8486, "step": 5827 }, { "epoch": 0.86, "learning_rate": 9.840860492435778e-07, "loss": 0.8369, "step": 5828 }, { "epoch": 0.86, "learning_rate": 9.820150948290797e-07, "loss": 0.8442, "step": 5829 }, { "epoch": 0.86, "learning_rate": 9.799462092882806e-07, "loss": 0.7832, "step": 5830 }, { "epoch": 0.86, "learning_rate": 9.77879393095823e-07, "loss": 0.7456, "step": 5831 }, { "epoch": 0.86, "learning_rate": 9.758146467258645e-07, "loss": 0.8096, "step": 5832 }, { "epoch": 0.86, "learning_rate": 9.737519706520938e-07, "loss": 0.7871, "step": 5833 }, { "epoch": 0.86, "learning_rate": 9.716913653477223e-07, "loss": 0.8062, "step": 5834 }, { "epoch": 0.86, "learning_rate": 9.696328312854842e-07, "loss": 0.7554, "step": 5835 }, { "epoch": 0.86, "learning_rate": 9.67576368937645e-07, "loss": 0.7324, "step": 5836 }, { "epoch": 0.86, "learning_rate": 9.655219787759862e-07, "loss": 0.3182, "step": 5837 }, { "epoch": 0.86, "learning_rate": 9.634696612718242e-07, "loss": 0.7974, "step": 5838 }, { "epoch": 0.86, "learning_rate": 9.614194168959912e-07, "loss": 0.731, "step": 5839 }, { "epoch": 0.86, "learning_rate": 9.593712461188442e-07, "loss": 0.7739, "step": 5840 }, { "epoch": 0.86, "learning_rate": 9.57325149410273e-07, "loss": 0.7676, "step": 5841 }, { "epoch": 0.86, "learning_rate": 9.552811272396822e-07, "loss": 0.7988, "step": 5842 }, { "epoch": 0.86, "learning_rate": 9.532391800760033e-07, "loss": 0.7715, "step": 5843 }, { "epoch": 0.86, "learning_rate": 9.511993083876958e-07, "loss": 0.8203, "step": 5844 }, { "epoch": 0.86, "learning_rate": 9.491615126427356e-07, "loss": 0.8081, "step": 5845 }, { "epoch": 0.86, "learning_rate": 9.471257933086308e-07, "loss": 0.3162, "step": 5846 }, { "epoch": 0.86, "learning_rate": 9.450921508524057e-07, "loss": 0.8179, "step": 5847 }, { "epoch": 0.86, "learning_rate": 9.430605857406117e-07, "loss": 0.8052, "step": 5848 }, { "epoch": 0.86, "learning_rate": 9.410310984393245e-07, "loss": 0.7676, "step": 5849 }, { "epoch": 0.87, "learning_rate": 9.390036894141397e-07, "loss": 0.7744, "step": 5850 }, { "epoch": 0.87, "learning_rate": 9.369783591301818e-07, "loss": 0.7524, "step": 5851 }, { "epoch": 0.87, "learning_rate": 9.349551080520913e-07, "loss": 0.8037, "step": 5852 }, { "epoch": 0.87, "learning_rate": 9.329339366440349e-07, "loss": 0.8159, "step": 5853 }, { "epoch": 0.87, "learning_rate": 9.309148453697059e-07, "loss": 0.8135, "step": 5854 }, { "epoch": 0.87, "learning_rate": 9.28897834692315e-07, "loss": 0.7656, "step": 5855 }, { "epoch": 0.87, "learning_rate": 9.268829050745964e-07, "loss": 0.8179, "step": 5856 }, { "epoch": 0.87, "learning_rate": 9.248700569788105e-07, "loss": 0.7876, "step": 5857 }, { "epoch": 0.87, "learning_rate": 9.228592908667344e-07, "loss": 0.8032, "step": 5858 }, { "epoch": 0.87, "learning_rate": 9.208506071996759e-07, "loss": 0.7739, "step": 5859 }, { "epoch": 0.87, "learning_rate": 9.188440064384541e-07, "loss": 0.7295, "step": 5860 }, { "epoch": 0.87, "learning_rate": 9.168394890434184e-07, "loss": 0.7568, "step": 5861 }, { "epoch": 0.87, "learning_rate": 9.148370554744402e-07, "loss": 0.2955, "step": 5862 }, { "epoch": 0.87, "learning_rate": 9.128367061909072e-07, "loss": 0.7964, "step": 5863 }, { "epoch": 0.87, "learning_rate": 9.10838441651738e-07, "loss": 0.8022, "step": 5864 }, { "epoch": 0.87, "learning_rate": 9.088422623153603e-07, "loss": 0.7583, "step": 5865 }, { "epoch": 0.87, "learning_rate": 9.068481686397324e-07, "loss": 0.7651, "step": 5866 }, { "epoch": 0.87, "learning_rate": 9.048561610823359e-07, "loss": 0.7886, "step": 5867 }, { "epoch": 0.87, "learning_rate": 9.028662401001664e-07, "loss": 0.8315, "step": 5868 }, { "epoch": 0.87, "learning_rate": 9.008784061497455e-07, "loss": 0.8071, "step": 5869 }, { "epoch": 0.87, "learning_rate": 8.988926596871128e-07, "loss": 0.7822, "step": 5870 }, { "epoch": 0.87, "learning_rate": 8.969090011678328e-07, "loss": 0.7505, "step": 5871 }, { "epoch": 0.87, "learning_rate": 8.949274310469936e-07, "loss": 0.8237, "step": 5872 }, { "epoch": 0.87, "learning_rate": 8.929479497791926e-07, "loss": 0.7402, "step": 5873 }, { "epoch": 0.87, "learning_rate": 8.909705578185601e-07, "loss": 0.7593, "step": 5874 }, { "epoch": 0.87, "learning_rate": 8.889952556187376e-07, "loss": 0.7891, "step": 5875 }, { "epoch": 0.87, "learning_rate": 8.87022043632898e-07, "loss": 0.75, "step": 5876 }, { "epoch": 0.87, "learning_rate": 8.850509223137249e-07, "loss": 0.8203, "step": 5877 }, { "epoch": 0.87, "learning_rate": 8.830818921134255e-07, "loss": 0.3445, "step": 5878 }, { "epoch": 0.87, "learning_rate": 8.811149534837271e-07, "loss": 0.8071, "step": 5879 }, { "epoch": 0.87, "learning_rate": 8.791501068758823e-07, "loss": 0.8247, "step": 5880 }, { "epoch": 0.87, "learning_rate": 8.771873527406549e-07, "loss": 0.7788, "step": 5881 }, { "epoch": 0.87, "learning_rate": 8.752266915283347e-07, "loss": 0.7407, "step": 5882 }, { "epoch": 0.87, "learning_rate": 8.732681236887264e-07, "loss": 0.7358, "step": 5883 }, { "epoch": 0.87, "learning_rate": 8.713116496711605e-07, "loss": 0.7544, "step": 5884 }, { "epoch": 0.87, "learning_rate": 8.693572699244879e-07, "loss": 0.7217, "step": 5885 }, { "epoch": 0.87, "learning_rate": 8.674049848970667e-07, "loss": 0.6897, "step": 5886 }, { "epoch": 0.87, "learning_rate": 8.654547950367898e-07, "loss": 0.7949, "step": 5887 }, { "epoch": 0.87, "learning_rate": 8.635067007910581e-07, "loss": 0.7861, "step": 5888 }, { "epoch": 0.87, "learning_rate": 8.615607026068018e-07, "loss": 0.8174, "step": 5889 }, { "epoch": 0.87, "learning_rate": 8.596168009304617e-07, "loss": 0.8027, "step": 5890 }, { "epoch": 0.87, "learning_rate": 8.576749962079989e-07, "loss": 0.7666, "step": 5891 }, { "epoch": 0.87, "learning_rate": 8.55735288884899e-07, "loss": 0.8389, "step": 5892 }, { "epoch": 0.87, "learning_rate": 8.537976794061587e-07, "loss": 0.7559, "step": 5893 }, { "epoch": 0.87, "learning_rate": 8.51862168216303e-07, "loss": 0.812, "step": 5894 }, { "epoch": 0.87, "learning_rate": 8.49928755759365e-07, "loss": 0.7319, "step": 5895 }, { "epoch": 0.87, "learning_rate": 8.479974424789017e-07, "loss": 0.7295, "step": 5896 }, { "epoch": 0.87, "learning_rate": 8.460682288179911e-07, "loss": 0.8198, "step": 5897 }, { "epoch": 0.87, "learning_rate": 8.441411152192247e-07, "loss": 0.8232, "step": 5898 }, { "epoch": 0.87, "learning_rate": 8.422161021247122e-07, "loss": 0.7812, "step": 5899 }, { "epoch": 0.87, "learning_rate": 8.402931899760869e-07, "loss": 0.7466, "step": 5900 }, { "epoch": 0.87, "learning_rate": 8.383723792144916e-07, "loss": 0.7334, "step": 5901 }, { "epoch": 0.87, "learning_rate": 8.36453670280597e-07, "loss": 0.7417, "step": 5902 }, { "epoch": 0.87, "learning_rate": 8.345370636145844e-07, "loss": 0.7808, "step": 5903 }, { "epoch": 0.87, "learning_rate": 8.326225596561521e-07, "loss": 0.7832, "step": 5904 }, { "epoch": 0.87, "learning_rate": 8.307101588445232e-07, "loss": 0.749, "step": 5905 }, { "epoch": 0.87, "learning_rate": 8.287998616184312e-07, "loss": 0.7578, "step": 5906 }, { "epoch": 0.87, "learning_rate": 8.268916684161276e-07, "loss": 0.8193, "step": 5907 }, { "epoch": 0.87, "learning_rate": 8.249855796753881e-07, "loss": 0.7778, "step": 5908 }, { "epoch": 0.87, "learning_rate": 8.23081595833497e-07, "loss": 0.7588, "step": 5909 }, { "epoch": 0.87, "learning_rate": 8.211797173272617e-07, "loss": 0.3357, "step": 5910 }, { "epoch": 0.87, "learning_rate": 8.192799445930044e-07, "loss": 0.751, "step": 5911 }, { "epoch": 0.87, "learning_rate": 8.173822780665608e-07, "loss": 0.8003, "step": 5912 }, { "epoch": 0.87, "learning_rate": 8.154867181832915e-07, "loss": 0.7783, "step": 5913 }, { "epoch": 0.87, "learning_rate": 8.135932653780642e-07, "loss": 0.8052, "step": 5914 }, { "epoch": 0.87, "learning_rate": 8.117019200852716e-07, "loss": 0.2882, "step": 5915 }, { "epoch": 0.87, "learning_rate": 8.098126827388187e-07, "loss": 0.8281, "step": 5916 }, { "epoch": 0.88, "learning_rate": 8.079255537721253e-07, "loss": 0.7578, "step": 5917 }, { "epoch": 0.88, "learning_rate": 8.060405336181343e-07, "loss": 0.7861, "step": 5918 }, { "epoch": 0.88, "learning_rate": 8.041576227092963e-07, "loss": 0.7979, "step": 5919 }, { "epoch": 0.88, "learning_rate": 8.022768214775811e-07, "loss": 0.7412, "step": 5920 }, { "epoch": 0.88, "learning_rate": 8.003981303544795e-07, "loss": 0.7905, "step": 5921 }, { "epoch": 0.88, "learning_rate": 7.985215497709909e-07, "loss": 0.7256, "step": 5922 }, { "epoch": 0.88, "learning_rate": 7.966470801576354e-07, "loss": 0.791, "step": 5923 }, { "epoch": 0.88, "learning_rate": 7.947747219444468e-07, "loss": 0.8286, "step": 5924 }, { "epoch": 0.88, "learning_rate": 7.929044755609728e-07, "loss": 0.7588, "step": 5925 }, { "epoch": 0.88, "learning_rate": 7.91036341436282e-07, "loss": 0.8105, "step": 5926 }, { "epoch": 0.88, "learning_rate": 7.891703199989509e-07, "loss": 0.8237, "step": 5927 }, { "epoch": 0.88, "learning_rate": 7.873064116770802e-07, "loss": 0.8149, "step": 5928 }, { "epoch": 0.88, "learning_rate": 7.854446168982777e-07, "loss": 0.7783, "step": 5929 }, { "epoch": 0.88, "learning_rate": 7.835849360896697e-07, "loss": 0.8716, "step": 5930 }, { "epoch": 0.88, "learning_rate": 7.817273696778994e-07, "loss": 0.7754, "step": 5931 }, { "epoch": 0.88, "learning_rate": 7.798719180891223e-07, "loss": 0.7246, "step": 5932 }, { "epoch": 0.88, "learning_rate": 7.780185817490082e-07, "loss": 0.7905, "step": 5933 }, { "epoch": 0.88, "learning_rate": 7.761673610827447e-07, "loss": 0.7549, "step": 5934 }, { "epoch": 0.88, "learning_rate": 7.743182565150286e-07, "loss": 0.7891, "step": 5935 }, { "epoch": 0.88, "learning_rate": 7.724712684700819e-07, "loss": 0.772, "step": 5936 }, { "epoch": 0.88, "learning_rate": 7.706263973716266e-07, "loss": 0.7954, "step": 5937 }, { "epoch": 0.88, "learning_rate": 7.687836436429086e-07, "loss": 0.7485, "step": 5938 }, { "epoch": 0.88, "learning_rate": 7.669430077066887e-07, "loss": 0.793, "step": 5939 }, { "epoch": 0.88, "learning_rate": 7.651044899852367e-07, "loss": 0.7812, "step": 5940 }, { "epoch": 0.88, "learning_rate": 7.632680909003398e-07, "loss": 0.7783, "step": 5941 }, { "epoch": 0.88, "learning_rate": 7.614338108732944e-07, "loss": 0.7822, "step": 5942 }, { "epoch": 0.88, "learning_rate": 7.59601650324917e-07, "loss": 0.3201, "step": 5943 }, { "epoch": 0.88, "learning_rate": 7.577716096755383e-07, "loss": 0.7563, "step": 5944 }, { "epoch": 0.88, "learning_rate": 7.559436893449968e-07, "loss": 0.7319, "step": 5945 }, { "epoch": 0.88, "learning_rate": 7.541178897526447e-07, "loss": 0.6865, "step": 5946 }, { "epoch": 0.88, "learning_rate": 7.522942113173559e-07, "loss": 0.6982, "step": 5947 }, { "epoch": 0.88, "learning_rate": 7.504726544575069e-07, "loss": 0.7954, "step": 5948 }, { "epoch": 0.88, "learning_rate": 7.48653219590999e-07, "loss": 0.7578, "step": 5949 }, { "epoch": 0.88, "learning_rate": 7.468359071352338e-07, "loss": 0.8062, "step": 5950 }, { "epoch": 0.88, "learning_rate": 7.450207175071356e-07, "loss": 0.7671, "step": 5951 }, { "epoch": 0.88, "learning_rate": 7.43207651123139e-07, "loss": 0.7773, "step": 5952 }, { "epoch": 0.88, "learning_rate": 7.413967083991925e-07, "loss": 0.7466, "step": 5953 }, { "epoch": 0.88, "learning_rate": 7.395878897507525e-07, "loss": 0.7881, "step": 5954 }, { "epoch": 0.88, "learning_rate": 7.377811955927928e-07, "loss": 0.7695, "step": 5955 }, { "epoch": 0.88, "learning_rate": 7.359766263397994e-07, "loss": 0.8096, "step": 5956 }, { "epoch": 0.88, "learning_rate": 7.341741824057713e-07, "loss": 0.7554, "step": 5957 }, { "epoch": 0.88, "learning_rate": 7.323738642042178e-07, "loss": 0.8398, "step": 5958 }, { "epoch": 0.88, "learning_rate": 7.305756721481605e-07, "loss": 0.6831, "step": 5959 }, { "epoch": 0.88, "learning_rate": 7.28779606650134e-07, "loss": 0.8091, "step": 5960 }, { "epoch": 0.88, "learning_rate": 7.269856681221854e-07, "loss": 0.8125, "step": 5961 }, { "epoch": 0.88, "learning_rate": 7.251938569758777e-07, "loss": 0.8403, "step": 5962 }, { "epoch": 0.88, "learning_rate": 7.234041736222752e-07, "loss": 0.813, "step": 5963 }, { "epoch": 0.88, "learning_rate": 7.216166184719653e-07, "loss": 0.7739, "step": 5964 }, { "epoch": 0.88, "learning_rate": 7.198311919350387e-07, "loss": 0.7349, "step": 5965 }, { "epoch": 0.88, "learning_rate": 7.180478944211055e-07, "loss": 0.7695, "step": 5966 }, { "epoch": 0.88, "learning_rate": 7.162667263392819e-07, "loss": 0.75, "step": 5967 }, { "epoch": 0.88, "learning_rate": 7.144876880981955e-07, "loss": 0.8022, "step": 5968 }, { "epoch": 0.88, "learning_rate": 7.127107801059896e-07, "loss": 0.6821, "step": 5969 }, { "epoch": 0.88, "learning_rate": 7.109360027703139e-07, "loss": 0.7891, "step": 5970 }, { "epoch": 0.88, "learning_rate": 7.091633564983314e-07, "loss": 0.7637, "step": 5971 }, { "epoch": 0.88, "learning_rate": 7.073928416967179e-07, "loss": 0.7622, "step": 5972 }, { "epoch": 0.88, "learning_rate": 7.056244587716565e-07, "loss": 0.7705, "step": 5973 }, { "epoch": 0.88, "learning_rate": 7.03858208128847e-07, "loss": 0.7974, "step": 5974 }, { "epoch": 0.88, "learning_rate": 7.020940901734918e-07, "loss": 0.7871, "step": 5975 }, { "epoch": 0.88, "learning_rate": 7.003321053103107e-07, "loss": 0.75, "step": 5976 }, { "epoch": 0.88, "learning_rate": 6.985722539435313e-07, "loss": 0.7803, "step": 5977 }, { "epoch": 0.88, "learning_rate": 6.96814536476893e-07, "loss": 0.7983, "step": 5978 }, { "epoch": 0.88, "learning_rate": 6.950589533136454e-07, "loss": 0.7969, "step": 5979 }, { "epoch": 0.88, "learning_rate": 6.933055048565473e-07, "loss": 0.287, "step": 5980 }, { "epoch": 0.88, "learning_rate": 6.915541915078672e-07, "loss": 0.8257, "step": 5981 }, { "epoch": 0.88, "learning_rate": 6.898050136693879e-07, "loss": 0.7734, "step": 5982 }, { "epoch": 0.88, "learning_rate": 6.880579717423985e-07, "loss": 0.7437, "step": 5983 }, { "epoch": 0.88, "learning_rate": 6.863130661276974e-07, "loss": 0.7778, "step": 5984 }, { "epoch": 0.89, "learning_rate": 6.845702972255974e-07, "loss": 0.7461, "step": 5985 }, { "epoch": 0.89, "learning_rate": 6.828296654359146e-07, "loss": 0.7461, "step": 5986 }, { "epoch": 0.89, "learning_rate": 6.810911711579826e-07, "loss": 0.7959, "step": 5987 }, { "epoch": 0.89, "learning_rate": 6.793548147906393e-07, "loss": 0.8179, "step": 5988 }, { "epoch": 0.89, "learning_rate": 6.776205967322303e-07, "loss": 0.7847, "step": 5989 }, { "epoch": 0.89, "learning_rate": 6.758885173806184e-07, "loss": 0.8037, "step": 5990 }, { "epoch": 0.89, "learning_rate": 6.741585771331672e-07, "loss": 0.7778, "step": 5991 }, { "epoch": 0.89, "learning_rate": 6.724307763867555e-07, "loss": 0.7534, "step": 5992 }, { "epoch": 0.89, "learning_rate": 6.707051155377686e-07, "loss": 0.8027, "step": 5993 }, { "epoch": 0.89, "learning_rate": 6.689815949820999e-07, "loss": 0.8223, "step": 5994 }, { "epoch": 0.89, "learning_rate": 6.672602151151564e-07, "loss": 0.8374, "step": 5995 }, { "epoch": 0.89, "learning_rate": 6.655409763318498e-07, "loss": 0.2925, "step": 5996 }, { "epoch": 0.89, "learning_rate": 6.638238790265983e-07, "loss": 0.8022, "step": 5997 }, { "epoch": 0.89, "learning_rate": 6.621089235933375e-07, "loss": 0.7437, "step": 5998 }, { "epoch": 0.89, "learning_rate": 6.603961104255018e-07, "loss": 0.6819, "step": 5999 }, { "epoch": 0.89, "learning_rate": 6.586854399160425e-07, "loss": 0.8242, "step": 6000 }, { "epoch": 0.89, "learning_rate": 6.569769124574133e-07, "loss": 0.8198, "step": 6001 }, { "epoch": 0.89, "learning_rate": 6.552705284415773e-07, "loss": 0.7773, "step": 6002 }, { "epoch": 0.89, "learning_rate": 6.535662882600091e-07, "loss": 0.7578, "step": 6003 }, { "epoch": 0.89, "learning_rate": 6.518641923036884e-07, "loss": 0.7607, "step": 6004 }, { "epoch": 0.89, "learning_rate": 6.501642409631059e-07, "loss": 0.7695, "step": 6005 }, { "epoch": 0.89, "learning_rate": 6.484664346282555e-07, "loss": 0.7598, "step": 6006 }, { "epoch": 0.89, "learning_rate": 6.46770773688642e-07, "loss": 0.769, "step": 6007 }, { "epoch": 0.89, "learning_rate": 6.45077258533281e-07, "loss": 0.7266, "step": 6008 }, { "epoch": 0.89, "learning_rate": 6.433858895506895e-07, "loss": 0.7817, "step": 6009 }, { "epoch": 0.89, "learning_rate": 6.416966671288949e-07, "loss": 0.3, "step": 6010 }, { "epoch": 0.89, "learning_rate": 6.400095916554361e-07, "loss": 0.8022, "step": 6011 }, { "epoch": 0.89, "learning_rate": 6.383246635173512e-07, "loss": 0.7744, "step": 6012 }, { "epoch": 0.89, "learning_rate": 6.366418831011955e-07, "loss": 0.7852, "step": 6013 }, { "epoch": 0.89, "learning_rate": 6.349612507930236e-07, "loss": 0.7915, "step": 6014 }, { "epoch": 0.89, "learning_rate": 6.332827669783981e-07, "loss": 0.8276, "step": 6015 }, { "epoch": 0.89, "learning_rate": 6.316064320423953e-07, "loss": 0.8169, "step": 6016 }, { "epoch": 0.89, "learning_rate": 6.299322463695912e-07, "loss": 0.8066, "step": 6017 }, { "epoch": 0.89, "learning_rate": 6.282602103440705e-07, "loss": 0.772, "step": 6018 }, { "epoch": 0.89, "learning_rate": 6.265903243494286e-07, "loss": 0.7725, "step": 6019 }, { "epoch": 0.89, "learning_rate": 6.249225887687615e-07, "loss": 0.7637, "step": 6020 }, { "epoch": 0.89, "learning_rate": 6.232570039846786e-07, "loss": 0.3281, "step": 6021 }, { "epoch": 0.89, "learning_rate": 6.215935703792908e-07, "loss": 0.7891, "step": 6022 }, { "epoch": 0.89, "learning_rate": 6.199322883342152e-07, "loss": 0.7485, "step": 6023 }, { "epoch": 0.89, "learning_rate": 6.182731582305801e-07, "loss": 0.7852, "step": 6024 }, { "epoch": 0.89, "learning_rate": 6.166161804490145e-07, "loss": 0.8208, "step": 6025 }, { "epoch": 0.89, "learning_rate": 6.14961355369661e-07, "loss": 0.7515, "step": 6026 }, { "epoch": 0.89, "learning_rate": 6.133086833721569e-07, "loss": 0.7397, "step": 6027 }, { "epoch": 0.89, "learning_rate": 6.116581648356557e-07, "loss": 0.7378, "step": 6028 }, { "epoch": 0.89, "learning_rate": 6.100098001388155e-07, "loss": 0.7812, "step": 6029 }, { "epoch": 0.89, "learning_rate": 6.083635896597951e-07, "loss": 0.7686, "step": 6030 }, { "epoch": 0.89, "learning_rate": 6.067195337762644e-07, "loss": 0.751, "step": 6031 }, { "epoch": 0.89, "learning_rate": 6.050776328653929e-07, "loss": 0.8057, "step": 6032 }, { "epoch": 0.89, "learning_rate": 6.034378873038638e-07, "loss": 0.7544, "step": 6033 }, { "epoch": 0.89, "learning_rate": 6.018002974678616e-07, "loss": 0.7402, "step": 6034 }, { "epoch": 0.89, "learning_rate": 6.001648637330726e-07, "loss": 0.7646, "step": 6035 }, { "epoch": 0.89, "learning_rate": 5.985315864746965e-07, "loss": 0.7451, "step": 6036 }, { "epoch": 0.89, "learning_rate": 5.969004660674294e-07, "loss": 0.7412, "step": 6037 }, { "epoch": 0.89, "learning_rate": 5.952715028854795e-07, "loss": 0.7871, "step": 6038 }, { "epoch": 0.89, "learning_rate": 5.936446973025612e-07, "loss": 0.7808, "step": 6039 }, { "epoch": 0.89, "learning_rate": 5.920200496918837e-07, "loss": 0.7573, "step": 6040 }, { "epoch": 0.89, "learning_rate": 5.903975604261725e-07, "loss": 0.7998, "step": 6041 }, { "epoch": 0.89, "learning_rate": 5.887772298776496e-07, "loss": 0.8203, "step": 6042 }, { "epoch": 0.89, "learning_rate": 5.871590584180497e-07, "loss": 0.7578, "step": 6043 }, { "epoch": 0.89, "learning_rate": 5.855430464186052e-07, "loss": 0.813, "step": 6044 }, { "epoch": 0.89, "learning_rate": 5.839291942500547e-07, "loss": 0.7197, "step": 6045 }, { "epoch": 0.89, "learning_rate": 5.823175022826444e-07, "loss": 0.8062, "step": 6046 }, { "epoch": 0.89, "learning_rate": 5.807079708861252e-07, "loss": 0.8008, "step": 6047 }, { "epoch": 0.89, "learning_rate": 5.791006004297451e-07, "loss": 0.8086, "step": 6048 }, { "epoch": 0.89, "learning_rate": 5.774953912822634e-07, "loss": 0.7817, "step": 6049 }, { "epoch": 0.89, "learning_rate": 5.758923438119413e-07, "loss": 0.7251, "step": 6050 }, { "epoch": 0.89, "learning_rate": 5.742914583865434e-07, "loss": 0.7737, "step": 6051 }, { "epoch": 0.9, "learning_rate": 5.726927353733424e-07, "loss": 0.751, "step": 6052 }, { "epoch": 0.9, "learning_rate": 5.710961751391075e-07, "loss": 0.8076, "step": 6053 }, { "epoch": 0.9, "learning_rate": 5.695017780501188e-07, "loss": 0.7871, "step": 6054 }, { "epoch": 0.9, "learning_rate": 5.679095444721538e-07, "loss": 0.8203, "step": 6055 }, { "epoch": 0.9, "learning_rate": 5.663194747705014e-07, "loss": 0.792, "step": 6056 }, { "epoch": 0.9, "learning_rate": 5.647315693099464e-07, "loss": 0.7876, "step": 6057 }, { "epoch": 0.9, "learning_rate": 5.631458284547797e-07, "loss": 0.8193, "step": 6058 }, { "epoch": 0.9, "learning_rate": 5.615622525688002e-07, "loss": 0.8027, "step": 6059 }, { "epoch": 0.9, "learning_rate": 5.59980842015303e-07, "loss": 0.7417, "step": 6060 }, { "epoch": 0.9, "learning_rate": 5.5840159715709e-07, "loss": 0.8267, "step": 6061 }, { "epoch": 0.9, "learning_rate": 5.568245183564669e-07, "loss": 0.7915, "step": 6062 }, { "epoch": 0.9, "learning_rate": 5.552496059752399e-07, "loss": 0.8042, "step": 6063 }, { "epoch": 0.9, "learning_rate": 5.536768603747222e-07, "loss": 0.7954, "step": 6064 }, { "epoch": 0.9, "learning_rate": 5.521062819157264e-07, "loss": 0.79, "step": 6065 }, { "epoch": 0.9, "learning_rate": 5.505378709585662e-07, "loss": 0.7383, "step": 6066 }, { "epoch": 0.9, "learning_rate": 5.489716278630652e-07, "loss": 0.7959, "step": 6067 }, { "epoch": 0.9, "learning_rate": 5.474075529885425e-07, "loss": 0.8091, "step": 6068 }, { "epoch": 0.9, "learning_rate": 5.458456466938233e-07, "loss": 0.7944, "step": 6069 }, { "epoch": 0.9, "learning_rate": 5.442859093372354e-07, "loss": 0.8867, "step": 6070 }, { "epoch": 0.9, "learning_rate": 5.42728341276606e-07, "loss": 0.8018, "step": 6071 }, { "epoch": 0.9, "learning_rate": 5.411729428692691e-07, "loss": 0.8062, "step": 6072 }, { "epoch": 0.9, "learning_rate": 5.396197144720572e-07, "loss": 0.7378, "step": 6073 }, { "epoch": 0.9, "learning_rate": 5.380686564413063e-07, "loss": 0.3372, "step": 6074 }, { "epoch": 0.9, "learning_rate": 5.365197691328561e-07, "loss": 0.7446, "step": 6075 }, { "epoch": 0.9, "learning_rate": 5.349730529020436e-07, "loss": 0.7666, "step": 6076 }, { "epoch": 0.9, "learning_rate": 5.33428508103715e-07, "loss": 0.7451, "step": 6077 }, { "epoch": 0.9, "learning_rate": 5.318861350922111e-07, "loss": 0.7031, "step": 6078 }, { "epoch": 0.9, "learning_rate": 5.303459342213779e-07, "loss": 0.8159, "step": 6079 }, { "epoch": 0.9, "learning_rate": 5.28807905844565e-07, "loss": 0.8384, "step": 6080 }, { "epoch": 0.9, "learning_rate": 5.272720503146201e-07, "loss": 0.8135, "step": 6081 }, { "epoch": 0.9, "learning_rate": 5.257383679838912e-07, "loss": 0.7637, "step": 6082 }, { "epoch": 0.9, "learning_rate": 5.242068592042349e-07, "loss": 0.8433, "step": 6083 }, { "epoch": 0.9, "learning_rate": 5.226775243269999e-07, "loss": 0.7788, "step": 6084 }, { "epoch": 0.9, "learning_rate": 5.211503637030435e-07, "loss": 0.8115, "step": 6085 }, { "epoch": 0.9, "learning_rate": 5.19625377682722e-07, "loss": 0.7979, "step": 6086 }, { "epoch": 0.9, "learning_rate": 5.181025666158889e-07, "loss": 0.7798, "step": 6087 }, { "epoch": 0.9, "learning_rate": 5.165819308519049e-07, "loss": 0.8052, "step": 6088 }, { "epoch": 0.9, "learning_rate": 5.150634707396263e-07, "loss": 0.793, "step": 6089 }, { "epoch": 0.9, "learning_rate": 5.135471866274167e-07, "loss": 0.7505, "step": 6090 }, { "epoch": 0.9, "learning_rate": 5.120330788631334e-07, "loss": 0.7764, "step": 6091 }, { "epoch": 0.9, "learning_rate": 5.105211477941374e-07, "loss": 0.7827, "step": 6092 }, { "epoch": 0.9, "learning_rate": 5.090113937672925e-07, "loss": 0.3127, "step": 6093 }, { "epoch": 0.9, "learning_rate": 5.075038171289603e-07, "loss": 0.7983, "step": 6094 }, { "epoch": 0.9, "learning_rate": 5.059984182250022e-07, "loss": 0.8105, "step": 6095 }, { "epoch": 0.9, "learning_rate": 5.044951974007838e-07, "loss": 0.7097, "step": 6096 }, { "epoch": 0.9, "learning_rate": 5.029941550011663e-07, "loss": 0.7954, "step": 6097 }, { "epoch": 0.9, "learning_rate": 5.014952913705162e-07, "loss": 0.7783, "step": 6098 }, { "epoch": 0.9, "learning_rate": 4.999986068526941e-07, "loss": 0.8042, "step": 6099 }, { "epoch": 0.9, "learning_rate": 4.985041017910653e-07, "loss": 0.7812, "step": 6100 }, { "epoch": 0.9, "learning_rate": 4.970117765284943e-07, "loss": 0.791, "step": 6101 }, { "epoch": 0.9, "learning_rate": 4.955216314073452e-07, "loss": 0.7891, "step": 6102 }, { "epoch": 0.9, "learning_rate": 4.940336667694834e-07, "loss": 0.729, "step": 6103 }, { "epoch": 0.9, "learning_rate": 4.925478829562668e-07, "loss": 0.7476, "step": 6104 }, { "epoch": 0.9, "learning_rate": 4.910642803085631e-07, "loss": 0.7998, "step": 6105 }, { "epoch": 0.9, "learning_rate": 4.895828591667351e-07, "loss": 0.7959, "step": 6106 }, { "epoch": 0.9, "learning_rate": 4.881036198706446e-07, "loss": 0.7612, "step": 6107 }, { "epoch": 0.9, "learning_rate": 4.866265627596522e-07, "loss": 0.8198, "step": 6108 }, { "epoch": 0.9, "learning_rate": 4.851516881726181e-07, "loss": 0.7729, "step": 6109 }, { "epoch": 0.9, "learning_rate": 4.83678996447906e-07, "loss": 0.7974, "step": 6110 }, { "epoch": 0.9, "learning_rate": 4.822084879233746e-07, "loss": 0.7256, "step": 6111 }, { "epoch": 0.9, "learning_rate": 4.807401629363806e-07, "loss": 0.7822, "step": 6112 }, { "epoch": 0.9, "learning_rate": 4.792740218237835e-07, "loss": 0.7954, "step": 6113 }, { "epoch": 0.9, "learning_rate": 4.778100649219398e-07, "loss": 0.7803, "step": 6114 }, { "epoch": 0.9, "learning_rate": 4.763482925667051e-07, "loss": 0.7588, "step": 6115 }, { "epoch": 0.9, "learning_rate": 4.74888705093437e-07, "loss": 0.7715, "step": 6116 }, { "epoch": 0.9, "learning_rate": 4.7343130283698193e-07, "loss": 0.8179, "step": 6117 }, { "epoch": 0.9, "learning_rate": 4.7197608613169685e-07, "loss": 0.8228, "step": 6118 }, { "epoch": 0.9, "learning_rate": 4.705230553114326e-07, "loss": 0.7744, "step": 6119 }, { "epoch": 0.91, "learning_rate": 4.6907221070953803e-07, "loss": 0.7969, "step": 6120 }, { "epoch": 0.91, "learning_rate": 4.6762355265885793e-07, "loss": 0.8008, "step": 6121 }, { "epoch": 0.91, "learning_rate": 4.661770814917399e-07, "loss": 0.7207, "step": 6122 }, { "epoch": 0.91, "learning_rate": 4.6473279754002844e-07, "loss": 0.7788, "step": 6123 }, { "epoch": 0.91, "learning_rate": 4.6329070113506847e-07, "loss": 0.7812, "step": 6124 }, { "epoch": 0.91, "learning_rate": 4.618507926076954e-07, "loss": 0.8198, "step": 6125 }, { "epoch": 0.91, "learning_rate": 4.604130722882516e-07, "loss": 0.7129, "step": 6126 }, { "epoch": 0.91, "learning_rate": 4.5897754050657104e-07, "loss": 0.295, "step": 6127 }, { "epoch": 0.91, "learning_rate": 4.575441975919914e-07, "loss": 0.7334, "step": 6128 }, { "epoch": 0.91, "learning_rate": 4.56113043873343e-07, "loss": 0.7563, "step": 6129 }, { "epoch": 0.91, "learning_rate": 4.546840796789553e-07, "loss": 0.751, "step": 6130 }, { "epoch": 0.91, "learning_rate": 4.532573053366585e-07, "loss": 0.7827, "step": 6131 }, { "epoch": 0.91, "learning_rate": 4.518327211737761e-07, "loss": 0.7075, "step": 6132 }, { "epoch": 0.91, "learning_rate": 4.504103275171323e-07, "loss": 0.7427, "step": 6133 }, { "epoch": 0.91, "learning_rate": 4.4899012469304725e-07, "loss": 0.705, "step": 6134 }, { "epoch": 0.91, "learning_rate": 4.4757211302733806e-07, "loss": 0.7104, "step": 6135 }, { "epoch": 0.91, "learning_rate": 4.4615629284532005e-07, "loss": 0.7373, "step": 6136 }, { "epoch": 0.91, "learning_rate": 4.447426644718067e-07, "loss": 0.7725, "step": 6137 }, { "epoch": 0.91, "learning_rate": 4.433312282311064e-07, "loss": 0.8159, "step": 6138 }, { "epoch": 0.91, "learning_rate": 4.4192198444702685e-07, "loss": 0.814, "step": 6139 }, { "epoch": 0.91, "learning_rate": 4.4051493344286934e-07, "loss": 0.7642, "step": 6140 }, { "epoch": 0.91, "learning_rate": 4.3911007554143683e-07, "loss": 0.7241, "step": 6141 }, { "epoch": 0.91, "learning_rate": 4.3770741106502704e-07, "loss": 0.7485, "step": 6142 }, { "epoch": 0.91, "learning_rate": 4.3630694033543255e-07, "loss": 0.7529, "step": 6143 }, { "epoch": 0.91, "learning_rate": 4.3490866367394525e-07, "loss": 0.7314, "step": 6144 }, { "epoch": 0.91, "learning_rate": 4.3351258140135186e-07, "loss": 0.7632, "step": 6145 }, { "epoch": 0.91, "learning_rate": 4.3211869383793735e-07, "loss": 0.8027, "step": 6146 }, { "epoch": 0.91, "learning_rate": 4.3072700130348255e-07, "loss": 0.7036, "step": 6147 }, { "epoch": 0.91, "learning_rate": 4.2933750411726425e-07, "loss": 0.2764, "step": 6148 }, { "epoch": 0.91, "learning_rate": 4.279502025980564e-07, "loss": 0.7617, "step": 6149 }, { "epoch": 0.91, "learning_rate": 4.2656509706412774e-07, "loss": 0.7266, "step": 6150 }, { "epoch": 0.91, "learning_rate": 4.2518218783324404e-07, "loss": 0.7661, "step": 6151 }, { "epoch": 0.91, "learning_rate": 4.2380147522266937e-07, "loss": 0.7979, "step": 6152 }, { "epoch": 0.91, "learning_rate": 4.2242295954915913e-07, "loss": 0.791, "step": 6153 }, { "epoch": 0.91, "learning_rate": 4.210466411289704e-07, "loss": 0.8145, "step": 6154 }, { "epoch": 0.91, "learning_rate": 4.1967252027785066e-07, "loss": 0.748, "step": 6155 }, { "epoch": 0.91, "learning_rate": 4.1830059731104657e-07, "loss": 0.7529, "step": 6156 }, { "epoch": 0.91, "learning_rate": 4.169308725433008e-07, "loss": 0.7529, "step": 6157 }, { "epoch": 0.91, "learning_rate": 4.1556334628884973e-07, "loss": 0.8018, "step": 6158 }, { "epoch": 0.91, "learning_rate": 4.1419801886142584e-07, "loss": 0.8169, "step": 6159 }, { "epoch": 0.91, "learning_rate": 4.128348905742585e-07, "loss": 0.7593, "step": 6160 }, { "epoch": 0.91, "learning_rate": 4.1147396174007094e-07, "loss": 0.7651, "step": 6161 }, { "epoch": 0.91, "learning_rate": 4.1011523267108333e-07, "loss": 0.8247, "step": 6162 }, { "epoch": 0.91, "learning_rate": 4.087587036790119e-07, "loss": 0.7837, "step": 6163 }, { "epoch": 0.91, "learning_rate": 4.0740437507506226e-07, "loss": 0.7563, "step": 6164 }, { "epoch": 0.91, "learning_rate": 4.060522471699435e-07, "loss": 0.7656, "step": 6165 }, { "epoch": 0.91, "learning_rate": 4.0470232027385424e-07, "loss": 0.8237, "step": 6166 }, { "epoch": 0.91, "learning_rate": 4.0335459469649117e-07, "loss": 0.7764, "step": 6167 }, { "epoch": 0.91, "learning_rate": 4.0200907074704367e-07, "loss": 0.7329, "step": 6168 }, { "epoch": 0.91, "learning_rate": 4.0066574873419697e-07, "loss": 0.793, "step": 6169 }, { "epoch": 0.91, "learning_rate": 3.9932462896613124e-07, "loss": 0.8481, "step": 6170 }, { "epoch": 0.91, "learning_rate": 3.979857117505226e-07, "loss": 0.2988, "step": 6171 }, { "epoch": 0.91, "learning_rate": 3.9664899739453753e-07, "loss": 0.7847, "step": 6172 }, { "epoch": 0.91, "learning_rate": 3.9531448620484304e-07, "loss": 0.7886, "step": 6173 }, { "epoch": 0.91, "learning_rate": 3.9398217848759637e-07, "loss": 0.7856, "step": 6174 }, { "epoch": 0.91, "learning_rate": 3.926520745484541e-07, "loss": 0.7461, "step": 6175 }, { "epoch": 0.91, "learning_rate": 3.913241746925589e-07, "loss": 0.7744, "step": 6176 }, { "epoch": 0.91, "learning_rate": 3.899984792245548e-07, "loss": 0.7983, "step": 6177 }, { "epoch": 0.91, "learning_rate": 3.8867498844857964e-07, "loss": 0.8662, "step": 6178 }, { "epoch": 0.91, "learning_rate": 3.873537026682617e-07, "loss": 0.7827, "step": 6179 }, { "epoch": 0.91, "learning_rate": 3.8603462218672837e-07, "loss": 0.79, "step": 6180 }, { "epoch": 0.91, "learning_rate": 3.847177473065955e-07, "loss": 0.7422, "step": 6181 }, { "epoch": 0.91, "learning_rate": 3.8340307832997693e-07, "loss": 0.7661, "step": 6182 }, { "epoch": 0.91, "learning_rate": 3.820906155584803e-07, "loss": 0.7427, "step": 6183 }, { "epoch": 0.91, "learning_rate": 3.8078035929320467e-07, "loss": 0.7788, "step": 6184 }, { "epoch": 0.91, "learning_rate": 3.7947230983474304e-07, "loss": 0.7656, "step": 6185 }, { "epoch": 0.91, "learning_rate": 3.781664674831875e-07, "loss": 0.7627, "step": 6186 }, { "epoch": 0.91, "learning_rate": 3.7686283253811516e-07, "loss": 0.7876, "step": 6187 }, { "epoch": 0.92, "learning_rate": 3.755614052986056e-07, "loss": 0.7729, "step": 6188 }, { "epoch": 0.92, "learning_rate": 3.742621860632245e-07, "loss": 0.7896, "step": 6189 }, { "epoch": 0.92, "learning_rate": 3.729651751300334e-07, "loss": 0.8398, "step": 6190 }, { "epoch": 0.92, "learning_rate": 3.71670372796592e-07, "loss": 0.7622, "step": 6191 }, { "epoch": 0.92, "learning_rate": 3.703777793599461e-07, "loss": 0.689, "step": 6192 }, { "epoch": 0.92, "learning_rate": 3.690873951166385e-07, "loss": 0.8022, "step": 6193 }, { "epoch": 0.92, "learning_rate": 3.6779922036270234e-07, "loss": 0.7554, "step": 6194 }, { "epoch": 0.92, "learning_rate": 3.66513255393669e-07, "loss": 0.3113, "step": 6195 }, { "epoch": 0.92, "learning_rate": 3.652295005045603e-07, "loss": 0.8052, "step": 6196 }, { "epoch": 0.92, "learning_rate": 3.639479559898895e-07, "loss": 0.7974, "step": 6197 }, { "epoch": 0.92, "learning_rate": 3.626686221436648e-07, "loss": 0.8062, "step": 6198 }, { "epoch": 0.92, "learning_rate": 3.613914992593825e-07, "loss": 0.7905, "step": 6199 }, { "epoch": 0.92, "learning_rate": 3.6011658763003944e-07, "loss": 0.7998, "step": 6200 }, { "epoch": 0.92, "learning_rate": 3.588438875481226e-07, "loss": 0.7329, "step": 6201 }, { "epoch": 0.92, "learning_rate": 3.575733993056063e-07, "loss": 0.7671, "step": 6202 }, { "epoch": 0.92, "learning_rate": 3.56305123193964e-07, "loss": 0.7915, "step": 6203 }, { "epoch": 0.92, "learning_rate": 3.550390595041564e-07, "loss": 0.7764, "step": 6204 }, { "epoch": 0.92, "learning_rate": 3.5377520852664217e-07, "loss": 0.8149, "step": 6205 }, { "epoch": 0.92, "learning_rate": 3.525135705513694e-07, "loss": 0.7881, "step": 6206 }, { "epoch": 0.92, "learning_rate": 3.512541458677754e-07, "loss": 0.7866, "step": 6207 }, { "epoch": 0.92, "learning_rate": 3.4999693476479577e-07, "loss": 0.7769, "step": 6208 }, { "epoch": 0.92, "learning_rate": 3.4874193753085426e-07, "loss": 0.792, "step": 6209 }, { "epoch": 0.92, "learning_rate": 3.474891544538683e-07, "loss": 0.7847, "step": 6210 }, { "epoch": 0.92, "learning_rate": 3.46238585821248e-07, "loss": 0.7295, "step": 6211 }, { "epoch": 0.92, "learning_rate": 3.4499023191989055e-07, "loss": 0.7559, "step": 6212 }, { "epoch": 0.92, "learning_rate": 3.437440930361924e-07, "loss": 0.7974, "step": 6213 }, { "epoch": 0.92, "learning_rate": 3.425001694560381e-07, "loss": 0.8213, "step": 6214 }, { "epoch": 0.92, "learning_rate": 3.412584614648018e-07, "loss": 0.7485, "step": 6215 }, { "epoch": 0.92, "learning_rate": 3.4001896934735436e-07, "loss": 0.8198, "step": 6216 }, { "epoch": 0.92, "learning_rate": 3.3878169338805276e-07, "loss": 0.7524, "step": 6217 }, { "epoch": 0.92, "learning_rate": 3.3754663387075116e-07, "loss": 0.7925, "step": 6218 }, { "epoch": 0.92, "learning_rate": 3.363137910787906e-07, "loss": 0.2733, "step": 6219 }, { "epoch": 0.92, "learning_rate": 3.3508316529500596e-07, "loss": 0.7168, "step": 6220 }, { "epoch": 0.92, "learning_rate": 3.3385475680172366e-07, "loss": 0.8062, "step": 6221 }, { "epoch": 0.92, "learning_rate": 3.3262856588076044e-07, "loss": 0.7134, "step": 6222 }, { "epoch": 0.92, "learning_rate": 3.314045928134224e-07, "loss": 0.7749, "step": 6223 }, { "epoch": 0.92, "learning_rate": 3.3018283788051386e-07, "loss": 0.8545, "step": 6224 }, { "epoch": 0.92, "learning_rate": 3.289633013623206e-07, "loss": 0.7837, "step": 6225 }, { "epoch": 0.92, "learning_rate": 3.277459835386276e-07, "loss": 0.7725, "step": 6226 }, { "epoch": 0.92, "learning_rate": 3.265308846887061e-07, "loss": 0.7607, "step": 6227 }, { "epoch": 0.92, "learning_rate": 3.253180050913185e-07, "loss": 0.8027, "step": 6228 }, { "epoch": 0.92, "learning_rate": 3.241073450247223e-07, "loss": 0.812, "step": 6229 }, { "epoch": 0.92, "learning_rate": 3.2289890476665975e-07, "loss": 0.8452, "step": 6230 }, { "epoch": 0.92, "learning_rate": 3.216926845943702e-07, "loss": 0.7432, "step": 6231 }, { "epoch": 0.92, "learning_rate": 3.2048868478457673e-07, "loss": 0.7498, "step": 6232 }, { "epoch": 0.92, "learning_rate": 3.192869056134984e-07, "loss": 0.7471, "step": 6233 }, { "epoch": 0.92, "learning_rate": 3.180873473568447e-07, "loss": 0.7617, "step": 6234 }, { "epoch": 0.92, "learning_rate": 3.168900102898109e-07, "loss": 0.7681, "step": 6235 }, { "epoch": 0.92, "learning_rate": 3.1569489468708746e-07, "loss": 0.7959, "step": 6236 }, { "epoch": 0.92, "learning_rate": 3.145020008228539e-07, "loss": 0.7051, "step": 6237 }, { "epoch": 0.92, "learning_rate": 3.133113289707779e-07, "loss": 0.7495, "step": 6238 }, { "epoch": 0.92, "learning_rate": 3.12122879404021e-07, "loss": 0.8184, "step": 6239 }, { "epoch": 0.92, "learning_rate": 3.1093665239523175e-07, "loss": 0.7485, "step": 6240 }, { "epoch": 0.92, "learning_rate": 3.097526482165503e-07, "loss": 0.7705, "step": 6241 }, { "epoch": 0.92, "learning_rate": 3.0857086713960706e-07, "loss": 0.7476, "step": 6242 }, { "epoch": 0.92, "learning_rate": 3.073913094355219e-07, "loss": 0.8213, "step": 6243 }, { "epoch": 0.92, "learning_rate": 3.0621397537490494e-07, "loss": 0.7749, "step": 6244 }, { "epoch": 0.92, "learning_rate": 3.050388652278558e-07, "loss": 0.7725, "step": 6245 }, { "epoch": 0.92, "learning_rate": 3.038659792639631e-07, "loss": 0.8232, "step": 6246 }, { "epoch": 0.92, "learning_rate": 3.0269531775230733e-07, "loss": 0.7781, "step": 6247 }, { "epoch": 0.92, "learning_rate": 3.0152688096145687e-07, "loss": 0.8115, "step": 6248 }, { "epoch": 0.92, "learning_rate": 3.003606691594696e-07, "loss": 0.7764, "step": 6249 }, { "epoch": 0.92, "learning_rate": 2.991966826138959e-07, "loss": 0.7812, "step": 6250 }, { "epoch": 0.92, "learning_rate": 2.9803492159177103e-07, "loss": 0.7275, "step": 6251 }, { "epoch": 0.92, "learning_rate": 2.968753863596241e-07, "loss": 0.7544, "step": 6252 }, { "epoch": 0.92, "learning_rate": 2.9571807718347e-07, "loss": 0.8687, "step": 6253 }, { "epoch": 0.92, "learning_rate": 2.945629943288131e-07, "loss": 0.7991, "step": 6254 }, { "epoch": 0.93, "learning_rate": 2.9341013806065243e-07, "loss": 0.7544, "step": 6255 }, { "epoch": 0.93, "learning_rate": 2.9225950864346876e-07, "loss": 0.7642, "step": 6256 }, { "epoch": 0.93, "learning_rate": 2.911111063412353e-07, "loss": 0.7695, "step": 6257 }, { "epoch": 0.93, "learning_rate": 2.8996493141741686e-07, "loss": 0.7661, "step": 6258 }, { "epoch": 0.93, "learning_rate": 2.888209841349632e-07, "loss": 0.7734, "step": 6259 }, { "epoch": 0.93, "learning_rate": 2.8767926475631426e-07, "loss": 0.8086, "step": 6260 }, { "epoch": 0.93, "learning_rate": 2.865397735434006e-07, "loss": 0.7383, "step": 6261 }, { "epoch": 0.93, "learning_rate": 2.8540251075763857e-07, "loss": 0.8062, "step": 6262 }, { "epoch": 0.93, "learning_rate": 2.8426747665993517e-07, "loss": 0.291, "step": 6263 }, { "epoch": 0.93, "learning_rate": 2.831346715106864e-07, "loss": 0.791, "step": 6264 }, { "epoch": 0.93, "learning_rate": 2.8200409556977894e-07, "loss": 0.8237, "step": 6265 }, { "epoch": 0.93, "learning_rate": 2.8087574909657965e-07, "loss": 0.7979, "step": 6266 }, { "epoch": 0.93, "learning_rate": 2.7974963234995266e-07, "loss": 0.7534, "step": 6267 }, { "epoch": 0.93, "learning_rate": 2.7862574558825017e-07, "loss": 0.8022, "step": 6268 }, { "epoch": 0.93, "learning_rate": 2.7750408906930704e-07, "loss": 0.8599, "step": 6269 }, { "epoch": 0.93, "learning_rate": 2.7638466305045073e-07, "loss": 0.7578, "step": 6270 }, { "epoch": 0.93, "learning_rate": 2.752674677884948e-07, "loss": 0.7822, "step": 6271 }, { "epoch": 0.93, "learning_rate": 2.74152503539743e-07, "loss": 0.7349, "step": 6272 }, { "epoch": 0.93, "learning_rate": 2.7303977055998743e-07, "loss": 0.7554, "step": 6273 }, { "epoch": 0.93, "learning_rate": 2.719292691045061e-07, "loss": 0.8823, "step": 6274 }, { "epoch": 0.93, "learning_rate": 2.708209994280675e-07, "loss": 0.7241, "step": 6275 }, { "epoch": 0.93, "learning_rate": 2.697149617849237e-07, "loss": 0.7056, "step": 6276 }, { "epoch": 0.93, "learning_rate": 2.6861115642882073e-07, "loss": 0.7197, "step": 6277 }, { "epoch": 0.93, "learning_rate": 2.675095836129915e-07, "loss": 0.8042, "step": 6278 }, { "epoch": 0.93, "learning_rate": 2.6641024359015056e-07, "loss": 0.7329, "step": 6279 }, { "epoch": 0.93, "learning_rate": 2.653131366125061e-07, "loss": 0.769, "step": 6280 }, { "epoch": 0.93, "learning_rate": 2.6421826293175357e-07, "loss": 0.7993, "step": 6281 }, { "epoch": 0.93, "learning_rate": 2.6312562279907416e-07, "loss": 0.7617, "step": 6282 }, { "epoch": 0.93, "learning_rate": 2.620352164651374e-07, "loss": 0.3093, "step": 6283 }, { "epoch": 0.93, "learning_rate": 2.6094704418009984e-07, "loss": 0.7939, "step": 6284 }, { "epoch": 0.93, "learning_rate": 2.5986110619360626e-07, "loss": 0.7505, "step": 6285 }, { "epoch": 0.93, "learning_rate": 2.587774027547918e-07, "loss": 0.8442, "step": 6286 }, { "epoch": 0.93, "learning_rate": 2.576959341122698e-07, "loss": 0.7954, "step": 6287 }, { "epoch": 0.93, "learning_rate": 2.566167005141529e-07, "loss": 0.7607, "step": 6288 }, { "epoch": 0.93, "learning_rate": 2.555397022080297e-07, "loss": 0.708, "step": 6289 }, { "epoch": 0.93, "learning_rate": 2.544649394409848e-07, "loss": 0.7588, "step": 6290 }, { "epoch": 0.93, "learning_rate": 2.5339241245958767e-07, "loss": 0.7881, "step": 6291 }, { "epoch": 0.93, "learning_rate": 2.523221215098881e-07, "loss": 0.7671, "step": 6292 }, { "epoch": 0.93, "learning_rate": 2.5125406683743417e-07, "loss": 0.8325, "step": 6293 }, { "epoch": 0.93, "learning_rate": 2.5018824868725087e-07, "loss": 0.7759, "step": 6294 }, { "epoch": 0.93, "learning_rate": 2.491246673038572e-07, "loss": 0.7559, "step": 6295 }, { "epoch": 0.93, "learning_rate": 2.480633229312557e-07, "loss": 0.7769, "step": 6296 }, { "epoch": 0.93, "learning_rate": 2.4700421581293375e-07, "loss": 0.8223, "step": 6297 }, { "epoch": 0.93, "learning_rate": 2.4594734619187155e-07, "loss": 0.7598, "step": 6298 }, { "epoch": 0.93, "learning_rate": 2.4489271431053066e-07, "loss": 0.7798, "step": 6299 }, { "epoch": 0.93, "learning_rate": 2.438403204108597e-07, "loss": 0.79, "step": 6300 }, { "epoch": 0.93, "learning_rate": 2.427901647342967e-07, "loss": 0.7676, "step": 6301 }, { "epoch": 0.93, "learning_rate": 2.4174224752176345e-07, "loss": 0.7876, "step": 6302 }, { "epoch": 0.93, "learning_rate": 2.40696569013672e-07, "loss": 0.7393, "step": 6303 }, { "epoch": 0.93, "learning_rate": 2.396531294499149e-07, "loss": 0.8276, "step": 6304 }, { "epoch": 0.93, "learning_rate": 2.386119290698752e-07, "loss": 0.8145, "step": 6305 }, { "epoch": 0.93, "learning_rate": 2.3757296811242281e-07, "loss": 0.7842, "step": 6306 }, { "epoch": 0.93, "learning_rate": 2.3653624681591048e-07, "loss": 0.7954, "step": 6307 }, { "epoch": 0.93, "learning_rate": 2.3550176541818015e-07, "loss": 0.7358, "step": 6308 }, { "epoch": 0.93, "learning_rate": 2.3446952415655977e-07, "loss": 0.7168, "step": 6309 }, { "epoch": 0.93, "learning_rate": 2.334395232678599e-07, "loss": 0.7649, "step": 6310 }, { "epoch": 0.93, "learning_rate": 2.3241176298838153e-07, "loss": 0.7651, "step": 6311 }, { "epoch": 0.93, "learning_rate": 2.3138624355391049e-07, "loss": 0.3406, "step": 6312 }, { "epoch": 0.93, "learning_rate": 2.3036296519971413e-07, "loss": 0.6938, "step": 6313 }, { "epoch": 0.93, "learning_rate": 2.2934192816055355e-07, "loss": 0.7646, "step": 6314 }, { "epoch": 0.93, "learning_rate": 2.283231326706681e-07, "loss": 0.3262, "step": 6315 }, { "epoch": 0.93, "learning_rate": 2.273065789637896e-07, "loss": 0.769, "step": 6316 }, { "epoch": 0.93, "learning_rate": 2.2629226727312936e-07, "loss": 0.7549, "step": 6317 }, { "epoch": 0.93, "learning_rate": 2.2528019783138678e-07, "loss": 0.2981, "step": 6318 }, { "epoch": 0.93, "learning_rate": 2.2427037087074942e-07, "loss": 0.7847, "step": 6319 }, { "epoch": 0.93, "learning_rate": 2.2326278662288648e-07, "loss": 0.7861, "step": 6320 }, { "epoch": 0.93, "learning_rate": 2.2225744531895632e-07, "loss": 0.7388, "step": 6321 }, { "epoch": 0.93, "learning_rate": 2.2125434718959892e-07, "loss": 0.7183, "step": 6322 }, { "epoch": 0.94, "learning_rate": 2.2025349246494021e-07, "loss": 0.8096, "step": 6323 }, { "epoch": 0.94, "learning_rate": 2.192548813745965e-07, "loss": 0.7681, "step": 6324 }, { "epoch": 0.94, "learning_rate": 2.182585141476623e-07, "loss": 0.7969, "step": 6325 }, { "epoch": 0.94, "learning_rate": 2.1726439101272145e-07, "loss": 0.7485, "step": 6326 }, { "epoch": 0.94, "learning_rate": 2.1627251219784262e-07, "loss": 0.7944, "step": 6327 }, { "epoch": 0.94, "learning_rate": 2.1528287793057934e-07, "loss": 0.3145, "step": 6328 }, { "epoch": 0.94, "learning_rate": 2.142954884379689e-07, "loss": 0.7344, "step": 6329 }, { "epoch": 0.94, "learning_rate": 2.133103439465356e-07, "loss": 0.7314, "step": 6330 }, { "epoch": 0.94, "learning_rate": 2.123274446822865e-07, "loss": 0.7466, "step": 6331 }, { "epoch": 0.94, "learning_rate": 2.1134679087071676e-07, "loss": 0.3016, "step": 6332 }, { "epoch": 0.94, "learning_rate": 2.1036838273680305e-07, "loss": 0.3137, "step": 6333 }, { "epoch": 0.94, "learning_rate": 2.0939222050500806e-07, "loss": 0.3126, "step": 6334 }, { "epoch": 0.94, "learning_rate": 2.0841830439928045e-07, "loss": 0.686, "step": 6335 }, { "epoch": 0.94, "learning_rate": 2.074466346430515e-07, "loss": 0.771, "step": 6336 }, { "epoch": 0.94, "learning_rate": 2.0647721145923948e-07, "loss": 0.7632, "step": 6337 }, { "epoch": 0.94, "learning_rate": 2.0551003507024546e-07, "loss": 0.7988, "step": 6338 }, { "epoch": 0.94, "learning_rate": 2.0454510569795416e-07, "loss": 0.8389, "step": 6339 }, { "epoch": 0.94, "learning_rate": 2.0358242356373735e-07, "loss": 0.772, "step": 6340 }, { "epoch": 0.94, "learning_rate": 2.0262198888845064e-07, "loss": 0.7661, "step": 6341 }, { "epoch": 0.94, "learning_rate": 2.016638018924344e-07, "loss": 0.749, "step": 6342 }, { "epoch": 0.94, "learning_rate": 2.0070786279550836e-07, "loss": 0.7695, "step": 6343 }, { "epoch": 0.94, "learning_rate": 1.9975417181698487e-07, "loss": 0.7046, "step": 6344 }, { "epoch": 0.94, "learning_rate": 1.9880272917565447e-07, "loss": 0.8198, "step": 6345 }, { "epoch": 0.94, "learning_rate": 1.9785353508979476e-07, "loss": 0.7427, "step": 6346 }, { "epoch": 0.94, "learning_rate": 1.9690658977716382e-07, "loss": 0.7441, "step": 6347 }, { "epoch": 0.94, "learning_rate": 1.9596189345501003e-07, "loss": 0.7583, "step": 6348 }, { "epoch": 0.94, "learning_rate": 1.95019446340059e-07, "loss": 0.771, "step": 6349 }, { "epoch": 0.94, "learning_rate": 1.9407924864852657e-07, "loss": 0.7466, "step": 6350 }, { "epoch": 0.94, "learning_rate": 1.9314130059610693e-07, "loss": 0.7778, "step": 6351 }, { "epoch": 0.94, "learning_rate": 1.9220560239798235e-07, "loss": 0.7456, "step": 6352 }, { "epoch": 0.94, "learning_rate": 1.9127215426881673e-07, "loss": 0.7061, "step": 6353 }, { "epoch": 0.94, "learning_rate": 1.9034095642275763e-07, "loss": 0.7964, "step": 6354 }, { "epoch": 0.94, "learning_rate": 1.894120090734397e-07, "loss": 0.8223, "step": 6355 }, { "epoch": 0.94, "learning_rate": 1.8848531243397471e-07, "loss": 0.7925, "step": 6356 }, { "epoch": 0.94, "learning_rate": 1.8756086671696594e-07, "loss": 0.7881, "step": 6357 }, { "epoch": 0.94, "learning_rate": 1.8663867213449373e-07, "loss": 0.7837, "step": 6358 }, { "epoch": 0.94, "learning_rate": 1.857187288981266e-07, "loss": 0.6865, "step": 6359 }, { "epoch": 0.94, "learning_rate": 1.8480103721891462e-07, "loss": 0.7979, "step": 6360 }, { "epoch": 0.94, "learning_rate": 1.838855973073883e-07, "loss": 0.7568, "step": 6361 }, { "epoch": 0.94, "learning_rate": 1.8297240937356742e-07, "loss": 0.2937, "step": 6362 }, { "epoch": 0.94, "learning_rate": 1.8206147362695214e-07, "loss": 0.7988, "step": 6363 }, { "epoch": 0.94, "learning_rate": 1.8115279027652533e-07, "loss": 0.7593, "step": 6364 }, { "epoch": 0.94, "learning_rate": 1.8024635953075353e-07, "loss": 0.7808, "step": 6365 }, { "epoch": 0.94, "learning_rate": 1.793421815975871e-07, "loss": 0.8208, "step": 6366 }, { "epoch": 0.94, "learning_rate": 1.784402566844601e-07, "loss": 0.8086, "step": 6367 }, { "epoch": 0.94, "learning_rate": 1.775405849982892e-07, "loss": 0.7974, "step": 6368 }, { "epoch": 0.94, "learning_rate": 1.7664316674547155e-07, "loss": 0.771, "step": 6369 }, { "epoch": 0.94, "learning_rate": 1.7574800213189137e-07, "loss": 0.7505, "step": 6370 }, { "epoch": 0.94, "learning_rate": 1.7485509136291322e-07, "loss": 0.7974, "step": 6371 }, { "epoch": 0.94, "learning_rate": 1.7396443464338663e-07, "loss": 0.2888, "step": 6372 }, { "epoch": 0.94, "learning_rate": 1.7307603217764257e-07, "loss": 0.7651, "step": 6373 }, { "epoch": 0.94, "learning_rate": 1.7218988416949355e-07, "loss": 0.3091, "step": 6374 }, { "epoch": 0.94, "learning_rate": 1.713059908222392e-07, "loss": 0.7417, "step": 6375 }, { "epoch": 0.94, "learning_rate": 1.704243523386573e-07, "loss": 0.7471, "step": 6376 }, { "epoch": 0.94, "learning_rate": 1.6954496892101047e-07, "loss": 0.814, "step": 6377 }, { "epoch": 0.94, "learning_rate": 1.6866784077104402e-07, "loss": 0.7554, "step": 6378 }, { "epoch": 0.94, "learning_rate": 1.677929680899848e-07, "loss": 0.7637, "step": 6379 }, { "epoch": 0.94, "learning_rate": 1.6692035107854332e-07, "loss": 0.7402, "step": 6380 }, { "epoch": 0.94, "learning_rate": 1.660499899369139e-07, "loss": 0.7583, "step": 6381 }, { "epoch": 0.94, "learning_rate": 1.6518188486476794e-07, "loss": 0.7783, "step": 6382 }, { "epoch": 0.94, "learning_rate": 1.6431603606126722e-07, "loss": 0.8237, "step": 6383 }, { "epoch": 0.94, "learning_rate": 1.6345244372504842e-07, "loss": 0.8003, "step": 6384 }, { "epoch": 0.94, "learning_rate": 1.6259110805423528e-07, "loss": 0.7998, "step": 6385 }, { "epoch": 0.94, "learning_rate": 1.6173202924643305e-07, "loss": 0.7896, "step": 6386 }, { "epoch": 0.94, "learning_rate": 1.6087520749872633e-07, "loss": 0.3005, "step": 6387 }, { "epoch": 0.94, "learning_rate": 1.600206430076856e-07, "loss": 0.772, "step": 6388 }, { "epoch": 0.94, "learning_rate": 1.5916833596936188e-07, "loss": 0.8271, "step": 6389 }, { "epoch": 0.94, "learning_rate": 1.583182865792876e-07, "loss": 0.8267, "step": 6390 }, { "epoch": 0.95, "learning_rate": 1.5747049503248013e-07, "loss": 0.7622, "step": 6391 }, { "epoch": 0.95, "learning_rate": 1.5662496152343275e-07, "loss": 0.7642, "step": 6392 }, { "epoch": 0.95, "learning_rate": 1.557816862461292e-07, "loss": 0.7627, "step": 6393 }, { "epoch": 0.95, "learning_rate": 1.549406693940292e-07, "loss": 0.856, "step": 6394 }, { "epoch": 0.95, "learning_rate": 1.5410191116007388e-07, "loss": 0.7471, "step": 6395 }, { "epoch": 0.95, "learning_rate": 1.532654117366894e-07, "loss": 0.7017, "step": 6396 }, { "epoch": 0.95, "learning_rate": 1.5243117131578445e-07, "loss": 0.7603, "step": 6397 }, { "epoch": 0.95, "learning_rate": 1.5159919008874368e-07, "loss": 0.7812, "step": 6398 }, { "epoch": 0.95, "learning_rate": 1.5076946824644002e-07, "loss": 0.748, "step": 6399 }, { "epoch": 0.95, "learning_rate": 1.4994200597922337e-07, "loss": 0.8384, "step": 6400 }, { "epoch": 0.95, "learning_rate": 1.491168034769297e-07, "loss": 0.7876, "step": 6401 }, { "epoch": 0.95, "learning_rate": 1.4829386092887199e-07, "loss": 0.7344, "step": 6402 }, { "epoch": 0.95, "learning_rate": 1.4747317852384702e-07, "loss": 0.7803, "step": 6403 }, { "epoch": 0.95, "learning_rate": 1.4665475645013417e-07, "loss": 0.79, "step": 6404 }, { "epoch": 0.95, "learning_rate": 1.458385948954899e-07, "loss": 0.7031, "step": 6405 }, { "epoch": 0.95, "learning_rate": 1.4502469404715892e-07, "loss": 0.7744, "step": 6406 }, { "epoch": 0.95, "learning_rate": 1.442130540918607e-07, "loss": 0.2791, "step": 6407 }, { "epoch": 0.95, "learning_rate": 1.4340367521579969e-07, "loss": 0.7959, "step": 6408 }, { "epoch": 0.95, "learning_rate": 1.4259655760466063e-07, "loss": 0.7827, "step": 6409 }, { "epoch": 0.95, "learning_rate": 1.4179170144360876e-07, "loss": 0.8018, "step": 6410 }, { "epoch": 0.95, "learning_rate": 1.4098910691729195e-07, "loss": 0.7383, "step": 6411 }, { "epoch": 0.95, "learning_rate": 1.4018877420983956e-07, "loss": 0.7388, "step": 6412 }, { "epoch": 0.95, "learning_rate": 1.3939070350485918e-07, "loss": 0.8081, "step": 6413 }, { "epoch": 0.95, "learning_rate": 1.385948949854432e-07, "loss": 0.7114, "step": 6414 }, { "epoch": 0.95, "learning_rate": 1.378013488341612e-07, "loss": 0.7842, "step": 6415 }, { "epoch": 0.95, "learning_rate": 1.3701006523306747e-07, "loss": 0.7432, "step": 6416 }, { "epoch": 0.95, "learning_rate": 1.3622104436369465e-07, "loss": 0.7568, "step": 6417 }, { "epoch": 0.95, "learning_rate": 1.3543428640705568e-07, "loss": 0.8184, "step": 6418 }, { "epoch": 0.95, "learning_rate": 1.3464979154364844e-07, "loss": 0.7354, "step": 6419 }, { "epoch": 0.95, "learning_rate": 1.338675599534478e-07, "loss": 0.7827, "step": 6420 }, { "epoch": 0.95, "learning_rate": 1.3308759181590912e-07, "loss": 0.7749, "step": 6421 }, { "epoch": 0.95, "learning_rate": 1.323098873099715e-07, "loss": 0.7783, "step": 6422 }, { "epoch": 0.95, "learning_rate": 1.3153444661405335e-07, "loss": 0.7617, "step": 6423 }, { "epoch": 0.95, "learning_rate": 1.307612699060523e-07, "loss": 0.7549, "step": 6424 }, { "epoch": 0.95, "learning_rate": 1.2999035736334874e-07, "loss": 0.7759, "step": 6425 }, { "epoch": 0.95, "learning_rate": 1.2922170916280118e-07, "loss": 0.8345, "step": 6426 }, { "epoch": 0.95, "learning_rate": 1.2845532548075301e-07, "loss": 0.7354, "step": 6427 }, { "epoch": 0.95, "learning_rate": 1.2769120649302247e-07, "loss": 0.7207, "step": 6428 }, { "epoch": 0.95, "learning_rate": 1.2692935237491154e-07, "loss": 0.8125, "step": 6429 }, { "epoch": 0.95, "learning_rate": 1.2616976330120268e-07, "loss": 0.8394, "step": 6430 }, { "epoch": 0.95, "learning_rate": 1.2541243944615755e-07, "loss": 0.7607, "step": 6431 }, { "epoch": 0.95, "learning_rate": 1.2465738098352053e-07, "loss": 0.7373, "step": 6432 }, { "epoch": 0.95, "learning_rate": 1.2390458808651085e-07, "loss": 0.7422, "step": 6433 }, { "epoch": 0.95, "learning_rate": 1.2315406092783478e-07, "loss": 0.8135, "step": 6434 }, { "epoch": 0.95, "learning_rate": 1.2240579967967348e-07, "loss": 0.7725, "step": 6435 }, { "epoch": 0.95, "learning_rate": 1.2165980451369186e-07, "loss": 0.7603, "step": 6436 }, { "epoch": 0.95, "learning_rate": 1.2091607560103304e-07, "loss": 0.7656, "step": 6437 }, { "epoch": 0.95, "learning_rate": 1.2017461311231938e-07, "loss": 0.7905, "step": 6438 }, { "epoch": 0.95, "learning_rate": 1.1943541721765596e-07, "loss": 0.7446, "step": 6439 }, { "epoch": 0.95, "learning_rate": 1.186984880866271e-07, "loss": 0.7905, "step": 6440 }, { "epoch": 0.95, "learning_rate": 1.1796382588829425e-07, "loss": 0.811, "step": 6441 }, { "epoch": 0.95, "learning_rate": 1.1723143079120369e-07, "loss": 0.793, "step": 6442 }, { "epoch": 0.95, "learning_rate": 1.1650130296337548e-07, "loss": 0.687, "step": 6443 }, { "epoch": 0.95, "learning_rate": 1.157734425723156e-07, "loss": 0.7534, "step": 6444 }, { "epoch": 0.95, "learning_rate": 1.1504784978500606e-07, "loss": 0.7969, "step": 6445 }, { "epoch": 0.95, "learning_rate": 1.1432452476790922e-07, "loss": 0.8535, "step": 6446 }, { "epoch": 0.95, "learning_rate": 1.1360346768696907e-07, "loss": 0.7739, "step": 6447 }, { "epoch": 0.95, "learning_rate": 1.1288467870760766e-07, "loss": 0.7832, "step": 6448 }, { "epoch": 0.95, "learning_rate": 1.1216815799472647e-07, "loss": 0.7959, "step": 6449 }, { "epoch": 0.95, "learning_rate": 1.1145390571270731e-07, "loss": 0.7817, "step": 6450 }, { "epoch": 0.95, "learning_rate": 1.1074192202541134e-07, "loss": 0.8169, "step": 6451 }, { "epoch": 0.95, "learning_rate": 1.1003220709617901e-07, "loss": 0.7349, "step": 6452 }, { "epoch": 0.95, "learning_rate": 1.093247610878323e-07, "loss": 0.8022, "step": 6453 }, { "epoch": 0.95, "learning_rate": 1.0861958416266805e-07, "loss": 0.7656, "step": 6454 }, { "epoch": 0.95, "learning_rate": 1.0791667648246796e-07, "loss": 0.7686, "step": 6455 }, { "epoch": 0.95, "learning_rate": 1.0721603820848748e-07, "loss": 0.8315, "step": 6456 }, { "epoch": 0.95, "learning_rate": 1.0651766950146913e-07, "loss": 0.7412, "step": 6457 }, { "epoch": 0.96, "learning_rate": 1.058215705216259e-07, "loss": 0.7661, "step": 6458 }, { "epoch": 0.96, "learning_rate": 1.0512774142865667e-07, "loss": 0.7651, "step": 6459 }, { "epoch": 0.96, "learning_rate": 1.0443618238173636e-07, "loss": 0.2893, "step": 6460 }, { "epoch": 0.96, "learning_rate": 1.0374689353952027e-07, "loss": 0.834, "step": 6461 }, { "epoch": 0.96, "learning_rate": 1.0305987506014193e-07, "loss": 0.7563, "step": 6462 }, { "epoch": 0.96, "learning_rate": 1.0237512710121522e-07, "loss": 0.8105, "step": 6463 }, { "epoch": 0.96, "learning_rate": 1.0169264981983229e-07, "loss": 0.8193, "step": 6464 }, { "epoch": 0.96, "learning_rate": 1.0101244337256567e-07, "loss": 0.8037, "step": 6465 }, { "epoch": 0.96, "learning_rate": 1.0033450791546495e-07, "loss": 0.7891, "step": 6466 }, { "epoch": 0.96, "learning_rate": 9.965884360405909e-08, "loss": 0.7817, "step": 6467 }, { "epoch": 0.96, "learning_rate": 9.898545059335852e-08, "loss": 0.8115, "step": 6468 }, { "epoch": 0.96, "learning_rate": 9.831432903784744e-08, "loss": 0.7871, "step": 6469 }, { "epoch": 0.96, "learning_rate": 9.764547909149602e-08, "loss": 0.7695, "step": 6470 }, { "epoch": 0.96, "learning_rate": 9.697890090774597e-08, "loss": 0.3335, "step": 6471 }, { "epoch": 0.96, "learning_rate": 9.631459463952275e-08, "loss": 0.752, "step": 6472 }, { "epoch": 0.96, "learning_rate": 9.565256043922999e-08, "loss": 0.7808, "step": 6473 }, { "epoch": 0.96, "learning_rate": 9.499279845874843e-08, "loss": 0.7437, "step": 6474 }, { "epoch": 0.96, "learning_rate": 9.433530884943698e-08, "loss": 0.7686, "step": 6475 }, { "epoch": 0.96, "learning_rate": 9.368009176213722e-08, "loss": 0.7593, "step": 6476 }, { "epoch": 0.96, "learning_rate": 9.302714734716334e-08, "loss": 0.8677, "step": 6477 }, { "epoch": 0.96, "learning_rate": 9.237647575431553e-08, "loss": 0.7539, "step": 6478 }, { "epoch": 0.96, "learning_rate": 9.172807713286547e-08, "loss": 0.7817, "step": 6479 }, { "epoch": 0.96, "learning_rate": 9.108195163156642e-08, "loss": 0.7944, "step": 6480 }, { "epoch": 0.96, "learning_rate": 9.0438099398652e-08, "loss": 0.749, "step": 6481 }, { "epoch": 0.96, "learning_rate": 8.979652058183185e-08, "loss": 0.8467, "step": 6482 }, { "epoch": 0.96, "learning_rate": 8.91572153282938e-08, "loss": 0.7822, "step": 6483 }, { "epoch": 0.96, "learning_rate": 8.852018378470606e-08, "loss": 0.7695, "step": 6484 }, { "epoch": 0.96, "learning_rate": 8.788542609721284e-08, "loss": 0.7852, "step": 6485 }, { "epoch": 0.96, "learning_rate": 8.725294241143988e-08, "loss": 0.8262, "step": 6486 }, { "epoch": 0.96, "learning_rate": 8.66227328724878e-08, "loss": 0.771, "step": 6487 }, { "epoch": 0.96, "learning_rate": 8.599479762493756e-08, "loss": 0.769, "step": 6488 }, { "epoch": 0.96, "learning_rate": 8.536913681284731e-08, "loss": 0.728, "step": 6489 }, { "epoch": 0.96, "learning_rate": 8.474575057975332e-08, "loss": 0.8247, "step": 6490 }, { "epoch": 0.96, "learning_rate": 8.41246390686734e-08, "loss": 0.7329, "step": 6491 }, { "epoch": 0.96, "learning_rate": 8.350580242209805e-08, "loss": 0.7534, "step": 6492 }, { "epoch": 0.96, "learning_rate": 8.288924078199922e-08, "loss": 0.7881, "step": 6493 }, { "epoch": 0.96, "learning_rate": 8.227495428982601e-08, "loss": 0.7446, "step": 6494 }, { "epoch": 0.96, "learning_rate": 8.16629430865068e-08, "loss": 0.7646, "step": 6495 }, { "epoch": 0.96, "learning_rate": 8.105320731244703e-08, "loss": 0.8066, "step": 6496 }, { "epoch": 0.96, "learning_rate": 8.044574710752928e-08, "loss": 0.7993, "step": 6497 }, { "epoch": 0.96, "learning_rate": 7.984056261111428e-08, "loss": 0.8042, "step": 6498 }, { "epoch": 0.96, "learning_rate": 7.923765396204208e-08, "loss": 0.7056, "step": 6499 }, { "epoch": 0.96, "learning_rate": 7.863702129863093e-08, "loss": 0.769, "step": 6500 }, { "epoch": 0.96, "learning_rate": 7.803866475867394e-08, "loss": 0.8174, "step": 6501 }, { "epoch": 0.96, "learning_rate": 7.744258447944464e-08, "loss": 0.7393, "step": 6502 }, { "epoch": 0.96, "learning_rate": 7.684878059769363e-08, "loss": 0.7969, "step": 6503 }, { "epoch": 0.96, "learning_rate": 7.625725324965084e-08, "loss": 0.3057, "step": 6504 }, { "epoch": 0.96, "learning_rate": 7.566800257101881e-08, "loss": 0.7441, "step": 6505 }, { "epoch": 0.96, "learning_rate": 7.508102869698386e-08, "loss": 0.8076, "step": 6506 }, { "epoch": 0.96, "learning_rate": 7.449633176220717e-08, "loss": 0.811, "step": 6507 }, { "epoch": 0.96, "learning_rate": 7.391391190082808e-08, "loss": 0.7769, "step": 6508 }, { "epoch": 0.96, "learning_rate": 7.333376924646197e-08, "loss": 0.8193, "step": 6509 }, { "epoch": 0.96, "learning_rate": 7.275590393220456e-08, "loss": 0.6816, "step": 6510 }, { "epoch": 0.96, "learning_rate": 7.218031609062538e-08, "loss": 0.8101, "step": 6511 }, { "epoch": 0.96, "learning_rate": 7.160700585377655e-08, "loss": 0.7935, "step": 6512 }, { "epoch": 0.96, "learning_rate": 7.103597335318402e-08, "loss": 0.7583, "step": 6513 }, { "epoch": 0.96, "learning_rate": 7.046721871985074e-08, "loss": 0.7754, "step": 6514 }, { "epoch": 0.96, "learning_rate": 6.990074208425901e-08, "loss": 0.8286, "step": 6515 }, { "epoch": 0.96, "learning_rate": 6.93365435763682e-08, "loss": 0.7974, "step": 6516 }, { "epoch": 0.96, "learning_rate": 6.877462332561479e-08, "loss": 0.8149, "step": 6517 }, { "epoch": 0.96, "learning_rate": 6.821498146091232e-08, "loss": 0.7617, "step": 6518 }, { "epoch": 0.96, "learning_rate": 6.765761811065141e-08, "loss": 0.7524, "step": 6519 }, { "epoch": 0.96, "learning_rate": 6.710253340270089e-08, "loss": 0.7656, "step": 6520 }, { "epoch": 0.96, "learning_rate": 6.654972746440557e-08, "loss": 0.8335, "step": 6521 }, { "epoch": 0.96, "learning_rate": 6.599920042258956e-08, "loss": 0.832, "step": 6522 }, { "epoch": 0.96, "learning_rate": 6.545095240355182e-08, "loss": 0.7529, "step": 6523 }, { "epoch": 0.96, "learning_rate": 6.49049835330684e-08, "loss": 0.8022, "step": 6524 }, { "epoch": 0.96, "learning_rate": 6.436129393639579e-08, "loss": 0.79, "step": 6525 }, { "epoch": 0.97, "learning_rate": 6.381988373826531e-08, "loss": 0.7129, "step": 6526 }, { "epoch": 0.97, "learning_rate": 6.328075306288317e-08, "loss": 0.7383, "step": 6527 }, { "epoch": 0.97, "learning_rate": 6.274390203393488e-08, "loss": 0.7769, "step": 6528 }, { "epoch": 0.97, "learning_rate": 6.220933077458413e-08, "loss": 0.2939, "step": 6529 }, { "epoch": 0.97, "learning_rate": 6.167703940747172e-08, "loss": 0.7524, "step": 6530 }, { "epoch": 0.97, "learning_rate": 6.114702805471107e-08, "loss": 0.7871, "step": 6531 }, { "epoch": 0.97, "learning_rate": 6.061929683789714e-08, "loss": 0.7622, "step": 6532 }, { "epoch": 0.97, "learning_rate": 6.009384587809975e-08, "loss": 0.7842, "step": 6533 }, { "epoch": 0.97, "learning_rate": 5.95706752958658e-08, "loss": 0.752, "step": 6534 }, { "epoch": 0.97, "learning_rate": 5.90497852112204e-08, "loss": 0.7686, "step": 6535 }, { "epoch": 0.97, "learning_rate": 5.8531175743661294e-08, "loss": 0.7539, "step": 6536 }, { "epoch": 0.97, "learning_rate": 5.801484701216997e-08, "loss": 0.8169, "step": 6537 }, { "epoch": 0.97, "learning_rate": 5.750079913519835e-08, "loss": 0.8032, "step": 6538 }, { "epoch": 0.97, "learning_rate": 5.698903223067653e-08, "loss": 0.8345, "step": 6539 }, { "epoch": 0.97, "learning_rate": 5.647954641601505e-08, "loss": 0.7178, "step": 6540 }, { "epoch": 0.97, "learning_rate": 5.597234180809597e-08, "loss": 0.8022, "step": 6541 }, { "epoch": 0.97, "learning_rate": 5.5467418523281745e-08, "loss": 0.7534, "step": 6542 }, { "epoch": 0.97, "learning_rate": 5.496477667740974e-08, "loss": 0.8022, "step": 6543 }, { "epoch": 0.97, "learning_rate": 5.446441638579214e-08, "loss": 0.7896, "step": 6544 }, { "epoch": 0.97, "learning_rate": 5.3966337763223795e-08, "loss": 0.749, "step": 6545 }, { "epoch": 0.97, "learning_rate": 5.347054092396886e-08, "loss": 0.7861, "step": 6546 }, { "epoch": 0.97, "learning_rate": 5.297702598177301e-08, "loss": 0.7598, "step": 6547 }, { "epoch": 0.97, "learning_rate": 5.2485793049855685e-08, "loss": 0.7993, "step": 6548 }, { "epoch": 0.97, "learning_rate": 5.19968422409145e-08, "loss": 0.7905, "step": 6549 }, { "epoch": 0.97, "learning_rate": 5.1510173667121966e-08, "loss": 0.7837, "step": 6550 }, { "epoch": 0.97, "learning_rate": 5.102578744012876e-08, "loss": 0.7705, "step": 6551 }, { "epoch": 0.97, "learning_rate": 5.054368367106044e-08, "loss": 0.7354, "step": 6552 }, { "epoch": 0.97, "learning_rate": 5.006386247051964e-08, "loss": 0.7798, "step": 6553 }, { "epoch": 0.97, "learning_rate": 4.958632394858498e-08, "loss": 0.7798, "step": 6554 }, { "epoch": 0.97, "learning_rate": 4.911106821481215e-08, "loss": 0.7529, "step": 6555 }, { "epoch": 0.97, "learning_rate": 4.863809537823283e-08, "loss": 0.7256, "step": 6556 }, { "epoch": 0.97, "learning_rate": 4.8167405547353555e-08, "loss": 0.7456, "step": 6557 }, { "epoch": 0.97, "learning_rate": 4.769899883016016e-08, "loss": 0.7539, "step": 6558 }, { "epoch": 0.97, "learning_rate": 4.723287533411003e-08, "loss": 0.7954, "step": 6559 }, { "epoch": 0.97, "learning_rate": 4.676903516614206e-08, "loss": 0.7646, "step": 6560 }, { "epoch": 0.97, "learning_rate": 4.630747843266781e-08, "loss": 0.7085, "step": 6561 }, { "epoch": 0.97, "learning_rate": 4.5848205239575894e-08, "loss": 0.8296, "step": 6562 }, { "epoch": 0.97, "learning_rate": 4.5391215692232036e-08, "loss": 0.3153, "step": 6563 }, { "epoch": 0.97, "learning_rate": 4.493650989547682e-08, "loss": 0.7773, "step": 6564 }, { "epoch": 0.97, "learning_rate": 4.4484087953625685e-08, "loss": 0.7788, "step": 6565 }, { "epoch": 0.97, "learning_rate": 4.403394997047339e-08, "loss": 0.7085, "step": 6566 }, { "epoch": 0.97, "learning_rate": 4.358609604928732e-08, "loss": 0.7871, "step": 6567 }, { "epoch": 0.97, "learning_rate": 4.314052629281418e-08, "loss": 0.7964, "step": 6568 }, { "epoch": 0.97, "learning_rate": 4.269724080327553e-08, "loss": 0.7744, "step": 6569 }, { "epoch": 0.97, "learning_rate": 4.2256239682365585e-08, "loss": 0.8027, "step": 6570 }, { "epoch": 0.97, "learning_rate": 4.1817523031258965e-08, "loss": 0.8398, "step": 6571 }, { "epoch": 0.97, "learning_rate": 4.1381090950605165e-08, "loss": 0.7637, "step": 6572 }, { "epoch": 0.97, "learning_rate": 4.094694354052742e-08, "loss": 0.2905, "step": 6573 }, { "epoch": 0.97, "learning_rate": 4.0515080900627165e-08, "loss": 0.769, "step": 6574 }, { "epoch": 0.97, "learning_rate": 4.0085503129980716e-08, "loss": 0.7798, "step": 6575 }, { "epoch": 0.97, "learning_rate": 3.9658210327139236e-08, "loss": 0.8623, "step": 6576 }, { "epoch": 0.97, "learning_rate": 3.923320259013208e-08, "loss": 0.7192, "step": 6577 }, { "epoch": 0.97, "learning_rate": 3.881048001646237e-08, "loss": 0.8096, "step": 6578 }, { "epoch": 0.97, "learning_rate": 3.8390042703110305e-08, "loss": 0.7175, "step": 6579 }, { "epoch": 0.97, "learning_rate": 3.797189074652874e-08, "loss": 0.8086, "step": 6580 }, { "epoch": 0.97, "learning_rate": 3.755602424265203e-08, "loss": 0.7505, "step": 6581 }, { "epoch": 0.97, "learning_rate": 3.714244328688388e-08, "loss": 0.8018, "step": 6582 }, { "epoch": 0.97, "learning_rate": 3.6731147974108374e-08, "loss": 0.7827, "step": 6583 }, { "epoch": 0.97, "learning_rate": 3.632213839868226e-08, "loss": 0.8228, "step": 6584 }, { "epoch": 0.97, "learning_rate": 3.5915414654440486e-08, "loss": 0.7563, "step": 6585 }, { "epoch": 0.97, "learning_rate": 3.551097683468951e-08, "loss": 0.3293, "step": 6586 }, { "epoch": 0.97, "learning_rate": 3.5108825032217355e-08, "loss": 0.769, "step": 6587 }, { "epoch": 0.97, "learning_rate": 3.470895933928131e-08, "loss": 0.7651, "step": 6588 }, { "epoch": 0.97, "learning_rate": 3.431137984761912e-08, "loss": 0.7905, "step": 6589 }, { "epoch": 0.97, "learning_rate": 3.391608664844115e-08, "loss": 0.7834, "step": 6590 }, { "epoch": 0.97, "learning_rate": 3.3523079832434854e-08, "loss": 0.8145, "step": 6591 }, { "epoch": 0.97, "learning_rate": 3.313235948976146e-08, "loss": 0.8135, "step": 6592 }, { "epoch": 0.98, "learning_rate": 3.2743925710058135e-08, "loss": 0.7651, "step": 6593 }, { "epoch": 0.98, "learning_rate": 3.235777858244027e-08, "loss": 0.7534, "step": 6594 }, { "epoch": 0.98, "learning_rate": 3.197391819549478e-08, "loss": 0.7622, "step": 6595 }, { "epoch": 0.98, "learning_rate": 3.1592344637284554e-08, "loss": 0.7559, "step": 6596 }, { "epoch": 0.98, "learning_rate": 3.1213057995350684e-08, "loss": 0.7549, "step": 6597 }, { "epoch": 0.98, "learning_rate": 3.0836058356708e-08, "loss": 0.3024, "step": 6598 }, { "epoch": 0.98, "learning_rate": 3.04613458078451e-08, "loss": 0.7876, "step": 6599 }, { "epoch": 0.98, "learning_rate": 3.008892043472655e-08, "loss": 0.7988, "step": 6600 }, { "epoch": 0.98, "learning_rate": 2.9718782322794015e-08, "loss": 0.7134, "step": 6601 }, { "epoch": 0.98, "learning_rate": 2.9350931556964e-08, "loss": 0.8179, "step": 6602 }, { "epoch": 0.98, "learning_rate": 2.8985368221625677e-08, "loss": 0.8008, "step": 6603 }, { "epoch": 0.98, "learning_rate": 2.8622092400647507e-08, "loss": 0.8076, "step": 6604 }, { "epoch": 0.98, "learning_rate": 2.8261104177368382e-08, "loss": 0.7412, "step": 6605 }, { "epoch": 0.98, "learning_rate": 2.79024036346065e-08, "loss": 0.7563, "step": 6606 }, { "epoch": 0.98, "learning_rate": 2.7545990854653816e-08, "loss": 0.7407, "step": 6607 }, { "epoch": 0.98, "learning_rate": 2.719186591927603e-08, "loss": 0.7712, "step": 6608 }, { "epoch": 0.98, "learning_rate": 2.6840028909715933e-08, "loss": 0.7686, "step": 6609 }, { "epoch": 0.98, "learning_rate": 2.649047990669118e-08, "loss": 0.7129, "step": 6610 }, { "epoch": 0.98, "learning_rate": 2.6143218990393182e-08, "loss": 0.7358, "step": 6611 }, { "epoch": 0.98, "learning_rate": 2.5798246240489323e-08, "loss": 0.6992, "step": 6612 }, { "epoch": 0.98, "learning_rate": 2.5455561736122958e-08, "loss": 0.7476, "step": 6613 }, { "epoch": 0.98, "learning_rate": 2.5115165555911204e-08, "loss": 0.7441, "step": 6614 }, { "epoch": 0.98, "learning_rate": 2.4777057777946034e-08, "loss": 0.7959, "step": 6615 }, { "epoch": 0.98, "learning_rate": 2.4441238479795404e-08, "loss": 0.3098, "step": 6616 }, { "epoch": 0.98, "learning_rate": 2.4107707738501017e-08, "loss": 0.8208, "step": 6617 }, { "epoch": 0.98, "learning_rate": 2.377646563058056e-08, "loss": 0.7783, "step": 6618 }, { "epoch": 0.98, "learning_rate": 2.344751223202768e-08, "loss": 0.7393, "step": 6619 }, { "epoch": 0.98, "learning_rate": 2.3120847618308683e-08, "loss": 0.7729, "step": 6620 }, { "epoch": 0.98, "learning_rate": 2.2796471864364733e-08, "loss": 0.7524, "step": 6621 }, { "epoch": 0.98, "learning_rate": 2.2474385044615188e-08, "loss": 0.7485, "step": 6622 }, { "epoch": 0.98, "learning_rate": 2.2154587232949833e-08, "loss": 0.8364, "step": 6623 }, { "epoch": 0.98, "learning_rate": 2.183707850273664e-08, "loss": 0.7905, "step": 6624 }, { "epoch": 0.98, "learning_rate": 2.1521858926817353e-08, "loss": 0.7266, "step": 6625 }, { "epoch": 0.98, "learning_rate": 2.1208928577508557e-08, "loss": 0.7734, "step": 6626 }, { "epoch": 0.98, "learning_rate": 2.0898287526601725e-08, "loss": 0.8643, "step": 6627 }, { "epoch": 0.98, "learning_rate": 2.0589935845363174e-08, "loss": 0.8057, "step": 6628 }, { "epoch": 0.98, "learning_rate": 2.028387360453188e-08, "loss": 0.7412, "step": 6629 }, { "epoch": 0.98, "learning_rate": 1.9980100874326115e-08, "loss": 0.7607, "step": 6630 }, { "epoch": 0.98, "learning_rate": 1.967861772443458e-08, "loss": 0.8091, "step": 6631 }, { "epoch": 0.98, "learning_rate": 1.9379424224024168e-08, "loss": 0.7188, "step": 6632 }, { "epoch": 0.98, "learning_rate": 1.908252044173331e-08, "loss": 0.7764, "step": 6633 }, { "epoch": 0.98, "learning_rate": 1.8787906445676406e-08, "loss": 0.7563, "step": 6634 }, { "epoch": 0.98, "learning_rate": 1.8495582303443837e-08, "loss": 0.7837, "step": 6635 }, { "epoch": 0.98, "learning_rate": 1.8205548082099733e-08, "loss": 0.7939, "step": 6636 }, { "epoch": 0.98, "learning_rate": 1.791780384818087e-08, "loss": 0.7944, "step": 6637 }, { "epoch": 0.98, "learning_rate": 1.763234966770333e-08, "loss": 0.793, "step": 6638 }, { "epoch": 0.98, "learning_rate": 1.7349185606151396e-08, "loss": 0.7402, "step": 6639 }, { "epoch": 0.98, "learning_rate": 1.7068311728490882e-08, "loss": 0.3091, "step": 6640 }, { "epoch": 0.98, "learning_rate": 1.678972809915802e-08, "loss": 0.8281, "step": 6641 }, { "epoch": 0.98, "learning_rate": 1.6513434782062797e-08, "loss": 0.7969, "step": 6642 }, { "epoch": 0.98, "learning_rate": 1.623943184059229e-08, "loss": 0.7749, "step": 6643 }, { "epoch": 0.98, "learning_rate": 1.5967719337608434e-08, "loss": 0.7607, "step": 6644 }, { "epoch": 0.98, "learning_rate": 1.569829733544581e-08, "loss": 0.7676, "step": 6645 }, { "epoch": 0.98, "learning_rate": 1.543116589591387e-08, "loss": 0.7402, "step": 6646 }, { "epoch": 0.98, "learning_rate": 1.5166325080295808e-08, "loss": 0.771, "step": 6647 }, { "epoch": 0.98, "learning_rate": 1.4903774949354133e-08, "loss": 0.769, "step": 6648 }, { "epoch": 0.98, "learning_rate": 1.4643515563318444e-08, "loss": 0.8008, "step": 6649 }, { "epoch": 0.98, "learning_rate": 1.4385546981897647e-08, "loss": 0.7935, "step": 6650 }, { "epoch": 0.98, "learning_rate": 1.4129869264275508e-08, "loss": 0.8071, "step": 6651 }, { "epoch": 0.98, "learning_rate": 1.3876482469106223e-08, "loss": 0.7979, "step": 6652 }, { "epoch": 0.98, "learning_rate": 1.3625386654522176e-08, "loss": 0.813, "step": 6653 }, { "epoch": 0.98, "learning_rate": 1.3376581878129514e-08, "loss": 0.8315, "step": 6654 }, { "epoch": 0.98, "learning_rate": 1.3130068197007018e-08, "loss": 0.771, "step": 6655 }, { "epoch": 0.98, "learning_rate": 1.288584566770945e-08, "loss": 0.7192, "step": 6656 }, { "epoch": 0.98, "learning_rate": 1.264391434626533e-08, "loss": 0.8042, "step": 6657 }, { "epoch": 0.98, "learning_rate": 1.240427428817692e-08, "loss": 0.7749, "step": 6658 }, { "epoch": 0.98, "learning_rate": 1.216692554842358e-08, "loss": 0.7104, "step": 6659 }, { "epoch": 0.98, "learning_rate": 1.1931868181455087e-08, "loss": 0.7832, "step": 6660 }, { "epoch": 0.99, "learning_rate": 1.1699102241198302e-08, "loss": 0.7612, "step": 6661 }, { "epoch": 0.99, "learning_rate": 1.1468627781052732e-08, "loss": 0.7988, "step": 6662 }, { "epoch": 0.99, "learning_rate": 1.1240444853894972e-08, "loss": 0.3474, "step": 6663 }, { "epoch": 0.99, "learning_rate": 1.1014553512072036e-08, "loss": 0.7666, "step": 6664 }, { "epoch": 0.99, "learning_rate": 1.0790953807406912e-08, "loss": 0.8384, "step": 6665 }, { "epoch": 0.99, "learning_rate": 1.056964579119968e-08, "loss": 0.8027, "step": 6666 }, { "epoch": 0.99, "learning_rate": 1.0350629514218613e-08, "loss": 0.8325, "step": 6667 }, { "epoch": 0.99, "learning_rate": 1.01339050267113e-08, "loss": 0.7983, "step": 6668 }, { "epoch": 0.99, "learning_rate": 9.919472378397966e-09, "loss": 0.7739, "step": 6669 }, { "epoch": 0.99, "learning_rate": 9.707331618472593e-09, "loss": 0.7607, "step": 6670 }, { "epoch": 0.99, "learning_rate": 9.49748279560514e-09, "loss": 0.7524, "step": 6671 }, { "epoch": 0.99, "learning_rate": 9.289925957935985e-09, "loss": 0.2792, "step": 6672 }, { "epoch": 0.99, "learning_rate": 9.084661153083706e-09, "loss": 0.7017, "step": 6673 }, { "epoch": 0.99, "learning_rate": 8.881688428139523e-09, "loss": 0.7578, "step": 6674 }, { "epoch": 0.99, "learning_rate": 8.681007829667298e-09, "loss": 0.2969, "step": 6675 }, { "epoch": 0.99, "learning_rate": 8.482619403707981e-09, "loss": 0.8018, "step": 6676 }, { "epoch": 0.99, "learning_rate": 8.286523195774054e-09, "loss": 0.8223, "step": 6677 }, { "epoch": 0.99, "learning_rate": 8.092719250853975e-09, "loss": 0.7388, "step": 6678 }, { "epoch": 0.99, "learning_rate": 7.901207613408845e-09, "loss": 0.7886, "step": 6679 }, { "epoch": 0.99, "learning_rate": 7.71198832737463e-09, "loss": 0.7383, "step": 6680 }, { "epoch": 0.99, "learning_rate": 7.525061436162162e-09, "loss": 0.3213, "step": 6681 }, { "epoch": 0.99, "learning_rate": 7.340426982654914e-09, "loss": 0.7485, "step": 6682 }, { "epoch": 0.99, "learning_rate": 7.1580850092112245e-09, "loss": 0.7407, "step": 6683 }, { "epoch": 0.99, "learning_rate": 6.978035557664298e-09, "loss": 0.7461, "step": 6684 }, { "epoch": 0.99, "learning_rate": 6.800278669317762e-09, "loss": 0.7129, "step": 6685 }, { "epoch": 0.99, "learning_rate": 6.624814384954548e-09, "loss": 0.7417, "step": 6686 }, { "epoch": 0.99, "learning_rate": 6.451642744828013e-09, "loss": 0.7598, "step": 6687 }, { "epoch": 0.99, "learning_rate": 6.2807637886663775e-09, "loss": 0.793, "step": 6688 }, { "epoch": 0.99, "learning_rate": 6.112177555672727e-09, "loss": 0.7817, "step": 6689 }, { "epoch": 0.99, "learning_rate": 5.945884084522791e-09, "loss": 0.751, "step": 6690 }, { "epoch": 0.99, "learning_rate": 5.781883413367162e-09, "loss": 0.7556, "step": 6691 }, { "epoch": 0.99, "learning_rate": 5.6201755798313e-09, "loss": 0.7979, "step": 6692 }, { "epoch": 0.99, "learning_rate": 5.460760621012196e-09, "loss": 0.7749, "step": 6693 }, { "epoch": 0.99, "learning_rate": 5.303638573482817e-09, "loss": 0.7456, "step": 6694 }, { "epoch": 0.99, "learning_rate": 5.148809473289884e-09, "loss": 0.7827, "step": 6695 }, { "epoch": 0.99, "learning_rate": 4.996273355953873e-09, "loss": 0.8062, "step": 6696 }, { "epoch": 0.99, "learning_rate": 4.8460302564679044e-09, "loss": 0.7576, "step": 6697 }, { "epoch": 0.99, "learning_rate": 4.69808020930218e-09, "loss": 0.834, "step": 6698 }, { "epoch": 0.99, "learning_rate": 4.55242324839622e-09, "loss": 0.771, "step": 6699 }, { "epoch": 0.99, "learning_rate": 4.4090594071688475e-09, "loss": 0.7163, "step": 6700 }, { "epoch": 0.99, "learning_rate": 4.267988718508198e-09, "loss": 0.7192, "step": 6701 }, { "epoch": 0.99, "learning_rate": 4.129211214778384e-09, "loss": 0.8091, "step": 6702 }, { "epoch": 0.99, "learning_rate": 3.99272692781838e-09, "loss": 0.8008, "step": 6703 }, { "epoch": 0.99, "learning_rate": 3.858535888938697e-09, "loss": 0.7808, "step": 6704 }, { "epoch": 0.99, "learning_rate": 3.726638128925819e-09, "loss": 0.7251, "step": 6705 }, { "epoch": 0.99, "learning_rate": 3.597033678038875e-09, "loss": 0.7505, "step": 6706 }, { "epoch": 0.99, "learning_rate": 3.469722566010747e-09, "loss": 0.8062, "step": 6707 }, { "epoch": 0.99, "learning_rate": 3.344704822049183e-09, "loss": 0.7891, "step": 6708 }, { "epoch": 0.99, "learning_rate": 3.2219804748367946e-09, "loss": 0.7788, "step": 6709 }, { "epoch": 0.99, "learning_rate": 3.1015495525255067e-09, "loss": 0.8438, "step": 6710 }, { "epoch": 0.99, "learning_rate": 2.9834120827465506e-09, "loss": 0.7676, "step": 6711 }, { "epoch": 0.99, "learning_rate": 2.8675680926015805e-09, "loss": 0.7769, "step": 6712 }, { "epoch": 0.99, "learning_rate": 2.7540176086671145e-09, "loss": 0.75, "step": 6713 }, { "epoch": 0.99, "learning_rate": 2.642760656994536e-09, "loss": 0.792, "step": 6714 }, { "epoch": 0.99, "learning_rate": 2.5337972631067633e-09, "loss": 0.7378, "step": 6715 }, { "epoch": 0.99, "learning_rate": 2.4271274520015766e-09, "loss": 0.7749, "step": 6716 }, { "epoch": 0.99, "learning_rate": 2.3227512481516224e-09, "loss": 0.7227, "step": 6717 }, { "epoch": 0.99, "learning_rate": 2.2206686755033013e-09, "loss": 0.7925, "step": 6718 }, { "epoch": 0.99, "learning_rate": 2.1208797574745477e-09, "loss": 0.7852, "step": 6719 }, { "epoch": 0.99, "learning_rate": 2.02338451695816e-09, "loss": 0.7393, "step": 6720 }, { "epoch": 0.99, "learning_rate": 1.928182976322912e-09, "loss": 0.7754, "step": 6721 }, { "epoch": 0.99, "learning_rate": 1.835275157409111e-09, "loss": 0.6978, "step": 6722 }, { "epoch": 0.99, "learning_rate": 1.7446610815308184e-09, "loss": 0.7847, "step": 6723 }, { "epoch": 0.99, "learning_rate": 1.6563407694758504e-09, "loss": 0.7842, "step": 6724 }, { "epoch": 0.99, "learning_rate": 1.5703142415079974e-09, "loss": 0.7227, "step": 6725 }, { "epoch": 0.99, "learning_rate": 1.486581517361474e-09, "loss": 0.79, "step": 6726 }, { "epoch": 0.99, "learning_rate": 1.4051426162464687e-09, "loss": 0.8047, "step": 6727 }, { "epoch": 0.99, "learning_rate": 1.3259975568469253e-09, "loss": 0.7544, "step": 6728 }, { "epoch": 1.0, "learning_rate": 1.2491463573194307e-09, "loss": 0.7383, "step": 6729 }, { "epoch": 1.0, "learning_rate": 1.174589035295437e-09, "loss": 0.7627, "step": 6730 }, { "epoch": 1.0, "learning_rate": 1.10232560787904e-09, "loss": 0.7925, "step": 6731 }, { "epoch": 1.0, "learning_rate": 1.0323560916492003e-09, "loss": 0.793, "step": 6732 }, { "epoch": 1.0, "learning_rate": 9.646805026586325e-10, "loss": 0.7388, "step": 6733 }, { "epoch": 1.0, "learning_rate": 8.992988564315852e-10, "loss": 0.772, "step": 6734 }, { "epoch": 1.0, "learning_rate": 8.362111679682817e-10, "loss": 0.7251, "step": 6735 }, { "epoch": 1.0, "learning_rate": 7.754174517426994e-10, "loss": 0.7778, "step": 6736 }, { "epoch": 1.0, "learning_rate": 7.1691772170146e-10, "loss": 0.2495, "step": 6737 }, { "epoch": 1.0, "learning_rate": 6.607119912649395e-10, "loss": 0.7803, "step": 6738 }, { "epoch": 1.0, "learning_rate": 6.068002733294887e-10, "loss": 0.7378, "step": 6739 }, { "epoch": 1.0, "learning_rate": 5.551825802607713e-10, "loss": 0.7583, "step": 6740 }, { "epoch": 1.0, "learning_rate": 5.058589239026468e-10, "loss": 0.7554, "step": 6741 }, { "epoch": 1.0, "learning_rate": 4.588293155693979e-10, "loss": 0.769, "step": 6742 }, { "epoch": 1.0, "learning_rate": 4.140937660512823e-10, "loss": 0.7812, "step": 6743 }, { "epoch": 1.0, "learning_rate": 3.716522856112015e-10, "loss": 0.7744, "step": 6744 }, { "epoch": 1.0, "learning_rate": 3.3150488398581147e-10, "loss": 0.8354, "step": 6745 }, { "epoch": 1.0, "learning_rate": 2.9365157038663273e-10, "loss": 0.2828, "step": 6746 }, { "epoch": 1.0, "learning_rate": 2.5809235349560924e-10, "loss": 0.7642, "step": 6747 }, { "epoch": 1.0, "learning_rate": 2.2482724147177005e-10, "loss": 0.7759, "step": 6748 }, { "epoch": 1.0, "learning_rate": 1.9385624194789843e-10, "loss": 0.7446, "step": 6749 }, { "epoch": 1.0, "learning_rate": 1.6517936202720132e-10, "loss": 0.7466, "step": 6750 }, { "epoch": 1.0, "learning_rate": 1.3879660828997055e-10, "loss": 0.8369, "step": 6751 }, { "epoch": 1.0, "learning_rate": 1.1470798678803186e-10, "loss": 0.7944, "step": 6752 }, { "epoch": 1.0, "learning_rate": 9.291350304918568e-11, "loss": 0.7598, "step": 6753 }, { "epoch": 1.0, "learning_rate": 7.341316207165606e-11, "loss": 0.8462, "step": 6754 }, { "epoch": 1.0, "learning_rate": 5.620696832964179e-11, "loss": 0.7642, "step": 6755 }, { "epoch": 1.0, "learning_rate": 4.1294925769985726e-11, "loss": 0.7534, "step": 6756 }, { "epoch": 1.0, "learning_rate": 2.867703781550546e-11, "loss": 0.7983, "step": 6757 }, { "epoch": 1.0, "learning_rate": 1.8353307359442184e-11, "loss": 0.7275, "step": 6758 }, { "epoch": 1.0, "learning_rate": 1.0323736772122062e-11, "loss": 0.7441, "step": 6759 }, { "epoch": 1.0, "learning_rate": 4.5883278931846405e-12, "loss": 0.7891, "step": 6760 }, { "epoch": 1.0, "learning_rate": 1.1470820393544302e-12, "loss": 0.3315, "step": 6761 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.533, "step": 6762 }, { "epoch": 1.0, "step": 6762, "total_flos": 3565969442308096.0, "train_loss": 0.801395031518042, "train_runtime": 44734.6205, "train_samples_per_second": 19.347, "train_steps_per_second": 0.151 } ], "max_steps": 6762, "num_train_epochs": 1, "total_flos": 3565969442308096.0, "trial_name": null, "trial_params": null }