diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,40597 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 6762, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 9.852216748768474e-08, + "loss": 1.1201, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.9704433497536947e-07, + "loss": 1.1611, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 2.955665024630542e-07, + "loss": 1.1011, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 3.9408866995073894e-07, + "loss": 1.1499, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 4.926108374384237e-07, + "loss": 1.1401, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 5.911330049261084e-07, + "loss": 1.1104, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 6.896551724137931e-07, + "loss": 1.0806, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 7.881773399014779e-07, + "loss": 1.1387, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 8.866995073891626e-07, + "loss": 1.1221, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 9.852216748768474e-07, + "loss": 1.0835, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.0837438423645322e-06, + "loss": 1.0166, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 1.1822660098522167e-06, + "loss": 1.0205, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 1.2807881773399017e-06, + "loss": 1.0645, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 1.3793103448275862e-06, + "loss": 1.0811, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 1.4778325123152712e-06, + "loss": 1.0117, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 1.5763546798029558e-06, + "loss": 1.0278, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 1.6748768472906405e-06, + "loss": 0.3706, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 1.7733990147783253e-06, + "loss": 0.9995, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 1.8719211822660098e-06, + "loss": 0.9595, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 1.970443349753695e-06, + "loss": 1.0039, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 2.0689655172413796e-06, + "loss": 0.9785, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 2.1674876847290643e-06, + "loss": 0.9346, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 2.266009852216749e-06, + "loss": 0.8794, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 2.3645320197044334e-06, + "loss": 0.9219, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 2.4630541871921186e-06, + "loss": 0.9341, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 2.5615763546798034e-06, + "loss": 0.9331, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 2.660098522167488e-06, + "loss": 0.9409, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 2.7586206896551725e-06, + "loss": 0.9766, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 2.8571428571428573e-06, + "loss": 0.9434, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 2.9556650246305424e-06, + "loss": 0.9146, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 3.054187192118227e-06, + "loss": 0.8652, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 3.1527093596059115e-06, + "loss": 0.9497, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 3.2512315270935963e-06, + "loss": 0.8242, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 3.349753694581281e-06, + "loss": 0.8765, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 3.448275862068966e-06, + "loss": 0.9253, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 3.5467980295566506e-06, + "loss": 0.8809, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 3.6453201970443354e-06, + "loss": 0.9053, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 3.7438423645320197e-06, + "loss": 0.9141, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 3.842364532019705e-06, + "loss": 0.8716, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 3.94088669950739e-06, + "loss": 0.8281, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.039408866995074e-06, + "loss": 0.8843, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 4.137931034482759e-06, + "loss": 0.916, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 4.236453201970444e-06, + "loss": 0.9263, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 4.334975369458129e-06, + "loss": 0.9087, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 4.4334975369458135e-06, + "loss": 0.8799, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 4.532019704433498e-06, + "loss": 0.8257, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 4.630541871921182e-06, + "loss": 0.8774, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 4.729064039408867e-06, + "loss": 0.8799, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 4.8275862068965525e-06, + "loss": 0.8516, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 4.926108374384237e-06, + "loss": 0.8325, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 5.024630541871922e-06, + "loss": 0.8599, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 5.123152709359607e-06, + "loss": 0.813, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 5.2216748768472915e-06, + "loss": 0.8672, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 5.320197044334976e-06, + "loss": 0.7798, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 5.41871921182266e-06, + "loss": 0.8457, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 5.517241379310345e-06, + "loss": 0.8921, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 5.61576354679803e-06, + "loss": 0.8906, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 5.7142857142857145e-06, + "loss": 0.8481, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 5.812807881773399e-06, + "loss": 0.8862, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 5.911330049261085e-06, + "loss": 0.3048, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 6.00985221674877e-06, + "loss": 0.8677, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 6.108374384236454e-06, + "loss": 0.8057, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 6.206896551724138e-06, + "loss": 0.8423, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 6.305418719211823e-06, + "loss": 0.8965, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 6.403940886699508e-06, + "loss": 0.8833, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 6.502463054187193e-06, + "loss": 0.2915, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 6.600985221674877e-06, + "loss": 0.8442, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 6.699507389162562e-06, + "loss": 0.8755, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 6.798029556650246e-06, + "loss": 0.9082, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 6.896551724137932e-06, + "loss": 0.8525, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 6.995073891625616e-06, + "loss": 0.8345, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 7.093596059113301e-06, + "loss": 0.8433, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 7.192118226600986e-06, + "loss": 0.8062, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 7.290640394088671e-06, + "loss": 0.8901, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 7.3891625615763555e-06, + "loss": 0.834, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 7.487684729064039e-06, + "loss": 0.8604, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 7.586206896551724e-06, + "loss": 0.8462, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 7.68472906403941e-06, + "loss": 0.8369, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 7.783251231527095e-06, + "loss": 0.7983, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 7.88177339901478e-06, + "loss": 0.8394, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 7.980295566502464e-06, + "loss": 0.8672, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 8.078817733990149e-06, + "loss": 0.8535, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 8.177339901477834e-06, + "loss": 0.8423, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 8.275862068965518e-06, + "loss": 0.8374, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 8.374384236453203e-06, + "loss": 0.7959, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 8.472906403940888e-06, + "loss": 0.9478, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 8.571428571428571e-06, + "loss": 0.8853, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 8.669950738916257e-06, + "loss": 0.2842, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 8.768472906403942e-06, + "loss": 0.8574, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 8.866995073891627e-06, + "loss": 0.7847, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 8.965517241379312e-06, + "loss": 0.8765, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 9.064039408866996e-06, + "loss": 0.8589, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 9.162561576354681e-06, + "loss": 0.8833, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 9.261083743842364e-06, + "loss": 0.8564, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 9.359605911330049e-06, + "loss": 0.8369, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 9.458128078817734e-06, + "loss": 0.8403, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 9.55665024630542e-06, + "loss": 0.8623, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 9.655172413793105e-06, + "loss": 0.8584, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 9.75369458128079e-06, + "loss": 0.8247, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 9.852216748768475e-06, + "loss": 0.855, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 9.95073891625616e-06, + "loss": 0.8438, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 1.0049261083743844e-05, + "loss": 0.812, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 1.0147783251231529e-05, + "loss": 0.8042, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 1.0246305418719214e-05, + "loss": 0.8945, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 1.0344827586206898e-05, + "loss": 0.8906, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 1.0443349753694583e-05, + "loss": 0.8232, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 1.0541871921182268e-05, + "loss": 0.8232, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 1.0640394088669953e-05, + "loss": 0.8374, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 1.0738916256157637e-05, + "loss": 0.854, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 1.083743842364532e-05, + "loss": 0.8496, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 1.0935960591133005e-05, + "loss": 0.8608, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 1.103448275862069e-05, + "loss": 0.7869, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 1.1133004926108375e-05, + "loss": 0.7915, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 1.123152709359606e-05, + "loss": 0.8296, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 1.1330049261083744e-05, + "loss": 0.8325, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 1.1428571428571429e-05, + "loss": 0.8228, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 1.1527093596059114e-05, + "loss": 0.8545, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 1.1625615763546799e-05, + "loss": 0.9116, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 1.1724137931034483e-05, + "loss": 0.8667, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 1.182266009852217e-05, + "loss": 0.8608, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 1.1921182266009855e-05, + "loss": 0.8335, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 1.201970443349754e-05, + "loss": 0.8306, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 1.2118226600985224e-05, + "loss": 0.7983, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 1.2216748768472909e-05, + "loss": 0.7915, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 1.2315270935960592e-05, + "loss": 0.8403, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 1.2413793103448277e-05, + "loss": 0.2775, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 1.2512315270935961e-05, + "loss": 0.8345, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 1.2610837438423646e-05, + "loss": 0.895, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 1.2709359605911331e-05, + "loss": 0.8975, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 1.2807881773399016e-05, + "loss": 0.8418, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 1.29064039408867e-05, + "loss": 0.8579, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 1.3004926108374385e-05, + "loss": 0.8652, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 1.310344827586207e-05, + "loss": 0.8301, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 1.3201970443349755e-05, + "loss": 0.8271, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 1.330049261083744e-05, + "loss": 0.8521, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 1.3399014778325124e-05, + "loss": 0.8066, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 1.3497536945812807e-05, + "loss": 0.8389, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 1.3596059113300492e-05, + "loss": 0.8823, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 1.369458128078818e-05, + "loss": 0.3254, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 1.3793103448275863e-05, + "loss": 0.8848, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 1.3891625615763548e-05, + "loss": 0.8013, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 1.3990147783251233e-05, + "loss": 0.8589, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 1.4088669950738918e-05, + "loss": 0.814, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 1.4187192118226602e-05, + "loss": 0.8579, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 1.4285714285714287e-05, + "loss": 0.8809, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 1.4384236453201972e-05, + "loss": 0.8716, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 1.4482758620689657e-05, + "loss": 0.874, + "step": 147 + }, + { + "epoch": 0.02, + "learning_rate": 1.4581280788177341e-05, + "loss": 0.8335, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 1.4679802955665026e-05, + "loss": 0.9067, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 1.4778325123152711e-05, + "loss": 0.8496, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 1.4876847290640396e-05, + "loss": 0.8994, + "step": 151 + }, + { + "epoch": 0.02, + "learning_rate": 1.4975369458128079e-05, + "loss": 0.9229, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 1.5073891625615764e-05, + "loss": 0.8159, + "step": 153 + }, + { + "epoch": 0.02, + "learning_rate": 1.5172413793103448e-05, + "loss": 0.8315, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 1.5270935960591133e-05, + "loss": 0.8521, + "step": 155 + }, + { + "epoch": 0.02, + "learning_rate": 1.536945812807882e-05, + "loss": 0.8765, + "step": 156 + }, + { + "epoch": 0.02, + "learning_rate": 1.5467980295566506e-05, + "loss": 0.8843, + "step": 157 + }, + { + "epoch": 0.02, + "learning_rate": 1.556650246305419e-05, + "loss": 0.8828, + "step": 158 + }, + { + "epoch": 0.02, + "learning_rate": 1.5665024630541875e-05, + "loss": 0.855, + "step": 159 + }, + { + "epoch": 0.02, + "learning_rate": 1.576354679802956e-05, + "loss": 0.8345, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 1.586206896551724e-05, + "loss": 0.8633, + "step": 161 + }, + { + "epoch": 0.02, + "learning_rate": 1.5960591133004928e-05, + "loss": 0.7974, + "step": 162 + }, + { + "epoch": 0.02, + "learning_rate": 1.605911330049261e-05, + "loss": 0.8433, + "step": 163 + }, + { + "epoch": 0.02, + "learning_rate": 1.6157635467980298e-05, + "loss": 0.877, + "step": 164 + }, + { + "epoch": 0.02, + "learning_rate": 1.625615763546798e-05, + "loss": 0.8423, + "step": 165 + }, + { + "epoch": 0.02, + "learning_rate": 1.6354679802955667e-05, + "loss": 0.8159, + "step": 166 + }, + { + "epoch": 0.02, + "learning_rate": 1.645320197044335e-05, + "loss": 0.855, + "step": 167 + }, + { + "epoch": 0.02, + "learning_rate": 1.6551724137931037e-05, + "loss": 0.8423, + "step": 168 + }, + { + "epoch": 0.02, + "learning_rate": 1.665024630541872e-05, + "loss": 0.9058, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 1.6748768472906406e-05, + "loss": 0.9023, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 1.684729064039409e-05, + "loss": 0.8828, + "step": 171 + }, + { + "epoch": 0.03, + "learning_rate": 1.6945812807881776e-05, + "loss": 0.8281, + "step": 172 + }, + { + "epoch": 0.03, + "learning_rate": 1.704433497536946e-05, + "loss": 0.8481, + "step": 173 + }, + { + "epoch": 0.03, + "learning_rate": 1.7142857142857142e-05, + "loss": 0.8569, + "step": 174 + }, + { + "epoch": 0.03, + "learning_rate": 1.7241379310344828e-05, + "loss": 0.9287, + "step": 175 + }, + { + "epoch": 0.03, + "learning_rate": 1.7339901477832515e-05, + "loss": 0.8335, + "step": 176 + }, + { + "epoch": 0.03, + "learning_rate": 1.7438423645320198e-05, + "loss": 0.8232, + "step": 177 + }, + { + "epoch": 0.03, + "learning_rate": 1.7536945812807884e-05, + "loss": 0.877, + "step": 178 + }, + { + "epoch": 0.03, + "learning_rate": 1.7635467980295567e-05, + "loss": 0.8188, + "step": 179 + }, + { + "epoch": 0.03, + "learning_rate": 1.7733990147783254e-05, + "loss": 0.8628, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 1.7832512315270937e-05, + "loss": 0.8096, + "step": 181 + }, + { + "epoch": 0.03, + "learning_rate": 1.7931034482758623e-05, + "loss": 0.8589, + "step": 182 + }, + { + "epoch": 0.03, + "learning_rate": 1.8029556650246306e-05, + "loss": 0.8477, + "step": 183 + }, + { + "epoch": 0.03, + "learning_rate": 1.8128078817733993e-05, + "loss": 0.8384, + "step": 184 + }, + { + "epoch": 0.03, + "learning_rate": 1.8226600985221676e-05, + "loss": 0.9077, + "step": 185 + }, + { + "epoch": 0.03, + "learning_rate": 1.8325123152709362e-05, + "loss": 0.8223, + "step": 186 + }, + { + "epoch": 0.03, + "learning_rate": 1.8423645320197045e-05, + "loss": 0.8306, + "step": 187 + }, + { + "epoch": 0.03, + "learning_rate": 1.852216748768473e-05, + "loss": 0.9048, + "step": 188 + }, + { + "epoch": 0.03, + "learning_rate": 1.8620689655172415e-05, + "loss": 0.8608, + "step": 189 + }, + { + "epoch": 0.03, + "learning_rate": 1.8719211822660098e-05, + "loss": 0.8672, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 1.8817733990147784e-05, + "loss": 0.7964, + "step": 191 + }, + { + "epoch": 0.03, + "learning_rate": 1.8916256157635468e-05, + "loss": 0.8276, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 1.9014778325123154e-05, + "loss": 0.8428, + "step": 193 + }, + { + "epoch": 0.03, + "learning_rate": 1.911330049261084e-05, + "loss": 0.832, + "step": 194 + }, + { + "epoch": 0.03, + "learning_rate": 1.9211822660098524e-05, + "loss": 0.9175, + "step": 195 + }, + { + "epoch": 0.03, + "learning_rate": 1.931034482758621e-05, + "loss": 0.7827, + "step": 196 + }, + { + "epoch": 0.03, + "learning_rate": 1.9408866995073893e-05, + "loss": 0.8945, + "step": 197 + }, + { + "epoch": 0.03, + "learning_rate": 1.950738916256158e-05, + "loss": 0.9243, + "step": 198 + }, + { + "epoch": 0.03, + "learning_rate": 1.9605911330049263e-05, + "loss": 0.8804, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 1.970443349753695e-05, + "loss": 0.9077, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 1.9802955665024632e-05, + "loss": 0.8647, + "step": 201 + }, + { + "epoch": 0.03, + "learning_rate": 1.990147783251232e-05, + "loss": 0.9092, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 0.9106, + "step": 203 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999998852917962e-05, + "loss": 0.9341, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999995411672108e-05, + "loss": 0.894, + "step": 205 + }, + { + "epoch": 0.03, + "learning_rate": 1.999998967626323e-05, + "loss": 0.8604, + "step": 206 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999981646692643e-05, + "loss": 0.8872, + "step": 207 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999971322962186e-05, + "loss": 0.895, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 1.999995870507423e-05, + "loss": 0.8555, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999943793031672e-05, + "loss": 0.8506, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 1.999992658683793e-05, + "loss": 0.8882, + "step": 211 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999907086496952e-05, + "loss": 0.8418, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999885292013213e-05, + "loss": 0.8779, + "step": 213 + }, + { + "epoch": 0.03, + "learning_rate": 1.999986120339171e-05, + "loss": 0.8315, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 1.999983482063797e-05, + "loss": 0.7949, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999806143758053e-05, + "loss": 0.9224, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 1.999977517275853e-05, + "loss": 0.8643, + "step": 217 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999741907646506e-05, + "loss": 0.8296, + "step": 218 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999706348429616e-05, + "loss": 0.8276, + "step": 219 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999668495116016e-05, + "loss": 0.7744, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 1.999962834771439e-05, + "loss": 0.8706, + "step": 221 + }, + { + "epoch": 0.03, + "learning_rate": 1.999958590623395e-05, + "loss": 0.833, + "step": 222 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999541170684433e-05, + "loss": 0.8477, + "step": 223 + }, + { + "epoch": 0.03, + "learning_rate": 1.99994941410761e-05, + "loss": 0.8784, + "step": 224 + }, + { + "epoch": 0.03, + "learning_rate": 1.999944481741974e-05, + "loss": 0.8081, + "step": 225 + }, + { + "epoch": 0.03, + "learning_rate": 1.999939319972667e-05, + "loss": 0.897, + "step": 226 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999339288008736e-05, + "loss": 0.8481, + "step": 227 + }, + { + "epoch": 0.03, + "learning_rate": 1.99992830822783e-05, + "loss": 0.9092, + "step": 228 + }, + { + "epoch": 0.03, + "learning_rate": 1.999922458254826e-05, + "loss": 0.8784, + "step": 229 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999163788832035e-05, + "loss": 0.8774, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 1.999910070114357e-05, + "loss": 0.9155, + "step": 231 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999035319497343e-05, + "loss": 0.8872, + "step": 232 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998967643908354e-05, + "loss": 0.8589, + "step": 233 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998897674392123e-05, + "loss": 0.9795, + "step": 234 + }, + { + "epoch": 0.03, + "learning_rate": 1.9998825410964706e-05, + "loss": 0.8188, + "step": 235 + }, + { + "epoch": 0.03, + "learning_rate": 1.999875085364268e-05, + "loss": 0.8882, + "step": 236 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998674002443156e-05, + "loss": 0.8794, + "step": 237 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998594857383756e-05, + "loss": 0.8618, + "step": 238 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998513418482642e-05, + "loss": 0.8618, + "step": 239 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998429685758495e-05, + "loss": 0.8755, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998343659230526e-05, + "loss": 0.3245, + "step": 241 + }, + { + "epoch": 0.04, + "learning_rate": 1.999825533891847e-05, + "loss": 0.877, + "step": 242 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998164724842593e-05, + "loss": 0.8506, + "step": 243 + }, + { + "epoch": 0.04, + "learning_rate": 1.999807181702368e-05, + "loss": 0.8394, + "step": 244 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997976615483042e-05, + "loss": 0.2776, + "step": 245 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997879120242527e-05, + "loss": 0.8818, + "step": 246 + }, + { + "epoch": 0.04, + "learning_rate": 1.99977793313245e-05, + "loss": 0.8975, + "step": 247 + }, + { + "epoch": 0.04, + "learning_rate": 1.999767724875185e-05, + "loss": 0.8794, + "step": 248 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997572872548e-05, + "loss": 0.8833, + "step": 249 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997466202736895e-05, + "loss": 0.8857, + "step": 250 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997357239343008e-05, + "loss": 0.8638, + "step": 251 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997245982391335e-05, + "loss": 0.9189, + "step": 252 + }, + { + "epoch": 0.04, + "learning_rate": 1.99971324319074e-05, + "loss": 0.856, + "step": 253 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997016587917256e-05, + "loss": 0.874, + "step": 254 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996898450447476e-05, + "loss": 0.8291, + "step": 255 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996778019525164e-05, + "loss": 0.9268, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996655295177953e-05, + "loss": 0.8481, + "step": 257 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996530277433993e-05, + "loss": 0.9272, + "step": 258 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996402966321962e-05, + "loss": 0.8418, + "step": 259 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996273361871076e-05, + "loss": 0.3203, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 1.999614146411106e-05, + "loss": 0.9077, + "step": 261 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996007273072183e-05, + "loss": 0.8496, + "step": 262 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995870788785223e-05, + "loss": 0.8013, + "step": 263 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995732011281493e-05, + "loss": 0.8809, + "step": 264 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995590940592833e-05, + "loss": 0.8032, + "step": 265 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995447576751605e-05, + "loss": 0.3406, + "step": 266 + }, + { + "epoch": 0.04, + "learning_rate": 1.99953019197907e-05, + "loss": 0.9082, + "step": 267 + }, + { + "epoch": 0.04, + "learning_rate": 1.999515396974353e-05, + "loss": 0.8906, + "step": 268 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995003726644045e-05, + "loss": 0.8516, + "step": 269 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994851190526712e-05, + "loss": 0.835, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 1.999469636142652e-05, + "loss": 0.8452, + "step": 271 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994539239378988e-05, + "loss": 0.8101, + "step": 272 + }, + { + "epoch": 0.04, + "learning_rate": 1.999437982442017e-05, + "loss": 0.8955, + "step": 273 + }, + { + "epoch": 0.04, + "learning_rate": 1.9994218116586633e-05, + "loss": 0.8916, + "step": 274 + }, + { + "epoch": 0.04, + "learning_rate": 1.999405411591548e-05, + "loss": 0.79, + "step": 275 + }, + { + "epoch": 0.04, + "learning_rate": 1.999388782244433e-05, + "loss": 0.8799, + "step": 276 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993719236211336e-05, + "loss": 0.8794, + "step": 277 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993548357255172e-05, + "loss": 0.8149, + "step": 278 + }, + { + "epoch": 0.04, + "learning_rate": 1.999337518561505e-05, + "loss": 0.8838, + "step": 279 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993199721330684e-05, + "loss": 0.8652, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993021964442336e-05, + "loss": 0.8853, + "step": 281 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992841914990792e-05, + "loss": 0.9355, + "step": 282 + }, + { + "epoch": 0.04, + "learning_rate": 1.999265957301735e-05, + "loss": 0.8936, + "step": 283 + }, + { + "epoch": 0.04, + "learning_rate": 1.999247493856384e-05, + "loss": 0.2738, + "step": 284 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992288011672628e-05, + "loss": 0.8428, + "step": 285 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992098792386595e-05, + "loss": 0.9155, + "step": 286 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991907280749148e-05, + "loss": 0.8354, + "step": 287 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991713476804227e-05, + "loss": 0.9146, + "step": 288 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991517380596294e-05, + "loss": 0.8823, + "step": 289 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991318992170334e-05, + "loss": 0.8926, + "step": 290 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991118311571862e-05, + "loss": 0.8774, + "step": 291 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990915338846918e-05, + "loss": 0.9404, + "step": 292 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990710074042066e-05, + "loss": 0.8799, + "step": 293 + }, + { + "epoch": 0.04, + "learning_rate": 1.99905025172044e-05, + "loss": 0.3264, + "step": 294 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990292668381527e-05, + "loss": 0.8174, + "step": 295 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990080527621606e-05, + "loss": 0.8843, + "step": 296 + }, + { + "epoch": 0.04, + "learning_rate": 1.998986609497329e-05, + "loss": 0.8203, + "step": 297 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989649370485784e-05, + "loss": 0.8618, + "step": 298 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989430354208803e-05, + "loss": 0.8818, + "step": 299 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989209046192596e-05, + "loss": 0.9019, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 1.998898544648793e-05, + "loss": 0.8589, + "step": 301 + }, + { + "epoch": 0.04, + "learning_rate": 1.9988759555146107e-05, + "loss": 0.897, + "step": 302 + }, + { + "epoch": 0.04, + "learning_rate": 1.998853137221895e-05, + "loss": 0.8447, + "step": 303 + }, + { + "epoch": 0.04, + "learning_rate": 1.9988300897758802e-05, + "loss": 0.8633, + "step": 304 + }, + { + "epoch": 0.05, + "learning_rate": 1.9988068131818545e-05, + "loss": 0.8726, + "step": 305 + }, + { + "epoch": 0.05, + "learning_rate": 1.998783307445158e-05, + "loss": 0.8584, + "step": 306 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987595725711823e-05, + "loss": 0.8198, + "step": 307 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987356085653738e-05, + "loss": 0.9429, + "step": 308 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987114154332292e-05, + "loss": 0.8638, + "step": 309 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986869931802993e-05, + "loss": 0.9043, + "step": 310 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986623418121872e-05, + "loss": 0.8403, + "step": 311 + }, + { + "epoch": 0.05, + "learning_rate": 1.998637461334548e-05, + "loss": 0.896, + "step": 312 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986123517530894e-05, + "loss": 0.8735, + "step": 313 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985870130735726e-05, + "loss": 0.3135, + "step": 314 + }, + { + "epoch": 0.05, + "learning_rate": 1.99856144530181e-05, + "loss": 0.875, + "step": 315 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985356484436682e-05, + "loss": 0.8672, + "step": 316 + }, + { + "epoch": 0.05, + "learning_rate": 1.998509622505065e-05, + "loss": 0.874, + "step": 317 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984833674919707e-05, + "loss": 0.9321, + "step": 318 + }, + { + "epoch": 0.05, + "learning_rate": 1.998456883410409e-05, + "loss": 0.8564, + "step": 319 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984301702664557e-05, + "loss": 0.895, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 1.9984032280662393e-05, + "loss": 0.8999, + "step": 321 + }, + { + "epoch": 0.05, + "learning_rate": 1.998376056815941e-05, + "loss": 0.7822, + "step": 322 + }, + { + "epoch": 0.05, + "learning_rate": 1.998348656521794e-05, + "loss": 0.8813, + "step": 323 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983210271900845e-05, + "loss": 0.9331, + "step": 324 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982931688271508e-05, + "loss": 0.8721, + "step": 325 + }, + { + "epoch": 0.05, + "learning_rate": 1.998265081439385e-05, + "loss": 0.8599, + "step": 326 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982367650332297e-05, + "loss": 0.8716, + "step": 327 + }, + { + "epoch": 0.05, + "learning_rate": 1.998208219615182e-05, + "loss": 0.8955, + "step": 328 + }, + { + "epoch": 0.05, + "learning_rate": 1.99817944519179e-05, + "loss": 0.8774, + "step": 329 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981504417696557e-05, + "loss": 0.8403, + "step": 330 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981212093554325e-05, + "loss": 0.8203, + "step": 331 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980917479558268e-05, + "loss": 0.8843, + "step": 332 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980620575775977e-05, + "loss": 0.9019, + "step": 333 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980321382275568e-05, + "loss": 0.8394, + "step": 334 + }, + { + "epoch": 0.05, + "learning_rate": 1.9980019899125674e-05, + "loss": 0.9087, + "step": 335 + }, + { + "epoch": 0.05, + "learning_rate": 1.997971612639547e-05, + "loss": 0.7954, + "step": 336 + }, + { + "epoch": 0.05, + "learning_rate": 1.997941006415464e-05, + "loss": 0.8413, + "step": 337 + }, + { + "epoch": 0.05, + "learning_rate": 1.99791017124734e-05, + "loss": 0.8965, + "step": 338 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978791071422494e-05, + "loss": 0.9009, + "step": 339 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978478141073183e-05, + "loss": 0.8901, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978162921497268e-05, + "loss": 0.9341, + "step": 341 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977845412767053e-05, + "loss": 0.8477, + "step": 342 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977525614955388e-05, + "loss": 0.9106, + "step": 343 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977203528135635e-05, + "loss": 0.9248, + "step": 344 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976879152381692e-05, + "loss": 0.8491, + "step": 345 + }, + { + "epoch": 0.05, + "learning_rate": 1.9976552487767975e-05, + "loss": 0.8096, + "step": 346 + }, + { + "epoch": 0.05, + "learning_rate": 1.997622353436942e-05, + "loss": 0.8115, + "step": 347 + }, + { + "epoch": 0.05, + "learning_rate": 1.99758922922615e-05, + "loss": 0.9131, + "step": 348 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975558761520205e-05, + "loss": 0.8984, + "step": 349 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975222942222054e-05, + "loss": 0.9204, + "step": 350 + }, + { + "epoch": 0.05, + "learning_rate": 1.997488483444409e-05, + "loss": 0.9321, + "step": 351 + }, + { + "epoch": 0.05, + "learning_rate": 1.997454443826388e-05, + "loss": 0.8989, + "step": 352 + }, + { + "epoch": 0.05, + "learning_rate": 1.997420175375951e-05, + "loss": 0.8818, + "step": 353 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973856781009607e-05, + "loss": 0.8882, + "step": 354 + }, + { + "epoch": 0.05, + "learning_rate": 1.997350952009331e-05, + "loss": 0.873, + "step": 355 + }, + { + "epoch": 0.05, + "learning_rate": 1.9973159971090285e-05, + "loss": 0.3008, + "step": 356 + }, + { + "epoch": 0.05, + "learning_rate": 1.9972808134080726e-05, + "loss": 0.9346, + "step": 357 + }, + { + "epoch": 0.05, + "learning_rate": 1.997245400914535e-05, + "loss": 0.8237, + "step": 358 + }, + { + "epoch": 0.05, + "learning_rate": 1.9972097596365395e-05, + "loss": 0.8608, + "step": 359 + }, + { + "epoch": 0.05, + "learning_rate": 1.9971738895822632e-05, + "loss": 0.8833, + "step": 360 + }, + { + "epoch": 0.05, + "learning_rate": 1.9971377907599354e-05, + "loss": 0.8257, + "step": 361 + }, + { + "epoch": 0.05, + "learning_rate": 1.9971014631778376e-05, + "loss": 0.9102, + "step": 362 + }, + { + "epoch": 0.05, + "learning_rate": 1.997064906844304e-05, + "loss": 0.8496, + "step": 363 + }, + { + "epoch": 0.05, + "learning_rate": 1.9970281217677207e-05, + "loss": 0.8906, + "step": 364 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969911079565274e-05, + "loss": 0.8481, + "step": 365 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969538654192158e-05, + "loss": 0.9092, + "step": 366 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969163941643296e-05, + "loss": 0.8765, + "step": 367 + }, + { + "epoch": 0.05, + "learning_rate": 1.996878694200465e-05, + "loss": 0.9229, + "step": 368 + }, + { + "epoch": 0.05, + "learning_rate": 1.9968407655362716e-05, + "loss": 0.916, + "step": 369 + }, + { + "epoch": 0.05, + "learning_rate": 1.9968026081804508e-05, + "loss": 0.8369, + "step": 370 + }, + { + "epoch": 0.05, + "learning_rate": 1.996764222141756e-05, + "loss": 0.8848, + "step": 371 + }, + { + "epoch": 0.06, + "learning_rate": 1.9967256074289944e-05, + "loss": 0.8867, + "step": 372 + }, + { + "epoch": 0.06, + "learning_rate": 1.996686764051024e-05, + "loss": 0.8291, + "step": 373 + }, + { + "epoch": 0.06, + "learning_rate": 1.9966476920167568e-05, + "loss": 0.9209, + "step": 374 + }, + { + "epoch": 0.06, + "learning_rate": 1.9966083913351563e-05, + "loss": 0.8833, + "step": 375 + }, + { + "epoch": 0.06, + "learning_rate": 1.9965688620152382e-05, + "loss": 0.9253, + "step": 376 + }, + { + "epoch": 0.06, + "learning_rate": 1.996529104066072e-05, + "loss": 0.8623, + "step": 377 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964891174967786e-05, + "loss": 0.9282, + "step": 378 + }, + { + "epoch": 0.06, + "learning_rate": 1.9964489023165313e-05, + "loss": 0.876, + "step": 379 + }, + { + "epoch": 0.06, + "learning_rate": 1.996408458534556e-05, + "loss": 0.8042, + "step": 380 + }, + { + "epoch": 0.06, + "learning_rate": 1.996367786160132e-05, + "loss": 0.8154, + "step": 381 + }, + { + "epoch": 0.06, + "learning_rate": 1.9963268852025893e-05, + "loss": 0.9121, + "step": 382 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962857556713117e-05, + "loss": 0.8438, + "step": 383 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962443975757352e-05, + "loss": 0.8628, + "step": 384 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962028109253474e-05, + "loss": 0.897, + "step": 385 + }, + { + "epoch": 0.06, + "learning_rate": 1.996160995729689e-05, + "loss": 0.8599, + "step": 386 + }, + { + "epoch": 0.06, + "learning_rate": 1.996118951998354e-05, + "loss": 0.8633, + "step": 387 + }, + { + "epoch": 0.06, + "learning_rate": 1.996076679740987e-05, + "loss": 0.8721, + "step": 388 + }, + { + "epoch": 0.06, + "learning_rate": 1.9960341789672863e-05, + "loss": 0.8472, + "step": 389 + }, + { + "epoch": 0.06, + "learning_rate": 1.995991449687002e-05, + "loss": 0.9287, + "step": 390 + }, + { + "epoch": 0.06, + "learning_rate": 1.9959484919099375e-05, + "loss": 0.8594, + "step": 391 + }, + { + "epoch": 0.06, + "learning_rate": 1.9959053056459474e-05, + "loss": 0.9004, + "step": 392 + }, + { + "epoch": 0.06, + "learning_rate": 1.9958618909049398e-05, + "loss": 0.8735, + "step": 393 + }, + { + "epoch": 0.06, + "learning_rate": 1.995818247696874e-05, + "loss": 0.8389, + "step": 394 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957743760317636e-05, + "loss": 0.894, + "step": 395 + }, + { + "epoch": 0.06, + "learning_rate": 1.9957302759196727e-05, + "loss": 0.8857, + "step": 396 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956859473707187e-05, + "loss": 0.833, + "step": 397 + }, + { + "epoch": 0.06, + "learning_rate": 1.9956413903950715e-05, + "loss": 0.875, + "step": 398 + }, + { + "epoch": 0.06, + "learning_rate": 1.995596605002953e-05, + "loss": 0.8403, + "step": 399 + }, + { + "epoch": 0.06, + "learning_rate": 1.995551591204638e-05, + "loss": 0.833, + "step": 400 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955063490104526e-05, + "loss": 0.8447, + "step": 401 + }, + { + "epoch": 0.06, + "learning_rate": 1.995460878430777e-05, + "loss": 0.8628, + "step": 402 + }, + { + "epoch": 0.06, + "learning_rate": 1.9954151794760425e-05, + "loss": 0.8789, + "step": 403 + }, + { + "epoch": 0.06, + "learning_rate": 1.9953692521567334e-05, + "loss": 0.3088, + "step": 404 + }, + { + "epoch": 0.06, + "learning_rate": 1.9953230964833857e-05, + "loss": 0.8081, + "step": 405 + }, + { + "epoch": 0.06, + "learning_rate": 1.9952767124665892e-05, + "loss": 0.8525, + "step": 406 + }, + { + "epoch": 0.06, + "learning_rate": 1.9952301001169842e-05, + "loss": 0.9282, + "step": 407 + }, + { + "epoch": 0.06, + "learning_rate": 1.995183259445265e-05, + "loss": 0.3341, + "step": 408 + }, + { + "epoch": 0.06, + "learning_rate": 1.995136190462177e-05, + "loss": 0.8745, + "step": 409 + }, + { + "epoch": 0.06, + "learning_rate": 1.995088893178519e-05, + "loss": 0.8818, + "step": 410 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950413676051415e-05, + "loss": 0.8931, + "step": 411 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949936137529482e-05, + "loss": 0.8843, + "step": 412 + }, + { + "epoch": 0.06, + "learning_rate": 1.9949456316328942e-05, + "loss": 0.9102, + "step": 413 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948974212559873e-05, + "loss": 0.875, + "step": 414 + }, + { + "epoch": 0.06, + "learning_rate": 1.994848982633288e-05, + "loss": 0.8428, + "step": 415 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948003157759088e-05, + "loss": 0.854, + "step": 416 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947514206950146e-05, + "loss": 0.8784, + "step": 417 + }, + { + "epoch": 0.06, + "learning_rate": 1.994702297401823e-05, + "loss": 0.8359, + "step": 418 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946529459076034e-05, + "loss": 0.8691, + "step": 419 + }, + { + "epoch": 0.06, + "learning_rate": 1.9946033662236778e-05, + "loss": 0.894, + "step": 420 + }, + { + "epoch": 0.06, + "learning_rate": 1.994553558361421e-05, + "loss": 0.8521, + "step": 421 + }, + { + "epoch": 0.06, + "learning_rate": 1.9945035223322593e-05, + "loss": 0.8652, + "step": 422 + }, + { + "epoch": 0.06, + "learning_rate": 1.994453258147672e-05, + "loss": 0.8818, + "step": 423 + }, + { + "epoch": 0.06, + "learning_rate": 1.9944027658191903e-05, + "loss": 0.2729, + "step": 424 + }, + { + "epoch": 0.06, + "learning_rate": 1.9943520453583986e-05, + "loss": 0.874, + "step": 425 + }, + { + "epoch": 0.06, + "learning_rate": 1.9943010967769324e-05, + "loss": 0.2968, + "step": 426 + }, + { + "epoch": 0.06, + "learning_rate": 1.9942499200864805e-05, + "loss": 0.897, + "step": 427 + }, + { + "epoch": 0.06, + "learning_rate": 1.9941985152987834e-05, + "loss": 0.9253, + "step": 428 + }, + { + "epoch": 0.06, + "learning_rate": 1.994146882425634e-05, + "loss": 0.8989, + "step": 429 + }, + { + "epoch": 0.06, + "learning_rate": 1.9940950214788783e-05, + "loss": 0.8911, + "step": 430 + }, + { + "epoch": 0.06, + "learning_rate": 1.9940429324704137e-05, + "loss": 0.8911, + "step": 431 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939906154121902e-05, + "loss": 0.9028, + "step": 432 + }, + { + "epoch": 0.06, + "learning_rate": 1.9939380703162104e-05, + "loss": 0.7974, + "step": 433 + }, + { + "epoch": 0.06, + "learning_rate": 1.993885297194529e-05, + "loss": 0.3132, + "step": 434 + }, + { + "epoch": 0.06, + "learning_rate": 1.9938322960592532e-05, + "loss": 0.9512, + "step": 435 + }, + { + "epoch": 0.06, + "learning_rate": 1.9937790669225417e-05, + "loss": 0.876, + "step": 436 + }, + { + "epoch": 0.06, + "learning_rate": 1.9937256097966068e-05, + "loss": 0.8467, + "step": 437 + }, + { + "epoch": 0.06, + "learning_rate": 1.9936719246937118e-05, + "loss": 0.834, + "step": 438 + }, + { + "epoch": 0.06, + "learning_rate": 1.9936180116261736e-05, + "loss": 0.9072, + "step": 439 + }, + { + "epoch": 0.07, + "learning_rate": 1.9935638706063605e-05, + "loss": 0.8828, + "step": 440 + }, + { + "epoch": 0.07, + "learning_rate": 1.993509501646693e-05, + "loss": 0.9473, + "step": 441 + }, + { + "epoch": 0.07, + "learning_rate": 1.993454904759645e-05, + "loss": 0.9697, + "step": 442 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934000799577414e-05, + "loss": 0.8794, + "step": 443 + }, + { + "epoch": 0.07, + "learning_rate": 1.9933450272535597e-05, + "loss": 0.8843, + "step": 444 + }, + { + "epoch": 0.07, + "learning_rate": 1.99328974665973e-05, + "loss": 0.8369, + "step": 445 + }, + { + "epoch": 0.07, + "learning_rate": 1.993234238188935e-05, + "loss": 0.918, + "step": 446 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931785018539088e-05, + "loss": 0.9048, + "step": 447 + }, + { + "epoch": 0.07, + "learning_rate": 1.9931225376674388e-05, + "loss": 0.9136, + "step": 448 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930663456423633e-05, + "loss": 0.8271, + "step": 449 + }, + { + "epoch": 0.07, + "learning_rate": 1.9930099257915744e-05, + "loss": 0.8931, + "step": 450 + }, + { + "epoch": 0.07, + "learning_rate": 1.9929532781280148e-05, + "loss": 0.8652, + "step": 451 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928964026646816e-05, + "loss": 0.938, + "step": 452 + }, + { + "epoch": 0.07, + "learning_rate": 1.9928392994146228e-05, + "loss": 0.8853, + "step": 453 + }, + { + "epoch": 0.07, + "learning_rate": 1.992781968390938e-05, + "loss": 0.856, + "step": 454 + }, + { + "epoch": 0.07, + "learning_rate": 1.99272440960678e-05, + "loss": 0.9199, + "step": 455 + }, + { + "epoch": 0.07, + "learning_rate": 1.992666623075354e-05, + "loss": 0.8906, + "step": 456 + }, + { + "epoch": 0.07, + "learning_rate": 1.9926086088099176e-05, + "loss": 0.9282, + "step": 457 + }, + { + "epoch": 0.07, + "learning_rate": 1.9925503668237796e-05, + "loss": 0.894, + "step": 458 + }, + { + "epoch": 0.07, + "learning_rate": 1.9924918971303017e-05, + "loss": 0.8809, + "step": 459 + }, + { + "epoch": 0.07, + "learning_rate": 1.9924331997428983e-05, + "loss": 0.8638, + "step": 460 + }, + { + "epoch": 0.07, + "learning_rate": 1.992374274675035e-05, + "loss": 0.9131, + "step": 461 + }, + { + "epoch": 0.07, + "learning_rate": 1.9923151219402308e-05, + "loss": 0.9229, + "step": 462 + }, + { + "epoch": 0.07, + "learning_rate": 1.9922557415520557e-05, + "loss": 0.8472, + "step": 463 + }, + { + "epoch": 0.07, + "learning_rate": 1.9921961335241326e-05, + "loss": 0.8804, + "step": 464 + }, + { + "epoch": 0.07, + "learning_rate": 1.992136297870137e-05, + "loss": 0.9624, + "step": 465 + }, + { + "epoch": 0.07, + "learning_rate": 1.992076234603796e-05, + "loss": 0.8574, + "step": 466 + }, + { + "epoch": 0.07, + "learning_rate": 1.992015943738889e-05, + "loss": 0.9253, + "step": 467 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919554252892473e-05, + "loss": 0.9136, + "step": 468 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918946792687553e-05, + "loss": 0.9229, + "step": 469 + }, + { + "epoch": 0.07, + "learning_rate": 1.9918337056913495e-05, + "loss": 0.8999, + "step": 470 + }, + { + "epoch": 0.07, + "learning_rate": 1.9917725045710176e-05, + "loss": 0.8433, + "step": 471 + }, + { + "epoch": 0.07, + "learning_rate": 1.9917110759218003e-05, + "loss": 0.9307, + "step": 472 + }, + { + "epoch": 0.07, + "learning_rate": 1.9916494197577904e-05, + "loss": 0.3005, + "step": 473 + }, + { + "epoch": 0.07, + "learning_rate": 1.991587536093133e-05, + "loss": 0.8569, + "step": 474 + }, + { + "epoch": 0.07, + "learning_rate": 1.9915254249420245e-05, + "loss": 0.9663, + "step": 475 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914630863187156e-05, + "loss": 0.8447, + "step": 476 + }, + { + "epoch": 0.07, + "learning_rate": 1.9914005202375063e-05, + "loss": 0.8584, + "step": 477 + }, + { + "epoch": 0.07, + "learning_rate": 1.9913377267127515e-05, + "loss": 0.8345, + "step": 478 + }, + { + "epoch": 0.07, + "learning_rate": 1.9912747057588562e-05, + "loss": 0.8604, + "step": 479 + }, + { + "epoch": 0.07, + "learning_rate": 1.991211457390279e-05, + "loss": 0.3125, + "step": 480 + }, + { + "epoch": 0.07, + "learning_rate": 1.9911479816215297e-05, + "loss": 0.8813, + "step": 481 + }, + { + "epoch": 0.07, + "learning_rate": 1.9910842784671706e-05, + "loss": 0.873, + "step": 482 + }, + { + "epoch": 0.07, + "learning_rate": 1.991020347941817e-05, + "loss": 0.8721, + "step": 483 + }, + { + "epoch": 0.07, + "learning_rate": 1.990956190060135e-05, + "loss": 0.8901, + "step": 484 + }, + { + "epoch": 0.07, + "learning_rate": 1.9908918048368435e-05, + "loss": 0.8975, + "step": 485 + }, + { + "epoch": 0.07, + "learning_rate": 1.9908271922867137e-05, + "loss": 0.853, + "step": 486 + }, + { + "epoch": 0.07, + "learning_rate": 1.9907623524245684e-05, + "loss": 0.9331, + "step": 487 + }, + { + "epoch": 0.07, + "learning_rate": 1.990697285265284e-05, + "loss": 0.8735, + "step": 488 + }, + { + "epoch": 0.07, + "learning_rate": 1.9906319908237866e-05, + "loss": 0.8828, + "step": 489 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905664691150567e-05, + "loss": 0.8779, + "step": 490 + }, + { + "epoch": 0.07, + "learning_rate": 1.9905007201541253e-05, + "loss": 0.8818, + "step": 491 + }, + { + "epoch": 0.07, + "learning_rate": 1.9904347439560772e-05, + "loss": 0.8696, + "step": 492 + }, + { + "epoch": 0.07, + "learning_rate": 1.9903685405360478e-05, + "loss": 0.8652, + "step": 493 + }, + { + "epoch": 0.07, + "learning_rate": 1.9903021099092257e-05, + "loss": 0.9502, + "step": 494 + }, + { + "epoch": 0.07, + "learning_rate": 1.9902354520908507e-05, + "loss": 0.8569, + "step": 495 + }, + { + "epoch": 0.07, + "learning_rate": 1.9901685670962155e-05, + "loss": 0.9121, + "step": 496 + }, + { + "epoch": 0.07, + "learning_rate": 1.9901014549406647e-05, + "loss": 0.9053, + "step": 497 + }, + { + "epoch": 0.07, + "learning_rate": 1.9900341156395942e-05, + "loss": 0.8018, + "step": 498 + }, + { + "epoch": 0.07, + "learning_rate": 1.9899665492084536e-05, + "loss": 0.8638, + "step": 499 + }, + { + "epoch": 0.07, + "learning_rate": 1.9898987556627435e-05, + "loss": 0.9219, + "step": 500 + }, + { + "epoch": 0.07, + "learning_rate": 1.989830735018017e-05, + "loss": 0.8301, + "step": 501 + }, + { + "epoch": 0.07, + "learning_rate": 1.9897624872898785e-05, + "loss": 0.8428, + "step": 502 + }, + { + "epoch": 0.07, + "learning_rate": 1.9896940124939862e-05, + "loss": 0.8438, + "step": 503 + }, + { + "epoch": 0.07, + "learning_rate": 1.9896253106460484e-05, + "loss": 0.894, + "step": 504 + }, + { + "epoch": 0.07, + "learning_rate": 1.9895563817618266e-05, + "loss": 0.8892, + "step": 505 + }, + { + "epoch": 0.07, + "learning_rate": 1.9894872258571344e-05, + "loss": 0.9521, + "step": 506 + }, + { + "epoch": 0.07, + "learning_rate": 1.9894178429478376e-05, + "loss": 0.8672, + "step": 507 + }, + { + "epoch": 0.08, + "learning_rate": 1.9893482330498533e-05, + "loss": 0.8945, + "step": 508 + }, + { + "epoch": 0.08, + "learning_rate": 1.9892783961791516e-05, + "loss": 0.8926, + "step": 509 + }, + { + "epoch": 0.08, + "learning_rate": 1.9892083323517535e-05, + "loss": 0.8115, + "step": 510 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891380415837333e-05, + "loss": 0.8906, + "step": 511 + }, + { + "epoch": 0.08, + "learning_rate": 1.9890675238912172e-05, + "loss": 0.9106, + "step": 512 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889967792903822e-05, + "loss": 0.8711, + "step": 513 + }, + { + "epoch": 0.08, + "learning_rate": 1.9889258077974588e-05, + "loss": 0.9136, + "step": 514 + }, + { + "epoch": 0.08, + "learning_rate": 1.9888546094287293e-05, + "loss": 0.8271, + "step": 515 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887831842005276e-05, + "loss": 0.8555, + "step": 516 + }, + { + "epoch": 0.08, + "learning_rate": 1.9887115321292393e-05, + "loss": 0.9297, + "step": 517 + }, + { + "epoch": 0.08, + "learning_rate": 1.9886396532313033e-05, + "loss": 0.9194, + "step": 518 + }, + { + "epoch": 0.08, + "learning_rate": 1.9885675475232094e-05, + "loss": 0.8867, + "step": 519 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884952150214997e-05, + "loss": 0.9463, + "step": 520 + }, + { + "epoch": 0.08, + "learning_rate": 1.9884226557427686e-05, + "loss": 0.8843, + "step": 521 + }, + { + "epoch": 0.08, + "learning_rate": 1.9883498697036624e-05, + "loss": 0.7993, + "step": 522 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882768569208798e-05, + "loss": 0.9111, + "step": 523 + }, + { + "epoch": 0.08, + "learning_rate": 1.9882036174111707e-05, + "loss": 0.3484, + "step": 524 + }, + { + "epoch": 0.08, + "learning_rate": 1.9881301511913372e-05, + "loss": 0.8433, + "step": 525 + }, + { + "epoch": 0.08, + "learning_rate": 1.9880564582782346e-05, + "loss": 0.8638, + "step": 526 + }, + { + "epoch": 0.08, + "learning_rate": 1.987982538688768e-05, + "loss": 0.8477, + "step": 527 + }, + { + "epoch": 0.08, + "learning_rate": 1.987908392439897e-05, + "loss": 0.9292, + "step": 528 + }, + { + "epoch": 0.08, + "learning_rate": 1.987834019548631e-05, + "loss": 0.9355, + "step": 529 + }, + { + "epoch": 0.08, + "learning_rate": 1.987759420032033e-05, + "loss": 0.8213, + "step": 530 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876845939072166e-05, + "loss": 0.8398, + "step": 531 + }, + { + "epoch": 0.08, + "learning_rate": 1.9876095411913492e-05, + "loss": 0.9229, + "step": 532 + }, + { + "epoch": 0.08, + "learning_rate": 1.9875342619016483e-05, + "loss": 0.9331, + "step": 533 + }, + { + "epoch": 0.08, + "learning_rate": 1.9874587560553844e-05, + "loss": 0.8818, + "step": 534 + }, + { + "epoch": 0.08, + "learning_rate": 1.9873830236698798e-05, + "loss": 0.8828, + "step": 535 + }, + { + "epoch": 0.08, + "learning_rate": 1.987307064762509e-05, + "loss": 0.9038, + "step": 536 + }, + { + "epoch": 0.08, + "learning_rate": 1.9872308793506982e-05, + "loss": 0.8535, + "step": 537 + }, + { + "epoch": 0.08, + "learning_rate": 1.9871544674519246e-05, + "loss": 0.9277, + "step": 538 + }, + { + "epoch": 0.08, + "learning_rate": 1.9870778290837198e-05, + "loss": 0.9097, + "step": 539 + }, + { + "epoch": 0.08, + "learning_rate": 1.9870009642636652e-05, + "loss": 0.811, + "step": 540 + }, + { + "epoch": 0.08, + "learning_rate": 1.986923873009395e-05, + "loss": 0.8872, + "step": 541 + }, + { + "epoch": 0.08, + "learning_rate": 1.9868465553385946e-05, + "loss": 0.8628, + "step": 542 + }, + { + "epoch": 0.08, + "learning_rate": 1.986769011269003e-05, + "loss": 0.876, + "step": 543 + }, + { + "epoch": 0.08, + "learning_rate": 1.9866912408184094e-05, + "loss": 0.9429, + "step": 544 + }, + { + "epoch": 0.08, + "learning_rate": 1.9866132440046556e-05, + "loss": 0.8887, + "step": 545 + }, + { + "epoch": 0.08, + "learning_rate": 1.9865350208456354e-05, + "loss": 0.8975, + "step": 546 + }, + { + "epoch": 0.08, + "learning_rate": 1.9864565713592946e-05, + "loss": 0.874, + "step": 547 + }, + { + "epoch": 0.08, + "learning_rate": 1.9863778955636308e-05, + "loss": 0.9092, + "step": 548 + }, + { + "epoch": 0.08, + "learning_rate": 1.9862989934766935e-05, + "loss": 0.8818, + "step": 549 + }, + { + "epoch": 0.08, + "learning_rate": 1.986219865116584e-05, + "loss": 0.8735, + "step": 550 + }, + { + "epoch": 0.08, + "learning_rate": 1.9861405105014558e-05, + "loss": 0.8198, + "step": 551 + }, + { + "epoch": 0.08, + "learning_rate": 1.986060929649514e-05, + "loss": 0.8198, + "step": 552 + }, + { + "epoch": 0.08, + "learning_rate": 1.9859811225790164e-05, + "loss": 0.8901, + "step": 553 + }, + { + "epoch": 0.08, + "learning_rate": 1.9859010893082708e-05, + "loss": 0.9009, + "step": 554 + }, + { + "epoch": 0.08, + "learning_rate": 1.9858208298556394e-05, + "loss": 0.8735, + "step": 555 + }, + { + "epoch": 0.08, + "learning_rate": 1.9857403442395343e-05, + "loss": 0.8892, + "step": 556 + }, + { + "epoch": 0.08, + "learning_rate": 1.98565963247842e-05, + "loss": 0.8481, + "step": 557 + }, + { + "epoch": 0.08, + "learning_rate": 1.9855786945908142e-05, + "loss": 0.8735, + "step": 558 + }, + { + "epoch": 0.08, + "learning_rate": 1.9854975305952844e-05, + "loss": 0.7729, + "step": 559 + }, + { + "epoch": 0.08, + "learning_rate": 1.9854161405104512e-05, + "loss": 0.8389, + "step": 560 + }, + { + "epoch": 0.08, + "learning_rate": 1.9853345243549865e-05, + "loss": 0.9175, + "step": 561 + }, + { + "epoch": 0.08, + "learning_rate": 1.9852526821476155e-05, + "loss": 0.9155, + "step": 562 + }, + { + "epoch": 0.08, + "learning_rate": 1.985170613907113e-05, + "loss": 0.8438, + "step": 563 + }, + { + "epoch": 0.08, + "learning_rate": 1.9850883196523072e-05, + "loss": 0.8301, + "step": 564 + }, + { + "epoch": 0.08, + "learning_rate": 1.9850057994020777e-05, + "loss": 0.9258, + "step": 565 + }, + { + "epoch": 0.08, + "learning_rate": 1.984923053175356e-05, + "loss": 0.916, + "step": 566 + }, + { + "epoch": 0.08, + "learning_rate": 1.9848400809911255e-05, + "loss": 0.8818, + "step": 567 + }, + { + "epoch": 0.08, + "learning_rate": 1.9847568828684217e-05, + "loss": 0.3037, + "step": 568 + }, + { + "epoch": 0.08, + "learning_rate": 1.9846734588263312e-05, + "loss": 0.8877, + "step": 569 + }, + { + "epoch": 0.08, + "learning_rate": 1.9845898088839926e-05, + "loss": 0.3188, + "step": 570 + }, + { + "epoch": 0.08, + "learning_rate": 1.9845059330605974e-05, + "loss": 0.8843, + "step": 571 + }, + { + "epoch": 0.08, + "learning_rate": 1.984421831375387e-05, + "loss": 0.8281, + "step": 572 + }, + { + "epoch": 0.08, + "learning_rate": 1.9843375038476565e-05, + "loss": 0.8164, + "step": 573 + }, + { + "epoch": 0.08, + "learning_rate": 1.9842529504967522e-05, + "loss": 0.8784, + "step": 574 + }, + { + "epoch": 0.09, + "learning_rate": 1.984168171342071e-05, + "loss": 0.9058, + "step": 575 + }, + { + "epoch": 0.09, + "learning_rate": 1.984083166403064e-05, + "loss": 0.8535, + "step": 576 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839979356992318e-05, + "loss": 0.8252, + "step": 577 + }, + { + "epoch": 0.09, + "learning_rate": 1.9839124792501275e-05, + "loss": 0.8999, + "step": 578 + }, + { + "epoch": 0.09, + "learning_rate": 1.983826797075357e-05, + "loss": 0.8921, + "step": 579 + }, + { + "epoch": 0.09, + "learning_rate": 1.9837408891945768e-05, + "loss": 0.8921, + "step": 580 + }, + { + "epoch": 0.09, + "learning_rate": 1.9836547556274954e-05, + "loss": 0.8867, + "step": 581 + }, + { + "epoch": 0.09, + "learning_rate": 1.9835683963938734e-05, + "loss": 0.8662, + "step": 582 + }, + { + "epoch": 0.09, + "learning_rate": 1.9834818115135235e-05, + "loss": 0.894, + "step": 583 + }, + { + "epoch": 0.09, + "learning_rate": 1.9833950010063087e-05, + "loss": 0.8921, + "step": 584 + }, + { + "epoch": 0.09, + "learning_rate": 1.983307964892146e-05, + "loss": 0.8306, + "step": 585 + }, + { + "epoch": 0.09, + "learning_rate": 1.9832207031910017e-05, + "loss": 0.8198, + "step": 586 + }, + { + "epoch": 0.09, + "learning_rate": 1.983133215922896e-05, + "loss": 0.8379, + "step": 587 + }, + { + "epoch": 0.09, + "learning_rate": 1.9830455031078994e-05, + "loss": 0.8667, + "step": 588 + }, + { + "epoch": 0.09, + "learning_rate": 1.9829575647661343e-05, + "loss": 0.9136, + "step": 589 + }, + { + "epoch": 0.09, + "learning_rate": 1.9828694009177764e-05, + "loss": 0.9121, + "step": 590 + }, + { + "epoch": 0.09, + "learning_rate": 1.982781011583051e-05, + "loss": 0.8794, + "step": 591 + }, + { + "epoch": 0.09, + "learning_rate": 1.982692396782236e-05, + "loss": 0.894, + "step": 592 + }, + { + "epoch": 0.09, + "learning_rate": 1.9826035565356615e-05, + "loss": 0.8396, + "step": 593 + }, + { + "epoch": 0.09, + "learning_rate": 1.982514490863709e-05, + "loss": 0.8789, + "step": 594 + }, + { + "epoch": 0.09, + "learning_rate": 1.982425199786811e-05, + "loss": 0.8662, + "step": 595 + }, + { + "epoch": 0.09, + "learning_rate": 1.9823356833254534e-05, + "loss": 0.8501, + "step": 596 + }, + { + "epoch": 0.09, + "learning_rate": 1.9822459415001712e-05, + "loss": 0.8521, + "step": 597 + }, + { + "epoch": 0.09, + "learning_rate": 1.9821559743315543e-05, + "loss": 0.9131, + "step": 598 + }, + { + "epoch": 0.09, + "learning_rate": 1.9820657818402414e-05, + "loss": 0.8848, + "step": 599 + }, + { + "epoch": 0.09, + "learning_rate": 1.981975364046925e-05, + "loss": 0.8608, + "step": 600 + }, + { + "epoch": 0.09, + "learning_rate": 1.9818847209723477e-05, + "loss": 0.9194, + "step": 601 + }, + { + "epoch": 0.09, + "learning_rate": 1.981793852637305e-05, + "loss": 0.791, + "step": 602 + }, + { + "epoch": 0.09, + "learning_rate": 1.9817027590626436e-05, + "loss": 0.7981, + "step": 603 + }, + { + "epoch": 0.09, + "learning_rate": 1.9816114402692612e-05, + "loss": 0.8911, + "step": 604 + }, + { + "epoch": 0.09, + "learning_rate": 1.9815198962781088e-05, + "loss": 0.8965, + "step": 605 + }, + { + "epoch": 0.09, + "learning_rate": 1.9814281271101876e-05, + "loss": 0.8936, + "step": 606 + }, + { + "epoch": 0.09, + "learning_rate": 1.9813361327865507e-05, + "loss": 0.9097, + "step": 607 + }, + { + "epoch": 0.09, + "learning_rate": 1.9812439133283033e-05, + "loss": 0.8638, + "step": 608 + }, + { + "epoch": 0.09, + "learning_rate": 1.981151468756603e-05, + "loss": 0.8765, + "step": 609 + }, + { + "epoch": 0.09, + "learning_rate": 1.9810587990926563e-05, + "loss": 0.876, + "step": 610 + }, + { + "epoch": 0.09, + "learning_rate": 1.9809659043577242e-05, + "loss": 0.8315, + "step": 611 + }, + { + "epoch": 0.09, + "learning_rate": 1.9808727845731182e-05, + "loss": 0.8418, + "step": 612 + }, + { + "epoch": 0.09, + "learning_rate": 1.980779439760202e-05, + "loss": 0.8745, + "step": 613 + }, + { + "epoch": 0.09, + "learning_rate": 1.9806858699403894e-05, + "loss": 0.8584, + "step": 614 + }, + { + "epoch": 0.09, + "learning_rate": 1.9805920751351476e-05, + "loss": 0.9448, + "step": 615 + }, + { + "epoch": 0.09, + "learning_rate": 1.980498055365994e-05, + "loss": 0.8896, + "step": 616 + }, + { + "epoch": 0.09, + "learning_rate": 1.980403810654499e-05, + "loss": 0.8838, + "step": 617 + }, + { + "epoch": 0.09, + "learning_rate": 1.9803093410222838e-05, + "loss": 0.8501, + "step": 618 + }, + { + "epoch": 0.09, + "learning_rate": 1.980214646491021e-05, + "loss": 0.8457, + "step": 619 + }, + { + "epoch": 0.09, + "learning_rate": 1.9801197270824346e-05, + "loss": 0.9307, + "step": 620 + }, + { + "epoch": 0.09, + "learning_rate": 1.9800245828183015e-05, + "loss": 0.8799, + "step": 621 + }, + { + "epoch": 0.09, + "learning_rate": 1.9799292137204494e-05, + "loss": 0.915, + "step": 622 + }, + { + "epoch": 0.09, + "learning_rate": 1.9798336198107567e-05, + "loss": 0.9053, + "step": 623 + }, + { + "epoch": 0.09, + "learning_rate": 1.979737801111155e-05, + "loss": 0.896, + "step": 624 + }, + { + "epoch": 0.09, + "learning_rate": 1.9796417576436264e-05, + "loss": 0.8853, + "step": 625 + }, + { + "epoch": 0.09, + "learning_rate": 1.979545489430205e-05, + "loss": 0.8477, + "step": 626 + }, + { + "epoch": 0.09, + "learning_rate": 1.9794489964929757e-05, + "loss": 0.9175, + "step": 627 + }, + { + "epoch": 0.09, + "learning_rate": 1.979352278854076e-05, + "loss": 0.8984, + "step": 628 + }, + { + "epoch": 0.09, + "learning_rate": 1.979255336535695e-05, + "loss": 0.9541, + "step": 629 + }, + { + "epoch": 0.09, + "learning_rate": 1.9791581695600722e-05, + "loss": 0.9165, + "step": 630 + }, + { + "epoch": 0.09, + "learning_rate": 1.979060777949499e-05, + "loss": 0.9688, + "step": 631 + }, + { + "epoch": 0.09, + "learning_rate": 1.9789631617263198e-05, + "loss": 0.9297, + "step": 632 + }, + { + "epoch": 0.09, + "learning_rate": 1.9788653209129284e-05, + "loss": 0.8818, + "step": 633 + }, + { + "epoch": 0.09, + "learning_rate": 1.9787672555317714e-05, + "loss": 0.8955, + "step": 634 + }, + { + "epoch": 0.09, + "learning_rate": 1.9786689656053467e-05, + "loss": 0.8486, + "step": 635 + }, + { + "epoch": 0.09, + "learning_rate": 1.9785704511562032e-05, + "loss": 0.8447, + "step": 636 + }, + { + "epoch": 0.09, + "learning_rate": 1.9784717122069425e-05, + "loss": 0.8618, + "step": 637 + }, + { + "epoch": 0.09, + "learning_rate": 1.978372748780216e-05, + "loss": 0.9434, + "step": 638 + }, + { + "epoch": 0.09, + "learning_rate": 1.978273560898728e-05, + "loss": 0.8662, + "step": 639 + }, + { + "epoch": 0.09, + "learning_rate": 1.9781741485852338e-05, + "loss": 0.9272, + "step": 640 + }, + { + "epoch": 0.09, + "learning_rate": 1.9780745118625407e-05, + "loss": 0.8687, + "step": 641 + }, + { + "epoch": 0.09, + "learning_rate": 1.977974650753506e-05, + "loss": 0.8857, + "step": 642 + }, + { + "epoch": 0.1, + "learning_rate": 1.9778745652810404e-05, + "loss": 0.8535, + "step": 643 + }, + { + "epoch": 0.1, + "learning_rate": 1.9777742554681044e-05, + "loss": 0.8745, + "step": 644 + }, + { + "epoch": 0.1, + "learning_rate": 1.9776737213377114e-05, + "loss": 0.8745, + "step": 645 + }, + { + "epoch": 0.1, + "learning_rate": 1.9775729629129253e-05, + "loss": 0.3535, + "step": 646 + }, + { + "epoch": 0.1, + "learning_rate": 1.9774719802168615e-05, + "loss": 0.8638, + "step": 647 + }, + { + "epoch": 0.1, + "learning_rate": 1.9773707732726873e-05, + "loss": 0.8047, + "step": 648 + }, + { + "epoch": 0.1, + "learning_rate": 1.9772693421036214e-05, + "loss": 0.8906, + "step": 649 + }, + { + "epoch": 0.1, + "learning_rate": 1.9771676867329334e-05, + "loss": 0.9399, + "step": 650 + }, + { + "epoch": 0.1, + "learning_rate": 1.9770658071839448e-05, + "loss": 0.8291, + "step": 651 + }, + { + "epoch": 0.1, + "learning_rate": 1.9769637034800287e-05, + "loss": 0.8838, + "step": 652 + }, + { + "epoch": 0.1, + "learning_rate": 1.9768613756446092e-05, + "loss": 0.9004, + "step": 653 + }, + { + "epoch": 0.1, + "learning_rate": 1.976758823701162e-05, + "loss": 0.9263, + "step": 654 + }, + { + "epoch": 0.1, + "learning_rate": 1.976656047673214e-05, + "loss": 0.9531, + "step": 655 + }, + { + "epoch": 0.1, + "learning_rate": 1.9765530475843443e-05, + "loss": 0.8569, + "step": 656 + }, + { + "epoch": 0.1, + "learning_rate": 1.9764498234581822e-05, + "loss": 0.8447, + "step": 657 + }, + { + "epoch": 0.1, + "learning_rate": 1.9763463753184092e-05, + "loss": 0.8799, + "step": 658 + }, + { + "epoch": 0.1, + "learning_rate": 1.9762427031887578e-05, + "loss": 0.9268, + "step": 659 + }, + { + "epoch": 0.1, + "learning_rate": 1.9761388070930126e-05, + "loss": 0.9248, + "step": 660 + }, + { + "epoch": 0.1, + "learning_rate": 1.9760346870550086e-05, + "loss": 0.8335, + "step": 661 + }, + { + "epoch": 0.1, + "learning_rate": 1.9759303430986332e-05, + "loss": 0.8936, + "step": 662 + }, + { + "epoch": 0.1, + "learning_rate": 1.975825775247824e-05, + "loss": 0.8442, + "step": 663 + }, + { + "epoch": 0.1, + "learning_rate": 1.9757209835265704e-05, + "loss": 0.9058, + "step": 664 + }, + { + "epoch": 0.1, + "learning_rate": 1.9756159679589143e-05, + "loss": 0.9673, + "step": 665 + }, + { + "epoch": 0.1, + "learning_rate": 1.975510728568947e-05, + "loss": 0.9224, + "step": 666 + }, + { + "epoch": 0.1, + "learning_rate": 1.975405265380813e-05, + "loss": 0.9072, + "step": 667 + }, + { + "epoch": 0.1, + "learning_rate": 1.975299578418707e-05, + "loss": 0.8853, + "step": 668 + }, + { + "epoch": 0.1, + "learning_rate": 1.9751936677068747e-05, + "loss": 0.8892, + "step": 669 + }, + { + "epoch": 0.1, + "learning_rate": 1.9750875332696143e-05, + "loss": 0.8813, + "step": 670 + }, + { + "epoch": 0.1, + "learning_rate": 1.974981175131275e-05, + "loss": 0.8447, + "step": 671 + }, + { + "epoch": 0.1, + "learning_rate": 1.974874593316257e-05, + "loss": 0.8853, + "step": 672 + }, + { + "epoch": 0.1, + "learning_rate": 1.9747677878490116e-05, + "loss": 0.9526, + "step": 673 + }, + { + "epoch": 0.1, + "learning_rate": 1.9746607587540417e-05, + "loss": 0.9614, + "step": 674 + }, + { + "epoch": 0.1, + "learning_rate": 1.9745535060559015e-05, + "loss": 0.9111, + "step": 675 + }, + { + "epoch": 0.1, + "learning_rate": 1.974446029779197e-05, + "loss": 0.8994, + "step": 676 + }, + { + "epoch": 0.1, + "learning_rate": 1.974338329948585e-05, + "loss": 0.9272, + "step": 677 + }, + { + "epoch": 0.1, + "learning_rate": 1.9742304065887732e-05, + "loss": 0.9307, + "step": 678 + }, + { + "epoch": 0.1, + "learning_rate": 1.974122259724521e-05, + "loss": 0.8735, + "step": 679 + }, + { + "epoch": 0.1, + "learning_rate": 1.9740138893806393e-05, + "loss": 0.9033, + "step": 680 + }, + { + "epoch": 0.1, + "learning_rate": 1.97390529558199e-05, + "loss": 0.9072, + "step": 681 + }, + { + "epoch": 0.1, + "learning_rate": 1.9737964783534863e-05, + "loss": 0.8809, + "step": 682 + }, + { + "epoch": 0.1, + "learning_rate": 1.973687437720093e-05, + "loss": 0.873, + "step": 683 + }, + { + "epoch": 0.1, + "learning_rate": 1.973578173706825e-05, + "loss": 0.9004, + "step": 684 + }, + { + "epoch": 0.1, + "learning_rate": 1.9734686863387494e-05, + "loss": 0.9424, + "step": 685 + }, + { + "epoch": 0.1, + "learning_rate": 1.973358975640985e-05, + "loss": 0.8745, + "step": 686 + }, + { + "epoch": 0.1, + "learning_rate": 1.973249041638701e-05, + "loss": 0.9136, + "step": 687 + }, + { + "epoch": 0.1, + "learning_rate": 1.973138884357118e-05, + "loss": 0.8682, + "step": 688 + }, + { + "epoch": 0.1, + "learning_rate": 1.973028503821508e-05, + "loss": 0.9336, + "step": 689 + }, + { + "epoch": 0.1, + "learning_rate": 1.9729179000571937e-05, + "loss": 0.8252, + "step": 690 + }, + { + "epoch": 0.1, + "learning_rate": 1.9728070730895497e-05, + "loss": 0.8691, + "step": 691 + }, + { + "epoch": 0.1, + "learning_rate": 1.9726960229440016e-05, + "loss": 0.8867, + "step": 692 + }, + { + "epoch": 0.1, + "learning_rate": 1.9725847496460256e-05, + "loss": 0.9126, + "step": 693 + }, + { + "epoch": 0.1, + "learning_rate": 1.9724732532211508e-05, + "loss": 0.3218, + "step": 694 + }, + { + "epoch": 0.1, + "learning_rate": 1.9723615336949554e-05, + "loss": 0.8496, + "step": 695 + }, + { + "epoch": 0.1, + "learning_rate": 1.9722495910930693e-05, + "loss": 0.8979, + "step": 696 + }, + { + "epoch": 0.1, + "learning_rate": 1.9721374254411753e-05, + "loss": 0.9097, + "step": 697 + }, + { + "epoch": 0.1, + "learning_rate": 1.9720250367650048e-05, + "loss": 0.9102, + "step": 698 + }, + { + "epoch": 0.1, + "learning_rate": 1.9719124250903422e-05, + "loss": 0.9146, + "step": 699 + }, + { + "epoch": 0.1, + "learning_rate": 1.9717995904430224e-05, + "loss": 0.9004, + "step": 700 + }, + { + "epoch": 0.1, + "learning_rate": 1.9716865328489313e-05, + "loss": 0.8208, + "step": 701 + }, + { + "epoch": 0.1, + "learning_rate": 1.9715732523340065e-05, + "loss": 0.8667, + "step": 702 + }, + { + "epoch": 0.1, + "learning_rate": 1.9714597489242366e-05, + "loss": 0.9048, + "step": 703 + }, + { + "epoch": 0.1, + "learning_rate": 1.9713460226456604e-05, + "loss": 0.9004, + "step": 704 + }, + { + "epoch": 0.1, + "learning_rate": 1.9712320735243686e-05, + "loss": 0.3115, + "step": 705 + }, + { + "epoch": 0.1, + "learning_rate": 1.971117901586504e-05, + "loss": 0.9116, + "step": 706 + }, + { + "epoch": 0.1, + "learning_rate": 1.9710035068582586e-05, + "loss": 0.8735, + "step": 707 + }, + { + "epoch": 0.1, + "learning_rate": 1.9708888893658768e-05, + "loss": 0.8989, + "step": 708 + }, + { + "epoch": 0.1, + "learning_rate": 1.9707740491356535e-05, + "loss": 0.854, + "step": 709 + }, + { + "epoch": 0.1, + "learning_rate": 1.970658986193935e-05, + "loss": 0.8721, + "step": 710 + }, + { + "epoch": 0.11, + "learning_rate": 1.9705437005671188e-05, + "loss": 0.8394, + "step": 711 + }, + { + "epoch": 0.11, + "learning_rate": 1.9704281922816533e-05, + "loss": 0.8735, + "step": 712 + }, + { + "epoch": 0.11, + "learning_rate": 1.9703124613640378e-05, + "loss": 0.939, + "step": 713 + }, + { + "epoch": 0.11, + "learning_rate": 1.970196507840823e-05, + "loss": 0.8755, + "step": 714 + }, + { + "epoch": 0.11, + "learning_rate": 1.9700803317386106e-05, + "loss": 0.9131, + "step": 715 + }, + { + "epoch": 0.11, + "learning_rate": 1.9699639330840532e-05, + "loss": 0.9253, + "step": 716 + }, + { + "epoch": 0.11, + "learning_rate": 1.9698473119038545e-05, + "loss": 0.9028, + "step": 717 + }, + { + "epoch": 0.11, + "learning_rate": 1.9697304682247695e-05, + "loss": 0.9316, + "step": 718 + }, + { + "epoch": 0.11, + "learning_rate": 1.9696134020736036e-05, + "loss": 0.9102, + "step": 719 + }, + { + "epoch": 0.11, + "learning_rate": 1.9694961134772148e-05, + "loss": 0.8848, + "step": 720 + }, + { + "epoch": 0.11, + "learning_rate": 1.9693786024625097e-05, + "loss": 0.8853, + "step": 721 + }, + { + "epoch": 0.11, + "learning_rate": 1.969260869056448e-05, + "loss": 0.9312, + "step": 722 + }, + { + "epoch": 0.11, + "learning_rate": 1.9691429132860396e-05, + "loss": 0.9194, + "step": 723 + }, + { + "epoch": 0.11, + "learning_rate": 1.969024735178345e-05, + "loss": 0.9443, + "step": 724 + }, + { + "epoch": 0.11, + "learning_rate": 1.968906334760477e-05, + "loss": 0.8525, + "step": 725 + }, + { + "epoch": 0.11, + "learning_rate": 1.9687877120595983e-05, + "loss": 0.8745, + "step": 726 + }, + { + "epoch": 0.11, + "learning_rate": 1.9686688671029224e-05, + "loss": 0.9351, + "step": 727 + }, + { + "epoch": 0.11, + "learning_rate": 1.968549799917715e-05, + "loss": 0.918, + "step": 728 + }, + { + "epoch": 0.11, + "learning_rate": 1.968430510531291e-05, + "loss": 0.9229, + "step": 729 + }, + { + "epoch": 0.11, + "learning_rate": 1.968310998971019e-05, + "loss": 0.8677, + "step": 730 + }, + { + "epoch": 0.11, + "learning_rate": 1.9681912652643156e-05, + "loss": 0.8706, + "step": 731 + }, + { + "epoch": 0.11, + "learning_rate": 1.9680713094386503e-05, + "loss": 0.8047, + "step": 732 + }, + { + "epoch": 0.11, + "learning_rate": 1.9679511315215424e-05, + "loss": 0.8257, + "step": 733 + }, + { + "epoch": 0.11, + "learning_rate": 1.967830731540563e-05, + "loss": 0.325, + "step": 734 + }, + { + "epoch": 0.11, + "learning_rate": 1.9677101095233342e-05, + "loss": 0.834, + "step": 735 + }, + { + "epoch": 0.11, + "learning_rate": 1.9675892654975278e-05, + "loss": 0.9321, + "step": 736 + }, + { + "epoch": 0.11, + "learning_rate": 1.9674681994908682e-05, + "loss": 0.8574, + "step": 737 + }, + { + "epoch": 0.11, + "learning_rate": 1.9673469115311297e-05, + "loss": 0.8667, + "step": 738 + }, + { + "epoch": 0.11, + "learning_rate": 1.9672254016461373e-05, + "loss": 0.8521, + "step": 739 + }, + { + "epoch": 0.11, + "learning_rate": 1.967103669863768e-05, + "loss": 0.8657, + "step": 740 + }, + { + "epoch": 0.11, + "learning_rate": 1.966981716211949e-05, + "loss": 0.8252, + "step": 741 + }, + { + "epoch": 0.11, + "learning_rate": 1.966859540718658e-05, + "loss": 0.9727, + "step": 742 + }, + { + "epoch": 0.11, + "learning_rate": 1.9667371434119244e-05, + "loss": 0.9014, + "step": 743 + }, + { + "epoch": 0.11, + "learning_rate": 1.966614524319828e-05, + "loss": 0.8193, + "step": 744 + }, + { + "epoch": 0.11, + "learning_rate": 1.9664916834704995e-05, + "loss": 0.8501, + "step": 745 + }, + { + "epoch": 0.11, + "learning_rate": 1.966368620892121e-05, + "loss": 0.9023, + "step": 746 + }, + { + "epoch": 0.11, + "learning_rate": 1.966245336612925e-05, + "loss": 0.8926, + "step": 747 + }, + { + "epoch": 0.11, + "learning_rate": 1.9661218306611947e-05, + "loss": 0.8398, + "step": 748 + }, + { + "epoch": 0.11, + "learning_rate": 1.9659981030652648e-05, + "loss": 0.9136, + "step": 749 + }, + { + "epoch": 0.11, + "learning_rate": 1.96587415385352e-05, + "loss": 0.8359, + "step": 750 + }, + { + "epoch": 0.11, + "learning_rate": 1.9657499830543964e-05, + "loss": 0.9136, + "step": 751 + }, + { + "epoch": 0.11, + "learning_rate": 1.9656255906963812e-05, + "loss": 0.8589, + "step": 752 + }, + { + "epoch": 0.11, + "learning_rate": 1.965500976808011e-05, + "loss": 0.8989, + "step": 753 + }, + { + "epoch": 0.11, + "learning_rate": 1.9653761414178753e-05, + "loss": 0.8618, + "step": 754 + }, + { + "epoch": 0.11, + "learning_rate": 1.9652510845546133e-05, + "loss": 0.3169, + "step": 755 + }, + { + "epoch": 0.11, + "learning_rate": 1.965125806246915e-05, + "loss": 0.8799, + "step": 756 + }, + { + "epoch": 0.11, + "learning_rate": 1.9650003065235206e-05, + "loss": 0.9062, + "step": 757 + }, + { + "epoch": 0.11, + "learning_rate": 1.9648745854132225e-05, + "loss": 0.8506, + "step": 758 + }, + { + "epoch": 0.11, + "learning_rate": 1.9647486429448635e-05, + "loss": 0.8823, + "step": 759 + }, + { + "epoch": 0.11, + "learning_rate": 1.964622479147336e-05, + "loss": 0.9185, + "step": 760 + }, + { + "epoch": 0.11, + "learning_rate": 1.9644960940495846e-05, + "loss": 0.8311, + "step": 761 + }, + { + "epoch": 0.11, + "learning_rate": 1.964369487680604e-05, + "loss": 0.9004, + "step": 762 + }, + { + "epoch": 0.11, + "learning_rate": 1.9642426600694395e-05, + "loss": 0.8579, + "step": 763 + }, + { + "epoch": 0.11, + "learning_rate": 1.964115611245188e-05, + "loss": 0.8296, + "step": 764 + }, + { + "epoch": 0.11, + "learning_rate": 1.9639883412369962e-05, + "loss": 0.8755, + "step": 765 + }, + { + "epoch": 0.11, + "learning_rate": 1.963860850074062e-05, + "loss": 0.8257, + "step": 766 + }, + { + "epoch": 0.11, + "learning_rate": 1.9637331377856337e-05, + "loss": 0.7979, + "step": 767 + }, + { + "epoch": 0.11, + "learning_rate": 1.9636052044010113e-05, + "loss": 0.8428, + "step": 768 + }, + { + "epoch": 0.11, + "learning_rate": 1.9634770499495443e-05, + "loss": 0.873, + "step": 769 + }, + { + "epoch": 0.11, + "learning_rate": 1.963348674460633e-05, + "loss": 0.9165, + "step": 770 + }, + { + "epoch": 0.11, + "learning_rate": 1.9632200779637302e-05, + "loss": 0.8799, + "step": 771 + }, + { + "epoch": 0.11, + "learning_rate": 1.9630912604883363e-05, + "loss": 0.8999, + "step": 772 + }, + { + "epoch": 0.11, + "learning_rate": 1.9629622220640058e-05, + "loss": 0.8525, + "step": 773 + }, + { + "epoch": 0.11, + "learning_rate": 1.962832962720341e-05, + "loss": 0.873, + "step": 774 + }, + { + "epoch": 0.11, + "learning_rate": 1.9627034824869968e-05, + "loss": 0.8491, + "step": 775 + }, + { + "epoch": 0.11, + "learning_rate": 1.9625737813936777e-05, + "loss": 0.9199, + "step": 776 + }, + { + "epoch": 0.11, + "learning_rate": 1.9624438594701397e-05, + "loss": 0.8726, + "step": 777 + }, + { + "epoch": 0.12, + "learning_rate": 1.9623137167461886e-05, + "loss": 0.8354, + "step": 778 + }, + { + "epoch": 0.12, + "learning_rate": 1.9621833532516813e-05, + "loss": 0.853, + "step": 779 + }, + { + "epoch": 0.12, + "learning_rate": 1.9620527690165258e-05, + "loss": 0.8896, + "step": 780 + }, + { + "epoch": 0.12, + "learning_rate": 1.9619219640706796e-05, + "loss": 0.9053, + "step": 781 + }, + { + "epoch": 0.12, + "learning_rate": 1.961790938444152e-05, + "loss": 0.9043, + "step": 782 + }, + { + "epoch": 0.12, + "learning_rate": 1.9616596921670025e-05, + "loss": 0.8271, + "step": 783 + }, + { + "epoch": 0.12, + "learning_rate": 1.9615282252693407e-05, + "loss": 0.9106, + "step": 784 + }, + { + "epoch": 0.12, + "learning_rate": 1.9613965377813273e-05, + "loss": 0.8628, + "step": 785 + }, + { + "epoch": 0.12, + "learning_rate": 1.9612646297331738e-05, + "loss": 0.8059, + "step": 786 + }, + { + "epoch": 0.12, + "learning_rate": 1.961132501155142e-05, + "loss": 0.8628, + "step": 787 + }, + { + "epoch": 0.12, + "learning_rate": 1.961000152077545e-05, + "loss": 0.9146, + "step": 788 + }, + { + "epoch": 0.12, + "learning_rate": 1.9608675825307442e-05, + "loss": 0.8911, + "step": 789 + }, + { + "epoch": 0.12, + "learning_rate": 1.960734792545155e-05, + "loss": 0.8618, + "step": 790 + }, + { + "epoch": 0.12, + "learning_rate": 1.9606017821512405e-05, + "loss": 0.9229, + "step": 791 + }, + { + "epoch": 0.12, + "learning_rate": 1.9604685513795157e-05, + "loss": 0.8516, + "step": 792 + }, + { + "epoch": 0.12, + "learning_rate": 1.9603351002605465e-05, + "loss": 0.8643, + "step": 793 + }, + { + "epoch": 0.12, + "learning_rate": 1.960201428824948e-05, + "loss": 0.8599, + "step": 794 + }, + { + "epoch": 0.12, + "learning_rate": 1.960067537103387e-05, + "loss": 0.9072, + "step": 795 + }, + { + "epoch": 0.12, + "learning_rate": 1.9599334251265805e-05, + "loss": 0.8799, + "step": 796 + }, + { + "epoch": 0.12, + "learning_rate": 1.959799092925296e-05, + "loss": 0.9243, + "step": 797 + }, + { + "epoch": 0.12, + "learning_rate": 1.9596645405303508e-05, + "loss": 0.3169, + "step": 798 + }, + { + "epoch": 0.12, + "learning_rate": 1.959529767972615e-05, + "loss": 0.9116, + "step": 799 + }, + { + "epoch": 0.12, + "learning_rate": 1.9593947752830057e-05, + "loss": 0.9111, + "step": 800 + }, + { + "epoch": 0.12, + "learning_rate": 1.959259562492494e-05, + "loss": 0.8535, + "step": 801 + }, + { + "epoch": 0.12, + "learning_rate": 1.959124129632099e-05, + "loss": 0.8242, + "step": 802 + }, + { + "epoch": 0.12, + "learning_rate": 1.958988476732892e-05, + "loss": 0.916, + "step": 803 + }, + { + "epoch": 0.12, + "learning_rate": 1.958852603825993e-05, + "loss": 0.9634, + "step": 804 + }, + { + "epoch": 0.12, + "learning_rate": 1.9587165109425746e-05, + "loss": 0.8721, + "step": 805 + }, + { + "epoch": 0.12, + "learning_rate": 1.9585801981138575e-05, + "loss": 0.8682, + "step": 806 + }, + { + "epoch": 0.12, + "learning_rate": 1.9584436653711152e-05, + "loss": 0.8423, + "step": 807 + }, + { + "epoch": 0.12, + "learning_rate": 1.95830691274567e-05, + "loss": 0.9048, + "step": 808 + }, + { + "epoch": 0.12, + "learning_rate": 1.9581699402688956e-05, + "loss": 0.8506, + "step": 809 + }, + { + "epoch": 0.12, + "learning_rate": 1.958032747972215e-05, + "loss": 0.856, + "step": 810 + }, + { + "epoch": 0.12, + "learning_rate": 1.9578953358871032e-05, + "loss": 0.8301, + "step": 811 + }, + { + "epoch": 0.12, + "learning_rate": 1.9577577040450842e-05, + "loss": 0.7944, + "step": 812 + }, + { + "epoch": 0.12, + "learning_rate": 1.9576198524777333e-05, + "loss": 0.8545, + "step": 813 + }, + { + "epoch": 0.12, + "learning_rate": 1.9574817812166758e-05, + "loss": 0.8662, + "step": 814 + }, + { + "epoch": 0.12, + "learning_rate": 1.9573434902935876e-05, + "loss": 0.8779, + "step": 815 + }, + { + "epoch": 0.12, + "learning_rate": 1.9572049797401945e-05, + "loss": 0.8657, + "step": 816 + }, + { + "epoch": 0.12, + "learning_rate": 1.957066249588274e-05, + "loss": 0.9087, + "step": 817 + }, + { + "epoch": 0.12, + "learning_rate": 1.9569272998696517e-05, + "loss": 0.8779, + "step": 818 + }, + { + "epoch": 0.12, + "learning_rate": 1.9567881306162065e-05, + "loss": 0.8892, + "step": 819 + }, + { + "epoch": 0.12, + "learning_rate": 1.9566487418598652e-05, + "loss": 0.8179, + "step": 820 + }, + { + "epoch": 0.12, + "learning_rate": 1.9565091336326055e-05, + "loss": 0.8354, + "step": 821 + }, + { + "epoch": 0.12, + "learning_rate": 1.9563693059664572e-05, + "loss": 0.8818, + "step": 822 + }, + { + "epoch": 0.12, + "learning_rate": 1.9562292588934973e-05, + "loss": 0.9204, + "step": 823 + }, + { + "epoch": 0.12, + "learning_rate": 1.9560889924458566e-05, + "loss": 0.7793, + "step": 824 + }, + { + "epoch": 0.12, + "learning_rate": 1.9559485066557132e-05, + "loss": 0.7905, + "step": 825 + }, + { + "epoch": 0.12, + "learning_rate": 1.9558078015552973e-05, + "loss": 0.8774, + "step": 826 + }, + { + "epoch": 0.12, + "learning_rate": 1.9556668771768894e-05, + "loss": 0.8623, + "step": 827 + }, + { + "epoch": 0.12, + "learning_rate": 1.9555257335528193e-05, + "loss": 0.3352, + "step": 828 + }, + { + "epoch": 0.12, + "learning_rate": 1.9553843707154682e-05, + "loss": 0.874, + "step": 829 + }, + { + "epoch": 0.12, + "learning_rate": 1.9552427886972665e-05, + "loss": 0.8921, + "step": 830 + }, + { + "epoch": 0.12, + "learning_rate": 1.9551009875306955e-05, + "loss": 0.8535, + "step": 831 + }, + { + "epoch": 0.12, + "learning_rate": 1.954958967248287e-05, + "loss": 0.853, + "step": 832 + }, + { + "epoch": 0.12, + "learning_rate": 1.9548167278826224e-05, + "loss": 0.873, + "step": 833 + }, + { + "epoch": 0.12, + "learning_rate": 1.9546742694663343e-05, + "loss": 0.8911, + "step": 834 + }, + { + "epoch": 0.12, + "learning_rate": 1.9545315920321045e-05, + "loss": 0.8223, + "step": 835 + }, + { + "epoch": 0.12, + "learning_rate": 1.954388695612666e-05, + "loss": 0.8408, + "step": 836 + }, + { + "epoch": 0.12, + "learning_rate": 1.954245580240801e-05, + "loss": 0.8628, + "step": 837 + }, + { + "epoch": 0.12, + "learning_rate": 1.954102245949343e-05, + "loss": 0.7507, + "step": 838 + }, + { + "epoch": 0.12, + "learning_rate": 1.953958692771175e-05, + "loss": 0.9194, + "step": 839 + }, + { + "epoch": 0.12, + "learning_rate": 1.9538149207392306e-05, + "loss": 0.8525, + "step": 840 + }, + { + "epoch": 0.12, + "learning_rate": 1.9536709298864937e-05, + "loss": 0.9116, + "step": 841 + }, + { + "epoch": 0.12, + "learning_rate": 1.9535267202459972e-05, + "loss": 0.854, + "step": 842 + }, + { + "epoch": 0.12, + "learning_rate": 1.9533822918508263e-05, + "loss": 0.3035, + "step": 843 + }, + { + "epoch": 0.12, + "learning_rate": 1.9532376447341143e-05, + "loss": 0.8403, + "step": 844 + }, + { + "epoch": 0.12, + "learning_rate": 1.9530927789290467e-05, + "loss": 0.8252, + "step": 845 + }, + { + "epoch": 0.13, + "learning_rate": 1.952947694468857e-05, + "loss": 0.9106, + "step": 846 + }, + { + "epoch": 0.13, + "learning_rate": 1.9528023913868305e-05, + "loss": 0.8462, + "step": 847 + }, + { + "epoch": 0.13, + "learning_rate": 1.952656869716302e-05, + "loss": 0.8784, + "step": 848 + }, + { + "epoch": 0.13, + "learning_rate": 1.9525111294906566e-05, + "loss": 0.8481, + "step": 849 + }, + { + "epoch": 0.13, + "learning_rate": 1.9523651707433295e-05, + "loss": 0.9019, + "step": 850 + }, + { + "epoch": 0.13, + "learning_rate": 1.9522189935078062e-05, + "loss": 0.897, + "step": 851 + }, + { + "epoch": 0.13, + "learning_rate": 1.952072597817622e-05, + "loss": 0.8848, + "step": 852 + }, + { + "epoch": 0.13, + "learning_rate": 1.951925983706362e-05, + "loss": 0.8398, + "step": 853 + }, + { + "epoch": 0.13, + "learning_rate": 1.9517791512076628e-05, + "loss": 0.8428, + "step": 854 + }, + { + "epoch": 0.13, + "learning_rate": 1.9516321003552096e-05, + "loss": 0.8979, + "step": 855 + }, + { + "epoch": 0.13, + "learning_rate": 1.9514848311827383e-05, + "loss": 0.8677, + "step": 856 + }, + { + "epoch": 0.13, + "learning_rate": 1.951337343724035e-05, + "loss": 0.8271, + "step": 857 + }, + { + "epoch": 0.13, + "learning_rate": 1.9511896380129357e-05, + "loss": 0.8496, + "step": 858 + }, + { + "epoch": 0.13, + "learning_rate": 1.9510417140833266e-05, + "loss": 0.8867, + "step": 859 + }, + { + "epoch": 0.13, + "learning_rate": 1.9508935719691438e-05, + "loss": 0.8745, + "step": 860 + }, + { + "epoch": 0.13, + "learning_rate": 1.9507452117043736e-05, + "loss": 0.3181, + "step": 861 + }, + { + "epoch": 0.13, + "learning_rate": 1.950596633323052e-05, + "loss": 0.3424, + "step": 862 + }, + { + "epoch": 0.13, + "learning_rate": 1.9504478368592658e-05, + "loss": 0.8115, + "step": 863 + }, + { + "epoch": 0.13, + "learning_rate": 1.9502988223471506e-05, + "loss": 0.9004, + "step": 864 + }, + { + "epoch": 0.13, + "learning_rate": 1.950149589820894e-05, + "loss": 0.833, + "step": 865 + }, + { + "epoch": 0.13, + "learning_rate": 1.950000139314731e-05, + "loss": 0.8892, + "step": 866 + }, + { + "epoch": 0.13, + "learning_rate": 1.9498504708629486e-05, + "loss": 0.8257, + "step": 867 + }, + { + "epoch": 0.13, + "learning_rate": 1.9497005844998835e-05, + "loss": 0.8652, + "step": 868 + }, + { + "epoch": 0.13, + "learning_rate": 1.949550480259922e-05, + "loss": 0.8301, + "step": 869 + }, + { + "epoch": 0.13, + "learning_rate": 1.9494001581775e-05, + "loss": 0.9253, + "step": 870 + }, + { + "epoch": 0.13, + "learning_rate": 1.949249618287104e-05, + "loss": 0.8687, + "step": 871 + }, + { + "epoch": 0.13, + "learning_rate": 1.949098860623271e-05, + "loss": 0.8389, + "step": 872 + }, + { + "epoch": 0.13, + "learning_rate": 1.9489478852205862e-05, + "loss": 0.8481, + "step": 873 + }, + { + "epoch": 0.13, + "learning_rate": 1.9487966921136865e-05, + "loss": 0.9106, + "step": 874 + }, + { + "epoch": 0.13, + "learning_rate": 1.9486452813372586e-05, + "loss": 0.8066, + "step": 875 + }, + { + "epoch": 0.13, + "learning_rate": 1.9484936529260374e-05, + "loss": 0.8447, + "step": 876 + }, + { + "epoch": 0.13, + "learning_rate": 1.94834180691481e-05, + "loss": 0.8291, + "step": 877 + }, + { + "epoch": 0.13, + "learning_rate": 1.9481897433384115e-05, + "loss": 0.8682, + "step": 878 + }, + { + "epoch": 0.13, + "learning_rate": 1.948037462231728e-05, + "loss": 0.3254, + "step": 879 + }, + { + "epoch": 0.13, + "learning_rate": 1.947884963629696e-05, + "loss": 0.3164, + "step": 880 + }, + { + "epoch": 0.13, + "learning_rate": 1.9477322475673003e-05, + "loss": 0.8208, + "step": 881 + }, + { + "epoch": 0.13, + "learning_rate": 1.947579314079577e-05, + "loss": 0.8408, + "step": 882 + }, + { + "epoch": 0.13, + "learning_rate": 1.947426163201611e-05, + "loss": 0.876, + "step": 883 + }, + { + "epoch": 0.13, + "learning_rate": 1.9472727949685383e-05, + "loss": 0.9121, + "step": 884 + }, + { + "epoch": 0.13, + "learning_rate": 1.9471192094155436e-05, + "loss": 0.8906, + "step": 885 + }, + { + "epoch": 0.13, + "learning_rate": 1.9469654065778622e-05, + "loss": 0.9097, + "step": 886 + }, + { + "epoch": 0.13, + "learning_rate": 1.946811386490779e-05, + "loss": 0.8848, + "step": 887 + }, + { + "epoch": 0.13, + "learning_rate": 1.946657149189629e-05, + "loss": 0.8179, + "step": 888 + }, + { + "epoch": 0.13, + "learning_rate": 1.946502694709796e-05, + "loss": 0.8022, + "step": 889 + }, + { + "epoch": 0.13, + "learning_rate": 1.9463480230867147e-05, + "loss": 0.8833, + "step": 890 + }, + { + "epoch": 0.13, + "learning_rate": 1.9461931343558697e-05, + "loss": 0.854, + "step": 891 + }, + { + "epoch": 0.13, + "learning_rate": 1.9460380285527945e-05, + "loss": 0.8867, + "step": 892 + }, + { + "epoch": 0.13, + "learning_rate": 1.9458827057130734e-05, + "loss": 0.8716, + "step": 893 + }, + { + "epoch": 0.13, + "learning_rate": 1.9457271658723396e-05, + "loss": 0.9043, + "step": 894 + }, + { + "epoch": 0.13, + "learning_rate": 1.9455714090662767e-05, + "loss": 0.3218, + "step": 895 + }, + { + "epoch": 0.13, + "learning_rate": 1.945415435330618e-05, + "loss": 0.8398, + "step": 896 + }, + { + "epoch": 0.13, + "learning_rate": 1.945259244701146e-05, + "loss": 0.7881, + "step": 897 + }, + { + "epoch": 0.13, + "learning_rate": 1.9451028372136936e-05, + "loss": 0.3018, + "step": 898 + }, + { + "epoch": 0.13, + "learning_rate": 1.9449462129041438e-05, + "loss": 0.814, + "step": 899 + }, + { + "epoch": 0.13, + "learning_rate": 1.9447893718084275e-05, + "loss": 0.8506, + "step": 900 + }, + { + "epoch": 0.13, + "learning_rate": 1.944632313962528e-05, + "loss": 0.8462, + "step": 901 + }, + { + "epoch": 0.13, + "learning_rate": 1.9444750394024762e-05, + "loss": 0.8984, + "step": 902 + }, + { + "epoch": 0.13, + "learning_rate": 1.9443175481643536e-05, + "loss": 0.833, + "step": 903 + }, + { + "epoch": 0.13, + "learning_rate": 1.944159840284291e-05, + "loss": 0.9243, + "step": 904 + }, + { + "epoch": 0.13, + "learning_rate": 1.94400191579847e-05, + "loss": 0.7993, + "step": 905 + }, + { + "epoch": 0.13, + "learning_rate": 1.94384377474312e-05, + "loss": 0.7703, + "step": 906 + }, + { + "epoch": 0.13, + "learning_rate": 1.943685417154522e-05, + "loss": 0.8701, + "step": 907 + }, + { + "epoch": 0.13, + "learning_rate": 1.9435268430690056e-05, + "loss": 0.856, + "step": 908 + }, + { + "epoch": 0.13, + "learning_rate": 1.94336805252295e-05, + "loss": 0.9292, + "step": 909 + }, + { + "epoch": 0.13, + "learning_rate": 1.9432090455527847e-05, + "loss": 0.897, + "step": 910 + }, + { + "epoch": 0.13, + "learning_rate": 1.9430498221949884e-05, + "loss": 0.855, + "step": 911 + }, + { + "epoch": 0.13, + "learning_rate": 1.9428903824860895e-05, + "loss": 0.8613, + "step": 912 + }, + { + "epoch": 0.14, + "learning_rate": 1.942730726462666e-05, + "loss": 0.9023, + "step": 913 + }, + { + "epoch": 0.14, + "learning_rate": 1.9425708541613457e-05, + "loss": 0.8921, + "step": 914 + }, + { + "epoch": 0.14, + "learning_rate": 1.942410765618806e-05, + "loss": 0.8423, + "step": 915 + }, + { + "epoch": 0.14, + "learning_rate": 1.9422504608717737e-05, + "loss": 0.8936, + "step": 916 + }, + { + "epoch": 0.14, + "learning_rate": 1.942089939957026e-05, + "loss": 0.8364, + "step": 917 + }, + { + "epoch": 0.14, + "learning_rate": 1.9419292029113878e-05, + "loss": 0.8779, + "step": 918 + }, + { + "epoch": 0.14, + "learning_rate": 1.9417682497717356e-05, + "loss": 0.8916, + "step": 919 + }, + { + "epoch": 0.14, + "learning_rate": 1.9416070805749947e-05, + "loss": 0.917, + "step": 920 + }, + { + "epoch": 0.14, + "learning_rate": 1.9414456953581396e-05, + "loss": 0.8301, + "step": 921 + }, + { + "epoch": 0.14, + "learning_rate": 1.941284094158195e-05, + "loss": 0.8755, + "step": 922 + }, + { + "epoch": 0.14, + "learning_rate": 1.941122277012235e-05, + "loss": 0.8994, + "step": 923 + }, + { + "epoch": 0.14, + "learning_rate": 1.940960243957383e-05, + "loss": 0.9229, + "step": 924 + }, + { + "epoch": 0.14, + "learning_rate": 1.940797995030812e-05, + "loss": 0.8818, + "step": 925 + }, + { + "epoch": 0.14, + "learning_rate": 1.940635530269744e-05, + "loss": 0.8975, + "step": 926 + }, + { + "epoch": 0.14, + "learning_rate": 1.9404728497114523e-05, + "loss": 0.8164, + "step": 927 + }, + { + "epoch": 0.14, + "learning_rate": 1.9403099533932573e-05, + "loss": 0.8755, + "step": 928 + }, + { + "epoch": 0.14, + "learning_rate": 1.9401468413525306e-05, + "loss": 0.8726, + "step": 929 + }, + { + "epoch": 0.14, + "learning_rate": 1.939983513626693e-05, + "loss": 0.873, + "step": 930 + }, + { + "epoch": 0.14, + "learning_rate": 1.9398199702532143e-05, + "loss": 0.8594, + "step": 931 + }, + { + "epoch": 0.14, + "learning_rate": 1.9396562112696137e-05, + "loss": 0.8643, + "step": 932 + }, + { + "epoch": 0.14, + "learning_rate": 1.939492236713461e-05, + "loss": 0.8413, + "step": 933 + }, + { + "epoch": 0.14, + "learning_rate": 1.9393280466223737e-05, + "loss": 0.8755, + "step": 934 + }, + { + "epoch": 0.14, + "learning_rate": 1.9391636410340206e-05, + "loss": 0.8242, + "step": 935 + }, + { + "epoch": 0.14, + "learning_rate": 1.9389990199861186e-05, + "loss": 0.8955, + "step": 936 + }, + { + "epoch": 0.14, + "learning_rate": 1.9388341835164346e-05, + "loss": 0.9155, + "step": 937 + }, + { + "epoch": 0.14, + "learning_rate": 1.9386691316627845e-05, + "loss": 0.8555, + "step": 938 + }, + { + "epoch": 0.14, + "learning_rate": 1.938503864463034e-05, + "loss": 0.9229, + "step": 939 + }, + { + "epoch": 0.14, + "learning_rate": 1.938338381955099e-05, + "loss": 0.8804, + "step": 940 + }, + { + "epoch": 0.14, + "learning_rate": 1.9381726841769422e-05, + "loss": 0.9937, + "step": 941 + }, + { + "epoch": 0.14, + "learning_rate": 1.9380067711665787e-05, + "loss": 0.8633, + "step": 942 + }, + { + "epoch": 0.14, + "learning_rate": 1.9378406429620712e-05, + "loss": 0.9023, + "step": 943 + }, + { + "epoch": 0.14, + "learning_rate": 1.9376742996015325e-05, + "loss": 0.8501, + "step": 944 + }, + { + "epoch": 0.14, + "learning_rate": 1.937507741123124e-05, + "loss": 0.8628, + "step": 945 + }, + { + "epoch": 0.14, + "learning_rate": 1.9373409675650573e-05, + "loss": 0.8789, + "step": 946 + }, + { + "epoch": 0.14, + "learning_rate": 1.937173978965593e-05, + "loss": 0.8105, + "step": 947 + }, + { + "epoch": 0.14, + "learning_rate": 1.9370067753630413e-05, + "loss": 0.8306, + "step": 948 + }, + { + "epoch": 0.14, + "learning_rate": 1.9368393567957607e-05, + "loss": 0.9316, + "step": 949 + }, + { + "epoch": 0.14, + "learning_rate": 1.93667172330216e-05, + "loss": 0.9072, + "step": 950 + }, + { + "epoch": 0.14, + "learning_rate": 1.936503874920698e-05, + "loss": 0.7761, + "step": 951 + }, + { + "epoch": 0.14, + "learning_rate": 1.9363358116898804e-05, + "loss": 0.8535, + "step": 952 + }, + { + "epoch": 0.14, + "learning_rate": 1.9361675336482652e-05, + "loss": 0.918, + "step": 953 + }, + { + "epoch": 0.14, + "learning_rate": 1.9359990408344565e-05, + "loss": 0.9087, + "step": 954 + }, + { + "epoch": 0.14, + "learning_rate": 1.9358303332871106e-05, + "loss": 0.8164, + "step": 955 + }, + { + "epoch": 0.14, + "learning_rate": 1.9356614110449314e-05, + "loss": 0.8325, + "step": 956 + }, + { + "epoch": 0.14, + "learning_rate": 1.9354922741466723e-05, + "loss": 0.9204, + "step": 957 + }, + { + "epoch": 0.14, + "learning_rate": 1.935322922631136e-05, + "loss": 0.8687, + "step": 958 + }, + { + "epoch": 0.14, + "learning_rate": 1.9351533565371747e-05, + "loss": 0.8442, + "step": 959 + }, + { + "epoch": 0.14, + "learning_rate": 1.9349835759036898e-05, + "loss": 0.8467, + "step": 960 + }, + { + "epoch": 0.14, + "learning_rate": 1.9348135807696315e-05, + "loss": 0.8462, + "step": 961 + }, + { + "epoch": 0.14, + "learning_rate": 1.934643371173999e-05, + "loss": 0.9224, + "step": 962 + }, + { + "epoch": 0.14, + "learning_rate": 1.9344729471558425e-05, + "loss": 0.8867, + "step": 963 + }, + { + "epoch": 0.14, + "learning_rate": 1.934302308754259e-05, + "loss": 0.8057, + "step": 964 + }, + { + "epoch": 0.14, + "learning_rate": 1.934131456008396e-05, + "loss": 0.337, + "step": 965 + }, + { + "epoch": 0.14, + "learning_rate": 1.9339603889574498e-05, + "loss": 0.8721, + "step": 966 + }, + { + "epoch": 0.14, + "learning_rate": 1.9337891076406667e-05, + "loss": 0.8218, + "step": 967 + }, + { + "epoch": 0.14, + "learning_rate": 1.9336176120973405e-05, + "loss": 0.8545, + "step": 968 + }, + { + "epoch": 0.14, + "learning_rate": 1.9334459023668154e-05, + "loss": 0.8306, + "step": 969 + }, + { + "epoch": 0.14, + "learning_rate": 1.9332739784884844e-05, + "loss": 0.8218, + "step": 970 + }, + { + "epoch": 0.14, + "learning_rate": 1.9331018405017902e-05, + "loss": 0.9946, + "step": 971 + }, + { + "epoch": 0.14, + "learning_rate": 1.9329294884462234e-05, + "loss": 0.9102, + "step": 972 + }, + { + "epoch": 0.14, + "learning_rate": 1.932756922361325e-05, + "loss": 0.8511, + "step": 973 + }, + { + "epoch": 0.14, + "learning_rate": 1.9325841422866833e-05, + "loss": 0.8804, + "step": 974 + }, + { + "epoch": 0.14, + "learning_rate": 1.9324111482619387e-05, + "loss": 0.8711, + "step": 975 + }, + { + "epoch": 0.14, + "learning_rate": 1.9322379403267774e-05, + "loss": 0.9482, + "step": 976 + }, + { + "epoch": 0.14, + "learning_rate": 1.9320645185209364e-05, + "loss": 0.8633, + "step": 977 + }, + { + "epoch": 0.14, + "learning_rate": 1.931890882884202e-05, + "loss": 0.9072, + "step": 978 + }, + { + "epoch": 0.14, + "learning_rate": 1.931717033456409e-05, + "loss": 0.8574, + "step": 979 + }, + { + "epoch": 0.14, + "learning_rate": 1.9315429702774408e-05, + "loss": 0.8306, + "step": 980 + }, + { + "epoch": 0.15, + "learning_rate": 1.9313686933872303e-05, + "loss": 0.8015, + "step": 981 + }, + { + "epoch": 0.15, + "learning_rate": 1.9311942028257604e-05, + "loss": 0.8564, + "step": 982 + }, + { + "epoch": 0.15, + "learning_rate": 1.9310194986330616e-05, + "loss": 0.8315, + "step": 983 + }, + { + "epoch": 0.15, + "learning_rate": 1.9308445808492134e-05, + "loss": 0.8452, + "step": 984 + }, + { + "epoch": 0.15, + "learning_rate": 1.9306694495143453e-05, + "loss": 0.9751, + "step": 985 + }, + { + "epoch": 0.15, + "learning_rate": 1.9304941046686356e-05, + "loss": 0.8525, + "step": 986 + }, + { + "epoch": 0.15, + "learning_rate": 1.9303185463523108e-05, + "loss": 0.8262, + "step": 987 + }, + { + "epoch": 0.15, + "learning_rate": 1.930142774605647e-05, + "loss": 0.8237, + "step": 988 + }, + { + "epoch": 0.15, + "learning_rate": 1.9299667894689695e-05, + "loss": 0.8906, + "step": 989 + }, + { + "epoch": 0.15, + "learning_rate": 1.9297905909826512e-05, + "loss": 0.833, + "step": 990 + }, + { + "epoch": 0.15, + "learning_rate": 1.9296141791871155e-05, + "loss": 0.8257, + "step": 991 + }, + { + "epoch": 0.15, + "learning_rate": 1.9294375541228345e-05, + "loss": 0.8774, + "step": 992 + }, + { + "epoch": 0.15, + "learning_rate": 1.9292607158303283e-05, + "loss": 0.895, + "step": 993 + }, + { + "epoch": 0.15, + "learning_rate": 1.929083664350167e-05, + "loss": 0.8901, + "step": 994 + }, + { + "epoch": 0.15, + "learning_rate": 1.928906399722969e-05, + "loss": 0.8794, + "step": 995 + }, + { + "epoch": 0.15, + "learning_rate": 1.9287289219894013e-05, + "loss": 0.9004, + "step": 996 + }, + { + "epoch": 0.15, + "learning_rate": 1.9285512311901807e-05, + "loss": 0.8369, + "step": 997 + }, + { + "epoch": 0.15, + "learning_rate": 1.928373327366072e-05, + "loss": 0.8975, + "step": 998 + }, + { + "epoch": 0.15, + "learning_rate": 1.9281952105578897e-05, + "loss": 0.9014, + "step": 999 + }, + { + "epoch": 0.15, + "learning_rate": 1.9280168808064964e-05, + "loss": 0.9302, + "step": 1000 + }, + { + "epoch": 0.15, + "learning_rate": 1.9278383381528036e-05, + "loss": 0.874, + "step": 1001 + }, + { + "epoch": 0.15, + "learning_rate": 1.9276595826377728e-05, + "loss": 0.896, + "step": 1002 + }, + { + "epoch": 0.15, + "learning_rate": 1.9274806143024126e-05, + "loss": 0.9053, + "step": 1003 + }, + { + "epoch": 0.15, + "learning_rate": 1.9273014331877814e-05, + "loss": 0.8989, + "step": 1004 + }, + { + "epoch": 0.15, + "learning_rate": 1.9271220393349867e-05, + "loss": 0.8555, + "step": 1005 + }, + { + "epoch": 0.15, + "learning_rate": 1.9269424327851842e-05, + "loss": 0.813, + "step": 1006 + }, + { + "epoch": 0.15, + "learning_rate": 1.9267626135795786e-05, + "loss": 0.8433, + "step": 1007 + }, + { + "epoch": 0.15, + "learning_rate": 1.9265825817594232e-05, + "loss": 0.9062, + "step": 1008 + }, + { + "epoch": 0.15, + "learning_rate": 1.9264023373660203e-05, + "loss": 0.9136, + "step": 1009 + }, + { + "epoch": 0.15, + "learning_rate": 1.926221880440721e-05, + "loss": 0.8633, + "step": 1010 + }, + { + "epoch": 0.15, + "learning_rate": 1.9260412110249248e-05, + "loss": 0.8853, + "step": 1011 + }, + { + "epoch": 0.15, + "learning_rate": 1.925860329160081e-05, + "loss": 0.3635, + "step": 1012 + }, + { + "epoch": 0.15, + "learning_rate": 1.9256792348876862e-05, + "loss": 0.8784, + "step": 1013 + }, + { + "epoch": 0.15, + "learning_rate": 1.9254979282492864e-05, + "loss": 0.9209, + "step": 1014 + }, + { + "epoch": 0.15, + "learning_rate": 1.9253164092864768e-05, + "loss": 0.853, + "step": 1015 + }, + { + "epoch": 0.15, + "learning_rate": 1.9251346780409004e-05, + "loss": 0.9985, + "step": 1016 + }, + { + "epoch": 0.15, + "learning_rate": 1.9249527345542493e-05, + "loss": 0.9028, + "step": 1017 + }, + { + "epoch": 0.15, + "learning_rate": 1.9247705788682646e-05, + "loss": 0.8779, + "step": 1018 + }, + { + "epoch": 0.15, + "learning_rate": 1.9245882110247354e-05, + "loss": 0.8467, + "step": 1019 + }, + { + "epoch": 0.15, + "learning_rate": 1.9244056310655006e-05, + "loss": 0.8765, + "step": 1020 + }, + { + "epoch": 0.15, + "learning_rate": 1.924222839032446e-05, + "loss": 0.9126, + "step": 1021 + }, + { + "epoch": 0.15, + "learning_rate": 1.9240398349675083e-05, + "loss": 0.8237, + "step": 1022 + }, + { + "epoch": 0.15, + "learning_rate": 1.923856618912671e-05, + "loss": 0.9023, + "step": 1023 + }, + { + "epoch": 0.15, + "learning_rate": 1.9236731909099666e-05, + "loss": 1.0049, + "step": 1024 + }, + { + "epoch": 0.15, + "learning_rate": 1.9234895510014767e-05, + "loss": 0.9854, + "step": 1025 + }, + { + "epoch": 0.15, + "learning_rate": 1.9233056992293314e-05, + "loss": 0.8652, + "step": 1026 + }, + { + "epoch": 0.15, + "learning_rate": 1.923121635635709e-05, + "loss": 0.8486, + "step": 1027 + }, + { + "epoch": 0.15, + "learning_rate": 1.9229373602628375e-05, + "loss": 0.8955, + "step": 1028 + }, + { + "epoch": 0.15, + "learning_rate": 1.922752873152992e-05, + "loss": 0.8848, + "step": 1029 + }, + { + "epoch": 0.15, + "learning_rate": 1.922568174348497e-05, + "loss": 0.8257, + "step": 1030 + }, + { + "epoch": 0.15, + "learning_rate": 1.922383263891726e-05, + "loss": 0.9136, + "step": 1031 + }, + { + "epoch": 0.15, + "learning_rate": 1.9221981418250993e-05, + "loss": 0.9258, + "step": 1032 + }, + { + "epoch": 0.15, + "learning_rate": 1.922012808191088e-05, + "loss": 0.8931, + "step": 1033 + }, + { + "epoch": 0.15, + "learning_rate": 1.9218272630322104e-05, + "loss": 0.8813, + "step": 1034 + }, + { + "epoch": 0.15, + "learning_rate": 1.9216415063910332e-05, + "loss": 0.8979, + "step": 1035 + }, + { + "epoch": 0.15, + "learning_rate": 1.9214555383101724e-05, + "loss": 0.8545, + "step": 1036 + }, + { + "epoch": 0.15, + "learning_rate": 1.9212693588322923e-05, + "loss": 0.8496, + "step": 1037 + }, + { + "epoch": 0.15, + "learning_rate": 1.9210829680001047e-05, + "loss": 0.938, + "step": 1038 + }, + { + "epoch": 0.15, + "learning_rate": 1.920896365856372e-05, + "loss": 0.8936, + "step": 1039 + }, + { + "epoch": 0.15, + "learning_rate": 1.9207095524439027e-05, + "loss": 0.8965, + "step": 1040 + }, + { + "epoch": 0.15, + "learning_rate": 1.9205225278055555e-05, + "loss": 0.8938, + "step": 1041 + }, + { + "epoch": 0.15, + "learning_rate": 1.9203352919842366e-05, + "loss": 0.8774, + "step": 1042 + }, + { + "epoch": 0.15, + "learning_rate": 1.9201478450229012e-05, + "loss": 0.8281, + "step": 1043 + }, + { + "epoch": 0.15, + "learning_rate": 1.919960186964552e-05, + "loss": 0.9233, + "step": 1044 + }, + { + "epoch": 0.15, + "learning_rate": 1.919772317852242e-05, + "loss": 0.8364, + "step": 1045 + }, + { + "epoch": 0.15, + "learning_rate": 1.919584237729071e-05, + "loss": 0.9673, + "step": 1046 + }, + { + "epoch": 0.15, + "learning_rate": 1.9193959466381868e-05, + "loss": 0.9351, + "step": 1047 + }, + { + "epoch": 0.15, + "learning_rate": 1.9192074446227878e-05, + "loss": 0.7847, + "step": 1048 + }, + { + "epoch": 0.16, + "learning_rate": 1.9190187317261184e-05, + "loss": 0.8652, + "step": 1049 + }, + { + "epoch": 0.16, + "learning_rate": 1.918829807991473e-05, + "loss": 0.874, + "step": 1050 + }, + { + "epoch": 0.16, + "learning_rate": 1.9186406734621937e-05, + "loss": 0.854, + "step": 1051 + }, + { + "epoch": 0.16, + "learning_rate": 1.918451328181671e-05, + "loss": 0.9189, + "step": 1052 + }, + { + "epoch": 0.16, + "learning_rate": 1.918261772193344e-05, + "loss": 0.9072, + "step": 1053 + }, + { + "epoch": 0.16, + "learning_rate": 1.9180720055407e-05, + "loss": 0.8389, + "step": 1054 + }, + { + "epoch": 0.16, + "learning_rate": 1.917882028267274e-05, + "loss": 0.8677, + "step": 1055 + }, + { + "epoch": 0.16, + "learning_rate": 1.9176918404166506e-05, + "loss": 0.8628, + "step": 1056 + }, + { + "epoch": 0.16, + "learning_rate": 1.9175014420324613e-05, + "loss": 0.7964, + "step": 1057 + }, + { + "epoch": 0.16, + "learning_rate": 1.9173108331583874e-05, + "loss": 0.9087, + "step": 1058 + }, + { + "epoch": 0.16, + "learning_rate": 1.9171200138381572e-05, + "loss": 0.3232, + "step": 1059 + }, + { + "epoch": 0.16, + "learning_rate": 1.916928984115548e-05, + "loss": 0.8008, + "step": 1060 + }, + { + "epoch": 0.16, + "learning_rate": 1.916737744034385e-05, + "loss": 0.8574, + "step": 1061 + }, + { + "epoch": 0.16, + "learning_rate": 1.916546293638542e-05, + "loss": 0.8921, + "step": 1062 + }, + { + "epoch": 0.16, + "learning_rate": 1.9163546329719406e-05, + "loss": 0.8838, + "step": 1063 + }, + { + "epoch": 0.16, + "learning_rate": 1.916162762078551e-05, + "loss": 0.9072, + "step": 1064 + }, + { + "epoch": 0.16, + "learning_rate": 1.9159706810023918e-05, + "loss": 0.9243, + "step": 1065 + }, + { + "epoch": 0.16, + "learning_rate": 1.9157783897875292e-05, + "loss": 0.2986, + "step": 1066 + }, + { + "epoch": 0.16, + "learning_rate": 1.9155858884780777e-05, + "loss": 0.8691, + "step": 1067 + }, + { + "epoch": 0.16, + "learning_rate": 1.9153931771182012e-05, + "loss": 0.8916, + "step": 1068 + }, + { + "epoch": 0.16, + "learning_rate": 1.91520025575211e-05, + "loss": 0.8052, + "step": 1069 + }, + { + "epoch": 0.16, + "learning_rate": 1.9150071244240638e-05, + "loss": 0.8901, + "step": 1070 + }, + { + "epoch": 0.16, + "learning_rate": 1.91481378317837e-05, + "loss": 0.9097, + "step": 1071 + }, + { + "epoch": 0.16, + "learning_rate": 1.914620232059384e-05, + "loss": 0.8975, + "step": 1072 + }, + { + "epoch": 0.16, + "learning_rate": 1.9144264711115104e-05, + "loss": 0.8555, + "step": 1073 + }, + { + "epoch": 0.16, + "learning_rate": 1.9142325003792004e-05, + "loss": 0.8491, + "step": 1074 + }, + { + "epoch": 0.16, + "learning_rate": 1.9140383199069542e-05, + "loss": 0.8794, + "step": 1075 + }, + { + "epoch": 0.16, + "learning_rate": 1.91384392973932e-05, + "loss": 0.877, + "step": 1076 + }, + { + "epoch": 0.16, + "learning_rate": 1.9136493299208944e-05, + "loss": 0.9429, + "step": 1077 + }, + { + "epoch": 0.16, + "learning_rate": 1.9134545204963214e-05, + "loss": 0.8398, + "step": 1078 + }, + { + "epoch": 0.16, + "learning_rate": 1.9132595015102936e-05, + "loss": 0.8867, + "step": 1079 + }, + { + "epoch": 0.16, + "learning_rate": 1.9130642730075516e-05, + "loss": 0.8745, + "step": 1080 + }, + { + "epoch": 0.16, + "learning_rate": 1.912868835032884e-05, + "loss": 0.9214, + "step": 1081 + }, + { + "epoch": 0.16, + "learning_rate": 1.9126731876311274e-05, + "loss": 0.8574, + "step": 1082 + }, + { + "epoch": 0.16, + "learning_rate": 1.9124773308471666e-05, + "loss": 0.8823, + "step": 1083 + }, + { + "epoch": 0.16, + "learning_rate": 1.9122812647259348e-05, + "loss": 0.8179, + "step": 1084 + }, + { + "epoch": 0.16, + "learning_rate": 1.912084989312412e-05, + "loss": 0.8599, + "step": 1085 + }, + { + "epoch": 0.16, + "learning_rate": 1.9118885046516274e-05, + "loss": 0.9868, + "step": 1086 + }, + { + "epoch": 0.16, + "learning_rate": 1.911691810788658e-05, + "loss": 0.9058, + "step": 1087 + }, + { + "epoch": 0.16, + "learning_rate": 1.9114949077686277e-05, + "loss": 0.3354, + "step": 1088 + }, + { + "epoch": 0.16, + "learning_rate": 1.9112977956367105e-05, + "loss": 0.9995, + "step": 1089 + }, + { + "epoch": 0.16, + "learning_rate": 1.9111004744381266e-05, + "loss": 0.8813, + "step": 1090 + }, + { + "epoch": 0.16, + "learning_rate": 1.9109029442181443e-05, + "loss": 0.8721, + "step": 1091 + }, + { + "epoch": 0.16, + "learning_rate": 1.9107052050220808e-05, + "loss": 0.8604, + "step": 1092 + }, + { + "epoch": 0.16, + "learning_rate": 1.9105072568953008e-05, + "loss": 0.3379, + "step": 1093 + }, + { + "epoch": 0.16, + "learning_rate": 1.9103090998832167e-05, + "loss": 0.9082, + "step": 1094 + }, + { + "epoch": 0.16, + "learning_rate": 1.910110734031289e-05, + "loss": 0.8369, + "step": 1095 + }, + { + "epoch": 0.16, + "learning_rate": 1.9099121593850255e-05, + "loss": 0.9043, + "step": 1096 + }, + { + "epoch": 0.16, + "learning_rate": 1.9097133759899838e-05, + "loss": 0.8872, + "step": 1097 + }, + { + "epoch": 0.16, + "learning_rate": 1.9095143838917667e-05, + "loss": 0.9126, + "step": 1098 + }, + { + "epoch": 0.16, + "learning_rate": 1.9093151831360268e-05, + "loss": 0.8403, + "step": 1099 + }, + { + "epoch": 0.16, + "learning_rate": 1.9091157737684643e-05, + "loss": 0.8994, + "step": 1100 + }, + { + "epoch": 0.16, + "learning_rate": 1.9089161558348266e-05, + "loss": 0.9492, + "step": 1101 + }, + { + "epoch": 0.16, + "learning_rate": 1.9087163293809093e-05, + "loss": 0.8862, + "step": 1102 + }, + { + "epoch": 0.16, + "learning_rate": 1.9085162944525563e-05, + "loss": 1.0015, + "step": 1103 + }, + { + "epoch": 0.16, + "learning_rate": 1.9083160510956584e-05, + "loss": 0.9419, + "step": 1104 + }, + { + "epoch": 0.16, + "learning_rate": 1.908115599356155e-05, + "loss": 1.0, + "step": 1105 + }, + { + "epoch": 0.16, + "learning_rate": 1.907914939280033e-05, + "loss": 0.9478, + "step": 1106 + }, + { + "epoch": 0.16, + "learning_rate": 1.907714070913327e-05, + "loss": 0.915, + "step": 1107 + }, + { + "epoch": 0.16, + "learning_rate": 1.907512994302119e-05, + "loss": 0.9927, + "step": 1108 + }, + { + "epoch": 0.16, + "learning_rate": 1.9073117094925405e-05, + "loss": 0.9517, + "step": 1109 + }, + { + "epoch": 0.16, + "learning_rate": 1.907110216530769e-05, + "loss": 0.9722, + "step": 1110 + }, + { + "epoch": 0.16, + "learning_rate": 1.9069085154630295e-05, + "loss": 0.938, + "step": 1111 + }, + { + "epoch": 0.16, + "learning_rate": 1.9067066063355967e-05, + "loss": 0.978, + "step": 1112 + }, + { + "epoch": 0.16, + "learning_rate": 1.906504489194791e-05, + "loss": 0.9497, + "step": 1113 + }, + { + "epoch": 0.16, + "learning_rate": 1.906302164086982e-05, + "loss": 0.9336, + "step": 1114 + }, + { + "epoch": 0.16, + "learning_rate": 1.9060996310585862e-05, + "loss": 0.9487, + "step": 1115 + }, + { + "epoch": 0.17, + "learning_rate": 1.9058968901560677e-05, + "loss": 1.0186, + "step": 1116 + }, + { + "epoch": 0.17, + "learning_rate": 1.905693941425939e-05, + "loss": 0.9331, + "step": 1117 + }, + { + "epoch": 0.17, + "learning_rate": 1.9054907849147597e-05, + "loss": 0.9468, + "step": 1118 + }, + { + "epoch": 0.17, + "learning_rate": 1.905287420669137e-05, + "loss": 0.8721, + "step": 1119 + }, + { + "epoch": 0.17, + "learning_rate": 1.9050838487357267e-05, + "loss": 0.9385, + "step": 1120 + }, + { + "epoch": 0.17, + "learning_rate": 1.9048800691612305e-05, + "loss": 0.917, + "step": 1121 + }, + { + "epoch": 0.17, + "learning_rate": 1.9046760819924e-05, + "loss": 0.3174, + "step": 1122 + }, + { + "epoch": 0.17, + "learning_rate": 1.904471887276032e-05, + "loss": 0.9097, + "step": 1123 + }, + { + "epoch": 0.17, + "learning_rate": 1.9042674850589732e-05, + "loss": 0.9351, + "step": 1124 + }, + { + "epoch": 0.17, + "learning_rate": 1.9040628753881155e-05, + "loss": 0.895, + "step": 1125 + }, + { + "epoch": 0.17, + "learning_rate": 1.9038580583104013e-05, + "loss": 0.8037, + "step": 1126 + }, + { + "epoch": 0.17, + "learning_rate": 1.903653033872818e-05, + "loss": 0.3344, + "step": 1127 + }, + { + "epoch": 0.17, + "learning_rate": 1.9034478021224015e-05, + "loss": 0.8384, + "step": 1128 + }, + { + "epoch": 0.17, + "learning_rate": 1.903242363106236e-05, + "loss": 0.8613, + "step": 1129 + }, + { + "epoch": 0.17, + "learning_rate": 1.903036716871452e-05, + "loss": 0.8721, + "step": 1130 + }, + { + "epoch": 0.17, + "learning_rate": 1.902830863465228e-05, + "loss": 0.3223, + "step": 1131 + }, + { + "epoch": 0.17, + "learning_rate": 1.9026248029347908e-05, + "loss": 0.3062, + "step": 1132 + }, + { + "epoch": 0.17, + "learning_rate": 1.9024185353274138e-05, + "loss": 0.9019, + "step": 1133 + }, + { + "epoch": 0.17, + "learning_rate": 1.902212060690418e-05, + "loss": 0.8506, + "step": 1134 + }, + { + "epoch": 0.17, + "learning_rate": 1.902005379071172e-05, + "loss": 0.9004, + "step": 1135 + }, + { + "epoch": 0.17, + "learning_rate": 1.9017984905170923e-05, + "loss": 0.8755, + "step": 1136 + }, + { + "epoch": 0.17, + "learning_rate": 1.9015913950756425e-05, + "loss": 0.9067, + "step": 1137 + }, + { + "epoch": 0.17, + "learning_rate": 1.9013840927943334e-05, + "loss": 0.8564, + "step": 1138 + }, + { + "epoch": 0.17, + "learning_rate": 1.9011765837207237e-05, + "loss": 0.8589, + "step": 1139 + }, + { + "epoch": 0.17, + "learning_rate": 1.900968867902419e-05, + "loss": 0.8774, + "step": 1140 + }, + { + "epoch": 0.17, + "learning_rate": 1.9007609453870738e-05, + "loss": 0.8638, + "step": 1141 + }, + { + "epoch": 0.17, + "learning_rate": 1.9005528162223878e-05, + "loss": 0.9458, + "step": 1142 + }, + { + "epoch": 0.17, + "learning_rate": 1.9003444804561098e-05, + "loss": 0.8589, + "step": 1143 + }, + { + "epoch": 0.17, + "learning_rate": 1.9001359381360354e-05, + "loss": 0.8789, + "step": 1144 + }, + { + "epoch": 0.17, + "learning_rate": 1.8999271893100074e-05, + "loss": 0.9253, + "step": 1145 + }, + { + "epoch": 0.17, + "learning_rate": 1.8997182340259165e-05, + "loss": 0.9092, + "step": 1146 + }, + { + "epoch": 0.17, + "learning_rate": 1.8995090723317e-05, + "loss": 0.8784, + "step": 1147 + }, + { + "epoch": 0.17, + "learning_rate": 1.8992997042753437e-05, + "loss": 0.8945, + "step": 1148 + }, + { + "epoch": 0.17, + "learning_rate": 1.8990901299048798e-05, + "loss": 0.8901, + "step": 1149 + }, + { + "epoch": 0.17, + "learning_rate": 1.8988803492683875e-05, + "loss": 0.8618, + "step": 1150 + }, + { + "epoch": 0.17, + "learning_rate": 1.8986703624139948e-05, + "loss": 0.9014, + "step": 1151 + }, + { + "epoch": 0.17, + "learning_rate": 1.8984601693898756e-05, + "loss": 0.9326, + "step": 1152 + }, + { + "epoch": 0.17, + "learning_rate": 1.898249770244252e-05, + "loss": 0.8857, + "step": 1153 + }, + { + "epoch": 0.17, + "learning_rate": 1.898039165025393e-05, + "loss": 0.8589, + "step": 1154 + }, + { + "epoch": 0.17, + "learning_rate": 1.897828353781614e-05, + "loss": 0.895, + "step": 1155 + }, + { + "epoch": 0.17, + "learning_rate": 1.89761733656128e-05, + "loss": 0.3257, + "step": 1156 + }, + { + "epoch": 0.17, + "learning_rate": 1.8974061134128008e-05, + "loss": 0.8838, + "step": 1157 + }, + { + "epoch": 0.17, + "learning_rate": 1.8971946843846348e-05, + "loss": 0.9048, + "step": 1158 + }, + { + "epoch": 0.17, + "learning_rate": 1.896983049525287e-05, + "loss": 0.8613, + "step": 1159 + }, + { + "epoch": 0.17, + "learning_rate": 1.89677120888331e-05, + "loss": 0.8872, + "step": 1160 + }, + { + "epoch": 0.17, + "learning_rate": 1.896559162507304e-05, + "loss": 0.9282, + "step": 1161 + }, + { + "epoch": 0.17, + "learning_rate": 1.8963469104459157e-05, + "loss": 0.896, + "step": 1162 + }, + { + "epoch": 0.17, + "learning_rate": 1.896134452747839e-05, + "loss": 0.8604, + "step": 1163 + }, + { + "epoch": 0.17, + "learning_rate": 1.8959217894618146e-05, + "loss": 0.8555, + "step": 1164 + }, + { + "epoch": 0.17, + "learning_rate": 1.8957089206366325e-05, + "loss": 0.8813, + "step": 1165 + }, + { + "epoch": 0.17, + "learning_rate": 1.895495846321127e-05, + "loss": 0.8706, + "step": 1166 + }, + { + "epoch": 0.17, + "learning_rate": 1.8952825665641808e-05, + "loss": 0.8823, + "step": 1167 + }, + { + "epoch": 0.17, + "learning_rate": 1.895069081414725e-05, + "loss": 0.9097, + "step": 1168 + }, + { + "epoch": 0.17, + "learning_rate": 1.8948553909217354e-05, + "loss": 0.2866, + "step": 1169 + }, + { + "epoch": 0.17, + "learning_rate": 1.8946414951342368e-05, + "loss": 0.8677, + "step": 1170 + }, + { + "epoch": 0.17, + "learning_rate": 1.8944273941012998e-05, + "loss": 0.8623, + "step": 1171 + }, + { + "epoch": 0.17, + "learning_rate": 1.8942130878720434e-05, + "loss": 0.9229, + "step": 1172 + }, + { + "epoch": 0.17, + "learning_rate": 1.8939985764956323e-05, + "loss": 0.8813, + "step": 1173 + }, + { + "epoch": 0.17, + "learning_rate": 1.8937838600212792e-05, + "loss": 0.8667, + "step": 1174 + }, + { + "epoch": 0.17, + "learning_rate": 1.8935689384982433e-05, + "loss": 0.8608, + "step": 1175 + }, + { + "epoch": 0.17, + "learning_rate": 1.893353811975832e-05, + "loss": 0.8652, + "step": 1176 + }, + { + "epoch": 0.17, + "learning_rate": 1.893138480503398e-05, + "loss": 0.8892, + "step": 1177 + }, + { + "epoch": 0.17, + "learning_rate": 1.892922944130342e-05, + "loss": 0.9312, + "step": 1178 + }, + { + "epoch": 0.17, + "learning_rate": 1.892707202906112e-05, + "loss": 0.917, + "step": 1179 + }, + { + "epoch": 0.17, + "learning_rate": 1.8924912568802023e-05, + "loss": 0.9028, + "step": 1180 + }, + { + "epoch": 0.17, + "learning_rate": 1.8922751061021545e-05, + "loss": 0.8379, + "step": 1181 + }, + { + "epoch": 0.17, + "learning_rate": 1.8920587506215567e-05, + "loss": 0.8022, + "step": 1182 + }, + { + "epoch": 0.17, + "learning_rate": 1.891842190488045e-05, + "loss": 0.8618, + "step": 1183 + }, + { + "epoch": 0.18, + "learning_rate": 1.8916254257513017e-05, + "loss": 0.8589, + "step": 1184 + }, + { + "epoch": 0.18, + "learning_rate": 1.8914084564610565e-05, + "loss": 0.8721, + "step": 1185 + }, + { + "epoch": 0.18, + "learning_rate": 1.8911912826670848e-05, + "loss": 0.9243, + "step": 1186 + }, + { + "epoch": 0.18, + "learning_rate": 1.8909739044192107e-05, + "loss": 0.8452, + "step": 1187 + }, + { + "epoch": 0.18, + "learning_rate": 1.8907563217673038e-05, + "loss": 0.8599, + "step": 1188 + }, + { + "epoch": 0.18, + "learning_rate": 1.8905385347612814e-05, + "loss": 0.8774, + "step": 1189 + }, + { + "epoch": 0.18, + "learning_rate": 1.8903205434511072e-05, + "loss": 0.9326, + "step": 1190 + }, + { + "epoch": 0.18, + "learning_rate": 1.8901023478867926e-05, + "loss": 0.9209, + "step": 1191 + }, + { + "epoch": 0.18, + "learning_rate": 1.8898839481183943e-05, + "loss": 0.8823, + "step": 1192 + }, + { + "epoch": 0.18, + "learning_rate": 1.8896653441960175e-05, + "loss": 0.8882, + "step": 1193 + }, + { + "epoch": 0.18, + "learning_rate": 1.889446536169813e-05, + "loss": 0.8516, + "step": 1194 + }, + { + "epoch": 0.18, + "learning_rate": 1.88922752408998e-05, + "loss": 0.814, + "step": 1195 + }, + { + "epoch": 0.18, + "learning_rate": 1.889008308006762e-05, + "loss": 0.8809, + "step": 1196 + }, + { + "epoch": 0.18, + "learning_rate": 1.888788887970452e-05, + "loss": 0.8643, + "step": 1197 + }, + { + "epoch": 0.18, + "learning_rate": 1.8885692640313875e-05, + "loss": 0.8237, + "step": 1198 + }, + { + "epoch": 0.18, + "learning_rate": 1.8883494362399547e-05, + "loss": 0.8828, + "step": 1199 + }, + { + "epoch": 0.18, + "learning_rate": 1.888129404646585e-05, + "loss": 0.8184, + "step": 1200 + }, + { + "epoch": 0.18, + "learning_rate": 1.8879091693017582e-05, + "loss": 0.8804, + "step": 1201 + }, + { + "epoch": 0.18, + "learning_rate": 1.8876887302559992e-05, + "loss": 0.8877, + "step": 1202 + }, + { + "epoch": 0.18, + "learning_rate": 1.88746808755988e-05, + "loss": 0.9463, + "step": 1203 + }, + { + "epoch": 0.18, + "learning_rate": 1.8872472412640207e-05, + "loss": 0.9243, + "step": 1204 + }, + { + "epoch": 0.18, + "learning_rate": 1.887026191419086e-05, + "loss": 0.9272, + "step": 1205 + }, + { + "epoch": 0.18, + "learning_rate": 1.8868049380757895e-05, + "loss": 0.9209, + "step": 1206 + }, + { + "epoch": 0.18, + "learning_rate": 1.886583481284889e-05, + "loss": 0.8584, + "step": 1207 + }, + { + "epoch": 0.18, + "learning_rate": 1.8863618210971912e-05, + "loss": 0.8372, + "step": 1208 + }, + { + "epoch": 0.18, + "learning_rate": 1.8861399575635486e-05, + "loss": 0.9062, + "step": 1209 + }, + { + "epoch": 0.18, + "learning_rate": 1.8859178907348602e-05, + "loss": 0.8145, + "step": 1210 + }, + { + "epoch": 0.18, + "learning_rate": 1.8856956206620717e-05, + "loss": 0.3381, + "step": 1211 + }, + { + "epoch": 0.18, + "learning_rate": 1.885473147396175e-05, + "loss": 0.3209, + "step": 1212 + }, + { + "epoch": 0.18, + "learning_rate": 1.88525047098821e-05, + "loss": 0.8613, + "step": 1213 + }, + { + "epoch": 0.18, + "learning_rate": 1.8850275914892622e-05, + "loss": 0.8691, + "step": 1214 + }, + { + "epoch": 0.18, + "learning_rate": 1.8848045089504633e-05, + "loss": 0.8398, + "step": 1215 + }, + { + "epoch": 0.18, + "learning_rate": 1.8845812234229924e-05, + "loss": 0.8691, + "step": 1216 + }, + { + "epoch": 0.18, + "learning_rate": 1.8843577349580747e-05, + "loss": 0.8604, + "step": 1217 + }, + { + "epoch": 0.18, + "learning_rate": 1.8841340436069825e-05, + "loss": 0.8726, + "step": 1218 + }, + { + "epoch": 0.18, + "learning_rate": 1.8839101494210338e-05, + "loss": 0.8916, + "step": 1219 + }, + { + "epoch": 0.18, + "learning_rate": 1.883686052451594e-05, + "loss": 0.9453, + "step": 1220 + }, + { + "epoch": 0.18, + "learning_rate": 1.8834617527500743e-05, + "loss": 0.897, + "step": 1221 + }, + { + "epoch": 0.18, + "learning_rate": 1.883237250367933e-05, + "loss": 0.8501, + "step": 1222 + }, + { + "epoch": 0.18, + "learning_rate": 1.8830125453566742e-05, + "loss": 0.8599, + "step": 1223 + }, + { + "epoch": 0.18, + "learning_rate": 1.8827876377678494e-05, + "loss": 0.855, + "step": 1224 + }, + { + "epoch": 0.18, + "learning_rate": 1.8825625276530558e-05, + "loss": 0.8984, + "step": 1225 + }, + { + "epoch": 0.18, + "learning_rate": 1.8823372150639375e-05, + "loss": 0.8516, + "step": 1226 + }, + { + "epoch": 0.18, + "learning_rate": 1.882111700052185e-05, + "loss": 0.8545, + "step": 1227 + }, + { + "epoch": 0.18, + "learning_rate": 1.881885982669535e-05, + "loss": 0.7876, + "step": 1228 + }, + { + "epoch": 0.18, + "learning_rate": 1.8816600629677705e-05, + "loss": 0.8647, + "step": 1229 + }, + { + "epoch": 0.18, + "learning_rate": 1.8814339409987217e-05, + "loss": 0.8774, + "step": 1230 + }, + { + "epoch": 0.18, + "learning_rate": 1.8812076168142647e-05, + "loss": 0.8394, + "step": 1231 + }, + { + "epoch": 0.18, + "learning_rate": 1.880981090466321e-05, + "loss": 0.8174, + "step": 1232 + }, + { + "epoch": 0.18, + "learning_rate": 1.8807543620068606e-05, + "loss": 0.8408, + "step": 1233 + }, + { + "epoch": 0.18, + "learning_rate": 1.8805274314878986e-05, + "loss": 0.8872, + "step": 1234 + }, + { + "epoch": 0.18, + "learning_rate": 1.880300298961496e-05, + "loss": 0.8462, + "step": 1235 + }, + { + "epoch": 0.18, + "learning_rate": 1.880072964479761e-05, + "loss": 0.353, + "step": 1236 + }, + { + "epoch": 0.18, + "learning_rate": 1.8798454280948483e-05, + "loss": 0.8188, + "step": 1237 + }, + { + "epoch": 0.18, + "learning_rate": 1.8796176898589575e-05, + "loss": 0.8354, + "step": 1238 + }, + { + "epoch": 0.18, + "learning_rate": 1.879389749824336e-05, + "loss": 0.8843, + "step": 1239 + }, + { + "epoch": 0.18, + "learning_rate": 1.8791616080432777e-05, + "loss": 0.2764, + "step": 1240 + }, + { + "epoch": 0.18, + "learning_rate": 1.8789332645681212e-05, + "loss": 0.8384, + "step": 1241 + }, + { + "epoch": 0.18, + "learning_rate": 1.8787047194512524e-05, + "loss": 0.9053, + "step": 1242 + }, + { + "epoch": 0.18, + "learning_rate": 1.8784759727451033e-05, + "loss": 0.3406, + "step": 1243 + }, + { + "epoch": 0.18, + "learning_rate": 1.878247024502152e-05, + "loss": 0.9492, + "step": 1244 + }, + { + "epoch": 0.18, + "learning_rate": 1.8780178747749237e-05, + "loss": 0.8701, + "step": 1245 + }, + { + "epoch": 0.18, + "learning_rate": 1.877788523615988e-05, + "loss": 0.8726, + "step": 1246 + }, + { + "epoch": 0.18, + "learning_rate": 1.8775589710779627e-05, + "loss": 0.896, + "step": 1247 + }, + { + "epoch": 0.18, + "learning_rate": 1.8773292172135106e-05, + "loss": 0.8857, + "step": 1248 + }, + { + "epoch": 0.18, + "learning_rate": 1.8770992620753413e-05, + "loss": 0.8428, + "step": 1249 + }, + { + "epoch": 0.18, + "learning_rate": 1.8768691057162097e-05, + "loss": 0.895, + "step": 1250 + }, + { + "epoch": 0.19, + "learning_rate": 1.8766387481889178e-05, + "loss": 0.8936, + "step": 1251 + }, + { + "epoch": 0.19, + "learning_rate": 1.8764081895463137e-05, + "loss": 0.7729, + "step": 1252 + }, + { + "epoch": 0.19, + "learning_rate": 1.8761774298412905e-05, + "loss": 0.8857, + "step": 1253 + }, + { + "epoch": 0.19, + "learning_rate": 1.875946469126789e-05, + "loss": 0.8262, + "step": 1254 + }, + { + "epoch": 0.19, + "learning_rate": 1.8757153074557953e-05, + "loss": 0.8262, + "step": 1255 + }, + { + "epoch": 0.19, + "learning_rate": 1.875483944881341e-05, + "loss": 0.873, + "step": 1256 + }, + { + "epoch": 0.19, + "learning_rate": 1.8752523814565053e-05, + "loss": 0.876, + "step": 1257 + }, + { + "epoch": 0.19, + "learning_rate": 1.8750206172344125e-05, + "loss": 0.875, + "step": 1258 + }, + { + "epoch": 0.19, + "learning_rate": 1.874788652268233e-05, + "loss": 0.8906, + "step": 1259 + }, + { + "epoch": 0.19, + "learning_rate": 1.874556486611183e-05, + "loss": 0.9116, + "step": 1260 + }, + { + "epoch": 0.19, + "learning_rate": 1.8743241203165253e-05, + "loss": 0.8359, + "step": 1261 + }, + { + "epoch": 0.19, + "learning_rate": 1.874091553437569e-05, + "loss": 0.8169, + "step": 1262 + }, + { + "epoch": 0.19, + "learning_rate": 1.8738587860276685e-05, + "loss": 0.8589, + "step": 1263 + }, + { + "epoch": 0.19, + "learning_rate": 1.8736258181402244e-05, + "loss": 0.8594, + "step": 1264 + }, + { + "epoch": 0.19, + "learning_rate": 1.873392649828683e-05, + "loss": 0.8916, + "step": 1265 + }, + { + "epoch": 0.19, + "learning_rate": 1.8731592811465377e-05, + "loss": 0.9092, + "step": 1266 + }, + { + "epoch": 0.19, + "learning_rate": 1.8729257121473262e-05, + "loss": 0.9438, + "step": 1267 + }, + { + "epoch": 0.19, + "learning_rate": 1.872691942884634e-05, + "loss": 0.8242, + "step": 1268 + }, + { + "epoch": 0.19, + "learning_rate": 1.872457973412091e-05, + "loss": 0.874, + "step": 1269 + }, + { + "epoch": 0.19, + "learning_rate": 1.872223803783374e-05, + "loss": 0.8853, + "step": 1270 + }, + { + "epoch": 0.19, + "learning_rate": 1.8719894340522048e-05, + "loss": 0.8032, + "step": 1271 + }, + { + "epoch": 0.19, + "learning_rate": 1.871754864272352e-05, + "loss": 0.8462, + "step": 1272 + }, + { + "epoch": 0.19, + "learning_rate": 1.87152009449763e-05, + "loss": 0.8716, + "step": 1273 + }, + { + "epoch": 0.19, + "learning_rate": 1.8712851247818985e-05, + "loss": 0.8311, + "step": 1274 + }, + { + "epoch": 0.19, + "learning_rate": 1.8710499551790632e-05, + "loss": 0.854, + "step": 1275 + }, + { + "epoch": 0.19, + "learning_rate": 1.8708145857430766e-05, + "loss": 0.8833, + "step": 1276 + }, + { + "epoch": 0.19, + "learning_rate": 1.870579016527936e-05, + "loss": 0.8726, + "step": 1277 + }, + { + "epoch": 0.19, + "learning_rate": 1.8703432475876844e-05, + "loss": 0.8628, + "step": 1278 + }, + { + "epoch": 0.19, + "learning_rate": 1.8701072789764118e-05, + "loss": 0.8535, + "step": 1279 + }, + { + "epoch": 0.19, + "learning_rate": 1.8698711107482522e-05, + "loss": 0.8716, + "step": 1280 + }, + { + "epoch": 0.19, + "learning_rate": 1.869634742957388e-05, + "loss": 0.3582, + "step": 1281 + }, + { + "epoch": 0.19, + "learning_rate": 1.8693981756580442e-05, + "loss": 0.8188, + "step": 1282 + }, + { + "epoch": 0.19, + "learning_rate": 1.8691614089044946e-05, + "loss": 0.8643, + "step": 1283 + }, + { + "epoch": 0.19, + "learning_rate": 1.8689244427510564e-05, + "loss": 0.8872, + "step": 1284 + }, + { + "epoch": 0.19, + "learning_rate": 1.868687277252094e-05, + "loss": 0.8364, + "step": 1285 + }, + { + "epoch": 0.19, + "learning_rate": 1.8684499124620167e-05, + "loss": 0.8311, + "step": 1286 + }, + { + "epoch": 0.19, + "learning_rate": 1.8682123484352804e-05, + "loss": 0.8032, + "step": 1287 + }, + { + "epoch": 0.19, + "learning_rate": 1.867974585226386e-05, + "loss": 0.8179, + "step": 1288 + }, + { + "epoch": 0.19, + "learning_rate": 1.86773662288988e-05, + "loss": 0.9058, + "step": 1289 + }, + { + "epoch": 0.19, + "learning_rate": 1.8674984614803553e-05, + "loss": 0.9097, + "step": 1290 + }, + { + "epoch": 0.19, + "learning_rate": 1.8672601010524497e-05, + "loss": 0.8506, + "step": 1291 + }, + { + "epoch": 0.19, + "learning_rate": 1.8670215416608467e-05, + "loss": 0.9141, + "step": 1292 + }, + { + "epoch": 0.19, + "learning_rate": 1.8667827833602767e-05, + "loss": 0.7959, + "step": 1293 + }, + { + "epoch": 0.19, + "learning_rate": 1.866543826205514e-05, + "loss": 0.8579, + "step": 1294 + }, + { + "epoch": 0.19, + "learning_rate": 1.8663046702513795e-05, + "loss": 0.8374, + "step": 1295 + }, + { + "epoch": 0.19, + "learning_rate": 1.8660653155527396e-05, + "loss": 0.8296, + "step": 1296 + }, + { + "epoch": 0.19, + "learning_rate": 1.865825762164506e-05, + "loss": 0.8271, + "step": 1297 + }, + { + "epoch": 0.19, + "learning_rate": 1.8655860101416362e-05, + "loss": 0.8257, + "step": 1298 + }, + { + "epoch": 0.19, + "learning_rate": 1.8653460595391335e-05, + "loss": 0.8291, + "step": 1299 + }, + { + "epoch": 0.19, + "learning_rate": 1.865105910412046e-05, + "loss": 0.8696, + "step": 1300 + }, + { + "epoch": 0.19, + "learning_rate": 1.8648655628154687e-05, + "loss": 0.9443, + "step": 1301 + }, + { + "epoch": 0.19, + "learning_rate": 1.8646250168045402e-05, + "loss": 0.7661, + "step": 1302 + }, + { + "epoch": 0.19, + "learning_rate": 1.8643842724344468e-05, + "loss": 0.833, + "step": 1303 + }, + { + "epoch": 0.19, + "learning_rate": 1.8641433297604183e-05, + "loss": 0.8208, + "step": 1304 + }, + { + "epoch": 0.19, + "learning_rate": 1.8639021888377313e-05, + "loss": 0.8862, + "step": 1305 + }, + { + "epoch": 0.19, + "learning_rate": 1.8636608497217077e-05, + "loss": 0.8828, + "step": 1306 + }, + { + "epoch": 0.19, + "learning_rate": 1.863419312467714e-05, + "loss": 0.9048, + "step": 1307 + }, + { + "epoch": 0.19, + "learning_rate": 1.863177577131164e-05, + "loss": 0.9473, + "step": 1308 + }, + { + "epoch": 0.19, + "learning_rate": 1.862935643767514e-05, + "loss": 0.8599, + "step": 1309 + }, + { + "epoch": 0.19, + "learning_rate": 1.862693512432269e-05, + "loss": 0.8848, + "step": 1310 + }, + { + "epoch": 0.19, + "learning_rate": 1.862451183180977e-05, + "loss": 0.8706, + "step": 1311 + }, + { + "epoch": 0.19, + "learning_rate": 1.862208656069233e-05, + "loss": 0.8022, + "step": 1312 + }, + { + "epoch": 0.19, + "learning_rate": 1.861965931152676e-05, + "loss": 0.8994, + "step": 1313 + }, + { + "epoch": 0.19, + "learning_rate": 1.861723008486992e-05, + "loss": 0.9263, + "step": 1314 + }, + { + "epoch": 0.19, + "learning_rate": 1.8614798881279107e-05, + "loss": 0.8745, + "step": 1315 + }, + { + "epoch": 0.19, + "learning_rate": 1.8612365701312075e-05, + "loss": 0.8545, + "step": 1316 + }, + { + "epoch": 0.19, + "learning_rate": 1.8609930545527048e-05, + "loss": 0.9302, + "step": 1317 + }, + { + "epoch": 0.19, + "learning_rate": 1.8607493414482683e-05, + "loss": 0.8682, + "step": 1318 + }, + { + "epoch": 0.2, + "learning_rate": 1.8605054308738095e-05, + "loss": 0.9097, + "step": 1319 + }, + { + "epoch": 0.2, + "learning_rate": 1.8602613228852862e-05, + "loss": 0.7866, + "step": 1320 + }, + { + "epoch": 0.2, + "learning_rate": 1.8600170175387004e-05, + "loss": 0.8921, + "step": 1321 + }, + { + "epoch": 0.2, + "learning_rate": 1.8597725148900997e-05, + "loss": 0.8545, + "step": 1322 + }, + { + "epoch": 0.2, + "learning_rate": 1.859527814995577e-05, + "loss": 0.8501, + "step": 1323 + }, + { + "epoch": 0.2, + "learning_rate": 1.859282917911271e-05, + "loss": 0.9102, + "step": 1324 + }, + { + "epoch": 0.2, + "learning_rate": 1.8590378236933642e-05, + "loss": 0.7197, + "step": 1325 + }, + { + "epoch": 0.2, + "learning_rate": 1.8587925323980863e-05, + "loss": 0.8574, + "step": 1326 + }, + { + "epoch": 0.2, + "learning_rate": 1.8585470440817103e-05, + "loss": 0.8647, + "step": 1327 + }, + { + "epoch": 0.2, + "learning_rate": 1.8583013588005553e-05, + "loss": 0.8921, + "step": 1328 + }, + { + "epoch": 0.2, + "learning_rate": 1.858055476610986e-05, + "loss": 0.8613, + "step": 1329 + }, + { + "epoch": 0.2, + "learning_rate": 1.8578093975694116e-05, + "loss": 0.8687, + "step": 1330 + }, + { + "epoch": 0.2, + "learning_rate": 1.8575631217322864e-05, + "loss": 0.8008, + "step": 1331 + }, + { + "epoch": 0.2, + "learning_rate": 1.8573166491561108e-05, + "loss": 0.7446, + "step": 1332 + }, + { + "epoch": 0.2, + "learning_rate": 1.857069979897429e-05, + "loss": 0.8745, + "step": 1333 + }, + { + "epoch": 0.2, + "learning_rate": 1.8568231140128307e-05, + "loss": 0.8481, + "step": 1334 + }, + { + "epoch": 0.2, + "learning_rate": 1.856576051558952e-05, + "loss": 0.8594, + "step": 1335 + }, + { + "epoch": 0.2, + "learning_rate": 1.8563287925924725e-05, + "loss": 0.8711, + "step": 1336 + }, + { + "epoch": 0.2, + "learning_rate": 1.8560813371701174e-05, + "loss": 0.8589, + "step": 1337 + }, + { + "epoch": 0.2, + "learning_rate": 1.8558336853486573e-05, + "loss": 0.792, + "step": 1338 + }, + { + "epoch": 0.2, + "learning_rate": 1.8555858371849075e-05, + "loss": 0.8589, + "step": 1339 + }, + { + "epoch": 0.2, + "learning_rate": 1.8553377927357283e-05, + "loss": 0.8979, + "step": 1340 + }, + { + "epoch": 0.2, + "learning_rate": 1.855089552058025e-05, + "loss": 0.9106, + "step": 1341 + }, + { + "epoch": 0.2, + "learning_rate": 1.8548411152087483e-05, + "loss": 0.856, + "step": 1342 + }, + { + "epoch": 0.2, + "learning_rate": 1.854592482244894e-05, + "loss": 0.8091, + "step": 1343 + }, + { + "epoch": 0.2, + "learning_rate": 1.8543436532235024e-05, + "loss": 0.3523, + "step": 1344 + }, + { + "epoch": 0.2, + "learning_rate": 1.8540946282016587e-05, + "loss": 0.9346, + "step": 1345 + }, + { + "epoch": 0.2, + "learning_rate": 1.8538454072364935e-05, + "loss": 0.8735, + "step": 1346 + }, + { + "epoch": 0.2, + "learning_rate": 1.853595990385182e-05, + "loss": 0.9155, + "step": 1347 + }, + { + "epoch": 0.2, + "learning_rate": 1.853346377704945e-05, + "loss": 0.9131, + "step": 1348 + }, + { + "epoch": 0.2, + "learning_rate": 1.8530965692530472e-05, + "loss": 0.9087, + "step": 1349 + }, + { + "epoch": 0.2, + "learning_rate": 1.852846565086799e-05, + "loss": 0.8374, + "step": 1350 + }, + { + "epoch": 0.2, + "learning_rate": 1.8525963652635556e-05, + "loss": 0.364, + "step": 1351 + }, + { + "epoch": 0.2, + "learning_rate": 1.8523459698407164e-05, + "loss": 0.8457, + "step": 1352 + }, + { + "epoch": 0.2, + "learning_rate": 1.852095378875727e-05, + "loss": 0.8716, + "step": 1353 + }, + { + "epoch": 0.2, + "learning_rate": 1.8518445924260765e-05, + "loss": 0.8154, + "step": 1354 + }, + { + "epoch": 0.2, + "learning_rate": 1.8515936105493e-05, + "loss": 0.792, + "step": 1355 + }, + { + "epoch": 0.2, + "learning_rate": 1.8513424333029757e-05, + "loss": 0.8838, + "step": 1356 + }, + { + "epoch": 0.2, + "learning_rate": 1.8510910607447293e-05, + "loss": 0.8843, + "step": 1357 + }, + { + "epoch": 0.2, + "learning_rate": 1.8508394929322287e-05, + "loss": 0.896, + "step": 1358 + }, + { + "epoch": 0.2, + "learning_rate": 1.8505877299231877e-05, + "loss": 0.8735, + "step": 1359 + }, + { + "epoch": 0.2, + "learning_rate": 1.8503357717753658e-05, + "loss": 0.9038, + "step": 1360 + }, + { + "epoch": 0.2, + "learning_rate": 1.8500836185465652e-05, + "loss": 0.8174, + "step": 1361 + }, + { + "epoch": 0.2, + "learning_rate": 1.849831270294635e-05, + "loss": 0.8711, + "step": 1362 + }, + { + "epoch": 0.2, + "learning_rate": 1.8495787270774676e-05, + "loss": 0.8809, + "step": 1363 + }, + { + "epoch": 0.2, + "learning_rate": 1.849325988953e-05, + "loss": 0.8501, + "step": 1364 + }, + { + "epoch": 0.2, + "learning_rate": 1.8490730559792153e-05, + "loss": 0.3105, + "step": 1365 + }, + { + "epoch": 0.2, + "learning_rate": 1.84881992821414e-05, + "loss": 0.8911, + "step": 1366 + }, + { + "epoch": 0.2, + "learning_rate": 1.848566605715846e-05, + "loss": 0.8203, + "step": 1367 + }, + { + "epoch": 0.2, + "learning_rate": 1.8483130885424493e-05, + "loss": 0.343, + "step": 1368 + }, + { + "epoch": 0.2, + "learning_rate": 1.8480593767521116e-05, + "loss": 0.854, + "step": 1369 + }, + { + "epoch": 0.2, + "learning_rate": 1.8478054704030376e-05, + "loss": 0.8433, + "step": 1370 + }, + { + "epoch": 0.2, + "learning_rate": 1.8475513695534784e-05, + "loss": 0.8857, + "step": 1371 + }, + { + "epoch": 0.2, + "learning_rate": 1.8472970742617284e-05, + "loss": 0.9023, + "step": 1372 + }, + { + "epoch": 0.2, + "learning_rate": 1.8470425845861274e-05, + "loss": 0.8911, + "step": 1373 + }, + { + "epoch": 0.2, + "learning_rate": 1.8467879005850595e-05, + "loss": 0.9204, + "step": 1374 + }, + { + "epoch": 0.2, + "learning_rate": 1.8465330223169532e-05, + "loss": 0.9023, + "step": 1375 + }, + { + "epoch": 0.2, + "learning_rate": 1.8462779498402817e-05, + "loss": 0.8379, + "step": 1376 + }, + { + "epoch": 0.2, + "learning_rate": 1.846022683213563e-05, + "loss": 0.8838, + "step": 1377 + }, + { + "epoch": 0.2, + "learning_rate": 1.8457672224953595e-05, + "loss": 0.2905, + "step": 1378 + }, + { + "epoch": 0.2, + "learning_rate": 1.8455115677442782e-05, + "loss": 0.8184, + "step": 1379 + }, + { + "epoch": 0.2, + "learning_rate": 1.84525571901897e-05, + "loss": 0.8633, + "step": 1380 + }, + { + "epoch": 0.2, + "learning_rate": 1.844999676378131e-05, + "loss": 0.8857, + "step": 1381 + }, + { + "epoch": 0.2, + "learning_rate": 1.8447434398805018e-05, + "loss": 0.8721, + "step": 1382 + }, + { + "epoch": 0.2, + "learning_rate": 1.844487009584867e-05, + "loss": 0.9297, + "step": 1383 + }, + { + "epoch": 0.2, + "learning_rate": 1.8442303855500562e-05, + "loss": 0.8252, + "step": 1384 + }, + { + "epoch": 0.2, + "learning_rate": 1.843973567834943e-05, + "loss": 0.8838, + "step": 1385 + }, + { + "epoch": 0.2, + "learning_rate": 1.8437165564984455e-05, + "loss": 0.8457, + "step": 1386 + }, + { + "epoch": 0.21, + "learning_rate": 1.8434593515995265e-05, + "loss": 0.814, + "step": 1387 + }, + { + "epoch": 0.21, + "learning_rate": 1.843201953197193e-05, + "loss": 0.8335, + "step": 1388 + }, + { + "epoch": 0.21, + "learning_rate": 1.8429443613504962e-05, + "loss": 0.8169, + "step": 1389 + }, + { + "epoch": 0.21, + "learning_rate": 1.8426865761185324e-05, + "loss": 0.8232, + "step": 1390 + }, + { + "epoch": 0.21, + "learning_rate": 1.842428597560441e-05, + "loss": 0.8296, + "step": 1391 + }, + { + "epoch": 0.21, + "learning_rate": 1.842170425735407e-05, + "loss": 0.8428, + "step": 1392 + }, + { + "epoch": 0.21, + "learning_rate": 1.841912060702659e-05, + "loss": 0.8887, + "step": 1393 + }, + { + "epoch": 0.21, + "learning_rate": 1.841653502521471e-05, + "loss": 0.8569, + "step": 1394 + }, + { + "epoch": 0.21, + "learning_rate": 1.841394751251159e-05, + "loss": 0.8193, + "step": 1395 + }, + { + "epoch": 0.21, + "learning_rate": 1.8411358069510864e-05, + "loss": 0.8813, + "step": 1396 + }, + { + "epoch": 0.21, + "learning_rate": 1.8408766696806578e-05, + "loss": 0.9092, + "step": 1397 + }, + { + "epoch": 0.21, + "learning_rate": 1.840617339499325e-05, + "loss": 0.897, + "step": 1398 + }, + { + "epoch": 0.21, + "learning_rate": 1.840357816466581e-05, + "loss": 0.8193, + "step": 1399 + }, + { + "epoch": 0.21, + "learning_rate": 1.8400981006419663e-05, + "loss": 0.8223, + "step": 1400 + }, + { + "epoch": 0.21, + "learning_rate": 1.8398381920850626e-05, + "loss": 0.8638, + "step": 1401 + }, + { + "epoch": 0.21, + "learning_rate": 1.8395780908554983e-05, + "loss": 0.8833, + "step": 1402 + }, + { + "epoch": 0.21, + "learning_rate": 1.839317797012944e-05, + "loss": 0.875, + "step": 1403 + }, + { + "epoch": 0.21, + "learning_rate": 1.8390573106171158e-05, + "loss": 0.8511, + "step": 1404 + }, + { + "epoch": 0.21, + "learning_rate": 1.8387966317277733e-05, + "loss": 0.8184, + "step": 1405 + }, + { + "epoch": 0.21, + "learning_rate": 1.838535760404721e-05, + "loss": 0.8398, + "step": 1406 + }, + { + "epoch": 0.21, + "learning_rate": 1.8382746967078063e-05, + "loss": 0.8281, + "step": 1407 + }, + { + "epoch": 0.21, + "learning_rate": 1.8380134406969218e-05, + "loss": 0.916, + "step": 1408 + }, + { + "epoch": 0.21, + "learning_rate": 1.8377519924320045e-05, + "loss": 0.8433, + "step": 1409 + }, + { + "epoch": 0.21, + "learning_rate": 1.8374903519730347e-05, + "loss": 0.7871, + "step": 1410 + }, + { + "epoch": 0.21, + "learning_rate": 1.837228519380036e-05, + "loss": 0.8486, + "step": 1411 + }, + { + "epoch": 0.21, + "learning_rate": 1.8369664947130787e-05, + "loss": 0.8521, + "step": 1412 + }, + { + "epoch": 0.21, + "learning_rate": 1.8367042780322744e-05, + "loss": 0.8306, + "step": 1413 + }, + { + "epoch": 0.21, + "learning_rate": 1.8364418693977803e-05, + "loss": 0.8989, + "step": 1414 + }, + { + "epoch": 0.21, + "learning_rate": 1.8361792688697972e-05, + "loss": 0.8096, + "step": 1415 + }, + { + "epoch": 0.21, + "learning_rate": 1.8359164765085698e-05, + "loss": 0.8345, + "step": 1416 + }, + { + "epoch": 0.21, + "learning_rate": 1.8356534923743875e-05, + "loss": 0.8965, + "step": 1417 + }, + { + "epoch": 0.21, + "learning_rate": 1.8353903165275825e-05, + "loss": 0.8662, + "step": 1418 + }, + { + "epoch": 0.21, + "learning_rate": 1.8351269490285323e-05, + "loss": 0.8569, + "step": 1419 + }, + { + "epoch": 0.21, + "learning_rate": 1.8348633899376567e-05, + "loss": 0.8179, + "step": 1420 + }, + { + "epoch": 0.21, + "learning_rate": 1.834599639315422e-05, + "loss": 0.8916, + "step": 1421 + }, + { + "epoch": 0.21, + "learning_rate": 1.8343356972223357e-05, + "loss": 0.8462, + "step": 1422 + }, + { + "epoch": 0.21, + "learning_rate": 1.834071563718951e-05, + "loss": 0.8857, + "step": 1423 + }, + { + "epoch": 0.21, + "learning_rate": 1.8338072388658642e-05, + "loss": 0.7886, + "step": 1424 + }, + { + "epoch": 0.21, + "learning_rate": 1.833542722723716e-05, + "loss": 0.9185, + "step": 1425 + }, + { + "epoch": 0.21, + "learning_rate": 1.8332780153531905e-05, + "loss": 0.8921, + "step": 1426 + }, + { + "epoch": 0.21, + "learning_rate": 1.833013116815016e-05, + "loss": 0.8926, + "step": 1427 + }, + { + "epoch": 0.21, + "learning_rate": 1.8327480271699647e-05, + "loss": 0.8638, + "step": 1428 + }, + { + "epoch": 0.21, + "learning_rate": 1.8324827464788525e-05, + "loss": 0.8848, + "step": 1429 + }, + { + "epoch": 0.21, + "learning_rate": 1.8322172748025386e-05, + "loss": 0.8462, + "step": 1430 + }, + { + "epoch": 0.21, + "learning_rate": 1.8319516122019274e-05, + "loss": 0.8423, + "step": 1431 + }, + { + "epoch": 0.21, + "learning_rate": 1.8316857587379656e-05, + "loss": 0.8906, + "step": 1432 + }, + { + "epoch": 0.21, + "learning_rate": 1.831419714471645e-05, + "loss": 0.8511, + "step": 1433 + }, + { + "epoch": 0.21, + "learning_rate": 1.831153479464e-05, + "loss": 0.8525, + "step": 1434 + }, + { + "epoch": 0.21, + "learning_rate": 1.8308870537761094e-05, + "loss": 0.8687, + "step": 1435 + }, + { + "epoch": 0.21, + "learning_rate": 1.8306204374690955e-05, + "loss": 0.9082, + "step": 1436 + }, + { + "epoch": 0.21, + "learning_rate": 1.830353630604125e-05, + "loss": 0.873, + "step": 1437 + }, + { + "epoch": 0.21, + "learning_rate": 1.830086633242407e-05, + "loss": 0.8379, + "step": 1438 + }, + { + "epoch": 0.21, + "learning_rate": 1.8298194454451957e-05, + "loss": 0.8428, + "step": 1439 + }, + { + "epoch": 0.21, + "learning_rate": 1.829552067273788e-05, + "loss": 0.8711, + "step": 1440 + }, + { + "epoch": 0.21, + "learning_rate": 1.8292844987895247e-05, + "loss": 0.9595, + "step": 1441 + }, + { + "epoch": 0.21, + "learning_rate": 1.829016740053791e-05, + "loss": 0.8354, + "step": 1442 + }, + { + "epoch": 0.21, + "learning_rate": 1.8287487911280147e-05, + "loss": 0.8467, + "step": 1443 + }, + { + "epoch": 0.21, + "learning_rate": 1.8284806520736677e-05, + "loss": 0.7793, + "step": 1444 + }, + { + "epoch": 0.21, + "learning_rate": 1.8282123229522654e-05, + "loss": 0.874, + "step": 1445 + }, + { + "epoch": 0.21, + "learning_rate": 1.8279438038253675e-05, + "loss": 0.9229, + "step": 1446 + }, + { + "epoch": 0.21, + "learning_rate": 1.827675094754576e-05, + "loss": 0.8286, + "step": 1447 + }, + { + "epoch": 0.21, + "learning_rate": 1.8274061958015377e-05, + "loss": 0.8369, + "step": 1448 + }, + { + "epoch": 0.21, + "learning_rate": 1.8271371070279418e-05, + "loss": 0.8901, + "step": 1449 + }, + { + "epoch": 0.21, + "learning_rate": 1.8268678284955222e-05, + "loss": 0.874, + "step": 1450 + }, + { + "epoch": 0.21, + "learning_rate": 1.826598360266056e-05, + "loss": 0.8696, + "step": 1451 + }, + { + "epoch": 0.21, + "learning_rate": 1.8263287024013628e-05, + "loss": 0.854, + "step": 1452 + }, + { + "epoch": 0.21, + "learning_rate": 1.8260588549633072e-05, + "loss": 0.8613, + "step": 1453 + }, + { + "epoch": 0.22, + "learning_rate": 1.8257888180137966e-05, + "loss": 0.8784, + "step": 1454 + }, + { + "epoch": 0.22, + "learning_rate": 1.8255185916147817e-05, + "loss": 0.8813, + "step": 1455 + }, + { + "epoch": 0.22, + "learning_rate": 1.8252481758282573e-05, + "loss": 0.854, + "step": 1456 + }, + { + "epoch": 0.22, + "learning_rate": 1.82497757071626e-05, + "loss": 0.8564, + "step": 1457 + }, + { + "epoch": 0.22, + "learning_rate": 1.824706776340873e-05, + "loss": 0.8989, + "step": 1458 + }, + { + "epoch": 0.22, + "learning_rate": 1.824435792764219e-05, + "loss": 0.8525, + "step": 1459 + }, + { + "epoch": 0.22, + "learning_rate": 1.8241646200484676e-05, + "loss": 0.8345, + "step": 1460 + }, + { + "epoch": 0.22, + "learning_rate": 1.8238932582558294e-05, + "loss": 0.8364, + "step": 1461 + }, + { + "epoch": 0.22, + "learning_rate": 1.823621707448559e-05, + "loss": 0.8086, + "step": 1462 + }, + { + "epoch": 0.22, + "learning_rate": 1.8233499676889556e-05, + "loss": 0.833, + "step": 1463 + }, + { + "epoch": 0.22, + "learning_rate": 1.8230780390393606e-05, + "loss": 0.9136, + "step": 1464 + }, + { + "epoch": 0.22, + "learning_rate": 1.822805921562158e-05, + "loss": 0.8892, + "step": 1465 + }, + { + "epoch": 0.22, + "learning_rate": 1.8225336153197765e-05, + "loss": 0.8716, + "step": 1466 + }, + { + "epoch": 0.22, + "learning_rate": 1.822261120374688e-05, + "loss": 0.8076, + "step": 1467 + }, + { + "epoch": 0.22, + "learning_rate": 1.821988436789407e-05, + "loss": 0.8701, + "step": 1468 + }, + { + "epoch": 0.22, + "learning_rate": 1.8217155646264915e-05, + "loss": 0.9385, + "step": 1469 + }, + { + "epoch": 0.22, + "learning_rate": 1.8214425039485428e-05, + "loss": 0.8809, + "step": 1470 + }, + { + "epoch": 0.22, + "learning_rate": 1.821169254818206e-05, + "loss": 0.8481, + "step": 1471 + }, + { + "epoch": 0.22, + "learning_rate": 1.8208958172981685e-05, + "loss": 0.8257, + "step": 1472 + }, + { + "epoch": 0.22, + "learning_rate": 1.820622191451161e-05, + "loss": 0.834, + "step": 1473 + }, + { + "epoch": 0.22, + "learning_rate": 1.8203483773399587e-05, + "loss": 0.8301, + "step": 1474 + }, + { + "epoch": 0.22, + "learning_rate": 1.8200743750273783e-05, + "loss": 0.3228, + "step": 1475 + }, + { + "epoch": 0.22, + "learning_rate": 1.8198001845762805e-05, + "loss": 0.8765, + "step": 1476 + }, + { + "epoch": 0.22, + "learning_rate": 1.8195258060495693e-05, + "loss": 0.7993, + "step": 1477 + }, + { + "epoch": 0.22, + "learning_rate": 1.819251239510192e-05, + "loss": 0.895, + "step": 1478 + }, + { + "epoch": 0.22, + "learning_rate": 1.818976485021138e-05, + "loss": 0.8408, + "step": 1479 + }, + { + "epoch": 0.22, + "learning_rate": 1.8187015426454402e-05, + "loss": 0.9082, + "step": 1480 + }, + { + "epoch": 0.22, + "learning_rate": 1.818426412446176e-05, + "loss": 0.8608, + "step": 1481 + }, + { + "epoch": 0.22, + "learning_rate": 1.8181510944864642e-05, + "loss": 0.3131, + "step": 1482 + }, + { + "epoch": 0.22, + "learning_rate": 1.8178755888294673e-05, + "loss": 0.8589, + "step": 1483 + }, + { + "epoch": 0.22, + "learning_rate": 1.8175998955383906e-05, + "loss": 0.8447, + "step": 1484 + }, + { + "epoch": 0.22, + "learning_rate": 1.817324014676483e-05, + "loss": 0.9111, + "step": 1485 + }, + { + "epoch": 0.22, + "learning_rate": 1.8170479463070362e-05, + "loss": 0.8843, + "step": 1486 + }, + { + "epoch": 0.22, + "learning_rate": 1.8167716904933842e-05, + "loss": 0.8506, + "step": 1487 + }, + { + "epoch": 0.22, + "learning_rate": 1.8164952472989047e-05, + "loss": 0.8701, + "step": 1488 + }, + { + "epoch": 0.22, + "learning_rate": 1.816218616787019e-05, + "loss": 0.8716, + "step": 1489 + }, + { + "epoch": 0.22, + "learning_rate": 1.8159417990211904e-05, + "loss": 0.9302, + "step": 1490 + }, + { + "epoch": 0.22, + "learning_rate": 1.815664794064925e-05, + "loss": 0.8643, + "step": 1491 + }, + { + "epoch": 0.22, + "learning_rate": 1.8153876019817725e-05, + "loss": 0.897, + "step": 1492 + }, + { + "epoch": 0.22, + "learning_rate": 1.8151102228353257e-05, + "loss": 0.8633, + "step": 1493 + }, + { + "epoch": 0.22, + "learning_rate": 1.8148326566892197e-05, + "loss": 0.8774, + "step": 1494 + }, + { + "epoch": 0.22, + "learning_rate": 1.8145549036071327e-05, + "loss": 0.8682, + "step": 1495 + }, + { + "epoch": 0.22, + "learning_rate": 1.8142769636527852e-05, + "loss": 0.8569, + "step": 1496 + }, + { + "epoch": 0.22, + "learning_rate": 1.8139988368899423e-05, + "loss": 0.9365, + "step": 1497 + }, + { + "epoch": 0.22, + "learning_rate": 1.81372052338241e-05, + "loss": 0.8638, + "step": 1498 + }, + { + "epoch": 0.22, + "learning_rate": 1.8134420231940384e-05, + "loss": 0.9297, + "step": 1499 + }, + { + "epoch": 0.22, + "learning_rate": 1.81316333638872e-05, + "loss": 0.9189, + "step": 1500 + }, + { + "epoch": 0.22, + "learning_rate": 1.8128844630303896e-05, + "loss": 0.8384, + "step": 1501 + }, + { + "epoch": 0.22, + "learning_rate": 1.812605403183026e-05, + "loss": 0.8149, + "step": 1502 + }, + { + "epoch": 0.22, + "learning_rate": 1.8123261569106502e-05, + "loss": 0.8428, + "step": 1503 + }, + { + "epoch": 0.22, + "learning_rate": 1.812046724277325e-05, + "loss": 0.8647, + "step": 1504 + }, + { + "epoch": 0.22, + "learning_rate": 1.8117671053471576e-05, + "loss": 0.8574, + "step": 1505 + }, + { + "epoch": 0.22, + "learning_rate": 1.811487300184297e-05, + "loss": 0.894, + "step": 1506 + }, + { + "epoch": 0.22, + "learning_rate": 1.811207308852935e-05, + "loss": 0.8286, + "step": 1507 + }, + { + "epoch": 0.22, + "learning_rate": 1.8109271314173062e-05, + "loss": 0.8574, + "step": 1508 + }, + { + "epoch": 0.22, + "learning_rate": 1.8106467679416877e-05, + "loss": 0.877, + "step": 1509 + }, + { + "epoch": 0.22, + "learning_rate": 1.8103662184904e-05, + "loss": 0.8335, + "step": 1510 + }, + { + "epoch": 0.22, + "learning_rate": 1.8100854831278052e-05, + "loss": 0.8247, + "step": 1511 + }, + { + "epoch": 0.22, + "learning_rate": 1.8098045619183092e-05, + "loss": 0.8198, + "step": 1512 + }, + { + "epoch": 0.22, + "learning_rate": 1.8095234549263592e-05, + "loss": 0.8828, + "step": 1513 + }, + { + "epoch": 0.22, + "learning_rate": 1.8092421622164464e-05, + "loss": 0.8813, + "step": 1514 + }, + { + "epoch": 0.22, + "learning_rate": 1.8089606838531034e-05, + "loss": 0.8813, + "step": 1515 + }, + { + "epoch": 0.22, + "learning_rate": 1.8086790199009067e-05, + "loss": 0.873, + "step": 1516 + }, + { + "epoch": 0.22, + "learning_rate": 1.808397170424474e-05, + "loss": 0.8491, + "step": 1517 + }, + { + "epoch": 0.22, + "learning_rate": 1.808115135488467e-05, + "loss": 0.8652, + "step": 1518 + }, + { + "epoch": 0.22, + "learning_rate": 1.8078329151575874e-05, + "loss": 0.8564, + "step": 1519 + }, + { + "epoch": 0.22, + "learning_rate": 1.807550509496583e-05, + "loss": 0.9204, + "step": 1520 + }, + { + "epoch": 0.22, + "learning_rate": 1.8072679185702416e-05, + "loss": 0.9033, + "step": 1521 + }, + { + "epoch": 0.23, + "learning_rate": 1.8069851424433943e-05, + "loss": 0.8726, + "step": 1522 + }, + { + "epoch": 0.23, + "learning_rate": 1.806702181180914e-05, + "loss": 0.8379, + "step": 1523 + }, + { + "epoch": 0.23, + "learning_rate": 1.8064190348477173e-05, + "loss": 0.8599, + "step": 1524 + }, + { + "epoch": 0.23, + "learning_rate": 1.8061357035087627e-05, + "loss": 0.8154, + "step": 1525 + }, + { + "epoch": 0.23, + "learning_rate": 1.8058521872290505e-05, + "loss": 0.9033, + "step": 1526 + }, + { + "epoch": 0.23, + "learning_rate": 1.8055684860736246e-05, + "loss": 0.8872, + "step": 1527 + }, + { + "epoch": 0.23, + "learning_rate": 1.80528460010757e-05, + "loss": 0.8989, + "step": 1528 + }, + { + "epoch": 0.23, + "learning_rate": 1.8050005293960157e-05, + "loss": 0.877, + "step": 1529 + }, + { + "epoch": 0.23, + "learning_rate": 1.8047162740041314e-05, + "loss": 0.8496, + "step": 1530 + }, + { + "epoch": 0.23, + "learning_rate": 1.8044318339971302e-05, + "loss": 0.8911, + "step": 1531 + }, + { + "epoch": 0.23, + "learning_rate": 1.8041472094402676e-05, + "loss": 0.8384, + "step": 1532 + }, + { + "epoch": 0.23, + "learning_rate": 1.8038624003988406e-05, + "loss": 0.8936, + "step": 1533 + }, + { + "epoch": 0.23, + "learning_rate": 1.803577406938189e-05, + "loss": 0.875, + "step": 1534 + }, + { + "epoch": 0.23, + "learning_rate": 1.803292229123696e-05, + "loss": 0.9087, + "step": 1535 + }, + { + "epoch": 0.23, + "learning_rate": 1.803006867020785e-05, + "loss": 0.8247, + "step": 1536 + }, + { + "epoch": 0.23, + "learning_rate": 1.8027213206949232e-05, + "loss": 0.8179, + "step": 1537 + }, + { + "epoch": 0.23, + "learning_rate": 1.8024355902116198e-05, + "loss": 0.7744, + "step": 1538 + }, + { + "epoch": 0.23, + "learning_rate": 1.8021496756364256e-05, + "loss": 0.8442, + "step": 1539 + }, + { + "epoch": 0.23, + "learning_rate": 1.8018635770349343e-05, + "loss": 0.8428, + "step": 1540 + }, + { + "epoch": 0.23, + "learning_rate": 1.8015772944727814e-05, + "loss": 0.856, + "step": 1541 + }, + { + "epoch": 0.23, + "learning_rate": 1.8012908280156452e-05, + "loss": 0.8491, + "step": 1542 + }, + { + "epoch": 0.23, + "learning_rate": 1.801004177729246e-05, + "loss": 0.8721, + "step": 1543 + }, + { + "epoch": 0.23, + "learning_rate": 1.8007173436793453e-05, + "loss": 0.9004, + "step": 1544 + }, + { + "epoch": 0.23, + "learning_rate": 1.800430325931748e-05, + "loss": 0.8574, + "step": 1545 + }, + { + "epoch": 0.23, + "learning_rate": 1.8001431245523008e-05, + "loss": 0.8696, + "step": 1546 + }, + { + "epoch": 0.23, + "learning_rate": 1.7998557396068923e-05, + "loss": 0.8389, + "step": 1547 + }, + { + "epoch": 0.23, + "learning_rate": 1.799568171161453e-05, + "loss": 0.8647, + "step": 1548 + }, + { + "epoch": 0.23, + "learning_rate": 1.7992804192819565e-05, + "loss": 0.9087, + "step": 1549 + }, + { + "epoch": 0.23, + "learning_rate": 1.798992484034417e-05, + "loss": 0.9365, + "step": 1550 + }, + { + "epoch": 0.23, + "learning_rate": 1.798704365484892e-05, + "loss": 0.8521, + "step": 1551 + }, + { + "epoch": 0.23, + "learning_rate": 1.7984160636994808e-05, + "loss": 0.9175, + "step": 1552 + }, + { + "epoch": 0.23, + "learning_rate": 1.7981275787443243e-05, + "loss": 0.8223, + "step": 1553 + }, + { + "epoch": 0.23, + "learning_rate": 1.7978389106856056e-05, + "loss": 0.8442, + "step": 1554 + }, + { + "epoch": 0.23, + "learning_rate": 1.7975500595895503e-05, + "loss": 0.8215, + "step": 1555 + }, + { + "epoch": 0.23, + "learning_rate": 1.7972610255224246e-05, + "loss": 0.8975, + "step": 1556 + }, + { + "epoch": 0.23, + "learning_rate": 1.796971808550539e-05, + "loss": 0.8481, + "step": 1557 + }, + { + "epoch": 0.23, + "learning_rate": 1.7966824087402438e-05, + "loss": 0.9209, + "step": 1558 + }, + { + "epoch": 0.23, + "learning_rate": 1.796392826157932e-05, + "loss": 0.8057, + "step": 1559 + }, + { + "epoch": 0.23, + "learning_rate": 1.7961030608700395e-05, + "loss": 0.3083, + "step": 1560 + }, + { + "epoch": 0.23, + "learning_rate": 1.7958131129430417e-05, + "loss": 0.8662, + "step": 1561 + }, + { + "epoch": 0.23, + "learning_rate": 1.795522982443459e-05, + "loss": 0.3489, + "step": 1562 + }, + { + "epoch": 0.23, + "learning_rate": 1.795232669437851e-05, + "loss": 0.791, + "step": 1563 + }, + { + "epoch": 0.23, + "learning_rate": 1.7949421739928205e-05, + "loss": 0.8301, + "step": 1564 + }, + { + "epoch": 0.23, + "learning_rate": 1.7946514961750123e-05, + "loss": 0.8652, + "step": 1565 + }, + { + "epoch": 0.23, + "learning_rate": 1.7943606360511122e-05, + "loss": 0.915, + "step": 1566 + }, + { + "epoch": 0.23, + "learning_rate": 1.7940695936878486e-05, + "loss": 0.7979, + "step": 1567 + }, + { + "epoch": 0.23, + "learning_rate": 1.793778369151991e-05, + "loss": 0.8042, + "step": 1568 + }, + { + "epoch": 0.23, + "learning_rate": 1.7934869625103517e-05, + "loss": 0.9282, + "step": 1569 + }, + { + "epoch": 0.23, + "learning_rate": 1.793195373829784e-05, + "loss": 0.8145, + "step": 1570 + }, + { + "epoch": 0.23, + "learning_rate": 1.7929036031771825e-05, + "loss": 0.3401, + "step": 1571 + }, + { + "epoch": 0.23, + "learning_rate": 1.792611650619485e-05, + "loss": 0.813, + "step": 1572 + }, + { + "epoch": 0.23, + "learning_rate": 1.7923195162236694e-05, + "loss": 0.8848, + "step": 1573 + }, + { + "epoch": 0.23, + "learning_rate": 1.792027200056757e-05, + "loss": 0.8599, + "step": 1574 + }, + { + "epoch": 0.23, + "learning_rate": 1.7917347021858092e-05, + "loss": 0.9072, + "step": 1575 + }, + { + "epoch": 0.23, + "learning_rate": 1.7914420226779303e-05, + "loss": 0.8604, + "step": 1576 + }, + { + "epoch": 0.23, + "learning_rate": 1.7911491616002656e-05, + "loss": 0.8398, + "step": 1577 + }, + { + "epoch": 0.23, + "learning_rate": 1.7908561190200022e-05, + "loss": 0.9121, + "step": 1578 + }, + { + "epoch": 0.23, + "learning_rate": 1.790562895004369e-05, + "loss": 0.8711, + "step": 1579 + }, + { + "epoch": 0.23, + "learning_rate": 1.790269489620636e-05, + "loss": 0.8364, + "step": 1580 + }, + { + "epoch": 0.23, + "learning_rate": 1.7899759029361156e-05, + "loss": 0.897, + "step": 1581 + }, + { + "epoch": 0.23, + "learning_rate": 1.7896821350181613e-05, + "loss": 0.8872, + "step": 1582 + }, + { + "epoch": 0.23, + "learning_rate": 1.7893881859341684e-05, + "loss": 0.8633, + "step": 1583 + }, + { + "epoch": 0.23, + "learning_rate": 1.7890940557515735e-05, + "loss": 0.8687, + "step": 1584 + }, + { + "epoch": 0.23, + "learning_rate": 1.7887997445378547e-05, + "loss": 0.8623, + "step": 1585 + }, + { + "epoch": 0.23, + "learning_rate": 1.7885052523605324e-05, + "loss": 0.8633, + "step": 1586 + }, + { + "epoch": 0.23, + "learning_rate": 1.7882105792871675e-05, + "loss": 0.9087, + "step": 1587 + }, + { + "epoch": 0.23, + "learning_rate": 1.787915725385363e-05, + "loss": 0.855, + "step": 1588 + }, + { + "epoch": 0.23, + "learning_rate": 1.7876206907227628e-05, + "loss": 0.8384, + "step": 1589 + }, + { + "epoch": 0.24, + "learning_rate": 1.787325475367053e-05, + "loss": 0.8433, + "step": 1590 + }, + { + "epoch": 0.24, + "learning_rate": 1.787030079385961e-05, + "loss": 0.8672, + "step": 1591 + }, + { + "epoch": 0.24, + "learning_rate": 1.7867345028472556e-05, + "loss": 0.8003, + "step": 1592 + }, + { + "epoch": 0.24, + "learning_rate": 1.7864387458187466e-05, + "loss": 0.9243, + "step": 1593 + }, + { + "epoch": 0.24, + "learning_rate": 1.7861428083682855e-05, + "loss": 0.873, + "step": 1594 + }, + { + "epoch": 0.24, + "learning_rate": 1.785846690563765e-05, + "loss": 0.8418, + "step": 1595 + }, + { + "epoch": 0.24, + "learning_rate": 1.7855503924731205e-05, + "loss": 0.8789, + "step": 1596 + }, + { + "epoch": 0.24, + "learning_rate": 1.785253914164326e-05, + "loss": 0.3291, + "step": 1597 + }, + { + "epoch": 0.24, + "learning_rate": 1.7849572557054e-05, + "loss": 0.8296, + "step": 1598 + }, + { + "epoch": 0.24, + "learning_rate": 1.7846604171643997e-05, + "loss": 0.876, + "step": 1599 + }, + { + "epoch": 0.24, + "learning_rate": 1.784363398609425e-05, + "loss": 0.8623, + "step": 1600 + }, + { + "epoch": 0.24, + "learning_rate": 1.7840662001086174e-05, + "loss": 0.8418, + "step": 1601 + }, + { + "epoch": 0.24, + "learning_rate": 1.7837688217301584e-05, + "loss": 0.8403, + "step": 1602 + }, + { + "epoch": 0.24, + "learning_rate": 1.7834712635422718e-05, + "loss": 0.8623, + "step": 1603 + }, + { + "epoch": 0.24, + "learning_rate": 1.7831735256132226e-05, + "loss": 0.875, + "step": 1604 + }, + { + "epoch": 0.24, + "learning_rate": 1.782875608011316e-05, + "loss": 0.877, + "step": 1605 + }, + { + "epoch": 0.24, + "learning_rate": 1.7825775108049003e-05, + "loss": 0.8667, + "step": 1606 + }, + { + "epoch": 0.24, + "learning_rate": 1.7822792340623623e-05, + "loss": 0.8472, + "step": 1607 + }, + { + "epoch": 0.24, + "learning_rate": 1.7819807778521335e-05, + "loss": 0.8647, + "step": 1608 + }, + { + "epoch": 0.24, + "learning_rate": 1.781682142242683e-05, + "loss": 0.9053, + "step": 1609 + }, + { + "epoch": 0.24, + "learning_rate": 1.7813833273025237e-05, + "loss": 0.8613, + "step": 1610 + }, + { + "epoch": 0.24, + "learning_rate": 1.7810843331002085e-05, + "loss": 0.8516, + "step": 1611 + }, + { + "epoch": 0.24, + "learning_rate": 1.7807851597043315e-05, + "loss": 0.8584, + "step": 1612 + }, + { + "epoch": 0.24, + "learning_rate": 1.7804858071835273e-05, + "loss": 0.8667, + "step": 1613 + }, + { + "epoch": 0.24, + "learning_rate": 1.7801862756064735e-05, + "loss": 0.8179, + "step": 1614 + }, + { + "epoch": 0.24, + "learning_rate": 1.7798865650418868e-05, + "loss": 0.8896, + "step": 1615 + }, + { + "epoch": 0.24, + "learning_rate": 1.7795866755585258e-05, + "loss": 0.8633, + "step": 1616 + }, + { + "epoch": 0.24, + "learning_rate": 1.77928660722519e-05, + "loss": 0.896, + "step": 1617 + }, + { + "epoch": 0.24, + "learning_rate": 1.7789863601107203e-05, + "loss": 0.8818, + "step": 1618 + }, + { + "epoch": 0.24, + "learning_rate": 1.778685934283998e-05, + "loss": 0.9409, + "step": 1619 + }, + { + "epoch": 0.24, + "learning_rate": 1.778385329813946e-05, + "loss": 0.8696, + "step": 1620 + }, + { + "epoch": 0.24, + "learning_rate": 1.7780845467695277e-05, + "loss": 0.875, + "step": 1621 + }, + { + "epoch": 0.24, + "learning_rate": 1.7777835852197477e-05, + "loss": 0.8296, + "step": 1622 + }, + { + "epoch": 0.24, + "learning_rate": 1.7774824452336516e-05, + "loss": 0.8848, + "step": 1623 + }, + { + "epoch": 0.24, + "learning_rate": 1.7771811268803258e-05, + "loss": 0.8745, + "step": 1624 + }, + { + "epoch": 0.24, + "learning_rate": 1.7768796302288973e-05, + "loss": 0.9316, + "step": 1625 + }, + { + "epoch": 0.24, + "learning_rate": 1.7765779553485352e-05, + "loss": 0.9092, + "step": 1626 + }, + { + "epoch": 0.24, + "learning_rate": 1.776276102308448e-05, + "loss": 0.8511, + "step": 1627 + }, + { + "epoch": 0.24, + "learning_rate": 1.775974071177886e-05, + "loss": 0.875, + "step": 1628 + }, + { + "epoch": 0.24, + "learning_rate": 1.7756718620261402e-05, + "loss": 0.8184, + "step": 1629 + }, + { + "epoch": 0.24, + "learning_rate": 1.7753694749225422e-05, + "loss": 0.8535, + "step": 1630 + }, + { + "epoch": 0.24, + "learning_rate": 1.7750669099364643e-05, + "loss": 0.8569, + "step": 1631 + }, + { + "epoch": 0.24, + "learning_rate": 1.7747641671373203e-05, + "loss": 0.8506, + "step": 1632 + }, + { + "epoch": 0.24, + "learning_rate": 1.774461246594564e-05, + "loss": 0.8613, + "step": 1633 + }, + { + "epoch": 0.24, + "learning_rate": 1.7741581483776906e-05, + "loss": 0.8315, + "step": 1634 + }, + { + "epoch": 0.24, + "learning_rate": 1.773854872556236e-05, + "loss": 0.8965, + "step": 1635 + }, + { + "epoch": 0.24, + "learning_rate": 1.7735514191997763e-05, + "loss": 0.894, + "step": 1636 + }, + { + "epoch": 0.24, + "learning_rate": 1.7732477883779287e-05, + "loss": 0.8301, + "step": 1637 + }, + { + "epoch": 0.24, + "learning_rate": 1.772943980160351e-05, + "loss": 0.3223, + "step": 1638 + }, + { + "epoch": 0.24, + "learning_rate": 1.7726399946167424e-05, + "loss": 0.8354, + "step": 1639 + }, + { + "epoch": 0.24, + "learning_rate": 1.772335831816841e-05, + "loss": 0.877, + "step": 1640 + }, + { + "epoch": 0.24, + "learning_rate": 1.772031491830428e-05, + "loss": 0.8379, + "step": 1641 + }, + { + "epoch": 0.24, + "learning_rate": 1.7717269747273234e-05, + "loss": 0.3325, + "step": 1642 + }, + { + "epoch": 0.24, + "learning_rate": 1.7714222805773885e-05, + "loss": 0.8208, + "step": 1643 + }, + { + "epoch": 0.24, + "learning_rate": 1.7711174094505248e-05, + "loss": 0.8955, + "step": 1644 + }, + { + "epoch": 0.24, + "learning_rate": 1.770812361416675e-05, + "loss": 0.8745, + "step": 1645 + }, + { + "epoch": 0.24, + "learning_rate": 1.7705071365458225e-05, + "loss": 0.8462, + "step": 1646 + }, + { + "epoch": 0.24, + "learning_rate": 1.7702017349079907e-05, + "loss": 0.8721, + "step": 1647 + }, + { + "epoch": 0.24, + "learning_rate": 1.7698961565732432e-05, + "loss": 0.8691, + "step": 1648 + }, + { + "epoch": 0.24, + "learning_rate": 1.769590401611685e-05, + "loss": 0.8735, + "step": 1649 + }, + { + "epoch": 0.24, + "learning_rate": 1.7692844700934615e-05, + "loss": 0.9346, + "step": 1650 + }, + { + "epoch": 0.24, + "learning_rate": 1.768978362088759e-05, + "loss": 0.8555, + "step": 1651 + }, + { + "epoch": 0.24, + "learning_rate": 1.768672077667802e-05, + "loss": 0.8887, + "step": 1652 + }, + { + "epoch": 0.24, + "learning_rate": 1.7683656169008587e-05, + "loss": 0.8687, + "step": 1653 + }, + { + "epoch": 0.24, + "learning_rate": 1.7680589798582356e-05, + "loss": 0.8535, + "step": 1654 + }, + { + "epoch": 0.24, + "learning_rate": 1.7677521666102805e-05, + "loss": 0.8384, + "step": 1655 + }, + { + "epoch": 0.24, + "learning_rate": 1.767445177227381e-05, + "loss": 0.7847, + "step": 1656 + }, + { + "epoch": 0.25, + "learning_rate": 1.767138011779966e-05, + "loss": 0.3116, + "step": 1657 + }, + { + "epoch": 0.25, + "learning_rate": 1.7668306703385038e-05, + "loss": 0.8838, + "step": 1658 + }, + { + "epoch": 0.25, + "learning_rate": 1.7665231529735042e-05, + "loss": 0.814, + "step": 1659 + }, + { + "epoch": 0.25, + "learning_rate": 1.766215459755516e-05, + "loss": 0.7642, + "step": 1660 + }, + { + "epoch": 0.25, + "learning_rate": 1.7659075907551296e-05, + "loss": 0.3298, + "step": 1661 + }, + { + "epoch": 0.25, + "learning_rate": 1.7655995460429747e-05, + "loss": 0.7808, + "step": 1662 + }, + { + "epoch": 0.25, + "learning_rate": 1.765291325689723e-05, + "loss": 0.8608, + "step": 1663 + }, + { + "epoch": 0.25, + "learning_rate": 1.7649829297660836e-05, + "loss": 0.7949, + "step": 1664 + }, + { + "epoch": 0.25, + "learning_rate": 1.7646743583428087e-05, + "loss": 0.8857, + "step": 1665 + }, + { + "epoch": 0.25, + "learning_rate": 1.7643656114906895e-05, + "loss": 0.8438, + "step": 1666 + }, + { + "epoch": 0.25, + "learning_rate": 1.7640566892805573e-05, + "loss": 0.8999, + "step": 1667 + }, + { + "epoch": 0.25, + "learning_rate": 1.7637475917832843e-05, + "loss": 0.854, + "step": 1668 + }, + { + "epoch": 0.25, + "learning_rate": 1.763438319069782e-05, + "loss": 0.8989, + "step": 1669 + }, + { + "epoch": 0.25, + "learning_rate": 1.763128871211003e-05, + "loss": 0.877, + "step": 1670 + }, + { + "epoch": 0.25, + "learning_rate": 1.76281924827794e-05, + "loss": 0.8516, + "step": 1671 + }, + { + "epoch": 0.25, + "learning_rate": 1.762509450341625e-05, + "loss": 0.8569, + "step": 1672 + }, + { + "epoch": 0.25, + "learning_rate": 1.762199477473131e-05, + "loss": 0.7871, + "step": 1673 + }, + { + "epoch": 0.25, + "learning_rate": 1.7618893297435713e-05, + "loss": 0.9155, + "step": 1674 + }, + { + "epoch": 0.25, + "learning_rate": 1.761579007224098e-05, + "loss": 0.8442, + "step": 1675 + }, + { + "epoch": 0.25, + "learning_rate": 1.761268509985905e-05, + "loss": 0.8701, + "step": 1676 + }, + { + "epoch": 0.25, + "learning_rate": 1.7609578381002248e-05, + "loss": 0.8716, + "step": 1677 + }, + { + "epoch": 0.25, + "learning_rate": 1.760646991638331e-05, + "loss": 0.8755, + "step": 1678 + }, + { + "epoch": 0.25, + "learning_rate": 1.7603359706715366e-05, + "loss": 0.3669, + "step": 1679 + }, + { + "epoch": 0.25, + "learning_rate": 1.7600247752711952e-05, + "loss": 0.8281, + "step": 1680 + }, + { + "epoch": 0.25, + "learning_rate": 1.7597134055087e-05, + "loss": 0.811, + "step": 1681 + }, + { + "epoch": 0.25, + "learning_rate": 1.7594018614554845e-05, + "loss": 0.9541, + "step": 1682 + }, + { + "epoch": 0.25, + "learning_rate": 1.7590901431830218e-05, + "loss": 0.856, + "step": 1683 + }, + { + "epoch": 0.25, + "learning_rate": 1.758778250762825e-05, + "loss": 0.8638, + "step": 1684 + }, + { + "epoch": 0.25, + "learning_rate": 1.7584661842664478e-05, + "loss": 0.8179, + "step": 1685 + }, + { + "epoch": 0.25, + "learning_rate": 1.7581539437654833e-05, + "loss": 0.8906, + "step": 1686 + }, + { + "epoch": 0.25, + "learning_rate": 1.7578415293315646e-05, + "loss": 0.8638, + "step": 1687 + }, + { + "epoch": 0.25, + "learning_rate": 1.7575289410363642e-05, + "loss": 0.8564, + "step": 1688 + }, + { + "epoch": 0.25, + "learning_rate": 1.7572161789515955e-05, + "loss": 0.9253, + "step": 1689 + }, + { + "epoch": 0.25, + "learning_rate": 1.7569032431490108e-05, + "loss": 0.8804, + "step": 1690 + }, + { + "epoch": 0.25, + "learning_rate": 1.7565901337004035e-05, + "loss": 0.8638, + "step": 1691 + }, + { + "epoch": 0.25, + "learning_rate": 1.7562768506776055e-05, + "loss": 0.811, + "step": 1692 + }, + { + "epoch": 0.25, + "learning_rate": 1.755963394152489e-05, + "loss": 0.8506, + "step": 1693 + }, + { + "epoch": 0.25, + "learning_rate": 1.7556497641969658e-05, + "loss": 0.8154, + "step": 1694 + }, + { + "epoch": 0.25, + "learning_rate": 1.7553359608829886e-05, + "loss": 0.8584, + "step": 1695 + }, + { + "epoch": 0.25, + "learning_rate": 1.7550219842825486e-05, + "loss": 0.8398, + "step": 1696 + }, + { + "epoch": 0.25, + "learning_rate": 1.754707834467677e-05, + "loss": 0.8735, + "step": 1697 + }, + { + "epoch": 0.25, + "learning_rate": 1.754393511510445e-05, + "loss": 0.7793, + "step": 1698 + }, + { + "epoch": 0.25, + "learning_rate": 1.754079015482964e-05, + "loss": 0.8413, + "step": 1699 + }, + { + "epoch": 0.25, + "learning_rate": 1.7537643464573838e-05, + "loss": 0.853, + "step": 1700 + }, + { + "epoch": 0.25, + "learning_rate": 1.7534495045058947e-05, + "loss": 0.895, + "step": 1701 + }, + { + "epoch": 0.25, + "learning_rate": 1.753134489700727e-05, + "loss": 0.8521, + "step": 1702 + }, + { + "epoch": 0.25, + "learning_rate": 1.7528193021141502e-05, + "loss": 0.8989, + "step": 1703 + }, + { + "epoch": 0.25, + "learning_rate": 1.7525039418184732e-05, + "loss": 0.8374, + "step": 1704 + }, + { + "epoch": 0.25, + "learning_rate": 1.7521884088860453e-05, + "loss": 0.8799, + "step": 1705 + }, + { + "epoch": 0.25, + "learning_rate": 1.7518727033892542e-05, + "loss": 0.7993, + "step": 1706 + }, + { + "epoch": 0.25, + "learning_rate": 1.7515568254005287e-05, + "loss": 0.9331, + "step": 1707 + }, + { + "epoch": 0.25, + "learning_rate": 1.751240774992336e-05, + "loss": 0.79, + "step": 1708 + }, + { + "epoch": 0.25, + "learning_rate": 1.7509245522371834e-05, + "loss": 0.8882, + "step": 1709 + }, + { + "epoch": 0.25, + "learning_rate": 1.7506081572076176e-05, + "loss": 0.8262, + "step": 1710 + }, + { + "epoch": 0.25, + "learning_rate": 1.750291589976224e-05, + "loss": 0.915, + "step": 1711 + }, + { + "epoch": 0.25, + "learning_rate": 1.74997485061563e-05, + "loss": 0.8452, + "step": 1712 + }, + { + "epoch": 0.25, + "learning_rate": 1.749657939198499e-05, + "loss": 0.8354, + "step": 1713 + }, + { + "epoch": 0.25, + "learning_rate": 1.749340855797537e-05, + "loss": 0.9102, + "step": 1714 + }, + { + "epoch": 0.25, + "learning_rate": 1.749023600485488e-05, + "loss": 0.3506, + "step": 1715 + }, + { + "epoch": 0.25, + "learning_rate": 1.7487061733351348e-05, + "loss": 0.8457, + "step": 1716 + }, + { + "epoch": 0.25, + "learning_rate": 1.7483885744193006e-05, + "loss": 0.8457, + "step": 1717 + }, + { + "epoch": 0.25, + "learning_rate": 1.7480708038108485e-05, + "loss": 0.813, + "step": 1718 + }, + { + "epoch": 0.25, + "learning_rate": 1.7477528615826793e-05, + "loss": 0.8467, + "step": 1719 + }, + { + "epoch": 0.25, + "learning_rate": 1.747434747807735e-05, + "loss": 0.936, + "step": 1720 + }, + { + "epoch": 0.25, + "learning_rate": 1.7471164625589957e-05, + "loss": 0.8843, + "step": 1721 + }, + { + "epoch": 0.25, + "learning_rate": 1.7467980059094817e-05, + "loss": 0.833, + "step": 1722 + }, + { + "epoch": 0.25, + "learning_rate": 1.7464793779322512e-05, + "loss": 0.8213, + "step": 1723 + }, + { + "epoch": 0.25, + "learning_rate": 1.746160578700404e-05, + "loss": 0.834, + "step": 1724 + }, + { + "epoch": 0.26, + "learning_rate": 1.745841608287077e-05, + "loss": 0.7974, + "step": 1725 + }, + { + "epoch": 0.26, + "learning_rate": 1.745522466765447e-05, + "loss": 0.8657, + "step": 1726 + }, + { + "epoch": 0.26, + "learning_rate": 1.7452031542087313e-05, + "loss": 0.8711, + "step": 1727 + }, + { + "epoch": 0.26, + "learning_rate": 1.7448836706901846e-05, + "loss": 0.856, + "step": 1728 + }, + { + "epoch": 0.26, + "learning_rate": 1.744564016283102e-05, + "loss": 0.9116, + "step": 1729 + }, + { + "epoch": 0.26, + "learning_rate": 1.7442441910608174e-05, + "loss": 0.8457, + "step": 1730 + }, + { + "epoch": 0.26, + "learning_rate": 1.743924195096704e-05, + "loss": 0.8535, + "step": 1731 + }, + { + "epoch": 0.26, + "learning_rate": 1.7436040284641742e-05, + "loss": 0.3364, + "step": 1732 + }, + { + "epoch": 0.26, + "learning_rate": 1.743283691236679e-05, + "loss": 0.8345, + "step": 1733 + }, + { + "epoch": 0.26, + "learning_rate": 1.7429631834877098e-05, + "loss": 0.8789, + "step": 1734 + }, + { + "epoch": 0.26, + "learning_rate": 1.7426425052907956e-05, + "loss": 0.3196, + "step": 1735 + }, + { + "epoch": 0.26, + "learning_rate": 1.742321656719506e-05, + "loss": 0.8638, + "step": 1736 + }, + { + "epoch": 0.26, + "learning_rate": 1.7420006378474483e-05, + "loss": 0.8774, + "step": 1737 + }, + { + "epoch": 0.26, + "learning_rate": 1.7416794487482693e-05, + "loss": 0.9121, + "step": 1738 + }, + { + "epoch": 0.26, + "learning_rate": 1.7413580894956558e-05, + "loss": 0.8149, + "step": 1739 + }, + { + "epoch": 0.26, + "learning_rate": 1.7410365601633326e-05, + "loss": 0.8472, + "step": 1740 + }, + { + "epoch": 0.26, + "learning_rate": 1.7407148608250635e-05, + "loss": 0.8594, + "step": 1741 + }, + { + "epoch": 0.26, + "learning_rate": 1.740392991554652e-05, + "loss": 0.8691, + "step": 1742 + }, + { + "epoch": 0.26, + "learning_rate": 1.74007095242594e-05, + "loss": 0.8276, + "step": 1743 + }, + { + "epoch": 0.26, + "learning_rate": 1.7397487435128084e-05, + "loss": 0.7842, + "step": 1744 + }, + { + "epoch": 0.26, + "learning_rate": 1.7394263648891777e-05, + "loss": 0.8945, + "step": 1745 + }, + { + "epoch": 0.26, + "learning_rate": 1.7391038166290065e-05, + "loss": 0.854, + "step": 1746 + }, + { + "epoch": 0.26, + "learning_rate": 1.7387810988062924e-05, + "loss": 0.8418, + "step": 1747 + }, + { + "epoch": 0.26, + "learning_rate": 1.7384582114950726e-05, + "loss": 0.7676, + "step": 1748 + }, + { + "epoch": 0.26, + "learning_rate": 1.7381351547694226e-05, + "loss": 0.8838, + "step": 1749 + }, + { + "epoch": 0.26, + "learning_rate": 1.737811928703457e-05, + "loss": 0.8506, + "step": 1750 + }, + { + "epoch": 0.26, + "learning_rate": 1.7374885333713293e-05, + "loss": 0.8398, + "step": 1751 + }, + { + "epoch": 0.26, + "learning_rate": 1.7371649688472315e-05, + "loss": 0.8823, + "step": 1752 + }, + { + "epoch": 0.26, + "learning_rate": 1.736841235205394e-05, + "loss": 0.8486, + "step": 1753 + }, + { + "epoch": 0.26, + "learning_rate": 1.7365173325200875e-05, + "loss": 0.8726, + "step": 1754 + }, + { + "epoch": 0.26, + "learning_rate": 1.7361932608656207e-05, + "loss": 0.9214, + "step": 1755 + }, + { + "epoch": 0.26, + "learning_rate": 1.7358690203163403e-05, + "loss": 0.8062, + "step": 1756 + }, + { + "epoch": 0.26, + "learning_rate": 1.7355446109466326e-05, + "loss": 0.8389, + "step": 1757 + }, + { + "epoch": 0.26, + "learning_rate": 1.7352200328309225e-05, + "loss": 0.8667, + "step": 1758 + }, + { + "epoch": 0.26, + "learning_rate": 1.7348952860436737e-05, + "loss": 0.8188, + "step": 1759 + }, + { + "epoch": 0.26, + "learning_rate": 1.734570370659388e-05, + "loss": 0.855, + "step": 1760 + }, + { + "epoch": 0.26, + "learning_rate": 1.7342452867526067e-05, + "loss": 0.873, + "step": 1761 + }, + { + "epoch": 0.26, + "learning_rate": 1.733920034397909e-05, + "loss": 0.9302, + "step": 1762 + }, + { + "epoch": 0.26, + "learning_rate": 1.7335946136699138e-05, + "loss": 0.8125, + "step": 1763 + }, + { + "epoch": 0.26, + "learning_rate": 1.7332690246432774e-05, + "loss": 0.8184, + "step": 1764 + }, + { + "epoch": 0.26, + "learning_rate": 1.7329432673926953e-05, + "loss": 0.9175, + "step": 1765 + }, + { + "epoch": 0.26, + "learning_rate": 1.732617341992902e-05, + "loss": 0.8325, + "step": 1766 + }, + { + "epoch": 0.26, + "learning_rate": 1.7322912485186695e-05, + "loss": 0.8838, + "step": 1767 + }, + { + "epoch": 0.26, + "learning_rate": 1.7319649870448096e-05, + "loss": 0.8511, + "step": 1768 + }, + { + "epoch": 0.26, + "learning_rate": 1.7316385576461714e-05, + "loss": 0.333, + "step": 1769 + }, + { + "epoch": 0.26, + "learning_rate": 1.731311960397644e-05, + "loss": 0.9067, + "step": 1770 + }, + { + "epoch": 0.26, + "learning_rate": 1.7309851953741532e-05, + "loss": 0.8535, + "step": 1771 + }, + { + "epoch": 0.26, + "learning_rate": 1.730658262650665e-05, + "loss": 0.855, + "step": 1772 + }, + { + "epoch": 0.26, + "learning_rate": 1.7303311623021824e-05, + "loss": 0.8623, + "step": 1773 + }, + { + "epoch": 0.26, + "learning_rate": 1.7300038944037486e-05, + "loss": 0.8901, + "step": 1774 + }, + { + "epoch": 0.26, + "learning_rate": 1.7296764590304435e-05, + "loss": 0.9175, + "step": 1775 + }, + { + "epoch": 0.26, + "learning_rate": 1.7293488562573863e-05, + "loss": 0.8306, + "step": 1776 + }, + { + "epoch": 0.26, + "learning_rate": 1.7290210861597347e-05, + "loss": 0.8379, + "step": 1777 + }, + { + "epoch": 0.26, + "learning_rate": 1.728693148812684e-05, + "loss": 0.7632, + "step": 1778 + }, + { + "epoch": 0.26, + "learning_rate": 1.728365044291469e-05, + "loss": 0.856, + "step": 1779 + }, + { + "epoch": 0.26, + "learning_rate": 1.7280367726713616e-05, + "loss": 0.8091, + "step": 1780 + }, + { + "epoch": 0.26, + "learning_rate": 1.727708334027673e-05, + "loss": 0.874, + "step": 1781 + }, + { + "epoch": 0.26, + "learning_rate": 1.727379728435753e-05, + "loss": 0.7964, + "step": 1782 + }, + { + "epoch": 0.26, + "learning_rate": 1.7270509559709886e-05, + "loss": 0.8667, + "step": 1783 + }, + { + "epoch": 0.26, + "learning_rate": 1.726722016708805e-05, + "loss": 0.8208, + "step": 1784 + }, + { + "epoch": 0.26, + "learning_rate": 1.7263929107246672e-05, + "loss": 0.8467, + "step": 1785 + }, + { + "epoch": 0.26, + "learning_rate": 1.7260636380940773e-05, + "loss": 0.8604, + "step": 1786 + }, + { + "epoch": 0.26, + "learning_rate": 1.7257341988925756e-05, + "loss": 0.8833, + "step": 1787 + }, + { + "epoch": 0.26, + "learning_rate": 1.7254045931957413e-05, + "loss": 0.9395, + "step": 1788 + }, + { + "epoch": 0.26, + "learning_rate": 1.7250748210791906e-05, + "loss": 0.8086, + "step": 1789 + }, + { + "epoch": 0.26, + "learning_rate": 1.724744882618579e-05, + "loss": 0.7603, + "step": 1790 + }, + { + "epoch": 0.26, + "learning_rate": 1.7244147778896003e-05, + "loss": 0.8599, + "step": 1791 + }, + { + "epoch": 0.27, + "learning_rate": 1.724084506967985e-05, + "loss": 0.8579, + "step": 1792 + }, + { + "epoch": 0.27, + "learning_rate": 1.7237540699295038e-05, + "loss": 0.8774, + "step": 1793 + }, + { + "epoch": 0.27, + "learning_rate": 1.7234234668499633e-05, + "loss": 0.9395, + "step": 1794 + }, + { + "epoch": 0.27, + "learning_rate": 1.7230926978052097e-05, + "loss": 0.8374, + "step": 1795 + }, + { + "epoch": 0.27, + "learning_rate": 1.722761762871127e-05, + "loss": 0.8491, + "step": 1796 + }, + { + "epoch": 0.27, + "learning_rate": 1.722430662123637e-05, + "loss": 0.8594, + "step": 1797 + }, + { + "epoch": 0.27, + "learning_rate": 1.7220993956387e-05, + "loss": 0.8652, + "step": 1798 + }, + { + "epoch": 0.27, + "learning_rate": 1.721767963492313e-05, + "loss": 0.876, + "step": 1799 + }, + { + "epoch": 0.27, + "learning_rate": 1.7214363657605126e-05, + "loss": 0.8462, + "step": 1800 + }, + { + "epoch": 0.27, + "learning_rate": 1.721104602519373e-05, + "loss": 0.8569, + "step": 1801 + }, + { + "epoch": 0.27, + "learning_rate": 1.7207726738450055e-05, + "loss": 0.8892, + "step": 1802 + }, + { + "epoch": 0.27, + "learning_rate": 1.7204405798135607e-05, + "loss": 0.8286, + "step": 1803 + }, + { + "epoch": 0.27, + "learning_rate": 1.720108320501226e-05, + "loss": 0.8604, + "step": 1804 + }, + { + "epoch": 0.27, + "learning_rate": 1.7197758959842267e-05, + "loss": 0.8979, + "step": 1805 + }, + { + "epoch": 0.27, + "learning_rate": 1.7194433063388273e-05, + "loss": 0.8486, + "step": 1806 + }, + { + "epoch": 0.27, + "learning_rate": 1.719110551641329e-05, + "loss": 0.8643, + "step": 1807 + }, + { + "epoch": 0.27, + "learning_rate": 1.718777631968071e-05, + "loss": 0.8516, + "step": 1808 + }, + { + "epoch": 0.27, + "learning_rate": 1.718444547395431e-05, + "loss": 0.814, + "step": 1809 + }, + { + "epoch": 0.27, + "learning_rate": 1.7181112979998235e-05, + "loss": 0.8335, + "step": 1810 + }, + { + "epoch": 0.27, + "learning_rate": 1.7177778838577017e-05, + "loss": 0.875, + "step": 1811 + }, + { + "epoch": 0.27, + "learning_rate": 1.717444305045556e-05, + "loss": 0.873, + "step": 1812 + }, + { + "epoch": 0.27, + "learning_rate": 1.7171105616399153e-05, + "loss": 0.3816, + "step": 1813 + }, + { + "epoch": 0.27, + "learning_rate": 1.7167766537173457e-05, + "loss": 0.9131, + "step": 1814 + }, + { + "epoch": 0.27, + "learning_rate": 1.7164425813544507e-05, + "loss": 0.9097, + "step": 1815 + }, + { + "epoch": 0.27, + "learning_rate": 1.716108344627872e-05, + "loss": 0.8716, + "step": 1816 + }, + { + "epoch": 0.27, + "learning_rate": 1.71577394361429e-05, + "loss": 0.8364, + "step": 1817 + }, + { + "epoch": 0.27, + "learning_rate": 1.7154393783904206e-05, + "loss": 0.9106, + "step": 1818 + }, + { + "epoch": 0.27, + "learning_rate": 1.7151046490330193e-05, + "loss": 0.9146, + "step": 1819 + }, + { + "epoch": 0.27, + "learning_rate": 1.714769755618878e-05, + "loss": 0.8467, + "step": 1820 + }, + { + "epoch": 0.27, + "learning_rate": 1.714434698224827e-05, + "loss": 0.8667, + "step": 1821 + }, + { + "epoch": 0.27, + "learning_rate": 1.714099476927734e-05, + "loss": 0.8813, + "step": 1822 + }, + { + "epoch": 0.27, + "learning_rate": 1.713764091804504e-05, + "loss": 0.8867, + "step": 1823 + }, + { + "epoch": 0.27, + "learning_rate": 1.7134285429320803e-05, + "loss": 0.8804, + "step": 1824 + }, + { + "epoch": 0.27, + "learning_rate": 1.7130928303874427e-05, + "loss": 0.8281, + "step": 1825 + }, + { + "epoch": 0.27, + "learning_rate": 1.71275695424761e-05, + "loss": 0.7705, + "step": 1826 + }, + { + "epoch": 0.27, + "learning_rate": 1.712420914589637e-05, + "loss": 0.8779, + "step": 1827 + }, + { + "epoch": 0.27, + "learning_rate": 1.7120847114906168e-05, + "loss": 0.855, + "step": 1828 + }, + { + "epoch": 0.27, + "learning_rate": 1.7117483450276803e-05, + "loss": 0.9287, + "step": 1829 + }, + { + "epoch": 0.27, + "learning_rate": 1.711411815277995e-05, + "loss": 0.8794, + "step": 1830 + }, + { + "epoch": 0.27, + "learning_rate": 1.711075122318767e-05, + "loss": 0.8452, + "step": 1831 + }, + { + "epoch": 0.27, + "learning_rate": 1.7107382662272384e-05, + "loss": 0.3099, + "step": 1832 + }, + { + "epoch": 0.27, + "learning_rate": 1.71040124708069e-05, + "loss": 0.8677, + "step": 1833 + }, + { + "epoch": 0.27, + "learning_rate": 1.7100640649564396e-05, + "loss": 0.3528, + "step": 1834 + }, + { + "epoch": 0.27, + "learning_rate": 1.7097267199318416e-05, + "loss": 0.939, + "step": 1835 + }, + { + "epoch": 0.27, + "learning_rate": 1.7093892120842894e-05, + "loss": 0.873, + "step": 1836 + }, + { + "epoch": 0.27, + "learning_rate": 1.7090515414912126e-05, + "loss": 0.9043, + "step": 1837 + }, + { + "epoch": 0.27, + "learning_rate": 1.7087137082300776e-05, + "loss": 0.8525, + "step": 1838 + }, + { + "epoch": 0.27, + "learning_rate": 1.70837571237839e-05, + "loss": 0.8506, + "step": 1839 + }, + { + "epoch": 0.27, + "learning_rate": 1.708037554013691e-05, + "loss": 0.9004, + "step": 1840 + }, + { + "epoch": 0.27, + "learning_rate": 1.7076992332135595e-05, + "loss": 0.8394, + "step": 1841 + }, + { + "epoch": 0.27, + "learning_rate": 1.7073607500556127e-05, + "loss": 0.8789, + "step": 1842 + }, + { + "epoch": 0.27, + "learning_rate": 1.707022104617503e-05, + "loss": 0.8687, + "step": 1843 + }, + { + "epoch": 0.27, + "learning_rate": 1.7066832969769222e-05, + "loss": 0.8271, + "step": 1844 + }, + { + "epoch": 0.27, + "learning_rate": 1.706344327211598e-05, + "loss": 0.8403, + "step": 1845 + }, + { + "epoch": 0.27, + "learning_rate": 1.7060051953992956e-05, + "loss": 0.8848, + "step": 1846 + }, + { + "epoch": 0.27, + "learning_rate": 1.7056659016178173e-05, + "loss": 0.873, + "step": 1847 + }, + { + "epoch": 0.27, + "learning_rate": 1.7053264459450023e-05, + "loss": 0.8804, + "step": 1848 + }, + { + "epoch": 0.27, + "learning_rate": 1.704986828458728e-05, + "loss": 0.8501, + "step": 1849 + }, + { + "epoch": 0.27, + "learning_rate": 1.7046470492369086e-05, + "loss": 0.8857, + "step": 1850 + }, + { + "epoch": 0.27, + "learning_rate": 1.704307108357494e-05, + "loss": 0.8613, + "step": 1851 + }, + { + "epoch": 0.27, + "learning_rate": 1.7039670058984725e-05, + "loss": 0.8745, + "step": 1852 + }, + { + "epoch": 0.27, + "learning_rate": 1.7036267419378695e-05, + "loss": 0.8496, + "step": 1853 + }, + { + "epoch": 0.27, + "learning_rate": 1.7032863165537465e-05, + "loss": 0.8511, + "step": 1854 + }, + { + "epoch": 0.27, + "learning_rate": 1.7029457298242035e-05, + "loss": 0.8315, + "step": 1855 + }, + { + "epoch": 0.27, + "learning_rate": 1.702604981827376e-05, + "loss": 0.8325, + "step": 1856 + }, + { + "epoch": 0.27, + "learning_rate": 1.702264072641438e-05, + "loss": 0.3618, + "step": 1857 + }, + { + "epoch": 0.27, + "learning_rate": 1.7019230023445987e-05, + "loss": 0.8921, + "step": 1858 + }, + { + "epoch": 0.27, + "learning_rate": 1.7015817710151058e-05, + "loss": 0.8687, + "step": 1859 + }, + { + "epoch": 0.28, + "learning_rate": 1.7012403787312433e-05, + "loss": 0.8057, + "step": 1860 + }, + { + "epoch": 0.28, + "learning_rate": 1.7008988255713317e-05, + "loss": 0.8604, + "step": 1861 + }, + { + "epoch": 0.28, + "learning_rate": 1.70055711161373e-05, + "loss": 0.9106, + "step": 1862 + }, + { + "epoch": 0.28, + "learning_rate": 1.7002152369368317e-05, + "loss": 0.8286, + "step": 1863 + }, + { + "epoch": 0.28, + "learning_rate": 1.6998732016190694e-05, + "loss": 0.8354, + "step": 1864 + }, + { + "epoch": 0.28, + "learning_rate": 1.699531005738911e-05, + "loss": 0.9365, + "step": 1865 + }, + { + "epoch": 0.28, + "learning_rate": 1.6991886493748625e-05, + "loss": 0.9043, + "step": 1866 + }, + { + "epoch": 0.28, + "learning_rate": 1.6988461326054652e-05, + "loss": 0.8389, + "step": 1867 + }, + { + "epoch": 0.28, + "learning_rate": 1.698503455509299e-05, + "loss": 0.8765, + "step": 1868 + }, + { + "epoch": 0.28, + "learning_rate": 1.698160618164979e-05, + "loss": 0.8491, + "step": 1869 + }, + { + "epoch": 0.28, + "learning_rate": 1.697817620651158e-05, + "loss": 0.8115, + "step": 1870 + }, + { + "epoch": 0.28, + "learning_rate": 1.697474463046525e-05, + "loss": 0.3389, + "step": 1871 + }, + { + "epoch": 0.28, + "learning_rate": 1.6971311454298062e-05, + "loss": 0.8462, + "step": 1872 + }, + { + "epoch": 0.28, + "learning_rate": 1.6967876678797647e-05, + "loss": 0.8564, + "step": 1873 + }, + { + "epoch": 0.28, + "learning_rate": 1.696444030475199e-05, + "loss": 0.9316, + "step": 1874 + }, + { + "epoch": 0.28, + "learning_rate": 1.6961002332949456e-05, + "loss": 0.8184, + "step": 1875 + }, + { + "epoch": 0.28, + "learning_rate": 1.6957562764178774e-05, + "loss": 0.9019, + "step": 1876 + }, + { + "epoch": 0.28, + "learning_rate": 1.6954121599229035e-05, + "loss": 0.8564, + "step": 1877 + }, + { + "epoch": 0.28, + "learning_rate": 1.69506788388897e-05, + "loss": 0.8652, + "step": 1878 + }, + { + "epoch": 0.28, + "learning_rate": 1.6947234483950593e-05, + "loss": 0.8809, + "step": 1879 + }, + { + "epoch": 0.28, + "learning_rate": 1.6943788535201907e-05, + "loss": 0.9229, + "step": 1880 + }, + { + "epoch": 0.28, + "learning_rate": 1.6940340993434197e-05, + "loss": 0.7974, + "step": 1881 + }, + { + "epoch": 0.28, + "learning_rate": 1.693689185943839e-05, + "loss": 0.894, + "step": 1882 + }, + { + "epoch": 0.28, + "learning_rate": 1.6933441134005774e-05, + "loss": 0.7449, + "step": 1883 + }, + { + "epoch": 0.28, + "learning_rate": 1.692998881792799e-05, + "loss": 0.8389, + "step": 1884 + }, + { + "epoch": 0.28, + "learning_rate": 1.6926534911997075e-05, + "loss": 0.918, + "step": 1885 + }, + { + "epoch": 0.28, + "learning_rate": 1.6923079417005396e-05, + "loss": 0.856, + "step": 1886 + }, + { + "epoch": 0.28, + "learning_rate": 1.691962233374571e-05, + "loss": 0.8135, + "step": 1887 + }, + { + "epoch": 0.28, + "learning_rate": 1.6916163663011124e-05, + "loss": 0.9634, + "step": 1888 + }, + { + "epoch": 0.28, + "learning_rate": 1.6912703405595116e-05, + "loss": 0.8721, + "step": 1889 + }, + { + "epoch": 0.28, + "learning_rate": 1.6909241562291522e-05, + "loss": 0.9316, + "step": 1890 + }, + { + "epoch": 0.28, + "learning_rate": 1.690577813389455e-05, + "loss": 0.8853, + "step": 1891 + }, + { + "epoch": 0.28, + "learning_rate": 1.6902313121198764e-05, + "loss": 0.8765, + "step": 1892 + }, + { + "epoch": 0.28, + "learning_rate": 1.68988465249991e-05, + "loss": 0.8892, + "step": 1893 + }, + { + "epoch": 0.28, + "learning_rate": 1.6895378346090843e-05, + "loss": 0.877, + "step": 1894 + }, + { + "epoch": 0.28, + "learning_rate": 1.6891908585269655e-05, + "loss": 0.8672, + "step": 1895 + }, + { + "epoch": 0.28, + "learning_rate": 1.688843724333156e-05, + "loss": 0.874, + "step": 1896 + }, + { + "epoch": 0.28, + "learning_rate": 1.6884964321072938e-05, + "loss": 0.8228, + "step": 1897 + }, + { + "epoch": 0.28, + "learning_rate": 1.6881489819290532e-05, + "loss": 0.8765, + "step": 1898 + }, + { + "epoch": 0.28, + "learning_rate": 1.6878013738781454e-05, + "loss": 0.8662, + "step": 1899 + }, + { + "epoch": 0.28, + "learning_rate": 1.687453608034317e-05, + "loss": 0.8462, + "step": 1900 + }, + { + "epoch": 0.28, + "learning_rate": 1.6871056844773512e-05, + "loss": 0.8569, + "step": 1901 + }, + { + "epoch": 0.28, + "learning_rate": 1.6867576032870677e-05, + "loss": 0.8652, + "step": 1902 + }, + { + "epoch": 0.28, + "learning_rate": 1.686409364543321e-05, + "loss": 0.8857, + "step": 1903 + }, + { + "epoch": 0.28, + "learning_rate": 1.686060968326005e-05, + "loss": 0.8687, + "step": 1904 + }, + { + "epoch": 0.28, + "learning_rate": 1.6857124147150454e-05, + "loss": 0.8711, + "step": 1905 + }, + { + "epoch": 0.28, + "learning_rate": 1.6853637037904066e-05, + "loss": 0.8789, + "step": 1906 + }, + { + "epoch": 0.28, + "learning_rate": 1.6850148356320894e-05, + "loss": 0.8408, + "step": 1907 + }, + { + "epoch": 0.28, + "learning_rate": 1.684665810320129e-05, + "loss": 0.8691, + "step": 1908 + }, + { + "epoch": 0.28, + "learning_rate": 1.6843166279345976e-05, + "loss": 0.8579, + "step": 1909 + }, + { + "epoch": 0.28, + "learning_rate": 1.683967288555604e-05, + "loss": 0.9092, + "step": 1910 + }, + { + "epoch": 0.28, + "learning_rate": 1.6836177922632918e-05, + "loss": 0.8418, + "step": 1911 + }, + { + "epoch": 0.28, + "learning_rate": 1.6832681391378414e-05, + "loss": 0.832, + "step": 1912 + }, + { + "epoch": 0.28, + "learning_rate": 1.6829183292594692e-05, + "loss": 0.8291, + "step": 1913 + }, + { + "epoch": 0.28, + "learning_rate": 1.6825683627084272e-05, + "loss": 0.8486, + "step": 1914 + }, + { + "epoch": 0.28, + "learning_rate": 1.682218239565003e-05, + "loss": 0.8755, + "step": 1915 + }, + { + "epoch": 0.28, + "learning_rate": 1.681867959909521e-05, + "loss": 0.3298, + "step": 1916 + }, + { + "epoch": 0.28, + "learning_rate": 1.681517523822341e-05, + "loss": 0.8843, + "step": 1917 + }, + { + "epoch": 0.28, + "learning_rate": 1.681166931383859e-05, + "loss": 0.8286, + "step": 1918 + }, + { + "epoch": 0.28, + "learning_rate": 1.6808161826745068e-05, + "loss": 0.8301, + "step": 1919 + }, + { + "epoch": 0.28, + "learning_rate": 1.6804652777747513e-05, + "loss": 0.834, + "step": 1920 + }, + { + "epoch": 0.28, + "learning_rate": 1.680114216765096e-05, + "loss": 0.8926, + "step": 1921 + }, + { + "epoch": 0.28, + "learning_rate": 1.6797629997260802e-05, + "loss": 0.8359, + "step": 1922 + }, + { + "epoch": 0.28, + "learning_rate": 1.679411626738279e-05, + "loss": 0.8945, + "step": 1923 + }, + { + "epoch": 0.28, + "learning_rate": 1.6790600978823032e-05, + "loss": 0.8057, + "step": 1924 + }, + { + "epoch": 0.28, + "learning_rate": 1.6787084132387987e-05, + "loss": 0.8677, + "step": 1925 + }, + { + "epoch": 0.28, + "learning_rate": 1.6783565728884483e-05, + "loss": 0.8545, + "step": 1926 + }, + { + "epoch": 0.28, + "learning_rate": 1.6780045769119694e-05, + "loss": 0.8623, + "step": 1927 + }, + { + "epoch": 0.29, + "learning_rate": 1.6776524253901162e-05, + "loss": 0.8281, + "step": 1928 + }, + { + "epoch": 0.29, + "learning_rate": 1.677300118403678e-05, + "loss": 0.877, + "step": 1929 + }, + { + "epoch": 0.29, + "learning_rate": 1.67694765603348e-05, + "loss": 0.9126, + "step": 1930 + }, + { + "epoch": 0.29, + "learning_rate": 1.6765950383603815e-05, + "loss": 0.9175, + "step": 1931 + }, + { + "epoch": 0.29, + "learning_rate": 1.6762422654652806e-05, + "loss": 0.8467, + "step": 1932 + }, + { + "epoch": 0.29, + "learning_rate": 1.675889337429108e-05, + "loss": 0.8687, + "step": 1933 + }, + { + "epoch": 0.29, + "learning_rate": 1.6755362543328317e-05, + "loss": 0.8745, + "step": 1934 + }, + { + "epoch": 0.29, + "learning_rate": 1.6751830162574544e-05, + "loss": 0.8213, + "step": 1935 + }, + { + "epoch": 0.29, + "learning_rate": 1.674829623284015e-05, + "loss": 0.835, + "step": 1936 + }, + { + "epoch": 0.29, + "learning_rate": 1.6744760754935878e-05, + "loss": 0.9497, + "step": 1937 + }, + { + "epoch": 0.29, + "learning_rate": 1.674122372967282e-05, + "loss": 0.7827, + "step": 1938 + }, + { + "epoch": 0.29, + "learning_rate": 1.6737685157862428e-05, + "loss": 0.8311, + "step": 1939 + }, + { + "epoch": 0.29, + "learning_rate": 1.6734145040316515e-05, + "loss": 0.8149, + "step": 1940 + }, + { + "epoch": 0.29, + "learning_rate": 1.6730603377847236e-05, + "loss": 0.9189, + "step": 1941 + }, + { + "epoch": 0.29, + "learning_rate": 1.6727060171267102e-05, + "loss": 0.8867, + "step": 1942 + }, + { + "epoch": 0.29, + "learning_rate": 1.6723515421388992e-05, + "loss": 0.8765, + "step": 1943 + }, + { + "epoch": 0.29, + "learning_rate": 1.6719969129026128e-05, + "loss": 0.8691, + "step": 1944 + }, + { + "epoch": 0.29, + "learning_rate": 1.6716421294992087e-05, + "loss": 0.8032, + "step": 1945 + }, + { + "epoch": 0.29, + "learning_rate": 1.6712871920100796e-05, + "loss": 0.8433, + "step": 1946 + }, + { + "epoch": 0.29, + "learning_rate": 1.6709321005166545e-05, + "loss": 0.8301, + "step": 1947 + }, + { + "epoch": 0.29, + "learning_rate": 1.670576855100397e-05, + "loss": 0.8247, + "step": 1948 + }, + { + "epoch": 0.29, + "learning_rate": 1.6702214558428062e-05, + "loss": 0.8115, + "step": 1949 + }, + { + "epoch": 0.29, + "learning_rate": 1.6698659028254164e-05, + "loss": 0.8398, + "step": 1950 + }, + { + "epoch": 0.29, + "learning_rate": 1.6695101961297978e-05, + "loss": 0.8501, + "step": 1951 + }, + { + "epoch": 0.29, + "learning_rate": 1.669154335837555e-05, + "loss": 0.9126, + "step": 1952 + }, + { + "epoch": 0.29, + "learning_rate": 1.668798322030328e-05, + "loss": 0.8804, + "step": 1953 + }, + { + "epoch": 0.29, + "learning_rate": 1.6684421547897925e-05, + "loss": 0.877, + "step": 1954 + }, + { + "epoch": 0.29, + "learning_rate": 1.6680858341976596e-05, + "loss": 0.8164, + "step": 1955 + }, + { + "epoch": 0.29, + "learning_rate": 1.6677293603356738e-05, + "loss": 0.8447, + "step": 1956 + }, + { + "epoch": 0.29, + "learning_rate": 1.6673727332856172e-05, + "loss": 0.8828, + "step": 1957 + }, + { + "epoch": 0.29, + "learning_rate": 1.667015953129305e-05, + "loss": 0.8447, + "step": 1958 + }, + { + "epoch": 0.29, + "learning_rate": 1.666659019948589e-05, + "loss": 0.7935, + "step": 1959 + }, + { + "epoch": 0.29, + "learning_rate": 1.6663019338253556e-05, + "loss": 0.7988, + "step": 1960 + }, + { + "epoch": 0.29, + "learning_rate": 1.665944694841526e-05, + "loss": 0.8511, + "step": 1961 + }, + { + "epoch": 0.29, + "learning_rate": 1.665587303079057e-05, + "loss": 0.8354, + "step": 1962 + }, + { + "epoch": 0.29, + "learning_rate": 1.6652297586199395e-05, + "loss": 0.896, + "step": 1963 + }, + { + "epoch": 0.29, + "learning_rate": 1.6648720615462007e-05, + "loss": 0.7881, + "step": 1964 + }, + { + "epoch": 0.29, + "learning_rate": 1.6645142119399014e-05, + "loss": 0.8926, + "step": 1965 + }, + { + "epoch": 0.29, + "learning_rate": 1.664156209883139e-05, + "loss": 0.8535, + "step": 1966 + }, + { + "epoch": 0.29, + "learning_rate": 1.6637980554580447e-05, + "loss": 0.8555, + "step": 1967 + }, + { + "epoch": 0.29, + "learning_rate": 1.663439748746785e-05, + "loss": 0.8105, + "step": 1968 + }, + { + "epoch": 0.29, + "learning_rate": 1.6630812898315615e-05, + "loss": 0.7466, + "step": 1969 + }, + { + "epoch": 0.29, + "learning_rate": 1.6627226787946104e-05, + "loss": 0.8677, + "step": 1970 + }, + { + "epoch": 0.29, + "learning_rate": 1.6623639157182028e-05, + "loss": 0.8271, + "step": 1971 + }, + { + "epoch": 0.29, + "learning_rate": 1.6620050006846452e-05, + "loss": 0.8359, + "step": 1972 + }, + { + "epoch": 0.29, + "learning_rate": 1.6616459337762784e-05, + "loss": 0.8521, + "step": 1973 + }, + { + "epoch": 0.29, + "learning_rate": 1.6612867150754776e-05, + "loss": 0.8506, + "step": 1974 + }, + { + "epoch": 0.29, + "learning_rate": 1.6609273446646548e-05, + "loss": 0.8657, + "step": 1975 + }, + { + "epoch": 0.29, + "learning_rate": 1.6605678226262547e-05, + "loss": 0.8867, + "step": 1976 + }, + { + "epoch": 0.29, + "learning_rate": 1.6602081490427577e-05, + "loss": 0.8555, + "step": 1977 + }, + { + "epoch": 0.29, + "learning_rate": 1.6598483239966783e-05, + "loss": 0.8643, + "step": 1978 + }, + { + "epoch": 0.29, + "learning_rate": 1.6594883475705673e-05, + "loss": 0.895, + "step": 1979 + }, + { + "epoch": 0.29, + "learning_rate": 1.659128219847008e-05, + "loss": 0.8184, + "step": 1980 + }, + { + "epoch": 0.29, + "learning_rate": 1.6587679409086207e-05, + "loss": 0.832, + "step": 1981 + }, + { + "epoch": 0.29, + "learning_rate": 1.6584075108380587e-05, + "loss": 0.8633, + "step": 1982 + }, + { + "epoch": 0.29, + "learning_rate": 1.6580469297180107e-05, + "loss": 0.8423, + "step": 1983 + }, + { + "epoch": 0.29, + "learning_rate": 1.6576861976312e-05, + "loss": 0.8872, + "step": 1984 + }, + { + "epoch": 0.29, + "learning_rate": 1.6573253146603843e-05, + "loss": 0.8599, + "step": 1985 + }, + { + "epoch": 0.29, + "learning_rate": 1.6569642808883562e-05, + "loss": 0.9106, + "step": 1986 + }, + { + "epoch": 0.29, + "learning_rate": 1.6566030963979428e-05, + "loss": 0.8242, + "step": 1987 + }, + { + "epoch": 0.29, + "learning_rate": 1.6562417612720055e-05, + "loss": 0.8223, + "step": 1988 + }, + { + "epoch": 0.29, + "learning_rate": 1.655880275593441e-05, + "loss": 0.7573, + "step": 1989 + }, + { + "epoch": 0.29, + "learning_rate": 1.6555186394451794e-05, + "loss": 0.793, + "step": 1990 + }, + { + "epoch": 0.29, + "learning_rate": 1.655156852910186e-05, + "loss": 0.8682, + "step": 1991 + }, + { + "epoch": 0.29, + "learning_rate": 1.6547949160714614e-05, + "loss": 0.7881, + "step": 1992 + }, + { + "epoch": 0.29, + "learning_rate": 1.6544328290120392e-05, + "loss": 0.8999, + "step": 1993 + }, + { + "epoch": 0.29, + "learning_rate": 1.654070591814988e-05, + "loss": 0.332, + "step": 1994 + }, + { + "epoch": 0.3, + "learning_rate": 1.6537082045634116e-05, + "loss": 0.8774, + "step": 1995 + }, + { + "epoch": 0.3, + "learning_rate": 1.653345667340447e-05, + "loss": 0.8066, + "step": 1996 + }, + { + "epoch": 0.3, + "learning_rate": 1.6529829802292665e-05, + "loss": 0.875, + "step": 1997 + }, + { + "epoch": 0.3, + "learning_rate": 1.652620143313076e-05, + "loss": 0.8057, + "step": 1998 + }, + { + "epoch": 0.3, + "learning_rate": 1.6522571566751165e-05, + "loss": 0.8936, + "step": 1999 + }, + { + "epoch": 0.3, + "learning_rate": 1.6518940203986636e-05, + "loss": 0.8813, + "step": 2000 + }, + { + "epoch": 0.3, + "learning_rate": 1.6515307345670263e-05, + "loss": 0.8232, + "step": 2001 + }, + { + "epoch": 0.3, + "learning_rate": 1.6511672992635478e-05, + "loss": 0.9043, + "step": 2002 + }, + { + "epoch": 0.3, + "learning_rate": 1.650803714571607e-05, + "loss": 0.8804, + "step": 2003 + }, + { + "epoch": 0.3, + "learning_rate": 1.6504399805746157e-05, + "loss": 0.8057, + "step": 2004 + }, + { + "epoch": 0.3, + "learning_rate": 1.6500760973560205e-05, + "loss": 0.8979, + "step": 2005 + }, + { + "epoch": 0.3, + "learning_rate": 1.6497120649993022e-05, + "loss": 0.8545, + "step": 2006 + }, + { + "epoch": 0.3, + "learning_rate": 1.6493478835879763e-05, + "loss": 0.8267, + "step": 2007 + }, + { + "epoch": 0.3, + "learning_rate": 1.648983553205591e-05, + "loss": 0.7324, + "step": 2008 + }, + { + "epoch": 0.3, + "learning_rate": 1.6486190739357307e-05, + "loss": 0.8223, + "step": 2009 + }, + { + "epoch": 0.3, + "learning_rate": 1.648254445862012e-05, + "loss": 0.2996, + "step": 2010 + }, + { + "epoch": 0.3, + "learning_rate": 1.6478896690680875e-05, + "loss": 0.8091, + "step": 2011 + }, + { + "epoch": 0.3, + "learning_rate": 1.647524743637642e-05, + "loss": 0.8384, + "step": 2012 + }, + { + "epoch": 0.3, + "learning_rate": 1.6471596696543964e-05, + "loss": 0.8438, + "step": 2013 + }, + { + "epoch": 0.3, + "learning_rate": 1.6467944472021035e-05, + "loss": 0.8145, + "step": 2014 + }, + { + "epoch": 0.3, + "learning_rate": 1.6464290763645522e-05, + "loss": 0.8735, + "step": 2015 + }, + { + "epoch": 0.3, + "learning_rate": 1.6460635572255644e-05, + "loss": 0.8833, + "step": 2016 + }, + { + "epoch": 0.3, + "learning_rate": 1.6456978898689958e-05, + "loss": 0.3386, + "step": 2017 + }, + { + "epoch": 0.3, + "learning_rate": 1.645332074378737e-05, + "loss": 0.8149, + "step": 2018 + }, + { + "epoch": 0.3, + "learning_rate": 1.6449661108387118e-05, + "loss": 0.8691, + "step": 2019 + }, + { + "epoch": 0.3, + "learning_rate": 1.6445999993328784e-05, + "loss": 0.833, + "step": 2020 + }, + { + "epoch": 0.3, + "learning_rate": 1.6442337399452286e-05, + "loss": 0.7534, + "step": 2021 + }, + { + "epoch": 0.3, + "learning_rate": 1.6438673327597885e-05, + "loss": 0.8608, + "step": 2022 + }, + { + "epoch": 0.3, + "learning_rate": 1.6435007778606177e-05, + "loss": 0.8667, + "step": 2023 + }, + { + "epoch": 0.3, + "learning_rate": 1.6431340753318102e-05, + "loss": 0.8325, + "step": 2024 + }, + { + "epoch": 0.3, + "learning_rate": 1.6427672252574934e-05, + "loss": 0.8564, + "step": 2025 + }, + { + "epoch": 0.3, + "learning_rate": 1.6424002277218287e-05, + "loss": 0.8867, + "step": 2026 + }, + { + "epoch": 0.3, + "learning_rate": 1.6420330828090114e-05, + "loss": 0.8101, + "step": 2027 + }, + { + "epoch": 0.3, + "learning_rate": 1.6416657906032706e-05, + "loss": 0.8271, + "step": 2028 + }, + { + "epoch": 0.3, + "learning_rate": 1.641298351188869e-05, + "loss": 0.7754, + "step": 2029 + }, + { + "epoch": 0.3, + "learning_rate": 1.6409307646501032e-05, + "loss": 0.8647, + "step": 2030 + }, + { + "epoch": 0.3, + "learning_rate": 1.640563031071304e-05, + "loss": 0.8442, + "step": 2031 + }, + { + "epoch": 0.3, + "learning_rate": 1.6401951505368353e-05, + "loss": 0.8535, + "step": 2032 + }, + { + "epoch": 0.3, + "learning_rate": 1.6398271231310948e-05, + "loss": 0.8237, + "step": 2033 + }, + { + "epoch": 0.3, + "learning_rate": 1.639458948938514e-05, + "loss": 0.8398, + "step": 2034 + }, + { + "epoch": 0.3, + "learning_rate": 1.6390906280435582e-05, + "loss": 0.897, + "step": 2035 + }, + { + "epoch": 0.3, + "learning_rate": 1.6387221605307263e-05, + "loss": 0.8921, + "step": 2036 + }, + { + "epoch": 0.3, + "learning_rate": 1.6383535464845507e-05, + "loss": 0.8477, + "step": 2037 + }, + { + "epoch": 0.3, + "learning_rate": 1.6379847859895977e-05, + "loss": 0.3503, + "step": 2038 + }, + { + "epoch": 0.3, + "learning_rate": 1.6376158791304667e-05, + "loss": 0.9106, + "step": 2039 + }, + { + "epoch": 0.3, + "learning_rate": 1.6372468259917913e-05, + "loss": 0.8223, + "step": 2040 + }, + { + "epoch": 0.3, + "learning_rate": 1.6368776266582383e-05, + "loss": 0.8691, + "step": 2041 + }, + { + "epoch": 0.3, + "learning_rate": 1.6365082812145077e-05, + "loss": 0.8286, + "step": 2042 + }, + { + "epoch": 0.3, + "learning_rate": 1.636138789745334e-05, + "loss": 0.3453, + "step": 2043 + }, + { + "epoch": 0.3, + "learning_rate": 1.635769152335484e-05, + "loss": 0.9048, + "step": 2044 + }, + { + "epoch": 0.3, + "learning_rate": 1.6353993690697595e-05, + "loss": 0.9111, + "step": 2045 + }, + { + "epoch": 0.3, + "learning_rate": 1.6350294400329935e-05, + "loss": 0.7764, + "step": 2046 + }, + { + "epoch": 0.3, + "learning_rate": 1.6346593653100547e-05, + "loss": 0.8809, + "step": 2047 + }, + { + "epoch": 0.3, + "learning_rate": 1.6342891449858444e-05, + "loss": 0.8452, + "step": 2048 + }, + { + "epoch": 0.3, + "learning_rate": 1.6339187791452968e-05, + "loss": 0.7554, + "step": 2049 + }, + { + "epoch": 0.3, + "learning_rate": 1.63354826787338e-05, + "loss": 0.8057, + "step": 2050 + }, + { + "epoch": 0.3, + "learning_rate": 1.6331776112550956e-05, + "loss": 0.812, + "step": 2051 + }, + { + "epoch": 0.3, + "learning_rate": 1.632806809375478e-05, + "loss": 0.8408, + "step": 2052 + }, + { + "epoch": 0.3, + "learning_rate": 1.6324358623195954e-05, + "loss": 0.8701, + "step": 2053 + }, + { + "epoch": 0.3, + "learning_rate": 1.632064770172549e-05, + "loss": 0.8652, + "step": 2054 + }, + { + "epoch": 0.3, + "learning_rate": 1.631693533019474e-05, + "loss": 0.8052, + "step": 2055 + }, + { + "epoch": 0.3, + "learning_rate": 1.631322150945537e-05, + "loss": 0.8833, + "step": 2056 + }, + { + "epoch": 0.3, + "learning_rate": 1.630950624035941e-05, + "loss": 0.8335, + "step": 2057 + }, + { + "epoch": 0.3, + "learning_rate": 1.6305789523759186e-05, + "loss": 0.3381, + "step": 2058 + }, + { + "epoch": 0.3, + "learning_rate": 1.630207136050738e-05, + "loss": 0.8086, + "step": 2059 + }, + { + "epoch": 0.3, + "learning_rate": 1.6298351751457008e-05, + "loss": 0.8447, + "step": 2060 + }, + { + "epoch": 0.3, + "learning_rate": 1.6294630697461396e-05, + "loss": 0.8687, + "step": 2061 + }, + { + "epoch": 0.3, + "learning_rate": 1.629090819937422e-05, + "loss": 0.8413, + "step": 2062 + }, + { + "epoch": 0.31, + "learning_rate": 1.628718425804949e-05, + "loss": 0.895, + "step": 2063 + }, + { + "epoch": 0.31, + "learning_rate": 1.628345887434153e-05, + "loss": 0.7568, + "step": 2064 + }, + { + "epoch": 0.31, + "learning_rate": 1.6279732049105e-05, + "loss": 0.8359, + "step": 2065 + }, + { + "epoch": 0.31, + "learning_rate": 1.6276003783194913e-05, + "loss": 0.8369, + "step": 2066 + }, + { + "epoch": 0.31, + "learning_rate": 1.6272274077466573e-05, + "loss": 0.8608, + "step": 2067 + }, + { + "epoch": 0.31, + "learning_rate": 1.626854293277565e-05, + "loss": 0.8223, + "step": 2068 + }, + { + "epoch": 0.31, + "learning_rate": 1.6264810349978125e-05, + "loss": 0.8535, + "step": 2069 + }, + { + "epoch": 0.31, + "learning_rate": 1.626107632993031e-05, + "loss": 0.8525, + "step": 2070 + }, + { + "epoch": 0.31, + "learning_rate": 1.625734087348886e-05, + "loss": 0.8623, + "step": 2071 + }, + { + "epoch": 0.31, + "learning_rate": 1.6253603981510742e-05, + "loss": 0.8403, + "step": 2072 + }, + { + "epoch": 0.31, + "learning_rate": 1.624986565485326e-05, + "loss": 0.8755, + "step": 2073 + }, + { + "epoch": 0.31, + "learning_rate": 1.6246125894374058e-05, + "loss": 0.7979, + "step": 2074 + }, + { + "epoch": 0.31, + "learning_rate": 1.6242384700931082e-05, + "loss": 0.8086, + "step": 2075 + }, + { + "epoch": 0.31, + "learning_rate": 1.6238642075382638e-05, + "loss": 0.8481, + "step": 2076 + }, + { + "epoch": 0.31, + "learning_rate": 1.6234898018587336e-05, + "loss": 0.8276, + "step": 2077 + }, + { + "epoch": 0.31, + "learning_rate": 1.623115253140413e-05, + "loss": 0.8364, + "step": 2078 + }, + { + "epoch": 0.31, + "learning_rate": 1.6227405614692295e-05, + "loss": 0.8057, + "step": 2079 + }, + { + "epoch": 0.31, + "learning_rate": 1.622365726931143e-05, + "loss": 0.8481, + "step": 2080 + }, + { + "epoch": 0.31, + "learning_rate": 1.6219907496121474e-05, + "loss": 0.7905, + "step": 2081 + }, + { + "epoch": 0.31, + "learning_rate": 1.6216156295982682e-05, + "loss": 0.3625, + "step": 2082 + }, + { + "epoch": 0.31, + "learning_rate": 1.6212403669755642e-05, + "loss": 0.7891, + "step": 2083 + }, + { + "epoch": 0.31, + "learning_rate": 1.6208649618301268e-05, + "loss": 0.877, + "step": 2084 + }, + { + "epoch": 0.31, + "learning_rate": 1.6204894142480803e-05, + "loss": 0.894, + "step": 2085 + }, + { + "epoch": 0.31, + "learning_rate": 1.6201137243155815e-05, + "loss": 0.8838, + "step": 2086 + }, + { + "epoch": 0.31, + "learning_rate": 1.6197378921188193e-05, + "loss": 0.8105, + "step": 2087 + }, + { + "epoch": 0.31, + "learning_rate": 1.619361917744016e-05, + "loss": 0.8652, + "step": 2088 + }, + { + "epoch": 0.31, + "learning_rate": 1.6189858012774267e-05, + "loss": 0.8564, + "step": 2089 + }, + { + "epoch": 0.31, + "learning_rate": 1.6186095428053382e-05, + "loss": 0.9126, + "step": 2090 + }, + { + "epoch": 0.31, + "learning_rate": 1.618233142414071e-05, + "loss": 0.8462, + "step": 2091 + }, + { + "epoch": 0.31, + "learning_rate": 1.6178566001899768e-05, + "loss": 0.8569, + "step": 2092 + }, + { + "epoch": 0.31, + "learning_rate": 1.617479916219441e-05, + "loss": 0.8979, + "step": 2093 + }, + { + "epoch": 0.31, + "learning_rate": 1.6171030905888808e-05, + "loss": 0.8467, + "step": 2094 + }, + { + "epoch": 0.31, + "learning_rate": 1.616726123384746e-05, + "loss": 0.812, + "step": 2095 + }, + { + "epoch": 0.31, + "learning_rate": 1.6163490146935196e-05, + "loss": 0.8252, + "step": 2096 + }, + { + "epoch": 0.31, + "learning_rate": 1.6159717646017162e-05, + "loss": 0.8887, + "step": 2097 + }, + { + "epoch": 0.31, + "learning_rate": 1.615594373195884e-05, + "loss": 0.8638, + "step": 2098 + }, + { + "epoch": 0.31, + "learning_rate": 1.6152168405626013e-05, + "loss": 0.8325, + "step": 2099 + }, + { + "epoch": 0.31, + "learning_rate": 1.614839166788481e-05, + "loss": 0.8218, + "step": 2100 + }, + { + "epoch": 0.31, + "learning_rate": 1.6144613519601682e-05, + "loss": 0.8335, + "step": 2101 + }, + { + "epoch": 0.31, + "learning_rate": 1.6140833961643386e-05, + "loss": 0.9028, + "step": 2102 + }, + { + "epoch": 0.31, + "learning_rate": 1.6137052994877026e-05, + "loss": 0.791, + "step": 2103 + }, + { + "epoch": 0.31, + "learning_rate": 1.6133270620170014e-05, + "loss": 0.8389, + "step": 2104 + }, + { + "epoch": 0.31, + "learning_rate": 1.6129486838390088e-05, + "loss": 0.8218, + "step": 2105 + }, + { + "epoch": 0.31, + "learning_rate": 1.612570165040531e-05, + "loss": 0.8789, + "step": 2106 + }, + { + "epoch": 0.31, + "learning_rate": 1.6121915057084064e-05, + "loss": 0.8208, + "step": 2107 + }, + { + "epoch": 0.31, + "learning_rate": 1.6118127059295055e-05, + "loss": 0.7383, + "step": 2108 + }, + { + "epoch": 0.31, + "learning_rate": 1.6114337657907316e-05, + "loss": 0.9082, + "step": 2109 + }, + { + "epoch": 0.31, + "learning_rate": 1.6110546853790197e-05, + "loss": 0.8569, + "step": 2110 + }, + { + "epoch": 0.31, + "learning_rate": 1.6106754647813367e-05, + "loss": 0.8765, + "step": 2111 + }, + { + "epoch": 0.31, + "learning_rate": 1.6102961040846824e-05, + "loss": 0.7988, + "step": 2112 + }, + { + "epoch": 0.31, + "learning_rate": 1.609916603376088e-05, + "loss": 0.8862, + "step": 2113 + }, + { + "epoch": 0.31, + "learning_rate": 1.609536962742617e-05, + "loss": 0.9263, + "step": 2114 + }, + { + "epoch": 0.31, + "learning_rate": 1.6091571822713667e-05, + "loss": 0.853, + "step": 2115 + }, + { + "epoch": 0.31, + "learning_rate": 1.6087772620494628e-05, + "loss": 0.8706, + "step": 2116 + }, + { + "epoch": 0.31, + "learning_rate": 1.6083972021640666e-05, + "loss": 0.8477, + "step": 2117 + }, + { + "epoch": 0.31, + "learning_rate": 1.6080170027023702e-05, + "loss": 0.8281, + "step": 2118 + }, + { + "epoch": 0.31, + "learning_rate": 1.6076366637515968e-05, + "loss": 0.8735, + "step": 2119 + }, + { + "epoch": 0.31, + "learning_rate": 1.6072561853990028e-05, + "loss": 0.3308, + "step": 2120 + }, + { + "epoch": 0.31, + "learning_rate": 1.606875567731876e-05, + "loss": 0.8511, + "step": 2121 + }, + { + "epoch": 0.31, + "learning_rate": 1.606494810837537e-05, + "loss": 0.8574, + "step": 2122 + }, + { + "epoch": 0.31, + "learning_rate": 1.6061139148033364e-05, + "loss": 0.8398, + "step": 2123 + }, + { + "epoch": 0.31, + "learning_rate": 1.6057328797166592e-05, + "loss": 0.8691, + "step": 2124 + }, + { + "epoch": 0.31, + "learning_rate": 1.6053517056649206e-05, + "loss": 0.8555, + "step": 2125 + }, + { + "epoch": 0.31, + "learning_rate": 1.6049703927355684e-05, + "loss": 0.7671, + "step": 2126 + }, + { + "epoch": 0.31, + "learning_rate": 1.6045889410160812e-05, + "loss": 0.8174, + "step": 2127 + }, + { + "epoch": 0.31, + "learning_rate": 1.6042073505939718e-05, + "loss": 0.7925, + "step": 2128 + }, + { + "epoch": 0.31, + "learning_rate": 1.6038256215567823e-05, + "loss": 0.8682, + "step": 2129 + }, + { + "epoch": 0.31, + "learning_rate": 1.6034437539920876e-05, + "loss": 0.8579, + "step": 2130 + }, + { + "epoch": 0.32, + "learning_rate": 1.6030617479874944e-05, + "loss": 0.8389, + "step": 2131 + }, + { + "epoch": 0.32, + "learning_rate": 1.6026796036306413e-05, + "loss": 0.8003, + "step": 2132 + }, + { + "epoch": 0.32, + "learning_rate": 1.602297321009199e-05, + "loss": 0.8857, + "step": 2133 + }, + { + "epoch": 0.32, + "learning_rate": 1.601914900210868e-05, + "loss": 0.8818, + "step": 2134 + }, + { + "epoch": 0.32, + "learning_rate": 1.6015323413233838e-05, + "loss": 0.7783, + "step": 2135 + }, + { + "epoch": 0.32, + "learning_rate": 1.6011496444345102e-05, + "loss": 0.8379, + "step": 2136 + }, + { + "epoch": 0.32, + "learning_rate": 1.6007668096320445e-05, + "loss": 0.8027, + "step": 2137 + }, + { + "epoch": 0.32, + "learning_rate": 1.6003838370038155e-05, + "loss": 0.8643, + "step": 2138 + }, + { + "epoch": 0.32, + "learning_rate": 1.6000007266376837e-05, + "loss": 0.2986, + "step": 2139 + }, + { + "epoch": 0.32, + "learning_rate": 1.59961747862154e-05, + "loss": 0.321, + "step": 2140 + }, + { + "epoch": 0.32, + "learning_rate": 1.5992340930433084e-05, + "loss": 0.3191, + "step": 2141 + }, + { + "epoch": 0.32, + "learning_rate": 1.598850569990944e-05, + "loss": 0.8306, + "step": 2142 + }, + { + "epoch": 0.32, + "learning_rate": 1.598466909552433e-05, + "loss": 0.8481, + "step": 2143 + }, + { + "epoch": 0.32, + "learning_rate": 1.598083111815793e-05, + "loss": 0.8823, + "step": 2144 + }, + { + "epoch": 0.32, + "learning_rate": 1.5976991768690743e-05, + "loss": 0.8823, + "step": 2145 + }, + { + "epoch": 0.32, + "learning_rate": 1.5973151048003574e-05, + "loss": 0.8696, + "step": 2146 + }, + { + "epoch": 0.32, + "learning_rate": 1.596930895697755e-05, + "loss": 0.8882, + "step": 2147 + }, + { + "epoch": 0.32, + "learning_rate": 1.5965465496494107e-05, + "loss": 0.8818, + "step": 2148 + }, + { + "epoch": 0.32, + "learning_rate": 1.5961620667434997e-05, + "loss": 0.9053, + "step": 2149 + }, + { + "epoch": 0.32, + "learning_rate": 1.595777447068229e-05, + "loss": 0.7856, + "step": 2150 + }, + { + "epoch": 0.32, + "learning_rate": 1.595392690711837e-05, + "loss": 0.8223, + "step": 2151 + }, + { + "epoch": 0.32, + "learning_rate": 1.5950077977625924e-05, + "loss": 0.9175, + "step": 2152 + }, + { + "epoch": 0.32, + "learning_rate": 1.594622768308796e-05, + "loss": 0.8755, + "step": 2153 + }, + { + "epoch": 0.32, + "learning_rate": 1.5942376024387806e-05, + "loss": 0.8994, + "step": 2154 + }, + { + "epoch": 0.32, + "learning_rate": 1.5938523002409083e-05, + "loss": 0.8247, + "step": 2155 + }, + { + "epoch": 0.32, + "learning_rate": 1.593466861803575e-05, + "loss": 0.8242, + "step": 2156 + }, + { + "epoch": 0.32, + "learning_rate": 1.593081287215206e-05, + "loss": 0.7808, + "step": 2157 + }, + { + "epoch": 0.32, + "learning_rate": 1.5926955765642587e-05, + "loss": 0.8242, + "step": 2158 + }, + { + "epoch": 0.32, + "learning_rate": 1.5923097299392213e-05, + "loss": 0.8599, + "step": 2159 + }, + { + "epoch": 0.32, + "learning_rate": 1.5919237474286134e-05, + "loss": 0.8335, + "step": 2160 + }, + { + "epoch": 0.32, + "learning_rate": 1.5915376291209854e-05, + "loss": 0.8525, + "step": 2161 + }, + { + "epoch": 0.32, + "learning_rate": 1.5911513751049197e-05, + "loss": 0.8076, + "step": 2162 + }, + { + "epoch": 0.32, + "learning_rate": 1.5907649854690292e-05, + "loss": 0.8506, + "step": 2163 + }, + { + "epoch": 0.32, + "learning_rate": 1.5903784603019575e-05, + "loss": 0.8926, + "step": 2164 + }, + { + "epoch": 0.32, + "learning_rate": 1.5899917996923803e-05, + "loss": 0.8892, + "step": 2165 + }, + { + "epoch": 0.32, + "learning_rate": 1.5896050037290038e-05, + "loss": 0.8247, + "step": 2166 + }, + { + "epoch": 0.32, + "learning_rate": 1.5892180725005656e-05, + "loss": 0.8501, + "step": 2167 + }, + { + "epoch": 0.32, + "learning_rate": 1.5888310060958338e-05, + "loss": 0.9023, + "step": 2168 + }, + { + "epoch": 0.32, + "learning_rate": 1.5884438046036072e-05, + "loss": 0.897, + "step": 2169 + }, + { + "epoch": 0.32, + "learning_rate": 1.5880564681127172e-05, + "loss": 0.7539, + "step": 2170 + }, + { + "epoch": 0.32, + "learning_rate": 1.587668996712025e-05, + "loss": 0.9092, + "step": 2171 + }, + { + "epoch": 0.32, + "learning_rate": 1.587281390490422e-05, + "loss": 0.8296, + "step": 2172 + }, + { + "epoch": 0.32, + "learning_rate": 1.5868936495368322e-05, + "loss": 0.8477, + "step": 2173 + }, + { + "epoch": 0.32, + "learning_rate": 1.5865057739402098e-05, + "loss": 0.8203, + "step": 2174 + }, + { + "epoch": 0.32, + "learning_rate": 1.5861177637895397e-05, + "loss": 0.76, + "step": 2175 + }, + { + "epoch": 0.32, + "learning_rate": 1.5857296191738373e-05, + "loss": 0.8989, + "step": 2176 + }, + { + "epoch": 0.32, + "learning_rate": 1.58534134018215e-05, + "loss": 0.8384, + "step": 2177 + }, + { + "epoch": 0.32, + "learning_rate": 1.5849529269035547e-05, + "loss": 0.8691, + "step": 2178 + }, + { + "epoch": 0.32, + "learning_rate": 1.5845643794271604e-05, + "loss": 0.832, + "step": 2179 + }, + { + "epoch": 0.32, + "learning_rate": 1.5841756978421064e-05, + "loss": 0.7969, + "step": 2180 + }, + { + "epoch": 0.32, + "learning_rate": 1.5837868822375617e-05, + "loss": 0.9062, + "step": 2181 + }, + { + "epoch": 0.32, + "learning_rate": 1.583397932702728e-05, + "loss": 0.8188, + "step": 2182 + }, + { + "epoch": 0.32, + "learning_rate": 1.583008849326836e-05, + "loss": 0.8247, + "step": 2183 + }, + { + "epoch": 0.32, + "learning_rate": 1.5826196321991484e-05, + "loss": 0.8374, + "step": 2184 + }, + { + "epoch": 0.32, + "learning_rate": 1.5822302814089577e-05, + "loss": 0.3231, + "step": 2185 + }, + { + "epoch": 0.32, + "learning_rate": 1.581840797045587e-05, + "loss": 0.8481, + "step": 2186 + }, + { + "epoch": 0.32, + "learning_rate": 1.581451179198391e-05, + "loss": 0.9204, + "step": 2187 + }, + { + "epoch": 0.32, + "learning_rate": 1.5810614279567536e-05, + "loss": 0.855, + "step": 2188 + }, + { + "epoch": 0.32, + "learning_rate": 1.5806715434100916e-05, + "loss": 0.8809, + "step": 2189 + }, + { + "epoch": 0.32, + "learning_rate": 1.580281525647849e-05, + "loss": 0.8599, + "step": 2190 + }, + { + "epoch": 0.32, + "learning_rate": 1.5798913747595038e-05, + "loss": 0.8594, + "step": 2191 + }, + { + "epoch": 0.32, + "learning_rate": 1.5795010908345628e-05, + "loss": 0.8188, + "step": 2192 + }, + { + "epoch": 0.32, + "learning_rate": 1.5791106739625627e-05, + "loss": 0.8208, + "step": 2193 + }, + { + "epoch": 0.32, + "learning_rate": 1.5787201242330725e-05, + "loss": 0.8896, + "step": 2194 + }, + { + "epoch": 0.32, + "learning_rate": 1.57832944173569e-05, + "loss": 0.8682, + "step": 2195 + }, + { + "epoch": 0.32, + "learning_rate": 1.5779386265600444e-05, + "loss": 0.8047, + "step": 2196 + }, + { + "epoch": 0.32, + "learning_rate": 1.577547678795795e-05, + "loss": 0.8501, + "step": 2197 + }, + { + "epoch": 0.33, + "learning_rate": 1.5771565985326323e-05, + "loss": 0.8447, + "step": 2198 + }, + { + "epoch": 0.33, + "learning_rate": 1.576765385860276e-05, + "loss": 0.8906, + "step": 2199 + }, + { + "epoch": 0.33, + "learning_rate": 1.5763740408684766e-05, + "loss": 0.854, + "step": 2200 + }, + { + "epoch": 0.33, + "learning_rate": 1.575982563647015e-05, + "loss": 0.3479, + "step": 2201 + }, + { + "epoch": 0.33, + "learning_rate": 1.575590954285703e-05, + "loss": 0.8022, + "step": 2202 + }, + { + "epoch": 0.33, + "learning_rate": 1.575199212874382e-05, + "loss": 0.9082, + "step": 2203 + }, + { + "epoch": 0.33, + "learning_rate": 1.5748073395029236e-05, + "loss": 0.7964, + "step": 2204 + }, + { + "epoch": 0.33, + "learning_rate": 1.57441533426123e-05, + "loss": 0.8442, + "step": 2205 + }, + { + "epoch": 0.33, + "learning_rate": 1.574023197239234e-05, + "loss": 0.8638, + "step": 2206 + }, + { + "epoch": 0.33, + "learning_rate": 1.5736309285268982e-05, + "loss": 0.834, + "step": 2207 + }, + { + "epoch": 0.33, + "learning_rate": 1.5732385282142153e-05, + "loss": 0.874, + "step": 2208 + }, + { + "epoch": 0.33, + "learning_rate": 1.5728459963912088e-05, + "loss": 0.8091, + "step": 2209 + }, + { + "epoch": 0.33, + "learning_rate": 1.572453333147931e-05, + "loss": 0.8232, + "step": 2210 + }, + { + "epoch": 0.33, + "learning_rate": 1.572060538574466e-05, + "loss": 0.8154, + "step": 2211 + }, + { + "epoch": 0.33, + "learning_rate": 1.5716676127609277e-05, + "loss": 0.8408, + "step": 2212 + }, + { + "epoch": 0.33, + "learning_rate": 1.5712745557974588e-05, + "loss": 0.8428, + "step": 2213 + }, + { + "epoch": 0.33, + "learning_rate": 1.5708813677742334e-05, + "loss": 0.8059, + "step": 2214 + }, + { + "epoch": 0.33, + "learning_rate": 1.5704880487814553e-05, + "loss": 0.8262, + "step": 2215 + }, + { + "epoch": 0.33, + "learning_rate": 1.5700945989093587e-05, + "loss": 0.8516, + "step": 2216 + }, + { + "epoch": 0.33, + "learning_rate": 1.5697010182482067e-05, + "loss": 0.8184, + "step": 2217 + }, + { + "epoch": 0.33, + "learning_rate": 1.5693073068882942e-05, + "loss": 0.8735, + "step": 2218 + }, + { + "epoch": 0.33, + "learning_rate": 1.568913464919944e-05, + "loss": 0.8354, + "step": 2219 + }, + { + "epoch": 0.33, + "learning_rate": 1.5685194924335102e-05, + "loss": 0.8838, + "step": 2220 + }, + { + "epoch": 0.33, + "learning_rate": 1.568125389519377e-05, + "loss": 0.8306, + "step": 2221 + }, + { + "epoch": 0.33, + "learning_rate": 1.5677311562679575e-05, + "loss": 0.8413, + "step": 2222 + }, + { + "epoch": 0.33, + "learning_rate": 1.567336792769696e-05, + "loss": 0.9092, + "step": 2223 + }, + { + "epoch": 0.33, + "learning_rate": 1.566942299115065e-05, + "loss": 0.8032, + "step": 2224 + }, + { + "epoch": 0.33, + "learning_rate": 1.5665476753945682e-05, + "loss": 0.7979, + "step": 2225 + }, + { + "epoch": 0.33, + "learning_rate": 1.5661529216987393e-05, + "loss": 0.7837, + "step": 2226 + }, + { + "epoch": 0.33, + "learning_rate": 1.5657580381181404e-05, + "loss": 0.8569, + "step": 2227 + }, + { + "epoch": 0.33, + "learning_rate": 1.5653630247433653e-05, + "loss": 0.8306, + "step": 2228 + }, + { + "epoch": 0.33, + "learning_rate": 1.5649678816650357e-05, + "loss": 0.7881, + "step": 2229 + }, + { + "epoch": 0.33, + "learning_rate": 1.564572608973804e-05, + "loss": 0.8594, + "step": 2230 + }, + { + "epoch": 0.33, + "learning_rate": 1.5641772067603526e-05, + "loss": 0.8535, + "step": 2231 + }, + { + "epoch": 0.33, + "learning_rate": 1.5637816751153932e-05, + "loss": 0.8936, + "step": 2232 + }, + { + "epoch": 0.33, + "learning_rate": 1.563386014129667e-05, + "loss": 0.874, + "step": 2233 + }, + { + "epoch": 0.33, + "learning_rate": 1.562990223893945e-05, + "loss": 0.8213, + "step": 2234 + }, + { + "epoch": 0.33, + "learning_rate": 1.5625943044990284e-05, + "loss": 0.8076, + "step": 2235 + }, + { + "epoch": 0.33, + "learning_rate": 1.5621982560357473e-05, + "loss": 0.8774, + "step": 2236 + }, + { + "epoch": 0.33, + "learning_rate": 1.561802078594962e-05, + "loss": 0.9302, + "step": 2237 + }, + { + "epoch": 0.33, + "learning_rate": 1.5614057722675618e-05, + "loss": 0.7983, + "step": 2238 + }, + { + "epoch": 0.33, + "learning_rate": 1.5610093371444665e-05, + "loss": 0.8174, + "step": 2239 + }, + { + "epoch": 0.33, + "learning_rate": 1.5606127733166237e-05, + "loss": 0.8745, + "step": 2240 + }, + { + "epoch": 0.33, + "learning_rate": 1.5602160808750126e-05, + "loss": 0.8223, + "step": 2241 + }, + { + "epoch": 0.33, + "learning_rate": 1.5598192599106404e-05, + "loss": 0.8335, + "step": 2242 + }, + { + "epoch": 0.33, + "learning_rate": 1.5594223105145446e-05, + "loss": 0.8457, + "step": 2243 + }, + { + "epoch": 0.33, + "learning_rate": 1.5590252327777923e-05, + "loss": 0.8535, + "step": 2244 + }, + { + "epoch": 0.33, + "learning_rate": 1.558628026791479e-05, + "loss": 0.813, + "step": 2245 + }, + { + "epoch": 0.33, + "learning_rate": 1.5582306926467302e-05, + "loss": 0.9058, + "step": 2246 + }, + { + "epoch": 0.33, + "learning_rate": 1.5578332304347016e-05, + "loss": 0.8848, + "step": 2247 + }, + { + "epoch": 0.33, + "learning_rate": 1.557435640246577e-05, + "loss": 0.9009, + "step": 2248 + }, + { + "epoch": 0.33, + "learning_rate": 1.55703792217357e-05, + "loss": 0.8179, + "step": 2249 + }, + { + "epoch": 0.33, + "learning_rate": 1.556640076306924e-05, + "loss": 0.8672, + "step": 2250 + }, + { + "epoch": 0.33, + "learning_rate": 1.5562421027379116e-05, + "loss": 0.8589, + "step": 2251 + }, + { + "epoch": 0.33, + "learning_rate": 1.555844001557834e-05, + "loss": 0.8643, + "step": 2252 + }, + { + "epoch": 0.33, + "learning_rate": 1.555445772858022e-05, + "loss": 0.8662, + "step": 2253 + }, + { + "epoch": 0.33, + "learning_rate": 1.5550474167298364e-05, + "loss": 0.853, + "step": 2254 + }, + { + "epoch": 0.33, + "learning_rate": 1.5546489332646658e-05, + "loss": 0.8784, + "step": 2255 + }, + { + "epoch": 0.33, + "learning_rate": 1.55425032255393e-05, + "loss": 0.8936, + "step": 2256 + }, + { + "epoch": 0.33, + "learning_rate": 1.553851584689076e-05, + "loss": 0.8281, + "step": 2257 + }, + { + "epoch": 0.33, + "learning_rate": 1.5534527197615804e-05, + "loss": 0.9126, + "step": 2258 + }, + { + "epoch": 0.33, + "learning_rate": 1.5530537278629507e-05, + "loss": 0.7949, + "step": 2259 + }, + { + "epoch": 0.33, + "learning_rate": 1.552654609084721e-05, + "loss": 0.8325, + "step": 2260 + }, + { + "epoch": 0.33, + "learning_rate": 1.5522553635184567e-05, + "loss": 0.832, + "step": 2261 + }, + { + "epoch": 0.33, + "learning_rate": 1.5518559912557497e-05, + "loss": 0.8877, + "step": 2262 + }, + { + "epoch": 0.33, + "learning_rate": 1.5514564923882245e-05, + "loss": 0.8232, + "step": 2263 + }, + { + "epoch": 0.33, + "learning_rate": 1.5510568670075313e-05, + "loss": 0.8418, + "step": 2264 + }, + { + "epoch": 0.33, + "learning_rate": 1.5506571152053512e-05, + "loss": 0.7817, + "step": 2265 + }, + { + "epoch": 0.34, + "learning_rate": 1.550257237073394e-05, + "loss": 0.7729, + "step": 2266 + }, + { + "epoch": 0.34, + "learning_rate": 1.5498572327033984e-05, + "loss": 0.8887, + "step": 2267 + }, + { + "epoch": 0.34, + "learning_rate": 1.549457102187131e-05, + "loss": 0.7915, + "step": 2268 + }, + { + "epoch": 0.34, + "learning_rate": 1.549056845616389e-05, + "loss": 0.7847, + "step": 2269 + }, + { + "epoch": 0.34, + "learning_rate": 1.5486564630829983e-05, + "loss": 0.9185, + "step": 2270 + }, + { + "epoch": 0.34, + "learning_rate": 1.5482559546788127e-05, + "loss": 0.3032, + "step": 2271 + }, + { + "epoch": 0.34, + "learning_rate": 1.547855320495715e-05, + "loss": 0.8511, + "step": 2272 + }, + { + "epoch": 0.34, + "learning_rate": 1.547454560625618e-05, + "loss": 0.835, + "step": 2273 + }, + { + "epoch": 0.34, + "learning_rate": 1.5470536751604622e-05, + "loss": 0.8496, + "step": 2274 + }, + { + "epoch": 0.34, + "learning_rate": 1.5466526641922174e-05, + "loss": 0.8398, + "step": 2275 + }, + { + "epoch": 0.34, + "learning_rate": 1.5462515278128822e-05, + "loss": 0.8076, + "step": 2276 + }, + { + "epoch": 0.34, + "learning_rate": 1.5458502661144835e-05, + "loss": 0.8877, + "step": 2277 + }, + { + "epoch": 0.34, + "learning_rate": 1.5454488791890777e-05, + "loss": 0.8203, + "step": 2278 + }, + { + "epoch": 0.34, + "learning_rate": 1.5450473671287492e-05, + "loss": 0.8423, + "step": 2279 + }, + { + "epoch": 0.34, + "learning_rate": 1.5446457300256118e-05, + "loss": 0.8354, + "step": 2280 + }, + { + "epoch": 0.34, + "learning_rate": 1.5442439679718075e-05, + "loss": 0.3486, + "step": 2281 + }, + { + "epoch": 0.34, + "learning_rate": 1.5438420810595073e-05, + "loss": 0.8232, + "step": 2282 + }, + { + "epoch": 0.34, + "learning_rate": 1.54344006938091e-05, + "loss": 0.8696, + "step": 2283 + }, + { + "epoch": 0.34, + "learning_rate": 1.543037933028245e-05, + "loss": 0.8413, + "step": 2284 + }, + { + "epoch": 0.34, + "learning_rate": 1.542635672093767e-05, + "loss": 0.832, + "step": 2285 + }, + { + "epoch": 0.34, + "learning_rate": 1.542233286669763e-05, + "loss": 0.853, + "step": 2286 + }, + { + "epoch": 0.34, + "learning_rate": 1.541830776848546e-05, + "loss": 0.8921, + "step": 2287 + }, + { + "epoch": 0.34, + "learning_rate": 1.5414281427224588e-05, + "loss": 0.9307, + "step": 2288 + }, + { + "epoch": 0.34, + "learning_rate": 1.5410253843838717e-05, + "loss": 0.9541, + "step": 2289 + }, + { + "epoch": 0.34, + "learning_rate": 1.5406225019251846e-05, + "loss": 0.813, + "step": 2290 + }, + { + "epoch": 0.34, + "learning_rate": 1.540219495438825e-05, + "loss": 0.8569, + "step": 2291 + }, + { + "epoch": 0.34, + "learning_rate": 1.5398163650172495e-05, + "loss": 0.8755, + "step": 2292 + }, + { + "epoch": 0.34, + "learning_rate": 1.5394131107529427e-05, + "loss": 0.8623, + "step": 2293 + }, + { + "epoch": 0.34, + "learning_rate": 1.5390097327384176e-05, + "loss": 0.8574, + "step": 2294 + }, + { + "epoch": 0.34, + "learning_rate": 1.538606231066216e-05, + "loss": 0.8789, + "step": 2295 + }, + { + "epoch": 0.34, + "learning_rate": 1.538202605828907e-05, + "loss": 0.894, + "step": 2296 + }, + { + "epoch": 0.34, + "learning_rate": 1.5377988571190903e-05, + "loss": 0.8193, + "step": 2297 + }, + { + "epoch": 0.34, + "learning_rate": 1.5373949850293915e-05, + "loss": 0.8584, + "step": 2298 + }, + { + "epoch": 0.34, + "learning_rate": 1.5369909896524657e-05, + "loss": 0.8706, + "step": 2299 + }, + { + "epoch": 0.34, + "learning_rate": 1.5365868710809958e-05, + "loss": 0.9121, + "step": 2300 + }, + { + "epoch": 0.34, + "learning_rate": 1.5361826294076938e-05, + "loss": 0.8931, + "step": 2301 + }, + { + "epoch": 0.34, + "learning_rate": 1.5357782647252984e-05, + "loss": 0.8267, + "step": 2302 + }, + { + "epoch": 0.34, + "learning_rate": 1.5353737771265785e-05, + "loss": 0.8008, + "step": 2303 + }, + { + "epoch": 0.34, + "learning_rate": 1.53496916670433e-05, + "loss": 0.9185, + "step": 2304 + }, + { + "epoch": 0.34, + "learning_rate": 1.5345644335513773e-05, + "loss": 0.9072, + "step": 2305 + }, + { + "epoch": 0.34, + "learning_rate": 1.5341595777605718e-05, + "loss": 0.8901, + "step": 2306 + }, + { + "epoch": 0.34, + "learning_rate": 1.5337545994247948e-05, + "loss": 0.856, + "step": 2307 + }, + { + "epoch": 0.34, + "learning_rate": 1.5333494986369554e-05, + "loss": 0.8086, + "step": 2308 + }, + { + "epoch": 0.34, + "learning_rate": 1.5329442754899897e-05, + "loss": 0.9014, + "step": 2309 + }, + { + "epoch": 0.34, + "learning_rate": 1.532538930076863e-05, + "loss": 0.8555, + "step": 2310 + }, + { + "epoch": 0.34, + "learning_rate": 1.5321334624905677e-05, + "loss": 0.876, + "step": 2311 + }, + { + "epoch": 0.34, + "learning_rate": 1.5317278728241252e-05, + "loss": 0.896, + "step": 2312 + }, + { + "epoch": 0.34, + "learning_rate": 1.531322161170584e-05, + "loss": 0.8721, + "step": 2313 + }, + { + "epoch": 0.34, + "learning_rate": 1.5309163276230215e-05, + "loss": 0.8696, + "step": 2314 + }, + { + "epoch": 0.34, + "learning_rate": 1.530510372274542e-05, + "loss": 0.8101, + "step": 2315 + }, + { + "epoch": 0.34, + "learning_rate": 1.5301042952182787e-05, + "loss": 0.8643, + "step": 2316 + }, + { + "epoch": 0.34, + "learning_rate": 1.5296980965473918e-05, + "loss": 0.8774, + "step": 2317 + }, + { + "epoch": 0.34, + "learning_rate": 1.529291776355071e-05, + "loss": 0.8442, + "step": 2318 + }, + { + "epoch": 0.34, + "learning_rate": 1.528885334734532e-05, + "loss": 0.8428, + "step": 2319 + }, + { + "epoch": 0.34, + "learning_rate": 1.528478771779019e-05, + "loss": 0.8135, + "step": 2320 + }, + { + "epoch": 0.34, + "learning_rate": 1.528072087581805e-05, + "loss": 0.8643, + "step": 2321 + }, + { + "epoch": 0.34, + "learning_rate": 1.5276652822361894e-05, + "loss": 0.9058, + "step": 2322 + }, + { + "epoch": 0.34, + "learning_rate": 1.5272583558355005e-05, + "loss": 0.8247, + "step": 2323 + }, + { + "epoch": 0.34, + "learning_rate": 1.5268513084730935e-05, + "loss": 0.9053, + "step": 2324 + }, + { + "epoch": 0.34, + "learning_rate": 1.5264441402423518e-05, + "loss": 0.916, + "step": 2325 + }, + { + "epoch": 0.34, + "learning_rate": 1.5260368512366865e-05, + "loss": 0.8037, + "step": 2326 + }, + { + "epoch": 0.34, + "learning_rate": 1.525629441549537e-05, + "loss": 0.8486, + "step": 2327 + }, + { + "epoch": 0.34, + "learning_rate": 1.5252219112743685e-05, + "loss": 0.8691, + "step": 2328 + }, + { + "epoch": 0.34, + "learning_rate": 1.5248142605046758e-05, + "loss": 0.8896, + "step": 2329 + }, + { + "epoch": 0.34, + "learning_rate": 1.524406489333981e-05, + "loss": 0.9043, + "step": 2330 + }, + { + "epoch": 0.34, + "learning_rate": 1.5239985978558333e-05, + "loss": 0.8042, + "step": 2331 + }, + { + "epoch": 0.34, + "learning_rate": 1.5235905861638094e-05, + "loss": 0.832, + "step": 2332 + }, + { + "epoch": 0.35, + "learning_rate": 1.5231824543515141e-05, + "loss": 0.8271, + "step": 2333 + }, + { + "epoch": 0.35, + "learning_rate": 1.5227742025125794e-05, + "loss": 0.8081, + "step": 2334 + }, + { + "epoch": 0.35, + "learning_rate": 1.5223658307406654e-05, + "loss": 0.855, + "step": 2335 + }, + { + "epoch": 0.35, + "learning_rate": 1.5219573391294587e-05, + "loss": 0.7393, + "step": 2336 + }, + { + "epoch": 0.35, + "learning_rate": 1.5215487277726741e-05, + "loss": 0.8706, + "step": 2337 + }, + { + "epoch": 0.35, + "learning_rate": 1.521139996764054e-05, + "loss": 0.8452, + "step": 2338 + }, + { + "epoch": 0.35, + "learning_rate": 1.520731146197368e-05, + "loss": 0.8569, + "step": 2339 + }, + { + "epoch": 0.35, + "learning_rate": 1.5203221761664131e-05, + "loss": 0.8623, + "step": 2340 + }, + { + "epoch": 0.35, + "learning_rate": 1.5199130867650134e-05, + "loss": 0.8418, + "step": 2341 + }, + { + "epoch": 0.35, + "learning_rate": 1.5195038780870211e-05, + "loss": 0.8481, + "step": 2342 + }, + { + "epoch": 0.35, + "learning_rate": 1.5190945502263152e-05, + "loss": 0.8042, + "step": 2343 + }, + { + "epoch": 0.35, + "learning_rate": 1.5186851032768025e-05, + "loss": 0.8306, + "step": 2344 + }, + { + "epoch": 0.35, + "learning_rate": 1.5182755373324162e-05, + "loss": 0.3555, + "step": 2345 + }, + { + "epoch": 0.35, + "learning_rate": 1.5178658524871183e-05, + "loss": 0.7915, + "step": 2346 + }, + { + "epoch": 0.35, + "learning_rate": 1.5174560488348964e-05, + "loss": 0.7549, + "step": 2347 + }, + { + "epoch": 0.35, + "learning_rate": 1.5170461264697669e-05, + "loss": 0.3352, + "step": 2348 + }, + { + "epoch": 0.35, + "learning_rate": 1.5166360854857724e-05, + "loss": 0.8184, + "step": 2349 + }, + { + "epoch": 0.35, + "learning_rate": 1.5162259259769831e-05, + "loss": 0.8848, + "step": 2350 + }, + { + "epoch": 0.35, + "learning_rate": 1.515815648037496e-05, + "loss": 0.8574, + "step": 2351 + }, + { + "epoch": 0.35, + "learning_rate": 1.5154052517614361e-05, + "loss": 0.8545, + "step": 2352 + }, + { + "epoch": 0.35, + "learning_rate": 1.5149947372429546e-05, + "loss": 0.8599, + "step": 2353 + }, + { + "epoch": 0.35, + "learning_rate": 1.5145841045762304e-05, + "loss": 0.8604, + "step": 2354 + }, + { + "epoch": 0.35, + "learning_rate": 1.5141733538554694e-05, + "loss": 0.8428, + "step": 2355 + }, + { + "epoch": 0.35, + "learning_rate": 1.5137624851749047e-05, + "loss": 0.8135, + "step": 2356 + }, + { + "epoch": 0.35, + "learning_rate": 1.5133514986287962e-05, + "loss": 0.8701, + "step": 2357 + }, + { + "epoch": 0.35, + "learning_rate": 1.5129403943114311e-05, + "loss": 0.8462, + "step": 2358 + }, + { + "epoch": 0.35, + "learning_rate": 1.512529172317123e-05, + "loss": 0.9102, + "step": 2359 + }, + { + "epoch": 0.35, + "learning_rate": 1.5121178327402137e-05, + "loss": 0.7793, + "step": 2360 + }, + { + "epoch": 0.35, + "learning_rate": 1.5117063756750706e-05, + "loss": 0.7681, + "step": 2361 + }, + { + "epoch": 0.35, + "learning_rate": 1.5112948012160888e-05, + "loss": 0.8667, + "step": 2362 + }, + { + "epoch": 0.35, + "learning_rate": 1.5108831094576909e-05, + "loss": 0.811, + "step": 2363 + }, + { + "epoch": 0.35, + "learning_rate": 1.5104713004943245e-05, + "loss": 0.8506, + "step": 2364 + }, + { + "epoch": 0.35, + "learning_rate": 1.5100593744204665e-05, + "loss": 0.8433, + "step": 2365 + }, + { + "epoch": 0.35, + "learning_rate": 1.509647331330619e-05, + "loss": 0.8652, + "step": 2366 + }, + { + "epoch": 0.35, + "learning_rate": 1.5092351713193116e-05, + "loss": 0.8452, + "step": 2367 + }, + { + "epoch": 0.35, + "learning_rate": 1.5088228944811002e-05, + "loss": 0.8594, + "step": 2368 + }, + { + "epoch": 0.35, + "learning_rate": 1.5084105009105684e-05, + "loss": 0.7832, + "step": 2369 + }, + { + "epoch": 0.35, + "learning_rate": 1.5079979907023257e-05, + "loss": 0.8779, + "step": 2370 + }, + { + "epoch": 0.35, + "learning_rate": 1.5075853639510087e-05, + "loss": 0.8472, + "step": 2371 + }, + { + "epoch": 0.35, + "learning_rate": 1.5071726207512814e-05, + "loss": 0.8521, + "step": 2372 + }, + { + "epoch": 0.35, + "learning_rate": 1.506759761197833e-05, + "loss": 0.8125, + "step": 2373 + }, + { + "epoch": 0.35, + "learning_rate": 1.50634678538538e-05, + "loss": 0.8779, + "step": 2374 + }, + { + "epoch": 0.35, + "learning_rate": 1.5059336934086672e-05, + "loss": 0.8477, + "step": 2375 + }, + { + "epoch": 0.35, + "learning_rate": 1.5055204853624634e-05, + "loss": 0.8662, + "step": 2376 + }, + { + "epoch": 0.35, + "learning_rate": 1.5051071613415663e-05, + "loss": 0.8755, + "step": 2377 + }, + { + "epoch": 0.35, + "learning_rate": 1.5046937214407979e-05, + "loss": 0.8267, + "step": 2378 + }, + { + "epoch": 0.35, + "learning_rate": 1.5042801657550095e-05, + "loss": 0.7886, + "step": 2379 + }, + { + "epoch": 0.35, + "learning_rate": 1.5038664943790768e-05, + "loss": 0.853, + "step": 2380 + }, + { + "epoch": 0.35, + "learning_rate": 1.503452707407903e-05, + "loss": 0.8613, + "step": 2381 + }, + { + "epoch": 0.35, + "learning_rate": 1.5030388049364177e-05, + "loss": 0.8477, + "step": 2382 + }, + { + "epoch": 0.35, + "learning_rate": 1.5026247870595763e-05, + "loss": 0.854, + "step": 2383 + }, + { + "epoch": 0.35, + "learning_rate": 1.5022106538723625e-05, + "loss": 0.834, + "step": 2384 + }, + { + "epoch": 0.35, + "learning_rate": 1.5017964054697838e-05, + "loss": 0.8423, + "step": 2385 + }, + { + "epoch": 0.35, + "learning_rate": 1.5013820419468767e-05, + "loss": 0.8477, + "step": 2386 + }, + { + "epoch": 0.35, + "learning_rate": 1.5009675633987027e-05, + "loss": 0.8081, + "step": 2387 + }, + { + "epoch": 0.35, + "learning_rate": 1.5005529699203494e-05, + "loss": 0.873, + "step": 2388 + }, + { + "epoch": 0.35, + "learning_rate": 1.5001382616069325e-05, + "loss": 0.8608, + "step": 2389 + }, + { + "epoch": 0.35, + "learning_rate": 1.4997234385535917e-05, + "loss": 0.8042, + "step": 2390 + }, + { + "epoch": 0.35, + "learning_rate": 1.4993085008554951e-05, + "loss": 0.8584, + "step": 2391 + }, + { + "epoch": 0.35, + "learning_rate": 1.4988934486078355e-05, + "loss": 0.7705, + "step": 2392 + }, + { + "epoch": 0.35, + "learning_rate": 1.4984782819058334e-05, + "loss": 0.356, + "step": 2393 + }, + { + "epoch": 0.35, + "learning_rate": 1.4980630008447343e-05, + "loss": 0.8364, + "step": 2394 + }, + { + "epoch": 0.35, + "learning_rate": 1.4976476055198109e-05, + "loss": 0.8398, + "step": 2395 + }, + { + "epoch": 0.35, + "learning_rate": 1.4972320960263614e-05, + "loss": 0.9092, + "step": 2396 + }, + { + "epoch": 0.35, + "learning_rate": 1.4968164724597103e-05, + "loss": 0.9053, + "step": 2397 + }, + { + "epoch": 0.35, + "learning_rate": 1.496400734915209e-05, + "loss": 0.875, + "step": 2398 + }, + { + "epoch": 0.35, + "learning_rate": 1.4959848834882344e-05, + "loss": 0.8584, + "step": 2399 + }, + { + "epoch": 0.35, + "learning_rate": 1.4955689182741893e-05, + "loss": 0.8735, + "step": 2400 + }, + { + "epoch": 0.36, + "learning_rate": 1.4951528393685033e-05, + "loss": 0.8853, + "step": 2401 + }, + { + "epoch": 0.36, + "learning_rate": 1.4947366468666314e-05, + "loss": 0.8721, + "step": 2402 + }, + { + "epoch": 0.36, + "learning_rate": 1.4943203408640552e-05, + "loss": 0.7871, + "step": 2403 + }, + { + "epoch": 0.36, + "learning_rate": 1.4939039214562823e-05, + "loss": 0.3411, + "step": 2404 + }, + { + "epoch": 0.36, + "learning_rate": 1.4934873887388458e-05, + "loss": 0.8037, + "step": 2405 + }, + { + "epoch": 0.36, + "learning_rate": 1.4930707428073052e-05, + "loss": 0.812, + "step": 2406 + }, + { + "epoch": 0.36, + "learning_rate": 1.4926539837572462e-05, + "loss": 0.812, + "step": 2407 + }, + { + "epoch": 0.36, + "learning_rate": 1.49223711168428e-05, + "loss": 0.3472, + "step": 2408 + }, + { + "epoch": 0.36, + "learning_rate": 1.4918201266840437e-05, + "loss": 0.7954, + "step": 2409 + }, + { + "epoch": 0.36, + "learning_rate": 1.4914030288522006e-05, + "loss": 0.895, + "step": 2410 + }, + { + "epoch": 0.36, + "learning_rate": 1.4909858182844399e-05, + "loss": 0.855, + "step": 2411 + }, + { + "epoch": 0.36, + "learning_rate": 1.4905684950764768e-05, + "loss": 0.8735, + "step": 2412 + }, + { + "epoch": 0.36, + "learning_rate": 1.4901510593240514e-05, + "loss": 0.8525, + "step": 2413 + }, + { + "epoch": 0.36, + "learning_rate": 1.4897335111229307e-05, + "loss": 0.8711, + "step": 2414 + }, + { + "epoch": 0.36, + "learning_rate": 1.4893158505689071e-05, + "loss": 0.8252, + "step": 2415 + }, + { + "epoch": 0.36, + "learning_rate": 1.4888980777577988e-05, + "loss": 0.8481, + "step": 2416 + }, + { + "epoch": 0.36, + "learning_rate": 1.4884801927854501e-05, + "loss": 0.9155, + "step": 2417 + }, + { + "epoch": 0.36, + "learning_rate": 1.4880621957477299e-05, + "loss": 0.9092, + "step": 2418 + }, + { + "epoch": 0.36, + "learning_rate": 1.4876440867405341e-05, + "loss": 0.8105, + "step": 2419 + }, + { + "epoch": 0.36, + "learning_rate": 1.4872258658597835e-05, + "loss": 0.8833, + "step": 2420 + }, + { + "epoch": 0.36, + "learning_rate": 1.4868075332014253e-05, + "loss": 0.8833, + "step": 2421 + }, + { + "epoch": 0.36, + "learning_rate": 1.4863890888614314e-05, + "loss": 0.7876, + "step": 2422 + }, + { + "epoch": 0.36, + "learning_rate": 1.4859705329357999e-05, + "loss": 0.7812, + "step": 2423 + }, + { + "epoch": 0.36, + "learning_rate": 1.4855518655205546e-05, + "loss": 0.8135, + "step": 2424 + }, + { + "epoch": 0.36, + "learning_rate": 1.4851330867117444e-05, + "loss": 0.8403, + "step": 2425 + }, + { + "epoch": 0.36, + "learning_rate": 1.4847141966054438e-05, + "loss": 0.9087, + "step": 2426 + }, + { + "epoch": 0.36, + "learning_rate": 1.484295195297754e-05, + "loss": 0.8359, + "step": 2427 + }, + { + "epoch": 0.36, + "learning_rate": 1.4838760828847995e-05, + "loss": 0.8413, + "step": 2428 + }, + { + "epoch": 0.36, + "learning_rate": 1.483456859462733e-05, + "loss": 0.8105, + "step": 2429 + }, + { + "epoch": 0.36, + "learning_rate": 1.48303752512773e-05, + "loss": 0.8501, + "step": 2430 + }, + { + "epoch": 0.36, + "learning_rate": 1.4826180799759934e-05, + "loss": 0.8862, + "step": 2431 + }, + { + "epoch": 0.36, + "learning_rate": 1.4821985241037506e-05, + "loss": 0.7983, + "step": 2432 + }, + { + "epoch": 0.36, + "learning_rate": 1.481778857607254e-05, + "loss": 0.8516, + "step": 2433 + }, + { + "epoch": 0.36, + "learning_rate": 1.4813590805827831e-05, + "loss": 0.8169, + "step": 2434 + }, + { + "epoch": 0.36, + "learning_rate": 1.4809391931266411e-05, + "loss": 0.8945, + "step": 2435 + }, + { + "epoch": 0.36, + "learning_rate": 1.480519195335157e-05, + "loss": 0.9263, + "step": 2436 + }, + { + "epoch": 0.36, + "learning_rate": 1.4800990873046847e-05, + "loss": 0.8252, + "step": 2437 + }, + { + "epoch": 0.36, + "learning_rate": 1.4796788691316051e-05, + "loss": 0.8687, + "step": 2438 + }, + { + "epoch": 0.36, + "learning_rate": 1.4792585409123219e-05, + "loss": 0.8315, + "step": 2439 + }, + { + "epoch": 0.36, + "learning_rate": 1.4788381027432663e-05, + "loss": 0.8452, + "step": 2440 + }, + { + "epoch": 0.36, + "learning_rate": 1.4784175547208932e-05, + "loss": 0.7612, + "step": 2441 + }, + { + "epoch": 0.36, + "learning_rate": 1.477996896941683e-05, + "loss": 0.8696, + "step": 2442 + }, + { + "epoch": 0.36, + "learning_rate": 1.4775761295021418e-05, + "loss": 0.8286, + "step": 2443 + }, + { + "epoch": 0.36, + "learning_rate": 1.4771552524988008e-05, + "loss": 0.897, + "step": 2444 + }, + { + "epoch": 0.36, + "learning_rate": 1.4767342660282155e-05, + "loss": 0.8672, + "step": 2445 + }, + { + "epoch": 0.36, + "learning_rate": 1.4763131701869675e-05, + "loss": 0.8418, + "step": 2446 + }, + { + "epoch": 0.36, + "learning_rate": 1.4758919650716632e-05, + "loss": 0.8208, + "step": 2447 + }, + { + "epoch": 0.36, + "learning_rate": 1.4754706507789336e-05, + "loss": 0.8403, + "step": 2448 + }, + { + "epoch": 0.36, + "learning_rate": 1.4750492274054353e-05, + "loss": 0.8521, + "step": 2449 + }, + { + "epoch": 0.36, + "learning_rate": 1.47462769504785e-05, + "loss": 0.7749, + "step": 2450 + }, + { + "epoch": 0.36, + "learning_rate": 1.4742060538028833e-05, + "loss": 0.8242, + "step": 2451 + }, + { + "epoch": 0.36, + "learning_rate": 1.4737843037672677e-05, + "loss": 0.8867, + "step": 2452 + }, + { + "epoch": 0.36, + "learning_rate": 1.4733624450377589e-05, + "loss": 0.8237, + "step": 2453 + }, + { + "epoch": 0.36, + "learning_rate": 1.4729404777111383e-05, + "loss": 0.8252, + "step": 2454 + }, + { + "epoch": 0.36, + "learning_rate": 1.4725184018842123e-05, + "loss": 0.8477, + "step": 2455 + }, + { + "epoch": 0.36, + "learning_rate": 1.4720962176538117e-05, + "loss": 0.7783, + "step": 2456 + }, + { + "epoch": 0.36, + "learning_rate": 1.4716739251167931e-05, + "loss": 0.8467, + "step": 2457 + }, + { + "epoch": 0.36, + "learning_rate": 1.4712515243700367e-05, + "loss": 0.8291, + "step": 2458 + }, + { + "epoch": 0.36, + "learning_rate": 1.4708290155104483e-05, + "loss": 0.8979, + "step": 2459 + }, + { + "epoch": 0.36, + "learning_rate": 1.4704063986349587e-05, + "loss": 0.7915, + "step": 2460 + }, + { + "epoch": 0.36, + "learning_rate": 1.469983673840523e-05, + "loss": 0.7944, + "step": 2461 + }, + { + "epoch": 0.36, + "learning_rate": 1.4695608412241209e-05, + "loss": 0.7983, + "step": 2462 + }, + { + "epoch": 0.36, + "learning_rate": 1.4691379008827578e-05, + "loss": 0.7632, + "step": 2463 + }, + { + "epoch": 0.36, + "learning_rate": 1.4687148529134621e-05, + "loss": 0.854, + "step": 2464 + }, + { + "epoch": 0.36, + "learning_rate": 1.4682916974132889e-05, + "loss": 0.3185, + "step": 2465 + }, + { + "epoch": 0.36, + "learning_rate": 1.4678684344793166e-05, + "loss": 0.8467, + "step": 2466 + }, + { + "epoch": 0.36, + "learning_rate": 1.4674450642086486e-05, + "loss": 0.8354, + "step": 2467 + }, + { + "epoch": 0.36, + "learning_rate": 1.4670215866984134e-05, + "loss": 0.8242, + "step": 2468 + }, + { + "epoch": 0.37, + "learning_rate": 1.4665980020457631e-05, + "loss": 0.8325, + "step": 2469 + }, + { + "epoch": 0.37, + "learning_rate": 1.4661743103478759e-05, + "loss": 0.8398, + "step": 2470 + }, + { + "epoch": 0.37, + "learning_rate": 1.4657505117019523e-05, + "loss": 0.9058, + "step": 2471 + }, + { + "epoch": 0.37, + "learning_rate": 1.46532660620522e-05, + "loss": 0.3374, + "step": 2472 + }, + { + "epoch": 0.37, + "learning_rate": 1.4649025939549289e-05, + "loss": 0.8105, + "step": 2473 + }, + { + "epoch": 0.37, + "learning_rate": 1.4644784750483546e-05, + "loss": 0.8628, + "step": 2474 + }, + { + "epoch": 0.37, + "learning_rate": 1.4640542495827974e-05, + "loss": 0.874, + "step": 2475 + }, + { + "epoch": 0.37, + "learning_rate": 1.4636299176555813e-05, + "loss": 0.8472, + "step": 2476 + }, + { + "epoch": 0.37, + "learning_rate": 1.4632054793640547e-05, + "loss": 0.7842, + "step": 2477 + }, + { + "epoch": 0.37, + "learning_rate": 1.4627809348055908e-05, + "loss": 0.8506, + "step": 2478 + }, + { + "epoch": 0.37, + "learning_rate": 1.4623562840775874e-05, + "loss": 0.8853, + "step": 2479 + }, + { + "epoch": 0.37, + "learning_rate": 1.4619315272774662e-05, + "loss": 0.7827, + "step": 2480 + }, + { + "epoch": 0.37, + "learning_rate": 1.4615066645026735e-05, + "loss": 0.8257, + "step": 2481 + }, + { + "epoch": 0.37, + "learning_rate": 1.4610816958506796e-05, + "loss": 0.8335, + "step": 2482 + }, + { + "epoch": 0.37, + "learning_rate": 1.4606566214189793e-05, + "loss": 0.853, + "step": 2483 + }, + { + "epoch": 0.37, + "learning_rate": 1.4602314413050915e-05, + "loss": 0.8965, + "step": 2484 + }, + { + "epoch": 0.37, + "learning_rate": 1.4598061556065598e-05, + "loss": 0.8252, + "step": 2485 + }, + { + "epoch": 0.37, + "learning_rate": 1.4593807644209519e-05, + "loss": 0.8369, + "step": 2486 + }, + { + "epoch": 0.37, + "learning_rate": 1.458955267845859e-05, + "loss": 0.8696, + "step": 2487 + }, + { + "epoch": 0.37, + "learning_rate": 1.458529665978897e-05, + "loss": 0.7935, + "step": 2488 + }, + { + "epoch": 0.37, + "learning_rate": 1.4581039589177065e-05, + "loss": 0.8535, + "step": 2489 + }, + { + "epoch": 0.37, + "learning_rate": 1.457678146759951e-05, + "loss": 0.832, + "step": 2490 + }, + { + "epoch": 0.37, + "learning_rate": 1.4572522296033194e-05, + "loss": 0.8857, + "step": 2491 + }, + { + "epoch": 0.37, + "learning_rate": 1.4568262075455237e-05, + "loss": 0.8779, + "step": 2492 + }, + { + "epoch": 0.37, + "learning_rate": 1.4564000806843009e-05, + "loss": 0.8535, + "step": 2493 + }, + { + "epoch": 0.37, + "learning_rate": 1.4559738491174104e-05, + "loss": 0.8701, + "step": 2494 + }, + { + "epoch": 0.37, + "learning_rate": 1.455547512942638e-05, + "loss": 0.8413, + "step": 2495 + }, + { + "epoch": 0.37, + "learning_rate": 1.4551210722577911e-05, + "loss": 0.873, + "step": 2496 + }, + { + "epoch": 0.37, + "learning_rate": 1.454694527160703e-05, + "loss": 0.8188, + "step": 2497 + }, + { + "epoch": 0.37, + "learning_rate": 1.4542678777492298e-05, + "loss": 0.8364, + "step": 2498 + }, + { + "epoch": 0.37, + "learning_rate": 1.4538411241212518e-05, + "loss": 0.8618, + "step": 2499 + }, + { + "epoch": 0.37, + "learning_rate": 1.4534142663746736e-05, + "loss": 0.876, + "step": 2500 + }, + { + "epoch": 0.37, + "learning_rate": 1.452987304607423e-05, + "loss": 0.8877, + "step": 2501 + }, + { + "epoch": 0.37, + "learning_rate": 1.4525602389174523e-05, + "loss": 0.8638, + "step": 2502 + }, + { + "epoch": 0.37, + "learning_rate": 1.4521330694027372e-05, + "loss": 0.8384, + "step": 2503 + }, + { + "epoch": 0.37, + "learning_rate": 1.4517057961612777e-05, + "loss": 0.834, + "step": 2504 + }, + { + "epoch": 0.37, + "learning_rate": 1.4512784192910965e-05, + "loss": 0.8486, + "step": 2505 + }, + { + "epoch": 0.37, + "learning_rate": 1.4508509388902421e-05, + "loss": 0.8877, + "step": 2506 + }, + { + "epoch": 0.37, + "learning_rate": 1.4504233550567844e-05, + "loss": 0.7734, + "step": 2507 + }, + { + "epoch": 0.37, + "learning_rate": 1.449995667888819e-05, + "loss": 0.7603, + "step": 2508 + }, + { + "epoch": 0.37, + "learning_rate": 1.4495678774844634e-05, + "loss": 0.8013, + "step": 2509 + }, + { + "epoch": 0.37, + "learning_rate": 1.4491399839418608e-05, + "loss": 0.7742, + "step": 2510 + }, + { + "epoch": 0.37, + "learning_rate": 1.4487119873591761e-05, + "loss": 0.8564, + "step": 2511 + }, + { + "epoch": 0.37, + "learning_rate": 1.4482838878345992e-05, + "loss": 0.8486, + "step": 2512 + }, + { + "epoch": 0.37, + "learning_rate": 1.4478556854663435e-05, + "loss": 0.8149, + "step": 2513 + }, + { + "epoch": 0.37, + "learning_rate": 1.4474273803526446e-05, + "loss": 0.7637, + "step": 2514 + }, + { + "epoch": 0.37, + "learning_rate": 1.446998972591764e-05, + "loss": 0.8398, + "step": 2515 + }, + { + "epoch": 0.37, + "learning_rate": 1.4465704622819843e-05, + "loss": 0.3525, + "step": 2516 + }, + { + "epoch": 0.37, + "learning_rate": 1.4461418495216137e-05, + "loss": 0.8462, + "step": 2517 + }, + { + "epoch": 0.37, + "learning_rate": 1.4457131344089824e-05, + "loss": 0.8164, + "step": 2518 + }, + { + "epoch": 0.37, + "learning_rate": 1.445284317042445e-05, + "loss": 0.8643, + "step": 2519 + }, + { + "epoch": 0.37, + "learning_rate": 1.444855397520379e-05, + "loss": 0.8203, + "step": 2520 + }, + { + "epoch": 0.37, + "learning_rate": 1.4444263759411858e-05, + "loss": 0.7915, + "step": 2521 + }, + { + "epoch": 0.37, + "learning_rate": 1.4439972524032898e-05, + "loss": 0.8442, + "step": 2522 + }, + { + "epoch": 0.37, + "learning_rate": 1.4435680270051392e-05, + "loss": 0.811, + "step": 2523 + }, + { + "epoch": 0.37, + "learning_rate": 1.4431386998452052e-05, + "loss": 0.8022, + "step": 2524 + }, + { + "epoch": 0.37, + "learning_rate": 1.4427092710219826e-05, + "loss": 0.8379, + "step": 2525 + }, + { + "epoch": 0.37, + "learning_rate": 1.4422797406339893e-05, + "loss": 0.8672, + "step": 2526 + }, + { + "epoch": 0.37, + "learning_rate": 1.4418501087797667e-05, + "loss": 0.8398, + "step": 2527 + }, + { + "epoch": 0.37, + "learning_rate": 1.4414203755578791e-05, + "loss": 0.8286, + "step": 2528 + }, + { + "epoch": 0.37, + "learning_rate": 1.4409905410669147e-05, + "loss": 0.8535, + "step": 2529 + }, + { + "epoch": 0.37, + "learning_rate": 1.440560605405485e-05, + "loss": 0.8281, + "step": 2530 + }, + { + "epoch": 0.37, + "learning_rate": 1.4401305686722234e-05, + "loss": 0.8589, + "step": 2531 + }, + { + "epoch": 0.37, + "learning_rate": 1.4397004309657877e-05, + "loss": 0.7939, + "step": 2532 + }, + { + "epoch": 0.37, + "learning_rate": 1.4392701923848586e-05, + "loss": 0.7803, + "step": 2533 + }, + { + "epoch": 0.37, + "learning_rate": 1.4388398530281403e-05, + "loss": 0.8447, + "step": 2534 + }, + { + "epoch": 0.37, + "learning_rate": 1.4384094129943589e-05, + "loss": 0.8457, + "step": 2535 + }, + { + "epoch": 0.38, + "learning_rate": 1.437978872382265e-05, + "loss": 0.8872, + "step": 2536 + }, + { + "epoch": 0.38, + "learning_rate": 1.4375482312906314e-05, + "loss": 0.8379, + "step": 2537 + }, + { + "epoch": 0.38, + "learning_rate": 1.4371174898182547e-05, + "loss": 0.8716, + "step": 2538 + }, + { + "epoch": 0.38, + "learning_rate": 1.4366866480639532e-05, + "loss": 0.8804, + "step": 2539 + }, + { + "epoch": 0.38, + "learning_rate": 1.4362557061265698e-05, + "loss": 0.8345, + "step": 2540 + }, + { + "epoch": 0.38, + "learning_rate": 1.4358246641049696e-05, + "loss": 0.8286, + "step": 2541 + }, + { + "epoch": 0.38, + "learning_rate": 1.4353935220980404e-05, + "loss": 0.7832, + "step": 2542 + }, + { + "epoch": 0.38, + "learning_rate": 1.4349622802046933e-05, + "loss": 0.8359, + "step": 2543 + }, + { + "epoch": 0.38, + "learning_rate": 1.4345309385238624e-05, + "loss": 0.7554, + "step": 2544 + }, + { + "epoch": 0.38, + "learning_rate": 1.4340994971545046e-05, + "loss": 0.8169, + "step": 2545 + }, + { + "epoch": 0.38, + "learning_rate": 1.4336679561955993e-05, + "loss": 0.8652, + "step": 2546 + }, + { + "epoch": 0.38, + "learning_rate": 1.4332363157461498e-05, + "loss": 0.7754, + "step": 2547 + }, + { + "epoch": 0.38, + "learning_rate": 1.4328045759051805e-05, + "loss": 0.8506, + "step": 2548 + }, + { + "epoch": 0.38, + "learning_rate": 1.4323727367717404e-05, + "loss": 0.8623, + "step": 2549 + }, + { + "epoch": 0.38, + "learning_rate": 1.4319407984449e-05, + "loss": 0.8354, + "step": 2550 + }, + { + "epoch": 0.38, + "learning_rate": 1.4315087610237535e-05, + "loss": 0.791, + "step": 2551 + }, + { + "epoch": 0.38, + "learning_rate": 1.4310766246074168e-05, + "loss": 0.8228, + "step": 2552 + }, + { + "epoch": 0.38, + "learning_rate": 1.4306443892950297e-05, + "loss": 0.8379, + "step": 2553 + }, + { + "epoch": 0.38, + "learning_rate": 1.4302120551857535e-05, + "loss": 0.856, + "step": 2554 + }, + { + "epoch": 0.38, + "learning_rate": 1.4297796223787734e-05, + "loss": 0.8789, + "step": 2555 + }, + { + "epoch": 0.38, + "learning_rate": 1.4293470909732958e-05, + "loss": 0.7954, + "step": 2556 + }, + { + "epoch": 0.38, + "learning_rate": 1.4289144610685512e-05, + "loss": 0.8525, + "step": 2557 + }, + { + "epoch": 0.38, + "learning_rate": 1.4284817327637916e-05, + "loss": 0.8452, + "step": 2558 + }, + { + "epoch": 0.38, + "learning_rate": 1.4280489061582917e-05, + "loss": 0.8638, + "step": 2559 + }, + { + "epoch": 0.38, + "learning_rate": 1.4276159813513499e-05, + "loss": 0.813, + "step": 2560 + }, + { + "epoch": 0.38, + "learning_rate": 1.4271829584422854e-05, + "loss": 0.8911, + "step": 2561 + }, + { + "epoch": 0.38, + "learning_rate": 1.4267498375304417e-05, + "loss": 0.894, + "step": 2562 + }, + { + "epoch": 0.38, + "learning_rate": 1.4263166187151826e-05, + "loss": 0.8267, + "step": 2563 + }, + { + "epoch": 0.38, + "learning_rate": 1.4258833020958965e-05, + "loss": 0.7842, + "step": 2564 + }, + { + "epoch": 0.38, + "learning_rate": 1.425449887771993e-05, + "loss": 0.8242, + "step": 2565 + }, + { + "epoch": 0.38, + "learning_rate": 1.4250163758429045e-05, + "loss": 0.7793, + "step": 2566 + }, + { + "epoch": 0.38, + "learning_rate": 1.424582766408086e-05, + "loss": 0.8604, + "step": 2567 + }, + { + "epoch": 0.38, + "learning_rate": 1.4241490595670142e-05, + "loss": 0.8408, + "step": 2568 + }, + { + "epoch": 0.38, + "learning_rate": 1.4237152554191889e-05, + "loss": 0.8765, + "step": 2569 + }, + { + "epoch": 0.38, + "learning_rate": 1.4232813540641319e-05, + "loss": 0.877, + "step": 2570 + }, + { + "epoch": 0.38, + "learning_rate": 1.422847355601387e-05, + "loss": 0.7983, + "step": 2571 + }, + { + "epoch": 0.38, + "learning_rate": 1.4224132601305209e-05, + "loss": 0.8633, + "step": 2572 + }, + { + "epoch": 0.38, + "learning_rate": 1.4219790677511219e-05, + "loss": 0.8877, + "step": 2573 + }, + { + "epoch": 0.38, + "learning_rate": 1.421544778562801e-05, + "loss": 0.7529, + "step": 2574 + }, + { + "epoch": 0.38, + "learning_rate": 1.4211103926651915e-05, + "loss": 0.8833, + "step": 2575 + }, + { + "epoch": 0.38, + "learning_rate": 1.4206759101579481e-05, + "loss": 0.8618, + "step": 2576 + }, + { + "epoch": 0.38, + "learning_rate": 1.4202413311407488e-05, + "loss": 0.8569, + "step": 2577 + }, + { + "epoch": 0.38, + "learning_rate": 1.4198066557132928e-05, + "loss": 0.7979, + "step": 2578 + }, + { + "epoch": 0.38, + "learning_rate": 1.419371883975302e-05, + "loss": 0.8247, + "step": 2579 + }, + { + "epoch": 0.38, + "learning_rate": 1.41893701602652e-05, + "loss": 0.7852, + "step": 2580 + }, + { + "epoch": 0.38, + "learning_rate": 1.4185020519667126e-05, + "loss": 0.8789, + "step": 2581 + }, + { + "epoch": 0.38, + "learning_rate": 1.4180669918956676e-05, + "loss": 0.7852, + "step": 2582 + }, + { + "epoch": 0.38, + "learning_rate": 1.4176318359131955e-05, + "loss": 0.8115, + "step": 2583 + }, + { + "epoch": 0.38, + "learning_rate": 1.4171965841191278e-05, + "loss": 0.8687, + "step": 2584 + }, + { + "epoch": 0.38, + "learning_rate": 1.4167612366133184e-05, + "loss": 0.7319, + "step": 2585 + }, + { + "epoch": 0.38, + "learning_rate": 1.4163257934956427e-05, + "loss": 0.8652, + "step": 2586 + }, + { + "epoch": 0.38, + "learning_rate": 1.4158902548659996e-05, + "loss": 0.876, + "step": 2587 + }, + { + "epoch": 0.38, + "learning_rate": 1.4154546208243078e-05, + "loss": 0.8413, + "step": 2588 + }, + { + "epoch": 0.38, + "learning_rate": 1.4150188914705099e-05, + "loss": 0.9585, + "step": 2589 + }, + { + "epoch": 0.38, + "learning_rate": 1.414583066904568e-05, + "loss": 0.814, + "step": 2590 + }, + { + "epoch": 0.38, + "learning_rate": 1.4141471472264682e-05, + "loss": 0.8438, + "step": 2591 + }, + { + "epoch": 0.38, + "learning_rate": 1.4137111325362181e-05, + "loss": 0.8486, + "step": 2592 + }, + { + "epoch": 0.38, + "learning_rate": 1.413275022933846e-05, + "loss": 0.8164, + "step": 2593 + }, + { + "epoch": 0.38, + "learning_rate": 1.4128388185194026e-05, + "loss": 0.834, + "step": 2594 + }, + { + "epoch": 0.38, + "learning_rate": 1.4124025193929602e-05, + "loss": 0.8223, + "step": 2595 + }, + { + "epoch": 0.38, + "learning_rate": 1.4119661256546135e-05, + "loss": 0.9126, + "step": 2596 + }, + { + "epoch": 0.38, + "learning_rate": 1.411529637404478e-05, + "loss": 0.8345, + "step": 2597 + }, + { + "epoch": 0.38, + "learning_rate": 1.4110930547426917e-05, + "loss": 0.855, + "step": 2598 + }, + { + "epoch": 0.38, + "learning_rate": 1.4106563777694134e-05, + "loss": 0.8379, + "step": 2599 + }, + { + "epoch": 0.38, + "learning_rate": 1.4102196065848239e-05, + "loss": 0.833, + "step": 2600 + }, + { + "epoch": 0.38, + "learning_rate": 1.4097827412891258e-05, + "loss": 0.7964, + "step": 2601 + }, + { + "epoch": 0.38, + "learning_rate": 1.4093457819825436e-05, + "loss": 0.8335, + "step": 2602 + }, + { + "epoch": 0.38, + "learning_rate": 1.4089087287653222e-05, + "loss": 0.856, + "step": 2603 + }, + { + "epoch": 0.39, + "learning_rate": 1.4084715817377292e-05, + "loss": 0.8223, + "step": 2604 + }, + { + "epoch": 0.39, + "learning_rate": 1.4080343410000532e-05, + "loss": 0.8384, + "step": 2605 + }, + { + "epoch": 0.39, + "learning_rate": 1.4075970066526044e-05, + "loss": 0.8979, + "step": 2606 + }, + { + "epoch": 0.39, + "learning_rate": 1.4071595787957148e-05, + "loss": 0.3518, + "step": 2607 + }, + { + "epoch": 0.39, + "learning_rate": 1.4067220575297369e-05, + "loss": 0.8423, + "step": 2608 + }, + { + "epoch": 0.39, + "learning_rate": 1.4062844429550457e-05, + "loss": 0.874, + "step": 2609 + }, + { + "epoch": 0.39, + "learning_rate": 1.4058467351720371e-05, + "loss": 0.792, + "step": 2610 + }, + { + "epoch": 0.39, + "learning_rate": 1.4054089342811286e-05, + "loss": 0.8613, + "step": 2611 + }, + { + "epoch": 0.39, + "learning_rate": 1.4049710403827583e-05, + "loss": 0.8599, + "step": 2612 + }, + { + "epoch": 0.39, + "learning_rate": 1.4045330535773868e-05, + "loss": 0.7954, + "step": 2613 + }, + { + "epoch": 0.39, + "learning_rate": 1.4040949739654954e-05, + "loss": 0.8311, + "step": 2614 + }, + { + "epoch": 0.39, + "learning_rate": 1.403656801647587e-05, + "loss": 0.8301, + "step": 2615 + }, + { + "epoch": 0.39, + "learning_rate": 1.4032185367241846e-05, + "loss": 0.8887, + "step": 2616 + }, + { + "epoch": 0.39, + "learning_rate": 1.4027801792958343e-05, + "loss": 0.8267, + "step": 2617 + }, + { + "epoch": 0.39, + "learning_rate": 1.4023417294631019e-05, + "loss": 0.8315, + "step": 2618 + }, + { + "epoch": 0.39, + "learning_rate": 1.4019031873265756e-05, + "loss": 0.8232, + "step": 2619 + }, + { + "epoch": 0.39, + "learning_rate": 1.4014645529868634e-05, + "loss": 0.7344, + "step": 2620 + }, + { + "epoch": 0.39, + "learning_rate": 1.4010258265445957e-05, + "loss": 0.8755, + "step": 2621 + }, + { + "epoch": 0.39, + "learning_rate": 1.4005870081004233e-05, + "loss": 0.8511, + "step": 2622 + }, + { + "epoch": 0.39, + "learning_rate": 1.4001480977550186e-05, + "loss": 0.8223, + "step": 2623 + }, + { + "epoch": 0.39, + "learning_rate": 1.3997090956090751e-05, + "loss": 0.8164, + "step": 2624 + }, + { + "epoch": 0.39, + "learning_rate": 1.3992700017633063e-05, + "loss": 0.8843, + "step": 2625 + }, + { + "epoch": 0.39, + "learning_rate": 1.398830816318448e-05, + "loss": 0.8105, + "step": 2626 + }, + { + "epoch": 0.39, + "learning_rate": 1.3983915393752565e-05, + "loss": 0.8687, + "step": 2627 + }, + { + "epoch": 0.39, + "learning_rate": 1.3979521710345094e-05, + "loss": 0.7861, + "step": 2628 + }, + { + "epoch": 0.39, + "learning_rate": 1.3975127113970047e-05, + "loss": 0.8286, + "step": 2629 + }, + { + "epoch": 0.39, + "learning_rate": 1.3970731605635614e-05, + "loss": 0.8018, + "step": 2630 + }, + { + "epoch": 0.39, + "learning_rate": 1.3966335186350199e-05, + "loss": 0.8667, + "step": 2631 + }, + { + "epoch": 0.39, + "learning_rate": 1.3961937857122418e-05, + "loss": 0.8184, + "step": 2632 + }, + { + "epoch": 0.39, + "learning_rate": 1.3957539618961084e-05, + "loss": 0.8457, + "step": 2633 + }, + { + "epoch": 0.39, + "learning_rate": 1.3953140472875228e-05, + "loss": 0.8364, + "step": 2634 + }, + { + "epoch": 0.39, + "learning_rate": 1.3948740419874085e-05, + "loss": 0.9224, + "step": 2635 + }, + { + "epoch": 0.39, + "learning_rate": 1.3944339460967098e-05, + "loss": 0.8267, + "step": 2636 + }, + { + "epoch": 0.39, + "learning_rate": 1.393993759716392e-05, + "loss": 0.8647, + "step": 2637 + }, + { + "epoch": 0.39, + "learning_rate": 1.3935534829474414e-05, + "loss": 0.8569, + "step": 2638 + }, + { + "epoch": 0.39, + "learning_rate": 1.3931131158908644e-05, + "loss": 0.3097, + "step": 2639 + }, + { + "epoch": 0.39, + "learning_rate": 1.3926726586476883e-05, + "loss": 0.875, + "step": 2640 + }, + { + "epoch": 0.39, + "learning_rate": 1.3922321113189617e-05, + "loss": 0.8901, + "step": 2641 + }, + { + "epoch": 0.39, + "learning_rate": 1.3917914740057527e-05, + "loss": 0.8511, + "step": 2642 + }, + { + "epoch": 0.39, + "learning_rate": 1.3913507468091515e-05, + "loss": 0.8242, + "step": 2643 + }, + { + "epoch": 0.39, + "learning_rate": 1.3909099298302677e-05, + "loss": 0.8281, + "step": 2644 + }, + { + "epoch": 0.39, + "learning_rate": 1.390469023170232e-05, + "loss": 0.8071, + "step": 2645 + }, + { + "epoch": 0.39, + "learning_rate": 1.3900280269301957e-05, + "loss": 0.7793, + "step": 2646 + }, + { + "epoch": 0.39, + "learning_rate": 1.3895869412113308e-05, + "loss": 0.8755, + "step": 2647 + }, + { + "epoch": 0.39, + "learning_rate": 1.3891457661148289e-05, + "loss": 0.3286, + "step": 2648 + }, + { + "epoch": 0.39, + "learning_rate": 1.3887045017419032e-05, + "loss": 0.8154, + "step": 2649 + }, + { + "epoch": 0.39, + "learning_rate": 1.3882631481937872e-05, + "loss": 0.835, + "step": 2650 + }, + { + "epoch": 0.39, + "learning_rate": 1.3878217055717345e-05, + "loss": 0.7852, + "step": 2651 + }, + { + "epoch": 0.39, + "learning_rate": 1.3873801739770192e-05, + "loss": 0.8691, + "step": 2652 + }, + { + "epoch": 0.39, + "learning_rate": 1.3869385535109358e-05, + "loss": 0.8892, + "step": 2653 + }, + { + "epoch": 0.39, + "learning_rate": 1.3864968442747995e-05, + "loss": 0.8145, + "step": 2654 + }, + { + "epoch": 0.39, + "learning_rate": 1.3860550463699456e-05, + "loss": 0.8569, + "step": 2655 + }, + { + "epoch": 0.39, + "learning_rate": 1.3856131598977298e-05, + "loss": 0.8164, + "step": 2656 + }, + { + "epoch": 0.39, + "learning_rate": 1.385171184959528e-05, + "loss": 0.791, + "step": 2657 + }, + { + "epoch": 0.39, + "learning_rate": 1.3847291216567364e-05, + "loss": 0.354, + "step": 2658 + }, + { + "epoch": 0.39, + "learning_rate": 1.384286970090772e-05, + "loss": 0.834, + "step": 2659 + }, + { + "epoch": 0.39, + "learning_rate": 1.3838447303630713e-05, + "loss": 0.8608, + "step": 2660 + }, + { + "epoch": 0.39, + "learning_rate": 1.3834024025750914e-05, + "loss": 0.7793, + "step": 2661 + }, + { + "epoch": 0.39, + "learning_rate": 1.3829599868283094e-05, + "loss": 0.832, + "step": 2662 + }, + { + "epoch": 0.39, + "learning_rate": 1.3825174832242232e-05, + "loss": 0.8286, + "step": 2663 + }, + { + "epoch": 0.39, + "learning_rate": 1.38207489186435e-05, + "loss": 0.8013, + "step": 2664 + }, + { + "epoch": 0.39, + "learning_rate": 1.3816322128502276e-05, + "loss": 0.8481, + "step": 2665 + }, + { + "epoch": 0.39, + "learning_rate": 1.381189446283414e-05, + "loss": 0.8286, + "step": 2666 + }, + { + "epoch": 0.39, + "learning_rate": 1.3807465922654863e-05, + "loss": 0.8154, + "step": 2667 + }, + { + "epoch": 0.39, + "learning_rate": 1.3803036508980436e-05, + "loss": 0.7695, + "step": 2668 + }, + { + "epoch": 0.39, + "learning_rate": 1.3798606222827033e-05, + "loss": 0.8809, + "step": 2669 + }, + { + "epoch": 0.39, + "learning_rate": 1.3794175065211035e-05, + "loss": 0.8936, + "step": 2670 + }, + { + "epoch": 0.4, + "learning_rate": 1.3789743037149026e-05, + "loss": 0.7544, + "step": 2671 + }, + { + "epoch": 0.4, + "learning_rate": 1.378531013965778e-05, + "loss": 0.7788, + "step": 2672 + }, + { + "epoch": 0.4, + "learning_rate": 1.3780876373754282e-05, + "loss": 0.8711, + "step": 2673 + }, + { + "epoch": 0.4, + "learning_rate": 1.3776441740455706e-05, + "loss": 0.8081, + "step": 2674 + }, + { + "epoch": 0.4, + "learning_rate": 1.3772006240779435e-05, + "loss": 0.8706, + "step": 2675 + }, + { + "epoch": 0.4, + "learning_rate": 1.3767569875743036e-05, + "loss": 0.8096, + "step": 2676 + }, + { + "epoch": 0.4, + "learning_rate": 1.3763132646364294e-05, + "loss": 0.8857, + "step": 2677 + }, + { + "epoch": 0.4, + "learning_rate": 1.375869455366118e-05, + "loss": 0.7861, + "step": 2678 + }, + { + "epoch": 0.4, + "learning_rate": 1.3754255598651862e-05, + "loss": 0.8579, + "step": 2679 + }, + { + "epoch": 0.4, + "learning_rate": 1.374981578235471e-05, + "loss": 0.7798, + "step": 2680 + }, + { + "epoch": 0.4, + "learning_rate": 1.374537510578829e-05, + "loss": 0.8496, + "step": 2681 + }, + { + "epoch": 0.4, + "learning_rate": 1.3740933569971368e-05, + "loss": 0.7729, + "step": 2682 + }, + { + "epoch": 0.4, + "learning_rate": 1.3736491175922909e-05, + "loss": 0.8599, + "step": 2683 + }, + { + "epoch": 0.4, + "learning_rate": 1.373204792466206e-05, + "loss": 0.8364, + "step": 2684 + }, + { + "epoch": 0.4, + "learning_rate": 1.3727603817208186e-05, + "loss": 0.8389, + "step": 2685 + }, + { + "epoch": 0.4, + "learning_rate": 1.3723158854580834e-05, + "loss": 0.8154, + "step": 2686 + }, + { + "epoch": 0.4, + "learning_rate": 1.3718713037799752e-05, + "loss": 0.811, + "step": 2687 + }, + { + "epoch": 0.4, + "learning_rate": 1.3714266367884883e-05, + "loss": 0.8623, + "step": 2688 + }, + { + "epoch": 0.4, + "learning_rate": 1.3709818845856366e-05, + "loss": 0.7759, + "step": 2689 + }, + { + "epoch": 0.4, + "learning_rate": 1.3705370472734537e-05, + "loss": 0.8965, + "step": 2690 + }, + { + "epoch": 0.4, + "learning_rate": 1.3700921249539923e-05, + "loss": 0.8638, + "step": 2691 + }, + { + "epoch": 0.4, + "learning_rate": 1.3696471177293253e-05, + "loss": 0.8594, + "step": 2692 + }, + { + "epoch": 0.4, + "learning_rate": 1.3692020257015441e-05, + "loss": 0.8892, + "step": 2693 + }, + { + "epoch": 0.4, + "learning_rate": 1.3687568489727606e-05, + "loss": 0.3252, + "step": 2694 + }, + { + "epoch": 0.4, + "learning_rate": 1.3683115876451054e-05, + "loss": 0.8311, + "step": 2695 + }, + { + "epoch": 0.4, + "learning_rate": 1.3678662418207289e-05, + "loss": 0.8604, + "step": 2696 + }, + { + "epoch": 0.4, + "learning_rate": 1.3674208116018007e-05, + "loss": 0.8481, + "step": 2697 + }, + { + "epoch": 0.4, + "learning_rate": 1.3669752970905095e-05, + "loss": 0.8174, + "step": 2698 + }, + { + "epoch": 0.4, + "learning_rate": 1.3665296983890639e-05, + "loss": 0.8423, + "step": 2699 + }, + { + "epoch": 0.4, + "learning_rate": 1.366084015599692e-05, + "loss": 0.7905, + "step": 2700 + }, + { + "epoch": 0.4, + "learning_rate": 1.3656382488246399e-05, + "loss": 0.8105, + "step": 2701 + }, + { + "epoch": 0.4, + "learning_rate": 1.3651923981661741e-05, + "loss": 0.7964, + "step": 2702 + }, + { + "epoch": 0.4, + "learning_rate": 1.3647464637265803e-05, + "loss": 0.8887, + "step": 2703 + }, + { + "epoch": 0.4, + "learning_rate": 1.364300445608163e-05, + "loss": 0.7964, + "step": 2704 + }, + { + "epoch": 0.4, + "learning_rate": 1.3638543439132464e-05, + "loss": 0.9229, + "step": 2705 + }, + { + "epoch": 0.4, + "learning_rate": 1.363408158744173e-05, + "loss": 0.8511, + "step": 2706 + }, + { + "epoch": 0.4, + "learning_rate": 1.3629618902033053e-05, + "loss": 0.8501, + "step": 2707 + }, + { + "epoch": 0.4, + "learning_rate": 1.3625155383930246e-05, + "loss": 0.8081, + "step": 2708 + }, + { + "epoch": 0.4, + "learning_rate": 1.3620691034157314e-05, + "loss": 0.832, + "step": 2709 + }, + { + "epoch": 0.4, + "learning_rate": 1.361622585373845e-05, + "loss": 0.875, + "step": 2710 + }, + { + "epoch": 0.4, + "learning_rate": 1.3611759843698043e-05, + "loss": 0.8647, + "step": 2711 + }, + { + "epoch": 0.4, + "learning_rate": 1.3607293005060663e-05, + "loss": 0.8188, + "step": 2712 + }, + { + "epoch": 0.4, + "learning_rate": 1.3602825338851082e-05, + "loss": 0.8638, + "step": 2713 + }, + { + "epoch": 0.4, + "learning_rate": 1.3598356846094253e-05, + "loss": 0.8994, + "step": 2714 + }, + { + "epoch": 0.4, + "learning_rate": 1.3593887527815327e-05, + "loss": 0.8213, + "step": 2715 + }, + { + "epoch": 0.4, + "learning_rate": 1.358941738503963e-05, + "loss": 0.877, + "step": 2716 + }, + { + "epoch": 0.4, + "learning_rate": 1.3584946418792688e-05, + "loss": 0.856, + "step": 2717 + }, + { + "epoch": 0.4, + "learning_rate": 1.358047463010022e-05, + "loss": 0.7471, + "step": 2718 + }, + { + "epoch": 0.4, + "learning_rate": 1.3576002019988123e-05, + "loss": 0.8496, + "step": 2719 + }, + { + "epoch": 0.4, + "learning_rate": 1.3571528589482492e-05, + "loss": 0.7363, + "step": 2720 + }, + { + "epoch": 0.4, + "learning_rate": 1.3567054339609595e-05, + "loss": 0.7954, + "step": 2721 + }, + { + "epoch": 0.4, + "learning_rate": 1.3562579271395913e-05, + "loss": 0.8604, + "step": 2722 + }, + { + "epoch": 0.4, + "learning_rate": 1.3558103385868087e-05, + "loss": 0.8472, + "step": 2723 + }, + { + "epoch": 0.4, + "learning_rate": 1.3553626684052966e-05, + "loss": 0.8042, + "step": 2724 + }, + { + "epoch": 0.4, + "learning_rate": 1.3549149166977573e-05, + "loss": 0.8311, + "step": 2725 + }, + { + "epoch": 0.4, + "learning_rate": 1.354467083566913e-05, + "loss": 0.8447, + "step": 2726 + }, + { + "epoch": 0.4, + "learning_rate": 1.3540191691155036e-05, + "loss": 0.7656, + "step": 2727 + }, + { + "epoch": 0.4, + "learning_rate": 1.3535711734462883e-05, + "loss": 0.7988, + "step": 2728 + }, + { + "epoch": 0.4, + "learning_rate": 1.3531230966620444e-05, + "loss": 0.855, + "step": 2729 + }, + { + "epoch": 0.4, + "learning_rate": 1.352674938865568e-05, + "loss": 0.8071, + "step": 2730 + }, + { + "epoch": 0.4, + "learning_rate": 1.3522267001596742e-05, + "loss": 0.8384, + "step": 2731 + }, + { + "epoch": 0.4, + "learning_rate": 1.351778380647196e-05, + "loss": 0.8018, + "step": 2732 + }, + { + "epoch": 0.4, + "learning_rate": 1.3513299804309856e-05, + "loss": 0.895, + "step": 2733 + }, + { + "epoch": 0.4, + "learning_rate": 1.3508814996139128e-05, + "loss": 0.7739, + "step": 2734 + }, + { + "epoch": 0.4, + "learning_rate": 1.3504329382988671e-05, + "loss": 0.877, + "step": 2735 + }, + { + "epoch": 0.4, + "learning_rate": 1.3499842965887552e-05, + "loss": 0.8477, + "step": 2736 + }, + { + "epoch": 0.4, + "learning_rate": 1.3495355745865038e-05, + "loss": 0.8423, + "step": 2737 + }, + { + "epoch": 0.4, + "learning_rate": 1.3490867723950559e-05, + "loss": 0.9014, + "step": 2738 + }, + { + "epoch": 0.41, + "learning_rate": 1.3486378901173747e-05, + "loss": 0.8315, + "step": 2739 + }, + { + "epoch": 0.41, + "learning_rate": 1.3481889278564414e-05, + "loss": 0.853, + "step": 2740 + }, + { + "epoch": 0.41, + "learning_rate": 1.347739885715255e-05, + "loss": 0.8184, + "step": 2741 + }, + { + "epoch": 0.41, + "learning_rate": 1.3472907637968331e-05, + "loss": 0.9121, + "step": 2742 + }, + { + "epoch": 0.41, + "learning_rate": 1.3468415622042117e-05, + "loss": 0.7944, + "step": 2743 + }, + { + "epoch": 0.41, + "learning_rate": 1.3463922810404448e-05, + "loss": 0.8018, + "step": 2744 + }, + { + "epoch": 0.41, + "learning_rate": 1.3459429204086056e-05, + "loss": 0.7764, + "step": 2745 + }, + { + "epoch": 0.41, + "learning_rate": 1.345493480411784e-05, + "loss": 0.9062, + "step": 2746 + }, + { + "epoch": 0.41, + "learning_rate": 1.3450439611530892e-05, + "loss": 0.7637, + "step": 2747 + }, + { + "epoch": 0.41, + "learning_rate": 1.3445943627356481e-05, + "loss": 0.8652, + "step": 2748 + }, + { + "epoch": 0.41, + "learning_rate": 1.3441446852626064e-05, + "loss": 0.8672, + "step": 2749 + }, + { + "epoch": 0.41, + "learning_rate": 1.3436949288371275e-05, + "loss": 0.8096, + "step": 2750 + }, + { + "epoch": 0.41, + "learning_rate": 1.3432450935623922e-05, + "loss": 0.8535, + "step": 2751 + }, + { + "epoch": 0.41, + "learning_rate": 1.342795179541601e-05, + "loss": 0.8101, + "step": 2752 + }, + { + "epoch": 0.41, + "learning_rate": 1.3423451868779707e-05, + "loss": 0.8149, + "step": 2753 + }, + { + "epoch": 0.41, + "learning_rate": 1.3418951156747374e-05, + "loss": 0.8447, + "step": 2754 + }, + { + "epoch": 0.41, + "learning_rate": 1.3414449660351552e-05, + "loss": 0.8242, + "step": 2755 + }, + { + "epoch": 0.41, + "learning_rate": 1.3409947380624953e-05, + "loss": 0.7524, + "step": 2756 + }, + { + "epoch": 0.41, + "learning_rate": 1.3405444318600471e-05, + "loss": 0.7881, + "step": 2757 + }, + { + "epoch": 0.41, + "learning_rate": 1.3400940475311193e-05, + "loss": 0.8228, + "step": 2758 + }, + { + "epoch": 0.41, + "learning_rate": 1.3396435851790364e-05, + "loss": 0.8076, + "step": 2759 + }, + { + "epoch": 0.41, + "learning_rate": 1.3391930449071424e-05, + "loss": 0.8589, + "step": 2760 + }, + { + "epoch": 0.41, + "learning_rate": 1.3387424268187982e-05, + "loss": 0.8345, + "step": 2761 + }, + { + "epoch": 0.41, + "learning_rate": 1.3382917310173835e-05, + "loss": 0.7798, + "step": 2762 + }, + { + "epoch": 0.41, + "learning_rate": 1.3378409576062952e-05, + "loss": 0.8076, + "step": 2763 + }, + { + "epoch": 0.41, + "learning_rate": 1.3373901066889477e-05, + "loss": 0.8574, + "step": 2764 + }, + { + "epoch": 0.41, + "learning_rate": 1.3369391783687742e-05, + "loss": 0.7749, + "step": 2765 + }, + { + "epoch": 0.41, + "learning_rate": 1.3364881727492247e-05, + "loss": 0.8467, + "step": 2766 + }, + { + "epoch": 0.41, + "learning_rate": 1.3360370899337674e-05, + "loss": 0.8057, + "step": 2767 + }, + { + "epoch": 0.41, + "learning_rate": 1.3355859300258878e-05, + "loss": 0.8398, + "step": 2768 + }, + { + "epoch": 0.41, + "learning_rate": 1.3351346931290899e-05, + "loss": 0.8032, + "step": 2769 + }, + { + "epoch": 0.41, + "learning_rate": 1.3346833793468943e-05, + "loss": 0.8252, + "step": 2770 + }, + { + "epoch": 0.41, + "learning_rate": 1.3342319887828402e-05, + "loss": 0.8525, + "step": 2771 + }, + { + "epoch": 0.41, + "learning_rate": 1.3337805215404837e-05, + "loss": 0.3821, + "step": 2772 + }, + { + "epoch": 0.41, + "learning_rate": 1.3333289777233993e-05, + "loss": 0.771, + "step": 2773 + }, + { + "epoch": 0.41, + "learning_rate": 1.3328773574351779e-05, + "loss": 0.9141, + "step": 2774 + }, + { + "epoch": 0.41, + "learning_rate": 1.3324256607794292e-05, + "loss": 0.8569, + "step": 2775 + }, + { + "epoch": 0.41, + "learning_rate": 1.3319738878597792e-05, + "loss": 0.8267, + "step": 2776 + }, + { + "epoch": 0.41, + "learning_rate": 1.3315220387798728e-05, + "loss": 0.7993, + "step": 2777 + }, + { + "epoch": 0.41, + "learning_rate": 1.3310701136433708e-05, + "loss": 0.9004, + "step": 2778 + }, + { + "epoch": 0.41, + "learning_rate": 1.3306181125539528e-05, + "loss": 0.8496, + "step": 2779 + }, + { + "epoch": 0.41, + "learning_rate": 1.330166035615315e-05, + "loss": 0.8423, + "step": 2780 + }, + { + "epoch": 0.41, + "learning_rate": 1.3297138829311713e-05, + "loss": 0.8364, + "step": 2781 + }, + { + "epoch": 0.41, + "learning_rate": 1.329261654605253e-05, + "loss": 0.8203, + "step": 2782 + }, + { + "epoch": 0.41, + "learning_rate": 1.3288093507413086e-05, + "loss": 0.8633, + "step": 2783 + }, + { + "epoch": 0.41, + "learning_rate": 1.3283569714431042e-05, + "loss": 0.9351, + "step": 2784 + }, + { + "epoch": 0.41, + "learning_rate": 1.3279045168144228e-05, + "loss": 0.8472, + "step": 2785 + }, + { + "epoch": 0.41, + "learning_rate": 1.3274519869590656e-05, + "loss": 0.71, + "step": 2786 + }, + { + "epoch": 0.41, + "learning_rate": 1.3269993819808493e-05, + "loss": 0.769, + "step": 2787 + }, + { + "epoch": 0.41, + "learning_rate": 1.3265467019836095e-05, + "loss": 0.8931, + "step": 2788 + }, + { + "epoch": 0.41, + "learning_rate": 1.3260939470711984e-05, + "loss": 0.8369, + "step": 2789 + }, + { + "epoch": 0.41, + "learning_rate": 1.3256411173474854e-05, + "loss": 0.8154, + "step": 2790 + }, + { + "epoch": 0.41, + "learning_rate": 1.325188212916357e-05, + "loss": 0.8408, + "step": 2791 + }, + { + "epoch": 0.41, + "learning_rate": 1.3247352338817172e-05, + "loss": 0.8354, + "step": 2792 + }, + { + "epoch": 0.41, + "learning_rate": 1.3242821803474861e-05, + "loss": 0.875, + "step": 2793 + }, + { + "epoch": 0.41, + "learning_rate": 1.3238290524176023e-05, + "loss": 0.7456, + "step": 2794 + }, + { + "epoch": 0.41, + "learning_rate": 1.3233758501960205e-05, + "loss": 0.7817, + "step": 2795 + }, + { + "epoch": 0.41, + "learning_rate": 1.3229225737867126e-05, + "loss": 0.8511, + "step": 2796 + }, + { + "epoch": 0.41, + "learning_rate": 1.3224692232936685e-05, + "loss": 0.8716, + "step": 2797 + }, + { + "epoch": 0.41, + "learning_rate": 1.3220157988208926e-05, + "loss": 0.8096, + "step": 2798 + }, + { + "epoch": 0.41, + "learning_rate": 1.3215623004724096e-05, + "loss": 0.8354, + "step": 2799 + }, + { + "epoch": 0.41, + "learning_rate": 1.3211087283522586e-05, + "loss": 0.8369, + "step": 2800 + }, + { + "epoch": 0.41, + "learning_rate": 1.3206550825644965e-05, + "loss": 0.8423, + "step": 2801 + }, + { + "epoch": 0.41, + "learning_rate": 1.3202013632131973e-05, + "loss": 0.894, + "step": 2802 + }, + { + "epoch": 0.41, + "learning_rate": 1.3197475704024516e-05, + "loss": 0.8052, + "step": 2803 + }, + { + "epoch": 0.41, + "learning_rate": 1.319293704236367e-05, + "loss": 0.7622, + "step": 2804 + }, + { + "epoch": 0.41, + "learning_rate": 1.3188397648190676e-05, + "loss": 0.8247, + "step": 2805 + }, + { + "epoch": 0.41, + "learning_rate": 1.3183857522546948e-05, + "loss": 0.9395, + "step": 2806 + }, + { + "epoch": 0.42, + "learning_rate": 1.3179316666474063e-05, + "loss": 0.8838, + "step": 2807 + }, + { + "epoch": 0.42, + "learning_rate": 1.3174775081013768e-05, + "loss": 0.8242, + "step": 2808 + }, + { + "epoch": 0.42, + "learning_rate": 1.3170232767207982e-05, + "loss": 0.8232, + "step": 2809 + }, + { + "epoch": 0.42, + "learning_rate": 1.3165689726098779e-05, + "loss": 0.8242, + "step": 2810 + }, + { + "epoch": 0.42, + "learning_rate": 1.3161145958728412e-05, + "loss": 0.813, + "step": 2811 + }, + { + "epoch": 0.42, + "learning_rate": 1.3156601466139293e-05, + "loss": 0.832, + "step": 2812 + }, + { + "epoch": 0.42, + "learning_rate": 1.3152056249374008e-05, + "loss": 0.8936, + "step": 2813 + }, + { + "epoch": 0.42, + "learning_rate": 1.3147510309475301e-05, + "loss": 0.8594, + "step": 2814 + }, + { + "epoch": 0.42, + "learning_rate": 1.3142963647486084e-05, + "loss": 0.8423, + "step": 2815 + }, + { + "epoch": 0.42, + "learning_rate": 1.3138416264449439e-05, + "loss": 0.8481, + "step": 2816 + }, + { + "epoch": 0.42, + "learning_rate": 1.3133868161408605e-05, + "loss": 0.8555, + "step": 2817 + }, + { + "epoch": 0.42, + "learning_rate": 1.3129319339406998e-05, + "loss": 0.7896, + "step": 2818 + }, + { + "epoch": 0.42, + "learning_rate": 1.3124769799488189e-05, + "loss": 0.8594, + "step": 2819 + }, + { + "epoch": 0.42, + "learning_rate": 1.3120219542695916e-05, + "loss": 0.3273, + "step": 2820 + }, + { + "epoch": 0.42, + "learning_rate": 1.3115668570074083e-05, + "loss": 0.79, + "step": 2821 + }, + { + "epoch": 0.42, + "learning_rate": 1.3111116882666764e-05, + "loss": 0.8125, + "step": 2822 + }, + { + "epoch": 0.42, + "learning_rate": 1.310656448151818e-05, + "loss": 0.833, + "step": 2823 + }, + { + "epoch": 0.42, + "learning_rate": 1.3102011367672734e-05, + "loss": 0.8345, + "step": 2824 + }, + { + "epoch": 0.42, + "learning_rate": 1.3097457542174983e-05, + "loss": 0.7666, + "step": 2825 + }, + { + "epoch": 0.42, + "learning_rate": 1.3092903006069651e-05, + "loss": 0.9297, + "step": 2826 + }, + { + "epoch": 0.42, + "learning_rate": 1.308834776040162e-05, + "loss": 0.8765, + "step": 2827 + }, + { + "epoch": 0.42, + "learning_rate": 1.308379180621594e-05, + "loss": 0.8784, + "step": 2828 + }, + { + "epoch": 0.42, + "learning_rate": 1.307923514455782e-05, + "loss": 0.8257, + "step": 2829 + }, + { + "epoch": 0.42, + "learning_rate": 1.3074677776472635e-05, + "loss": 0.8794, + "step": 2830 + }, + { + "epoch": 0.42, + "learning_rate": 1.3070119703005922e-05, + "loss": 0.8389, + "step": 2831 + }, + { + "epoch": 0.42, + "learning_rate": 1.3065560925203373e-05, + "loss": 0.3099, + "step": 2832 + }, + { + "epoch": 0.42, + "learning_rate": 1.3061001444110852e-05, + "loss": 0.8276, + "step": 2833 + }, + { + "epoch": 0.42, + "learning_rate": 1.3056441260774371e-05, + "loss": 0.8608, + "step": 2834 + }, + { + "epoch": 0.42, + "learning_rate": 1.3051880376240117e-05, + "loss": 0.8003, + "step": 2835 + }, + { + "epoch": 0.42, + "learning_rate": 1.3047318791554431e-05, + "loss": 0.752, + "step": 2836 + }, + { + "epoch": 0.42, + "learning_rate": 1.3042756507763813e-05, + "loss": 0.8062, + "step": 2837 + }, + { + "epoch": 0.42, + "learning_rate": 1.3038193525914928e-05, + "loss": 0.8149, + "step": 2838 + }, + { + "epoch": 0.42, + "learning_rate": 1.3033629847054598e-05, + "loss": 0.8013, + "step": 2839 + }, + { + "epoch": 0.42, + "learning_rate": 1.3029065472229806e-05, + "loss": 0.8359, + "step": 2840 + }, + { + "epoch": 0.42, + "learning_rate": 1.3024500402487697e-05, + "loss": 0.7896, + "step": 2841 + }, + { + "epoch": 0.42, + "learning_rate": 1.3019934638875565e-05, + "loss": 0.8813, + "step": 2842 + }, + { + "epoch": 0.42, + "learning_rate": 1.3015368182440878e-05, + "loss": 0.834, + "step": 2843 + }, + { + "epoch": 0.42, + "learning_rate": 1.3010801034231255e-05, + "loss": 0.8213, + "step": 2844 + }, + { + "epoch": 0.42, + "learning_rate": 1.3006233195294474e-05, + "loss": 0.8662, + "step": 2845 + }, + { + "epoch": 0.42, + "learning_rate": 1.3001664666678475e-05, + "loss": 0.8501, + "step": 2846 + }, + { + "epoch": 0.42, + "learning_rate": 1.2997095449431348e-05, + "loss": 0.8213, + "step": 2847 + }, + { + "epoch": 0.42, + "learning_rate": 1.299252554460135e-05, + "loss": 0.8589, + "step": 2848 + }, + { + "epoch": 0.42, + "learning_rate": 1.298795495323689e-05, + "loss": 0.8076, + "step": 2849 + }, + { + "epoch": 0.42, + "learning_rate": 1.298338367638654e-05, + "loss": 0.854, + "step": 2850 + }, + { + "epoch": 0.42, + "learning_rate": 1.2978811715099023e-05, + "loss": 0.8286, + "step": 2851 + }, + { + "epoch": 0.42, + "learning_rate": 1.2974239070423222e-05, + "loss": 0.834, + "step": 2852 + }, + { + "epoch": 0.42, + "learning_rate": 1.296966574340818e-05, + "loss": 0.6997, + "step": 2853 + }, + { + "epoch": 0.42, + "learning_rate": 1.296509173510309e-05, + "loss": 0.8608, + "step": 2854 + }, + { + "epoch": 0.42, + "learning_rate": 1.2960517046557304e-05, + "loss": 0.7734, + "step": 2855 + }, + { + "epoch": 0.42, + "learning_rate": 1.2955941678820332e-05, + "loss": 0.8975, + "step": 2856 + }, + { + "epoch": 0.42, + "learning_rate": 1.295136563294184e-05, + "loss": 0.8047, + "step": 2857 + }, + { + "epoch": 0.42, + "learning_rate": 1.2946788909971646e-05, + "loss": 0.7896, + "step": 2858 + }, + { + "epoch": 0.42, + "learning_rate": 1.2942211510959726e-05, + "loss": 0.8408, + "step": 2859 + }, + { + "epoch": 0.42, + "learning_rate": 1.2937633436956208e-05, + "loss": 0.8394, + "step": 2860 + }, + { + "epoch": 0.42, + "learning_rate": 1.293305468901138e-05, + "loss": 0.7637, + "step": 2861 + }, + { + "epoch": 0.42, + "learning_rate": 1.2928475268175683e-05, + "loss": 0.855, + "step": 2862 + }, + { + "epoch": 0.42, + "learning_rate": 1.292389517549971e-05, + "loss": 0.8682, + "step": 2863 + }, + { + "epoch": 0.42, + "learning_rate": 1.2919314412034207e-05, + "loss": 0.8438, + "step": 2864 + }, + { + "epoch": 0.42, + "learning_rate": 1.2914732978830077e-05, + "loss": 0.8267, + "step": 2865 + }, + { + "epoch": 0.42, + "learning_rate": 1.2910150876938381e-05, + "loss": 0.8755, + "step": 2866 + }, + { + "epoch": 0.42, + "learning_rate": 1.2905568107410325e-05, + "loss": 0.7661, + "step": 2867 + }, + { + "epoch": 0.42, + "learning_rate": 1.290098467129727e-05, + "loss": 0.895, + "step": 2868 + }, + { + "epoch": 0.42, + "learning_rate": 1.2896400569650734e-05, + "loss": 0.8682, + "step": 2869 + }, + { + "epoch": 0.42, + "learning_rate": 1.2891815803522378e-05, + "loss": 0.8442, + "step": 2870 + }, + { + "epoch": 0.42, + "learning_rate": 1.2887230373964034e-05, + "loss": 0.8506, + "step": 2871 + }, + { + "epoch": 0.42, + "learning_rate": 1.2882644282027668e-05, + "loss": 0.8335, + "step": 2872 + }, + { + "epoch": 0.42, + "learning_rate": 1.2878057528765407e-05, + "loss": 0.8145, + "step": 2873 + }, + { + "epoch": 0.43, + "learning_rate": 1.2873470115229523e-05, + "loss": 0.9297, + "step": 2874 + }, + { + "epoch": 0.43, + "learning_rate": 1.2868882042472448e-05, + "loss": 0.8867, + "step": 2875 + }, + { + "epoch": 0.43, + "learning_rate": 1.2864293311546762e-05, + "loss": 0.8374, + "step": 2876 + }, + { + "epoch": 0.43, + "learning_rate": 1.2859703923505194e-05, + "loss": 0.8477, + "step": 2877 + }, + { + "epoch": 0.43, + "learning_rate": 1.2855113879400625e-05, + "loss": 0.8926, + "step": 2878 + }, + { + "epoch": 0.43, + "learning_rate": 1.2850523180286084e-05, + "loss": 0.8867, + "step": 2879 + }, + { + "epoch": 0.43, + "learning_rate": 1.2845931827214755e-05, + "loss": 0.8486, + "step": 2880 + }, + { + "epoch": 0.43, + "learning_rate": 1.284133982123997e-05, + "loss": 0.8657, + "step": 2881 + }, + { + "epoch": 0.43, + "learning_rate": 1.283674716341521e-05, + "loss": 0.8677, + "step": 2882 + }, + { + "epoch": 0.43, + "learning_rate": 1.2832153854794105e-05, + "loss": 0.8613, + "step": 2883 + }, + { + "epoch": 0.43, + "learning_rate": 1.2827559896430437e-05, + "loss": 0.8853, + "step": 2884 + }, + { + "epoch": 0.43, + "learning_rate": 1.2822965289378134e-05, + "loss": 0.8662, + "step": 2885 + }, + { + "epoch": 0.43, + "learning_rate": 1.2818370034691277e-05, + "loss": 0.8345, + "step": 2886 + }, + { + "epoch": 0.43, + "learning_rate": 1.281377413342409e-05, + "loss": 0.9038, + "step": 2887 + }, + { + "epoch": 0.43, + "learning_rate": 1.2809177586630948e-05, + "loss": 0.8398, + "step": 2888 + }, + { + "epoch": 0.43, + "learning_rate": 1.2804580395366375e-05, + "loss": 0.8755, + "step": 2889 + }, + { + "epoch": 0.43, + "learning_rate": 1.2799982560685042e-05, + "loss": 0.8232, + "step": 2890 + }, + { + "epoch": 0.43, + "learning_rate": 1.279538408364177e-05, + "loss": 0.9043, + "step": 2891 + }, + { + "epoch": 0.43, + "learning_rate": 1.2790784965291522e-05, + "loss": 0.8779, + "step": 2892 + }, + { + "epoch": 0.43, + "learning_rate": 1.2786185206689412e-05, + "loss": 0.8325, + "step": 2893 + }, + { + "epoch": 0.43, + "learning_rate": 1.27815848088907e-05, + "loss": 0.853, + "step": 2894 + }, + { + "epoch": 0.43, + "learning_rate": 1.2776983772950797e-05, + "loss": 0.8359, + "step": 2895 + }, + { + "epoch": 0.43, + "learning_rate": 1.2772382099925248e-05, + "loss": 0.8711, + "step": 2896 + }, + { + "epoch": 0.43, + "learning_rate": 1.276777979086976e-05, + "loss": 0.8994, + "step": 2897 + }, + { + "epoch": 0.43, + "learning_rate": 1.276317684684017e-05, + "loss": 0.3552, + "step": 2898 + }, + { + "epoch": 0.43, + "learning_rate": 1.275857326889248e-05, + "loss": 0.7876, + "step": 2899 + }, + { + "epoch": 0.43, + "learning_rate": 1.2753969058082817e-05, + "loss": 0.8145, + "step": 2900 + }, + { + "epoch": 0.43, + "learning_rate": 1.2749364215467464e-05, + "loss": 0.8804, + "step": 2901 + }, + { + "epoch": 0.43, + "learning_rate": 1.274475874210285e-05, + "loss": 0.8833, + "step": 2902 + }, + { + "epoch": 0.43, + "learning_rate": 1.2740152639045546e-05, + "loss": 0.8223, + "step": 2903 + }, + { + "epoch": 0.43, + "learning_rate": 1.2735545907352264e-05, + "loss": 0.8818, + "step": 2904 + }, + { + "epoch": 0.43, + "learning_rate": 1.2730938548079873e-05, + "loss": 0.8301, + "step": 2905 + }, + { + "epoch": 0.43, + "learning_rate": 1.2726330562285362e-05, + "loss": 0.8672, + "step": 2906 + }, + { + "epoch": 0.43, + "learning_rate": 1.2721721951025892e-05, + "loss": 0.812, + "step": 2907 + }, + { + "epoch": 0.43, + "learning_rate": 1.2717112715358748e-05, + "loss": 0.8418, + "step": 2908 + }, + { + "epoch": 0.43, + "learning_rate": 1.2712502856341364e-05, + "loss": 0.7603, + "step": 2909 + }, + { + "epoch": 0.43, + "learning_rate": 1.270789237503132e-05, + "loss": 0.791, + "step": 2910 + }, + { + "epoch": 0.43, + "learning_rate": 1.2703281272486334e-05, + "loss": 0.8169, + "step": 2911 + }, + { + "epoch": 0.43, + "learning_rate": 1.2698669549764272e-05, + "loss": 0.897, + "step": 2912 + }, + { + "epoch": 0.43, + "learning_rate": 1.2694057207923134e-05, + "loss": 0.8086, + "step": 2913 + }, + { + "epoch": 0.43, + "learning_rate": 1.268944424802107e-05, + "loss": 0.8608, + "step": 2914 + }, + { + "epoch": 0.43, + "learning_rate": 1.2684830671116364e-05, + "loss": 0.7881, + "step": 2915 + }, + { + "epoch": 0.43, + "learning_rate": 1.2680216478267453e-05, + "loss": 0.8403, + "step": 2916 + }, + { + "epoch": 0.43, + "learning_rate": 1.2675601670532905e-05, + "loss": 0.8198, + "step": 2917 + }, + { + "epoch": 0.43, + "learning_rate": 1.2670986248971433e-05, + "loss": 0.8545, + "step": 2918 + }, + { + "epoch": 0.43, + "learning_rate": 1.266637021464189e-05, + "loss": 0.3522, + "step": 2919 + }, + { + "epoch": 0.43, + "learning_rate": 1.2661753568603273e-05, + "loss": 0.8311, + "step": 2920 + }, + { + "epoch": 0.43, + "learning_rate": 1.265713631191471e-05, + "loss": 0.875, + "step": 2921 + }, + { + "epoch": 0.43, + "learning_rate": 1.2652518445635479e-05, + "loss": 0.8579, + "step": 2922 + }, + { + "epoch": 0.43, + "learning_rate": 1.2647899970825e-05, + "loss": 0.8818, + "step": 2923 + }, + { + "epoch": 0.43, + "learning_rate": 1.2643280888542815e-05, + "loss": 0.8154, + "step": 2924 + }, + { + "epoch": 0.43, + "learning_rate": 1.263866119984863e-05, + "loss": 0.9102, + "step": 2925 + }, + { + "epoch": 0.43, + "learning_rate": 1.2634040905802267e-05, + "loss": 0.8335, + "step": 2926 + }, + { + "epoch": 0.43, + "learning_rate": 1.2629420007463705e-05, + "loss": 0.8506, + "step": 2927 + }, + { + "epoch": 0.43, + "learning_rate": 1.2624798505893048e-05, + "loss": 0.8267, + "step": 2928 + }, + { + "epoch": 0.43, + "learning_rate": 1.2620176402150546e-05, + "loss": 0.8115, + "step": 2929 + }, + { + "epoch": 0.43, + "learning_rate": 1.2615553697296585e-05, + "loss": 0.9009, + "step": 2930 + }, + { + "epoch": 0.43, + "learning_rate": 1.2610930392391694e-05, + "loss": 0.8574, + "step": 2931 + }, + { + "epoch": 0.43, + "learning_rate": 1.2606306488496526e-05, + "loss": 0.79, + "step": 2932 + }, + { + "epoch": 0.43, + "learning_rate": 1.260168198667189e-05, + "loss": 0.8228, + "step": 2933 + }, + { + "epoch": 0.43, + "learning_rate": 1.2597056887978718e-05, + "loss": 0.8315, + "step": 2934 + }, + { + "epoch": 0.43, + "learning_rate": 1.2592431193478085e-05, + "loss": 0.3303, + "step": 2935 + }, + { + "epoch": 0.43, + "learning_rate": 1.2587804904231198e-05, + "loss": 0.8042, + "step": 2936 + }, + { + "epoch": 0.43, + "learning_rate": 1.2583178021299407e-05, + "loss": 0.8594, + "step": 2937 + }, + { + "epoch": 0.43, + "learning_rate": 1.257855054574419e-05, + "loss": 0.8696, + "step": 2938 + }, + { + "epoch": 0.43, + "learning_rate": 1.2573922478627173e-05, + "loss": 0.8535, + "step": 2939 + }, + { + "epoch": 0.43, + "learning_rate": 1.2569293821010109e-05, + "loss": 0.8423, + "step": 2940 + }, + { + "epoch": 0.43, + "learning_rate": 1.2564664573954883e-05, + "loss": 0.8286, + "step": 2941 + }, + { + "epoch": 0.44, + "learning_rate": 1.2560034738523524e-05, + "loss": 0.832, + "step": 2942 + }, + { + "epoch": 0.44, + "learning_rate": 1.255540431577819e-05, + "loss": 0.7871, + "step": 2943 + }, + { + "epoch": 0.44, + "learning_rate": 1.2550773306781181e-05, + "loss": 0.8765, + "step": 2944 + }, + { + "epoch": 0.44, + "learning_rate": 1.254614171259492e-05, + "loss": 0.8232, + "step": 2945 + }, + { + "epoch": 0.44, + "learning_rate": 1.2541509534281974e-05, + "loss": 0.8062, + "step": 2946 + }, + { + "epoch": 0.44, + "learning_rate": 1.253687677290504e-05, + "loss": 0.8608, + "step": 2947 + }, + { + "epoch": 0.44, + "learning_rate": 1.2532243429526951e-05, + "loss": 0.8247, + "step": 2948 + }, + { + "epoch": 0.44, + "learning_rate": 1.252760950521067e-05, + "loss": 0.8877, + "step": 2949 + }, + { + "epoch": 0.44, + "learning_rate": 1.2522975001019298e-05, + "loss": 0.8828, + "step": 2950 + }, + { + "epoch": 0.44, + "learning_rate": 1.251833991801606e-05, + "loss": 0.8193, + "step": 2951 + }, + { + "epoch": 0.44, + "learning_rate": 1.2513704257264327e-05, + "loss": 0.8569, + "step": 2952 + }, + { + "epoch": 0.44, + "learning_rate": 1.2509068019827592e-05, + "loss": 0.9185, + "step": 2953 + }, + { + "epoch": 0.44, + "learning_rate": 1.2504431206769487e-05, + "loss": 0.8584, + "step": 2954 + }, + { + "epoch": 0.44, + "learning_rate": 1.2499793819153772e-05, + "loss": 0.8257, + "step": 2955 + }, + { + "epoch": 0.44, + "learning_rate": 1.2495155858044332e-05, + "loss": 0.8726, + "step": 2956 + }, + { + "epoch": 0.44, + "learning_rate": 1.2490517324505205e-05, + "loss": 0.8247, + "step": 2957 + }, + { + "epoch": 0.44, + "learning_rate": 1.2485878219600537e-05, + "loss": 0.8511, + "step": 2958 + }, + { + "epoch": 0.44, + "learning_rate": 1.248123854439462e-05, + "loss": 0.8721, + "step": 2959 + }, + { + "epoch": 0.44, + "learning_rate": 1.2476598299951866e-05, + "loss": 0.8433, + "step": 2960 + }, + { + "epoch": 0.44, + "learning_rate": 1.247195748733683e-05, + "loss": 0.8516, + "step": 2961 + }, + { + "epoch": 0.44, + "learning_rate": 1.2467316107614185e-05, + "loss": 0.8525, + "step": 2962 + }, + { + "epoch": 0.44, + "learning_rate": 1.2462674161848742e-05, + "loss": 0.811, + "step": 2963 + }, + { + "epoch": 0.44, + "learning_rate": 1.245803165110544e-05, + "loss": 0.8367, + "step": 2964 + }, + { + "epoch": 0.44, + "learning_rate": 1.2453388576449343e-05, + "loss": 0.7827, + "step": 2965 + }, + { + "epoch": 0.44, + "learning_rate": 1.2448744938945656e-05, + "loss": 0.7959, + "step": 2966 + }, + { + "epoch": 0.44, + "learning_rate": 1.2444100739659702e-05, + "loss": 0.8638, + "step": 2967 + }, + { + "epoch": 0.44, + "learning_rate": 1.2439455979656931e-05, + "loss": 0.3643, + "step": 2968 + }, + { + "epoch": 0.44, + "learning_rate": 1.2434810660002937e-05, + "loss": 0.8413, + "step": 2969 + }, + { + "epoch": 0.44, + "learning_rate": 1.2430164781763422e-05, + "loss": 0.8394, + "step": 2970 + }, + { + "epoch": 0.44, + "learning_rate": 1.2425518346004237e-05, + "loss": 0.8394, + "step": 2971 + }, + { + "epoch": 0.44, + "learning_rate": 1.2420871353791348e-05, + "loss": 0.318, + "step": 2972 + }, + { + "epoch": 0.44, + "learning_rate": 1.2416223806190846e-05, + "loss": 0.8413, + "step": 2973 + }, + { + "epoch": 0.44, + "learning_rate": 1.2411575704268957e-05, + "loss": 0.8457, + "step": 2974 + }, + { + "epoch": 0.44, + "learning_rate": 1.2406927049092034e-05, + "loss": 0.8535, + "step": 2975 + }, + { + "epoch": 0.44, + "learning_rate": 1.2402277841726555e-05, + "loss": 0.8735, + "step": 2976 + }, + { + "epoch": 0.44, + "learning_rate": 1.2397628083239122e-05, + "loss": 0.8193, + "step": 2977 + }, + { + "epoch": 0.44, + "learning_rate": 1.2392977774696466e-05, + "loss": 0.8457, + "step": 2978 + }, + { + "epoch": 0.44, + "learning_rate": 1.2388326917165445e-05, + "loss": 0.8311, + "step": 2979 + }, + { + "epoch": 0.44, + "learning_rate": 1.2383675511713045e-05, + "loss": 0.3267, + "step": 2980 + }, + { + "epoch": 0.44, + "learning_rate": 1.2379023559406368e-05, + "loss": 0.8662, + "step": 2981 + }, + { + "epoch": 0.44, + "learning_rate": 1.2374371061312655e-05, + "loss": 0.8477, + "step": 2982 + }, + { + "epoch": 0.44, + "learning_rate": 1.236971801849926e-05, + "loss": 0.8389, + "step": 2983 + }, + { + "epoch": 0.44, + "learning_rate": 1.2365064432033674e-05, + "loss": 0.8359, + "step": 2984 + }, + { + "epoch": 0.44, + "learning_rate": 1.2360410302983497e-05, + "loss": 0.8203, + "step": 2985 + }, + { + "epoch": 0.44, + "learning_rate": 1.2355755632416469e-05, + "loss": 0.8521, + "step": 2986 + }, + { + "epoch": 0.44, + "learning_rate": 1.2351100421400444e-05, + "loss": 0.8413, + "step": 2987 + }, + { + "epoch": 0.44, + "learning_rate": 1.2346444671003408e-05, + "loss": 0.874, + "step": 2988 + }, + { + "epoch": 0.44, + "learning_rate": 1.2341788382293467e-05, + "loss": 0.875, + "step": 2989 + }, + { + "epoch": 0.44, + "learning_rate": 1.2337131556338843e-05, + "loss": 0.8594, + "step": 2990 + }, + { + "epoch": 0.44, + "learning_rate": 1.2332474194207895e-05, + "loss": 0.814, + "step": 2991 + }, + { + "epoch": 0.44, + "learning_rate": 1.2327816296969095e-05, + "loss": 0.855, + "step": 2992 + }, + { + "epoch": 0.44, + "learning_rate": 1.2323157865691045e-05, + "loss": 0.8809, + "step": 2993 + }, + { + "epoch": 0.44, + "learning_rate": 1.2318498901442461e-05, + "loss": 0.8784, + "step": 2994 + }, + { + "epoch": 0.44, + "learning_rate": 1.231383940529219e-05, + "loss": 0.8645, + "step": 2995 + }, + { + "epoch": 0.44, + "learning_rate": 1.2309179378309188e-05, + "loss": 0.9399, + "step": 2996 + }, + { + "epoch": 0.44, + "learning_rate": 1.2304518821562554e-05, + "loss": 0.8994, + "step": 2997 + }, + { + "epoch": 0.44, + "learning_rate": 1.2299857736121487e-05, + "loss": 0.855, + "step": 2998 + }, + { + "epoch": 0.44, + "learning_rate": 1.2295196123055325e-05, + "loss": 0.9102, + "step": 2999 + }, + { + "epoch": 0.44, + "learning_rate": 1.2290533983433508e-05, + "loss": 0.9121, + "step": 3000 + }, + { + "epoch": 0.44, + "learning_rate": 1.2285871318325611e-05, + "loss": 0.8931, + "step": 3001 + }, + { + "epoch": 0.44, + "learning_rate": 1.2281208128801331e-05, + "loss": 0.8652, + "step": 3002 + }, + { + "epoch": 0.44, + "learning_rate": 1.2276544415930476e-05, + "loss": 0.9165, + "step": 3003 + }, + { + "epoch": 0.44, + "learning_rate": 1.227188018078298e-05, + "loss": 0.9033, + "step": 3004 + }, + { + "epoch": 0.44, + "learning_rate": 1.2267215424428894e-05, + "loss": 0.8486, + "step": 3005 + }, + { + "epoch": 0.44, + "learning_rate": 1.2262550147938389e-05, + "loss": 0.875, + "step": 3006 + }, + { + "epoch": 0.44, + "learning_rate": 1.2257884352381755e-05, + "loss": 0.8276, + "step": 3007 + }, + { + "epoch": 0.44, + "learning_rate": 1.2253218038829404e-05, + "loss": 0.8999, + "step": 3008 + }, + { + "epoch": 0.44, + "learning_rate": 1.2248551208351866e-05, + "loss": 0.855, + "step": 3009 + }, + { + "epoch": 0.45, + "learning_rate": 1.2243883862019787e-05, + "loss": 0.8901, + "step": 3010 + }, + { + "epoch": 0.45, + "learning_rate": 1.223921600090393e-05, + "loss": 0.897, + "step": 3011 + }, + { + "epoch": 0.45, + "learning_rate": 1.2234547626075185e-05, + "loss": 0.8979, + "step": 3012 + }, + { + "epoch": 0.45, + "learning_rate": 1.222987873860455e-05, + "loss": 0.897, + "step": 3013 + }, + { + "epoch": 0.45, + "learning_rate": 1.2225209339563144e-05, + "loss": 0.8325, + "step": 3014 + }, + { + "epoch": 0.45, + "learning_rate": 1.2220539430022206e-05, + "loss": 0.856, + "step": 3015 + }, + { + "epoch": 0.45, + "learning_rate": 1.221586901105309e-05, + "loss": 0.7322, + "step": 3016 + }, + { + "epoch": 0.45, + "learning_rate": 1.2211198083727262e-05, + "loss": 0.8779, + "step": 3017 + }, + { + "epoch": 0.45, + "learning_rate": 1.2206526649116315e-05, + "loss": 0.8008, + "step": 3018 + }, + { + "epoch": 0.45, + "learning_rate": 1.2201854708291949e-05, + "loss": 0.8081, + "step": 3019 + }, + { + "epoch": 0.45, + "learning_rate": 1.2197182262325987e-05, + "loss": 0.3074, + "step": 3020 + }, + { + "epoch": 0.45, + "learning_rate": 1.2192509312290362e-05, + "loss": 0.8896, + "step": 3021 + }, + { + "epoch": 0.45, + "learning_rate": 1.2187835859257126e-05, + "loss": 0.8193, + "step": 3022 + }, + { + "epoch": 0.45, + "learning_rate": 1.2183161904298447e-05, + "loss": 0.8013, + "step": 3023 + }, + { + "epoch": 0.45, + "learning_rate": 1.2178487448486607e-05, + "loss": 0.8457, + "step": 3024 + }, + { + "epoch": 0.45, + "learning_rate": 1.2173812492894001e-05, + "loss": 0.8945, + "step": 3025 + }, + { + "epoch": 0.45, + "learning_rate": 1.2169137038593142e-05, + "loss": 0.3496, + "step": 3026 + }, + { + "epoch": 0.45, + "learning_rate": 1.2164461086656656e-05, + "loss": 0.9067, + "step": 3027 + }, + { + "epoch": 0.45, + "learning_rate": 1.2159784638157282e-05, + "loss": 0.8784, + "step": 3028 + }, + { + "epoch": 0.45, + "learning_rate": 1.2155107694167875e-05, + "loss": 0.9009, + "step": 3029 + }, + { + "epoch": 0.45, + "learning_rate": 1.2150430255761402e-05, + "loss": 0.8735, + "step": 3030 + }, + { + "epoch": 0.45, + "learning_rate": 1.2145752324010948e-05, + "loss": 0.8936, + "step": 3031 + }, + { + "epoch": 0.45, + "learning_rate": 1.2141073899989699e-05, + "loss": 0.8672, + "step": 3032 + }, + { + "epoch": 0.45, + "learning_rate": 1.2136394984770967e-05, + "loss": 0.8657, + "step": 3033 + }, + { + "epoch": 0.45, + "learning_rate": 1.2131715579428175e-05, + "loss": 0.8682, + "step": 3034 + }, + { + "epoch": 0.45, + "learning_rate": 1.2127035685034852e-05, + "loss": 0.8857, + "step": 3035 + }, + { + "epoch": 0.45, + "learning_rate": 1.2122355302664643e-05, + "loss": 0.8198, + "step": 3036 + }, + { + "epoch": 0.45, + "learning_rate": 1.2117674433391302e-05, + "loss": 0.8315, + "step": 3037 + }, + { + "epoch": 0.45, + "learning_rate": 1.2112993078288702e-05, + "loss": 0.3191, + "step": 3038 + }, + { + "epoch": 0.45, + "learning_rate": 1.2108311238430819e-05, + "loss": 0.8848, + "step": 3039 + }, + { + "epoch": 0.45, + "learning_rate": 1.2103628914891747e-05, + "loss": 0.8188, + "step": 3040 + }, + { + "epoch": 0.45, + "learning_rate": 1.2098946108745682e-05, + "loss": 0.9067, + "step": 3041 + }, + { + "epoch": 0.45, + "learning_rate": 1.2094262821066944e-05, + "loss": 0.877, + "step": 3042 + }, + { + "epoch": 0.45, + "learning_rate": 1.2089579052929952e-05, + "loss": 0.3726, + "step": 3043 + }, + { + "epoch": 0.45, + "learning_rate": 1.2084894805409242e-05, + "loss": 0.9238, + "step": 3044 + }, + { + "epoch": 0.45, + "learning_rate": 1.2080210079579452e-05, + "loss": 0.8071, + "step": 3045 + }, + { + "epoch": 0.45, + "learning_rate": 1.2075524876515339e-05, + "loss": 0.8433, + "step": 3046 + }, + { + "epoch": 0.45, + "learning_rate": 1.2070839197291764e-05, + "loss": 0.8438, + "step": 3047 + }, + { + "epoch": 0.45, + "learning_rate": 1.20661530429837e-05, + "loss": 0.9468, + "step": 3048 + }, + { + "epoch": 0.45, + "learning_rate": 1.2061466414666228e-05, + "loss": 0.8018, + "step": 3049 + }, + { + "epoch": 0.45, + "learning_rate": 1.2056779313414536e-05, + "loss": 0.8364, + "step": 3050 + }, + { + "epoch": 0.45, + "learning_rate": 1.2052091740303919e-05, + "loss": 0.856, + "step": 3051 + }, + { + "epoch": 0.45, + "learning_rate": 1.2047403696409787e-05, + "loss": 0.8311, + "step": 3052 + }, + { + "epoch": 0.45, + "learning_rate": 1.2042715182807659e-05, + "loss": 0.8467, + "step": 3053 + }, + { + "epoch": 0.45, + "learning_rate": 1.2038026200573148e-05, + "loss": 0.3369, + "step": 3054 + }, + { + "epoch": 0.45, + "learning_rate": 1.2033336750781985e-05, + "loss": 0.8662, + "step": 3055 + }, + { + "epoch": 0.45, + "learning_rate": 1.2028646834510012e-05, + "loss": 0.8369, + "step": 3056 + }, + { + "epoch": 0.45, + "learning_rate": 1.202395645283317e-05, + "loss": 0.9014, + "step": 3057 + }, + { + "epoch": 0.45, + "learning_rate": 1.2019265606827507e-05, + "loss": 0.8315, + "step": 3058 + }, + { + "epoch": 0.45, + "learning_rate": 1.2014574297569182e-05, + "loss": 0.3065, + "step": 3059 + }, + { + "epoch": 0.45, + "learning_rate": 1.200988252613446e-05, + "loss": 0.8604, + "step": 3060 + }, + { + "epoch": 0.45, + "learning_rate": 1.200519029359971e-05, + "loss": 0.8462, + "step": 3061 + }, + { + "epoch": 0.45, + "learning_rate": 1.2000497601041401e-05, + "loss": 0.8076, + "step": 3062 + }, + { + "epoch": 0.45, + "learning_rate": 1.1995804449536122e-05, + "loss": 0.8662, + "step": 3063 + }, + { + "epoch": 0.45, + "learning_rate": 1.1991110840160554e-05, + "loss": 0.8735, + "step": 3064 + }, + { + "epoch": 0.45, + "learning_rate": 1.198641677399149e-05, + "loss": 0.8623, + "step": 3065 + }, + { + "epoch": 0.45, + "learning_rate": 1.1981722252105827e-05, + "loss": 0.7993, + "step": 3066 + }, + { + "epoch": 0.45, + "learning_rate": 1.1977027275580561e-05, + "loss": 0.8662, + "step": 3067 + }, + { + "epoch": 0.45, + "learning_rate": 1.1972331845492801e-05, + "loss": 0.8418, + "step": 3068 + }, + { + "epoch": 0.45, + "learning_rate": 1.1967635962919754e-05, + "loss": 0.7881, + "step": 3069 + }, + { + "epoch": 0.45, + "learning_rate": 1.1962939628938735e-05, + "loss": 0.8213, + "step": 3070 + }, + { + "epoch": 0.45, + "learning_rate": 1.1958242844627155e-05, + "loss": 0.8218, + "step": 3071 + }, + { + "epoch": 0.45, + "learning_rate": 1.1953545611062536e-05, + "loss": 0.8042, + "step": 3072 + }, + { + "epoch": 0.45, + "learning_rate": 1.1948847929322498e-05, + "loss": 0.9072, + "step": 3073 + }, + { + "epoch": 0.45, + "learning_rate": 1.1944149800484774e-05, + "loss": 0.8115, + "step": 3074 + }, + { + "epoch": 0.45, + "learning_rate": 1.1939451225627184e-05, + "loss": 0.8892, + "step": 3075 + }, + { + "epoch": 0.45, + "learning_rate": 1.1934752205827663e-05, + "loss": 0.8164, + "step": 3076 + }, + { + "epoch": 0.46, + "learning_rate": 1.1930052742164234e-05, + "loss": 0.8486, + "step": 3077 + }, + { + "epoch": 0.46, + "learning_rate": 1.1925352835715045e-05, + "loss": 0.8579, + "step": 3078 + }, + { + "epoch": 0.46, + "learning_rate": 1.1920652487558322e-05, + "loss": 0.8496, + "step": 3079 + }, + { + "epoch": 0.46, + "learning_rate": 1.1915951698772403e-05, + "loss": 0.8823, + "step": 3080 + }, + { + "epoch": 0.46, + "learning_rate": 1.1911250470435731e-05, + "loss": 0.8657, + "step": 3081 + }, + { + "epoch": 0.46, + "learning_rate": 1.1906548803626839e-05, + "loss": 0.9292, + "step": 3082 + }, + { + "epoch": 0.46, + "learning_rate": 1.1901846699424374e-05, + "loss": 0.3149, + "step": 3083 + }, + { + "epoch": 0.46, + "learning_rate": 1.1897144158907066e-05, + "loss": 0.8892, + "step": 3084 + }, + { + "epoch": 0.46, + "learning_rate": 1.1892441183153762e-05, + "loss": 0.8057, + "step": 3085 + }, + { + "epoch": 0.46, + "learning_rate": 1.18877377732434e-05, + "loss": 0.8223, + "step": 3086 + }, + { + "epoch": 0.46, + "learning_rate": 1.1883033930255018e-05, + "loss": 0.8428, + "step": 3087 + }, + { + "epoch": 0.46, + "learning_rate": 1.1878329655267758e-05, + "loss": 0.8574, + "step": 3088 + }, + { + "epoch": 0.46, + "learning_rate": 1.1873624949360853e-05, + "loss": 0.811, + "step": 3089 + }, + { + "epoch": 0.46, + "learning_rate": 1.1868919813613645e-05, + "loss": 0.8105, + "step": 3090 + }, + { + "epoch": 0.46, + "learning_rate": 1.1864214249105565e-05, + "loss": 0.2864, + "step": 3091 + }, + { + "epoch": 0.46, + "learning_rate": 1.185950825691615e-05, + "loss": 0.855, + "step": 3092 + }, + { + "epoch": 0.46, + "learning_rate": 1.1854801838125032e-05, + "loss": 0.855, + "step": 3093 + }, + { + "epoch": 0.46, + "learning_rate": 1.1850094993811936e-05, + "loss": 0.7344, + "step": 3094 + }, + { + "epoch": 0.46, + "learning_rate": 1.1845387725056694e-05, + "loss": 0.7959, + "step": 3095 + }, + { + "epoch": 0.46, + "learning_rate": 1.1840680032939226e-05, + "loss": 0.9033, + "step": 3096 + }, + { + "epoch": 0.46, + "learning_rate": 1.1835971918539562e-05, + "loss": 0.854, + "step": 3097 + }, + { + "epoch": 0.46, + "learning_rate": 1.1831263382937814e-05, + "loss": 0.7764, + "step": 3098 + }, + { + "epoch": 0.46, + "learning_rate": 1.1826554427214198e-05, + "loss": 0.7266, + "step": 3099 + }, + { + "epoch": 0.46, + "learning_rate": 1.1821845052449026e-05, + "loss": 0.8452, + "step": 3100 + }, + { + "epoch": 0.46, + "learning_rate": 1.1817135259722707e-05, + "loss": 0.8262, + "step": 3101 + }, + { + "epoch": 0.46, + "learning_rate": 1.1812425050115749e-05, + "loss": 0.8901, + "step": 3102 + }, + { + "epoch": 0.46, + "learning_rate": 1.180771442470874e-05, + "loss": 0.8926, + "step": 3103 + }, + { + "epoch": 0.46, + "learning_rate": 1.1803003384582382e-05, + "loss": 0.8726, + "step": 3104 + }, + { + "epoch": 0.46, + "learning_rate": 1.1798291930817468e-05, + "loss": 0.835, + "step": 3105 + }, + { + "epoch": 0.46, + "learning_rate": 1.1793580064494878e-05, + "loss": 0.8779, + "step": 3106 + }, + { + "epoch": 0.46, + "learning_rate": 1.178886778669559e-05, + "loss": 0.853, + "step": 3107 + }, + { + "epoch": 0.46, + "learning_rate": 1.1784155098500682e-05, + "loss": 0.8618, + "step": 3108 + }, + { + "epoch": 0.46, + "learning_rate": 1.1779442000991321e-05, + "loss": 0.8525, + "step": 3109 + }, + { + "epoch": 0.46, + "learning_rate": 1.177472849524877e-05, + "loss": 0.9023, + "step": 3110 + }, + { + "epoch": 0.46, + "learning_rate": 1.1770014582354378e-05, + "loss": 0.8701, + "step": 3111 + }, + { + "epoch": 0.46, + "learning_rate": 1.17653002633896e-05, + "loss": 0.8706, + "step": 3112 + }, + { + "epoch": 0.46, + "learning_rate": 1.176058553943598e-05, + "loss": 0.8462, + "step": 3113 + }, + { + "epoch": 0.46, + "learning_rate": 1.1755870411575147e-05, + "loss": 0.8857, + "step": 3114 + }, + { + "epoch": 0.46, + "learning_rate": 1.1751154880888835e-05, + "loss": 0.936, + "step": 3115 + }, + { + "epoch": 0.46, + "learning_rate": 1.1746438948458858e-05, + "loss": 0.8618, + "step": 3116 + }, + { + "epoch": 0.46, + "learning_rate": 1.1741722615367132e-05, + "loss": 0.3625, + "step": 3117 + }, + { + "epoch": 0.46, + "learning_rate": 1.1737005882695658e-05, + "loss": 0.8091, + "step": 3118 + }, + { + "epoch": 0.46, + "learning_rate": 1.1732288751526537e-05, + "loss": 0.9121, + "step": 3119 + }, + { + "epoch": 0.46, + "learning_rate": 1.1727571222941952e-05, + "loss": 0.7036, + "step": 3120 + }, + { + "epoch": 0.46, + "learning_rate": 1.1722853298024184e-05, + "loss": 0.7686, + "step": 3121 + }, + { + "epoch": 0.46, + "learning_rate": 1.17181349778556e-05, + "loss": 0.8608, + "step": 3122 + }, + { + "epoch": 0.46, + "learning_rate": 1.1713416263518663e-05, + "loss": 0.8613, + "step": 3123 + }, + { + "epoch": 0.46, + "learning_rate": 1.1708697156095918e-05, + "loss": 0.8237, + "step": 3124 + }, + { + "epoch": 0.46, + "learning_rate": 1.1703977656670014e-05, + "loss": 0.8545, + "step": 3125 + }, + { + "epoch": 0.46, + "learning_rate": 1.1699257766323673e-05, + "loss": 0.8691, + "step": 3126 + }, + { + "epoch": 0.46, + "learning_rate": 1.169453748613972e-05, + "loss": 0.8384, + "step": 3127 + }, + { + "epoch": 0.46, + "learning_rate": 1.1689816817201065e-05, + "loss": 0.8105, + "step": 3128 + }, + { + "epoch": 0.46, + "learning_rate": 1.1685095760590706e-05, + "loss": 0.3374, + "step": 3129 + }, + { + "epoch": 0.46, + "learning_rate": 1.1680374317391734e-05, + "loss": 0.8623, + "step": 3130 + }, + { + "epoch": 0.46, + "learning_rate": 1.1675652488687318e-05, + "loss": 0.8335, + "step": 3131 + }, + { + "epoch": 0.46, + "learning_rate": 1.1670930275560728e-05, + "loss": 0.8452, + "step": 3132 + }, + { + "epoch": 0.46, + "learning_rate": 1.1666207679095316e-05, + "loss": 0.9067, + "step": 3133 + }, + { + "epoch": 0.46, + "learning_rate": 1.1661484700374528e-05, + "loss": 0.306, + "step": 3134 + }, + { + "epoch": 0.46, + "learning_rate": 1.1656761340481883e-05, + "loss": 0.363, + "step": 3135 + }, + { + "epoch": 0.46, + "learning_rate": 1.1652037600501007e-05, + "loss": 0.8486, + "step": 3136 + }, + { + "epoch": 0.46, + "learning_rate": 1.1647313481515596e-05, + "loss": 0.325, + "step": 3137 + }, + { + "epoch": 0.46, + "learning_rate": 1.1642588984609446e-05, + "loss": 0.8252, + "step": 3138 + }, + { + "epoch": 0.46, + "learning_rate": 1.163786411086643e-05, + "loss": 0.856, + "step": 3139 + }, + { + "epoch": 0.46, + "learning_rate": 1.1633138861370512e-05, + "loss": 0.8823, + "step": 3140 + }, + { + "epoch": 0.46, + "learning_rate": 1.1628413237205745e-05, + "loss": 0.8257, + "step": 3141 + }, + { + "epoch": 0.46, + "learning_rate": 1.1623687239456265e-05, + "loss": 0.8394, + "step": 3142 + }, + { + "epoch": 0.46, + "learning_rate": 1.1618960869206287e-05, + "loss": 0.9087, + "step": 3143 + }, + { + "epoch": 0.46, + "learning_rate": 1.1614234127540122e-05, + "loss": 0.8472, + "step": 3144 + }, + { + "epoch": 0.47, + "learning_rate": 1.1609507015542166e-05, + "loss": 0.8306, + "step": 3145 + }, + { + "epoch": 0.47, + "learning_rate": 1.1604779534296888e-05, + "loss": 0.897, + "step": 3146 + }, + { + "epoch": 0.47, + "learning_rate": 1.1600051684888858e-05, + "loss": 0.8257, + "step": 3147 + }, + { + "epoch": 0.47, + "learning_rate": 1.1595323468402715e-05, + "loss": 0.8491, + "step": 3148 + }, + { + "epoch": 0.47, + "learning_rate": 1.1590594885923192e-05, + "loss": 0.8809, + "step": 3149 + }, + { + "epoch": 0.47, + "learning_rate": 1.1585865938535106e-05, + "loss": 0.8452, + "step": 3150 + }, + { + "epoch": 0.47, + "learning_rate": 1.1581136627323355e-05, + "loss": 0.8599, + "step": 3151 + }, + { + "epoch": 0.47, + "learning_rate": 1.1576406953372916e-05, + "loss": 0.8418, + "step": 3152 + }, + { + "epoch": 0.47, + "learning_rate": 1.1571676917768859e-05, + "loss": 0.7678, + "step": 3153 + }, + { + "epoch": 0.47, + "learning_rate": 1.1566946521596323e-05, + "loss": 0.8818, + "step": 3154 + }, + { + "epoch": 0.47, + "learning_rate": 1.1562215765940553e-05, + "loss": 0.8315, + "step": 3155 + }, + { + "epoch": 0.47, + "learning_rate": 1.1557484651886848e-05, + "loss": 0.8716, + "step": 3156 + }, + { + "epoch": 0.47, + "learning_rate": 1.1552753180520612e-05, + "loss": 0.8501, + "step": 3157 + }, + { + "epoch": 0.47, + "learning_rate": 1.1548021352927317e-05, + "loss": 0.7852, + "step": 3158 + }, + { + "epoch": 0.47, + "learning_rate": 1.1543289170192524e-05, + "loss": 0.7979, + "step": 3159 + }, + { + "epoch": 0.47, + "learning_rate": 1.1538556633401876e-05, + "loss": 0.8359, + "step": 3160 + }, + { + "epoch": 0.47, + "learning_rate": 1.1533823743641091e-05, + "loss": 0.8257, + "step": 3161 + }, + { + "epoch": 0.47, + "learning_rate": 1.1529090501995972e-05, + "loss": 0.7209, + "step": 3162 + }, + { + "epoch": 0.47, + "learning_rate": 1.15243569095524e-05, + "loss": 0.8911, + "step": 3163 + }, + { + "epoch": 0.47, + "learning_rate": 1.1519622967396347e-05, + "loss": 0.8911, + "step": 3164 + }, + { + "epoch": 0.47, + "learning_rate": 1.151488867661385e-05, + "loss": 0.3517, + "step": 3165 + }, + { + "epoch": 0.47, + "learning_rate": 1.1510154038291035e-05, + "loss": 0.854, + "step": 3166 + }, + { + "epoch": 0.47, + "learning_rate": 1.1505419053514102e-05, + "loss": 0.8311, + "step": 3167 + }, + { + "epoch": 0.47, + "learning_rate": 1.1500683723369339e-05, + "loss": 0.8359, + "step": 3168 + }, + { + "epoch": 0.47, + "learning_rate": 1.1495948048943108e-05, + "loss": 0.8667, + "step": 3169 + }, + { + "epoch": 0.47, + "learning_rate": 1.149121203132185e-05, + "loss": 0.856, + "step": 3170 + }, + { + "epoch": 0.47, + "learning_rate": 1.1486475671592084e-05, + "loss": 0.8198, + "step": 3171 + }, + { + "epoch": 0.47, + "learning_rate": 1.1481738970840409e-05, + "loss": 0.835, + "step": 3172 + }, + { + "epoch": 0.47, + "learning_rate": 1.14770019301535e-05, + "loss": 0.8286, + "step": 3173 + }, + { + "epoch": 0.47, + "learning_rate": 1.1472264550618118e-05, + "loss": 0.8267, + "step": 3174 + }, + { + "epoch": 0.47, + "learning_rate": 1.1467526833321087e-05, + "loss": 0.8286, + "step": 3175 + }, + { + "epoch": 0.47, + "learning_rate": 1.1462788779349323e-05, + "loss": 0.8066, + "step": 3176 + }, + { + "epoch": 0.47, + "learning_rate": 1.1458050389789812e-05, + "loss": 0.8232, + "step": 3177 + }, + { + "epoch": 0.47, + "learning_rate": 1.1453311665729618e-05, + "loss": 0.3433, + "step": 3178 + }, + { + "epoch": 0.47, + "learning_rate": 1.1448572608255885e-05, + "loss": 0.8848, + "step": 3179 + }, + { + "epoch": 0.47, + "learning_rate": 1.1443833218455824e-05, + "loss": 0.832, + "step": 3180 + }, + { + "epoch": 0.47, + "learning_rate": 1.1439093497416735e-05, + "loss": 0.8105, + "step": 3181 + }, + { + "epoch": 0.47, + "learning_rate": 1.1434353446225983e-05, + "loss": 0.7964, + "step": 3182 + }, + { + "epoch": 0.47, + "learning_rate": 1.142961306597102e-05, + "loss": 0.8218, + "step": 3183 + }, + { + "epoch": 0.47, + "learning_rate": 1.142487235773936e-05, + "loss": 0.8481, + "step": 3184 + }, + { + "epoch": 0.47, + "learning_rate": 1.1420131322618601e-05, + "loss": 0.8633, + "step": 3185 + }, + { + "epoch": 0.47, + "learning_rate": 1.1415389961696416e-05, + "loss": 0.3457, + "step": 3186 + }, + { + "epoch": 0.47, + "learning_rate": 1.1410648276060553e-05, + "loss": 0.8306, + "step": 3187 + }, + { + "epoch": 0.47, + "learning_rate": 1.1405906266798827e-05, + "loss": 0.8418, + "step": 3188 + }, + { + "epoch": 0.47, + "learning_rate": 1.140116393499914e-05, + "loss": 0.874, + "step": 3189 + }, + { + "epoch": 0.47, + "learning_rate": 1.139642128174945e-05, + "loss": 0.8164, + "step": 3190 + }, + { + "epoch": 0.47, + "learning_rate": 1.1391678308137807e-05, + "loss": 0.8149, + "step": 3191 + }, + { + "epoch": 0.47, + "learning_rate": 1.138693501525233e-05, + "loss": 0.8545, + "step": 3192 + }, + { + "epoch": 0.47, + "learning_rate": 1.1382191404181201e-05, + "loss": 0.8315, + "step": 3193 + }, + { + "epoch": 0.47, + "learning_rate": 1.1377447476012686e-05, + "loss": 0.8438, + "step": 3194 + }, + { + "epoch": 0.47, + "learning_rate": 1.1372703231835119e-05, + "loss": 0.853, + "step": 3195 + }, + { + "epoch": 0.47, + "learning_rate": 1.1367958672736908e-05, + "loss": 0.9263, + "step": 3196 + }, + { + "epoch": 0.47, + "learning_rate": 1.1363213799806531e-05, + "loss": 0.7393, + "step": 3197 + }, + { + "epoch": 0.47, + "learning_rate": 1.1358468614132543e-05, + "loss": 0.7637, + "step": 3198 + }, + { + "epoch": 0.47, + "learning_rate": 1.135372311680356e-05, + "loss": 0.8169, + "step": 3199 + }, + { + "epoch": 0.47, + "learning_rate": 1.1348977308908289e-05, + "loss": 0.7822, + "step": 3200 + }, + { + "epoch": 0.47, + "learning_rate": 1.1344231191535488e-05, + "loss": 0.8047, + "step": 3201 + }, + { + "epoch": 0.47, + "learning_rate": 1.1339484765773995e-05, + "loss": 0.9111, + "step": 3202 + }, + { + "epoch": 0.47, + "learning_rate": 1.133473803271272e-05, + "loss": 0.8687, + "step": 3203 + }, + { + "epoch": 0.47, + "learning_rate": 1.1329990993440638e-05, + "loss": 0.8398, + "step": 3204 + }, + { + "epoch": 0.47, + "learning_rate": 1.13252436490468e-05, + "loss": 0.8579, + "step": 3205 + }, + { + "epoch": 0.47, + "learning_rate": 1.1320496000620325e-05, + "loss": 0.8184, + "step": 3206 + }, + { + "epoch": 0.47, + "learning_rate": 1.1315748049250404e-05, + "loss": 0.8384, + "step": 3207 + }, + { + "epoch": 0.47, + "learning_rate": 1.1310999796026285e-05, + "loss": 0.7983, + "step": 3208 + }, + { + "epoch": 0.47, + "learning_rate": 1.130625124203731e-05, + "loss": 0.8384, + "step": 3209 + }, + { + "epoch": 0.47, + "learning_rate": 1.1301502388372862e-05, + "loss": 0.8545, + "step": 3210 + }, + { + "epoch": 0.47, + "learning_rate": 1.1296753236122416e-05, + "loss": 0.8633, + "step": 3211 + }, + { + "epoch": 0.48, + "learning_rate": 1.1292003786375496e-05, + "loss": 0.8311, + "step": 3212 + }, + { + "epoch": 0.48, + "learning_rate": 1.128725404022171e-05, + "loss": 0.8433, + "step": 3213 + }, + { + "epoch": 0.48, + "learning_rate": 1.1282503998750726e-05, + "loss": 0.79, + "step": 3214 + }, + { + "epoch": 0.48, + "learning_rate": 1.1277753663052284e-05, + "loss": 0.8555, + "step": 3215 + }, + { + "epoch": 0.48, + "learning_rate": 1.1273003034216186e-05, + "loss": 0.8491, + "step": 3216 + }, + { + "epoch": 0.48, + "learning_rate": 1.1268252113332302e-05, + "loss": 0.9131, + "step": 3217 + }, + { + "epoch": 0.48, + "learning_rate": 1.1263500901490576e-05, + "loss": 0.3213, + "step": 3218 + }, + { + "epoch": 0.48, + "learning_rate": 1.1258749399781013e-05, + "loss": 0.8711, + "step": 3219 + }, + { + "epoch": 0.48, + "learning_rate": 1.1253997609293684e-05, + "loss": 0.833, + "step": 3220 + }, + { + "epoch": 0.48, + "learning_rate": 1.1249245531118725e-05, + "loss": 0.8647, + "step": 3221 + }, + { + "epoch": 0.48, + "learning_rate": 1.1244493166346344e-05, + "loss": 0.8496, + "step": 3222 + }, + { + "epoch": 0.48, + "learning_rate": 1.1239740516066813e-05, + "loss": 0.8359, + "step": 3223 + }, + { + "epoch": 0.48, + "learning_rate": 1.1234987581370468e-05, + "loss": 0.8706, + "step": 3224 + }, + { + "epoch": 0.48, + "learning_rate": 1.1230234363347708e-05, + "loss": 0.8188, + "step": 3225 + }, + { + "epoch": 0.48, + "learning_rate": 1.1225480863088997e-05, + "loss": 0.8794, + "step": 3226 + }, + { + "epoch": 0.48, + "learning_rate": 1.122072708168487e-05, + "loss": 0.7739, + "step": 3227 + }, + { + "epoch": 0.48, + "learning_rate": 1.1215973020225924e-05, + "loss": 0.7935, + "step": 3228 + }, + { + "epoch": 0.48, + "learning_rate": 1.1211218679802812e-05, + "loss": 0.3257, + "step": 3229 + }, + { + "epoch": 0.48, + "learning_rate": 1.1206464061506259e-05, + "loss": 0.813, + "step": 3230 + }, + { + "epoch": 0.48, + "learning_rate": 1.1201709166427059e-05, + "loss": 0.8091, + "step": 3231 + }, + { + "epoch": 0.48, + "learning_rate": 1.119695399565606e-05, + "loss": 0.792, + "step": 3232 + }, + { + "epoch": 0.48, + "learning_rate": 1.1192198550284172e-05, + "loss": 0.7969, + "step": 3233 + }, + { + "epoch": 0.48, + "learning_rate": 1.1187442831402378e-05, + "loss": 0.7842, + "step": 3234 + }, + { + "epoch": 0.48, + "learning_rate": 1.1182686840101711e-05, + "loss": 0.7866, + "step": 3235 + }, + { + "epoch": 0.48, + "learning_rate": 1.1177930577473281e-05, + "loss": 0.8218, + "step": 3236 + }, + { + "epoch": 0.48, + "learning_rate": 1.1173174044608249e-05, + "loss": 0.8115, + "step": 3237 + }, + { + "epoch": 0.48, + "learning_rate": 1.116841724259784e-05, + "loss": 0.897, + "step": 3238 + }, + { + "epoch": 0.48, + "learning_rate": 1.1163660172533346e-05, + "loss": 0.8403, + "step": 3239 + }, + { + "epoch": 0.48, + "learning_rate": 1.1158902835506114e-05, + "loss": 0.7734, + "step": 3240 + }, + { + "epoch": 0.48, + "learning_rate": 1.1154145232607558e-05, + "loss": 0.8325, + "step": 3241 + }, + { + "epoch": 0.48, + "learning_rate": 1.1149387364929148e-05, + "loss": 0.7842, + "step": 3242 + }, + { + "epoch": 0.48, + "learning_rate": 1.1144629233562418e-05, + "loss": 0.8276, + "step": 3243 + }, + { + "epoch": 0.48, + "learning_rate": 1.1139870839598959e-05, + "loss": 0.8599, + "step": 3244 + }, + { + "epoch": 0.48, + "learning_rate": 1.113511218413043e-05, + "loss": 0.9126, + "step": 3245 + }, + { + "epoch": 0.48, + "learning_rate": 1.1130353268248539e-05, + "loss": 0.8301, + "step": 3246 + }, + { + "epoch": 0.48, + "learning_rate": 1.1125594093045062e-05, + "loss": 0.8774, + "step": 3247 + }, + { + "epoch": 0.48, + "learning_rate": 1.1120834659611832e-05, + "loss": 0.8037, + "step": 3248 + }, + { + "epoch": 0.48, + "learning_rate": 1.111607496904074e-05, + "loss": 0.8003, + "step": 3249 + }, + { + "epoch": 0.48, + "learning_rate": 1.111131502242374e-05, + "loss": 0.79, + "step": 3250 + }, + { + "epoch": 0.48, + "learning_rate": 1.1106554820852837e-05, + "loss": 0.8105, + "step": 3251 + }, + { + "epoch": 0.48, + "learning_rate": 1.1101794365420103e-05, + "loss": 0.8882, + "step": 3252 + }, + { + "epoch": 0.48, + "learning_rate": 1.1097033657217662e-05, + "loss": 0.77, + "step": 3253 + }, + { + "epoch": 0.48, + "learning_rate": 1.1092272697337703e-05, + "loss": 0.7637, + "step": 3254 + }, + { + "epoch": 0.48, + "learning_rate": 1.1087511486872461e-05, + "loss": 0.8716, + "step": 3255 + }, + { + "epoch": 0.48, + "learning_rate": 1.1082750026914246e-05, + "loss": 0.856, + "step": 3256 + }, + { + "epoch": 0.48, + "learning_rate": 1.1077988318555405e-05, + "loss": 0.8643, + "step": 3257 + }, + { + "epoch": 0.48, + "learning_rate": 1.1073226362888358e-05, + "loss": 0.7903, + "step": 3258 + }, + { + "epoch": 0.48, + "learning_rate": 1.1068464161005575e-05, + "loss": 0.9312, + "step": 3259 + }, + { + "epoch": 0.48, + "learning_rate": 1.1063701713999582e-05, + "loss": 0.9556, + "step": 3260 + }, + { + "epoch": 0.48, + "learning_rate": 1.1058939022962962e-05, + "loss": 0.9697, + "step": 3261 + }, + { + "epoch": 0.48, + "learning_rate": 1.1054176088988352e-05, + "loss": 1.0029, + "step": 3262 + }, + { + "epoch": 0.48, + "learning_rate": 1.1049412913168456e-05, + "loss": 0.9741, + "step": 3263 + }, + { + "epoch": 0.48, + "learning_rate": 1.1044649496596018e-05, + "loss": 0.9268, + "step": 3264 + }, + { + "epoch": 0.48, + "learning_rate": 1.1039885840363845e-05, + "loss": 0.9146, + "step": 3265 + }, + { + "epoch": 0.48, + "learning_rate": 1.1035121945564795e-05, + "loss": 0.9858, + "step": 3266 + }, + { + "epoch": 0.48, + "learning_rate": 1.1030357813291787e-05, + "loss": 0.9961, + "step": 3267 + }, + { + "epoch": 0.48, + "learning_rate": 1.1025593444637797e-05, + "loss": 0.9585, + "step": 3268 + }, + { + "epoch": 0.48, + "learning_rate": 1.1020828840695836e-05, + "loss": 0.9585, + "step": 3269 + }, + { + "epoch": 0.48, + "learning_rate": 1.1016064002558993e-05, + "loss": 0.3335, + "step": 3270 + }, + { + "epoch": 0.48, + "learning_rate": 1.1011298931320393e-05, + "loss": 0.9517, + "step": 3271 + }, + { + "epoch": 0.48, + "learning_rate": 1.1006533628073229e-05, + "loss": 0.9385, + "step": 3272 + }, + { + "epoch": 0.48, + "learning_rate": 1.1001768093910734e-05, + "loss": 0.9453, + "step": 3273 + }, + { + "epoch": 0.48, + "learning_rate": 1.0997002329926201e-05, + "loss": 0.8896, + "step": 3274 + }, + { + "epoch": 0.48, + "learning_rate": 1.0992236337212976e-05, + "loss": 0.8965, + "step": 3275 + }, + { + "epoch": 0.48, + "learning_rate": 1.0987470116864454e-05, + "loss": 0.8418, + "step": 3276 + }, + { + "epoch": 0.48, + "learning_rate": 1.0982703669974087e-05, + "loss": 0.7949, + "step": 3277 + }, + { + "epoch": 0.48, + "learning_rate": 1.097793699763537e-05, + "loss": 0.8701, + "step": 3278 + }, + { + "epoch": 0.48, + "learning_rate": 1.0973170100941865e-05, + "loss": 0.7666, + "step": 3279 + }, + { + "epoch": 0.49, + "learning_rate": 1.0968402980987164e-05, + "loss": 0.8389, + "step": 3280 + }, + { + "epoch": 0.49, + "learning_rate": 1.0963635638864938e-05, + "loss": 0.8398, + "step": 3281 + }, + { + "epoch": 0.49, + "learning_rate": 1.095886807566888e-05, + "loss": 0.7983, + "step": 3282 + }, + { + "epoch": 0.49, + "learning_rate": 1.0954100292492758e-05, + "loss": 0.8301, + "step": 3283 + }, + { + "epoch": 0.49, + "learning_rate": 1.094933229043037e-05, + "loss": 0.874, + "step": 3284 + }, + { + "epoch": 0.49, + "learning_rate": 1.0944564070575576e-05, + "loss": 0.8901, + "step": 3285 + }, + { + "epoch": 0.49, + "learning_rate": 1.093979563402229e-05, + "loss": 0.7676, + "step": 3286 + }, + { + "epoch": 0.49, + "learning_rate": 1.0935026981864462e-05, + "loss": 0.8691, + "step": 3287 + }, + { + "epoch": 0.49, + "learning_rate": 1.093025811519611e-05, + "loss": 0.918, + "step": 3288 + }, + { + "epoch": 0.49, + "learning_rate": 1.0925489035111275e-05, + "loss": 0.8403, + "step": 3289 + }, + { + "epoch": 0.49, + "learning_rate": 1.0920719742704071e-05, + "loss": 0.8096, + "step": 3290 + }, + { + "epoch": 0.49, + "learning_rate": 1.0915950239068653e-05, + "loss": 0.8467, + "step": 3291 + }, + { + "epoch": 0.49, + "learning_rate": 1.0911180525299224e-05, + "loss": 0.7642, + "step": 3292 + }, + { + "epoch": 0.49, + "learning_rate": 1.090641060249003e-05, + "loss": 0.8198, + "step": 3293 + }, + { + "epoch": 0.49, + "learning_rate": 1.090164047173537e-05, + "loss": 0.8501, + "step": 3294 + }, + { + "epoch": 0.49, + "learning_rate": 1.0896870134129592e-05, + "loss": 0.8882, + "step": 3295 + }, + { + "epoch": 0.49, + "learning_rate": 1.089209959076709e-05, + "loss": 0.8403, + "step": 3296 + }, + { + "epoch": 0.49, + "learning_rate": 1.0887328842742307e-05, + "loss": 0.7856, + "step": 3297 + }, + { + "epoch": 0.49, + "learning_rate": 1.0882557891149725e-05, + "loss": 0.8237, + "step": 3298 + }, + { + "epoch": 0.49, + "learning_rate": 1.0877786737083882e-05, + "loss": 0.8711, + "step": 3299 + }, + { + "epoch": 0.49, + "learning_rate": 1.0873015381639363e-05, + "loss": 0.8718, + "step": 3300 + }, + { + "epoch": 0.49, + "learning_rate": 1.0868243825910786e-05, + "loss": 0.8965, + "step": 3301 + }, + { + "epoch": 0.49, + "learning_rate": 1.0863472070992834e-05, + "loss": 0.338, + "step": 3302 + }, + { + "epoch": 0.49, + "learning_rate": 1.0858700117980217e-05, + "loss": 0.8159, + "step": 3303 + }, + { + "epoch": 0.49, + "learning_rate": 1.0853927967967705e-05, + "loss": 0.8496, + "step": 3304 + }, + { + "epoch": 0.49, + "learning_rate": 1.0849155622050107e-05, + "loss": 0.8237, + "step": 3305 + }, + { + "epoch": 0.49, + "learning_rate": 1.0844383081322276e-05, + "loss": 0.8799, + "step": 3306 + }, + { + "epoch": 0.49, + "learning_rate": 1.0839610346879112e-05, + "loss": 0.8691, + "step": 3307 + }, + { + "epoch": 0.49, + "learning_rate": 1.0834837419815558e-05, + "loss": 0.8975, + "step": 3308 + }, + { + "epoch": 0.49, + "learning_rate": 1.0830064301226603e-05, + "loss": 0.8647, + "step": 3309 + }, + { + "epoch": 0.49, + "learning_rate": 1.0825290992207278e-05, + "loss": 0.8452, + "step": 3310 + }, + { + "epoch": 0.49, + "learning_rate": 1.0820517493852655e-05, + "loss": 0.8296, + "step": 3311 + }, + { + "epoch": 0.49, + "learning_rate": 1.0815743807257859e-05, + "loss": 0.8843, + "step": 3312 + }, + { + "epoch": 0.49, + "learning_rate": 1.081096993351805e-05, + "loss": 0.8438, + "step": 3313 + }, + { + "epoch": 0.49, + "learning_rate": 1.0806195873728429e-05, + "loss": 0.8286, + "step": 3314 + }, + { + "epoch": 0.49, + "learning_rate": 1.080142162898425e-05, + "loss": 0.8027, + "step": 3315 + }, + { + "epoch": 0.49, + "learning_rate": 1.0796647200380795e-05, + "loss": 0.856, + "step": 3316 + }, + { + "epoch": 0.49, + "learning_rate": 1.0791872589013404e-05, + "loss": 0.8237, + "step": 3317 + }, + { + "epoch": 0.49, + "learning_rate": 1.0787097795977447e-05, + "loss": 0.855, + "step": 3318 + }, + { + "epoch": 0.49, + "learning_rate": 1.0782322822368343e-05, + "loss": 0.8696, + "step": 3319 + }, + { + "epoch": 0.49, + "learning_rate": 1.077754766928155e-05, + "loss": 0.9126, + "step": 3320 + }, + { + "epoch": 0.49, + "learning_rate": 1.0772772337812557e-05, + "loss": 0.8159, + "step": 3321 + }, + { + "epoch": 0.49, + "learning_rate": 1.0767996829056914e-05, + "loss": 0.8574, + "step": 3322 + }, + { + "epoch": 0.49, + "learning_rate": 1.0763221144110196e-05, + "loss": 0.8237, + "step": 3323 + }, + { + "epoch": 0.49, + "learning_rate": 1.0758445284068027e-05, + "loss": 0.8755, + "step": 3324 + }, + { + "epoch": 0.49, + "learning_rate": 1.0753669250026062e-05, + "loss": 0.8823, + "step": 3325 + }, + { + "epoch": 0.49, + "learning_rate": 1.0748893043080008e-05, + "loss": 0.8203, + "step": 3326 + }, + { + "epoch": 0.49, + "learning_rate": 1.07441166643256e-05, + "loss": 0.8452, + "step": 3327 + }, + { + "epoch": 0.49, + "learning_rate": 1.0739340114858623e-05, + "loss": 0.8438, + "step": 3328 + }, + { + "epoch": 0.49, + "learning_rate": 1.073456339577489e-05, + "loss": 0.7544, + "step": 3329 + }, + { + "epoch": 0.49, + "learning_rate": 1.0729786508170261e-05, + "loss": 0.8394, + "step": 3330 + }, + { + "epoch": 0.49, + "learning_rate": 1.0725009453140636e-05, + "loss": 0.8252, + "step": 3331 + }, + { + "epoch": 0.49, + "learning_rate": 1.0720232231781944e-05, + "loss": 0.8525, + "step": 3332 + }, + { + "epoch": 0.49, + "learning_rate": 1.0715454845190161e-05, + "loss": 0.8164, + "step": 3333 + }, + { + "epoch": 0.49, + "learning_rate": 1.0710677294461298e-05, + "loss": 0.8076, + "step": 3334 + }, + { + "epoch": 0.49, + "learning_rate": 1.0705899580691402e-05, + "loss": 0.8228, + "step": 3335 + }, + { + "epoch": 0.49, + "learning_rate": 1.0701121704976561e-05, + "loss": 0.3367, + "step": 3336 + }, + { + "epoch": 0.49, + "learning_rate": 1.0696343668412898e-05, + "loss": 0.811, + "step": 3337 + }, + { + "epoch": 0.49, + "learning_rate": 1.069156547209657e-05, + "loss": 0.8193, + "step": 3338 + }, + { + "epoch": 0.49, + "learning_rate": 1.0686787117123776e-05, + "loss": 0.8257, + "step": 3339 + }, + { + "epoch": 0.49, + "learning_rate": 1.0682008604590748e-05, + "loss": 0.332, + "step": 3340 + }, + { + "epoch": 0.49, + "learning_rate": 1.067722993559376e-05, + "loss": 0.79, + "step": 3341 + }, + { + "epoch": 0.49, + "learning_rate": 1.0672451111229108e-05, + "loss": 0.8052, + "step": 3342 + }, + { + "epoch": 0.49, + "learning_rate": 1.066767213259314e-05, + "loss": 0.772, + "step": 3343 + }, + { + "epoch": 0.49, + "learning_rate": 1.0662893000782228e-05, + "loss": 0.8223, + "step": 3344 + }, + { + "epoch": 0.49, + "learning_rate": 1.0658113716892789e-05, + "loss": 0.8457, + "step": 3345 + }, + { + "epoch": 0.49, + "learning_rate": 1.0653334282021261e-05, + "loss": 0.8335, + "step": 3346 + }, + { + "epoch": 0.49, + "learning_rate": 1.0648554697264131e-05, + "loss": 0.6777, + "step": 3347 + }, + { + "epoch": 0.5, + "learning_rate": 1.064377496371791e-05, + "loss": 0.8198, + "step": 3348 + }, + { + "epoch": 0.5, + "learning_rate": 1.0638995082479151e-05, + "loss": 0.8608, + "step": 3349 + }, + { + "epoch": 0.5, + "learning_rate": 1.0634215054644437e-05, + "loss": 0.8379, + "step": 3350 + }, + { + "epoch": 0.5, + "learning_rate": 1.0629434881310382e-05, + "loss": 0.3228, + "step": 3351 + }, + { + "epoch": 0.5, + "learning_rate": 1.0624654563573635e-05, + "loss": 0.7681, + "step": 3352 + }, + { + "epoch": 0.5, + "learning_rate": 1.0619874102530886e-05, + "loss": 0.8242, + "step": 3353 + }, + { + "epoch": 0.5, + "learning_rate": 1.0615093499278846e-05, + "loss": 0.8579, + "step": 3354 + }, + { + "epoch": 0.5, + "learning_rate": 1.0610312754914262e-05, + "loss": 0.7998, + "step": 3355 + }, + { + "epoch": 0.5, + "learning_rate": 1.0605531870533922e-05, + "loss": 0.8667, + "step": 3356 + }, + { + "epoch": 0.5, + "learning_rate": 1.0600750847234633e-05, + "loss": 0.7939, + "step": 3357 + }, + { + "epoch": 0.5, + "learning_rate": 1.0595969686113245e-05, + "loss": 0.8193, + "step": 3358 + }, + { + "epoch": 0.5, + "learning_rate": 1.0591188388266632e-05, + "loss": 0.8618, + "step": 3359 + }, + { + "epoch": 0.5, + "learning_rate": 1.0586406954791702e-05, + "loss": 0.8057, + "step": 3360 + }, + { + "epoch": 0.5, + "learning_rate": 1.0581625386785392e-05, + "loss": 0.3293, + "step": 3361 + }, + { + "epoch": 0.5, + "learning_rate": 1.057684368534468e-05, + "loss": 0.8667, + "step": 3362 + }, + { + "epoch": 0.5, + "learning_rate": 1.0572061851566557e-05, + "loss": 0.7871, + "step": 3363 + }, + { + "epoch": 0.5, + "learning_rate": 1.0567279886548063e-05, + "loss": 0.7954, + "step": 3364 + }, + { + "epoch": 0.5, + "learning_rate": 1.0562497791386255e-05, + "loss": 0.7861, + "step": 3365 + }, + { + "epoch": 0.5, + "learning_rate": 1.055771556717822e-05, + "loss": 0.7939, + "step": 3366 + }, + { + "epoch": 0.5, + "learning_rate": 1.0552933215021088e-05, + "loss": 0.7964, + "step": 3367 + }, + { + "epoch": 0.5, + "learning_rate": 1.0548150736012002e-05, + "loss": 0.8315, + "step": 3368 + }, + { + "epoch": 0.5, + "learning_rate": 1.0543368131248144e-05, + "loss": 0.8535, + "step": 3369 + }, + { + "epoch": 0.5, + "learning_rate": 1.053858540182672e-05, + "loss": 0.3442, + "step": 3370 + }, + { + "epoch": 0.5, + "learning_rate": 1.0533802548844969e-05, + "loss": 0.853, + "step": 3371 + }, + { + "epoch": 0.5, + "learning_rate": 1.0529019573400154e-05, + "loss": 0.8384, + "step": 3372 + }, + { + "epoch": 0.5, + "learning_rate": 1.0524236476589571e-05, + "loss": 0.7617, + "step": 3373 + }, + { + "epoch": 0.5, + "learning_rate": 1.0519453259510535e-05, + "loss": 0.8271, + "step": 3374 + }, + { + "epoch": 0.5, + "learning_rate": 1.0514669923260398e-05, + "loss": 0.7798, + "step": 3375 + }, + { + "epoch": 0.5, + "learning_rate": 1.050988646893654e-05, + "loss": 0.3088, + "step": 3376 + }, + { + "epoch": 0.5, + "learning_rate": 1.0505102897636358e-05, + "loss": 0.8247, + "step": 3377 + }, + { + "epoch": 0.5, + "learning_rate": 1.0500319210457284e-05, + "loss": 0.8823, + "step": 3378 + }, + { + "epoch": 0.5, + "learning_rate": 1.0495535408496772e-05, + "loss": 0.7959, + "step": 3379 + }, + { + "epoch": 0.5, + "learning_rate": 1.049075149285231e-05, + "loss": 0.769, + "step": 3380 + }, + { + "epoch": 0.5, + "learning_rate": 1.0485967464621401e-05, + "loss": 0.8091, + "step": 3381 + }, + { + "epoch": 0.5, + "learning_rate": 1.0481183324901583e-05, + "loss": 0.2905, + "step": 3382 + }, + { + "epoch": 0.5, + "learning_rate": 1.0476399074790413e-05, + "loss": 0.8657, + "step": 3383 + }, + { + "epoch": 0.5, + "learning_rate": 1.047161471538548e-05, + "loss": 0.8896, + "step": 3384 + }, + { + "epoch": 0.5, + "learning_rate": 1.0466830247784394e-05, + "loss": 0.8145, + "step": 3385 + }, + { + "epoch": 0.5, + "learning_rate": 1.0462045673084788e-05, + "loss": 0.8281, + "step": 3386 + }, + { + "epoch": 0.5, + "learning_rate": 1.0457260992384326e-05, + "loss": 0.7734, + "step": 3387 + }, + { + "epoch": 0.5, + "learning_rate": 1.0452476206780686e-05, + "loss": 0.854, + "step": 3388 + }, + { + "epoch": 0.5, + "learning_rate": 1.0447691317371582e-05, + "loss": 0.793, + "step": 3389 + }, + { + "epoch": 0.5, + "learning_rate": 1.0442906325254747e-05, + "loss": 0.8491, + "step": 3390 + }, + { + "epoch": 0.5, + "learning_rate": 1.0438121231527928e-05, + "loss": 0.7988, + "step": 3391 + }, + { + "epoch": 0.5, + "learning_rate": 1.0433336037288915e-05, + "loss": 0.8301, + "step": 3392 + }, + { + "epoch": 0.5, + "learning_rate": 1.0428550743635502e-05, + "loss": 0.7725, + "step": 3393 + }, + { + "epoch": 0.5, + "learning_rate": 1.042376535166552e-05, + "loss": 0.8853, + "step": 3394 + }, + { + "epoch": 0.5, + "learning_rate": 1.041897986247681e-05, + "loss": 0.8447, + "step": 3395 + }, + { + "epoch": 0.5, + "learning_rate": 1.0414194277167244e-05, + "loss": 0.8262, + "step": 3396 + }, + { + "epoch": 0.5, + "learning_rate": 1.0409408596834718e-05, + "loss": 0.873, + "step": 3397 + }, + { + "epoch": 0.5, + "learning_rate": 1.040462282257714e-05, + "loss": 0.8696, + "step": 3398 + }, + { + "epoch": 0.5, + "learning_rate": 1.039983695549245e-05, + "loss": 0.3159, + "step": 3399 + }, + { + "epoch": 0.5, + "learning_rate": 1.0395050996678602e-05, + "loss": 0.8242, + "step": 3400 + }, + { + "epoch": 0.5, + "learning_rate": 1.0390264947233573e-05, + "loss": 0.811, + "step": 3401 + }, + { + "epoch": 0.5, + "learning_rate": 1.0385478808255358e-05, + "loss": 0.7896, + "step": 3402 + }, + { + "epoch": 0.5, + "learning_rate": 1.0380692580841985e-05, + "loss": 0.8594, + "step": 3403 + }, + { + "epoch": 0.5, + "learning_rate": 1.0375906266091485e-05, + "loss": 0.7524, + "step": 3404 + }, + { + "epoch": 0.5, + "learning_rate": 1.037111986510192e-05, + "loss": 0.8467, + "step": 3405 + }, + { + "epoch": 0.5, + "learning_rate": 1.036633337897137e-05, + "loss": 0.813, + "step": 3406 + }, + { + "epoch": 0.5, + "learning_rate": 1.0361546808797929e-05, + "loss": 0.8271, + "step": 3407 + }, + { + "epoch": 0.5, + "learning_rate": 1.0356760155679719e-05, + "loss": 0.8081, + "step": 3408 + }, + { + "epoch": 0.5, + "learning_rate": 1.0351973420714878e-05, + "loss": 0.8149, + "step": 3409 + }, + { + "epoch": 0.5, + "learning_rate": 1.0347186605001556e-05, + "loss": 0.8359, + "step": 3410 + }, + { + "epoch": 0.5, + "learning_rate": 1.0342399709637932e-05, + "loss": 0.8354, + "step": 3411 + }, + { + "epoch": 0.5, + "learning_rate": 1.0337612735722195e-05, + "loss": 0.8179, + "step": 3412 + }, + { + "epoch": 0.5, + "learning_rate": 1.0332825684352559e-05, + "loss": 0.8472, + "step": 3413 + }, + { + "epoch": 0.5, + "learning_rate": 1.032803855662725e-05, + "loss": 0.3101, + "step": 3414 + }, + { + "epoch": 0.51, + "learning_rate": 1.0323251353644512e-05, + "loss": 0.8862, + "step": 3415 + }, + { + "epoch": 0.51, + "learning_rate": 1.031846407650261e-05, + "loss": 0.8213, + "step": 3416 + }, + { + "epoch": 0.51, + "learning_rate": 1.0313676726299824e-05, + "loss": 0.7944, + "step": 3417 + }, + { + "epoch": 0.51, + "learning_rate": 1.0308889304134453e-05, + "loss": 0.8545, + "step": 3418 + }, + { + "epoch": 0.51, + "learning_rate": 1.0304101811104803e-05, + "loss": 0.8198, + "step": 3419 + }, + { + "epoch": 0.51, + "learning_rate": 1.029931424830921e-05, + "loss": 0.8438, + "step": 3420 + }, + { + "epoch": 0.51, + "learning_rate": 1.0294526616846017e-05, + "loss": 0.3047, + "step": 3421 + }, + { + "epoch": 0.51, + "learning_rate": 1.0289738917813585e-05, + "loss": 0.8115, + "step": 3422 + }, + { + "epoch": 0.51, + "learning_rate": 1.0284951152310292e-05, + "loss": 0.8267, + "step": 3423 + }, + { + "epoch": 0.51, + "learning_rate": 1.0280163321434528e-05, + "loss": 0.7974, + "step": 3424 + }, + { + "epoch": 0.51, + "learning_rate": 1.0275375426284704e-05, + "loss": 0.293, + "step": 3425 + }, + { + "epoch": 0.51, + "learning_rate": 1.027058746795924e-05, + "loss": 0.7896, + "step": 3426 + }, + { + "epoch": 0.51, + "learning_rate": 1.0265799447556566e-05, + "loss": 0.7905, + "step": 3427 + }, + { + "epoch": 0.51, + "learning_rate": 1.026101136617514e-05, + "loss": 0.855, + "step": 3428 + }, + { + "epoch": 0.51, + "learning_rate": 1.0256223224913422e-05, + "loss": 0.8281, + "step": 3429 + }, + { + "epoch": 0.51, + "learning_rate": 1.0251435024869894e-05, + "loss": 0.8276, + "step": 3430 + }, + { + "epoch": 0.51, + "learning_rate": 1.0246646767143046e-05, + "loss": 0.8018, + "step": 3431 + }, + { + "epoch": 0.51, + "learning_rate": 1.0241858452831384e-05, + "loss": 0.7559, + "step": 3432 + }, + { + "epoch": 0.51, + "learning_rate": 1.0237070083033422e-05, + "loss": 0.7969, + "step": 3433 + }, + { + "epoch": 0.51, + "learning_rate": 1.0232281658847693e-05, + "loss": 0.752, + "step": 3434 + }, + { + "epoch": 0.51, + "learning_rate": 1.0227493181372745e-05, + "loss": 0.8462, + "step": 3435 + }, + { + "epoch": 0.51, + "learning_rate": 1.0222704651707125e-05, + "loss": 0.8486, + "step": 3436 + }, + { + "epoch": 0.51, + "learning_rate": 1.0217916070949405e-05, + "loss": 0.8101, + "step": 3437 + }, + { + "epoch": 0.51, + "learning_rate": 1.0213127440198158e-05, + "loss": 0.7769, + "step": 3438 + }, + { + "epoch": 0.51, + "learning_rate": 1.0208338760551985e-05, + "loss": 0.835, + "step": 3439 + }, + { + "epoch": 0.51, + "learning_rate": 1.020355003310948e-05, + "loss": 0.8076, + "step": 3440 + }, + { + "epoch": 0.51, + "learning_rate": 1.019876125896926e-05, + "loss": 0.8237, + "step": 3441 + }, + { + "epoch": 0.51, + "learning_rate": 1.019397243922994e-05, + "loss": 0.8159, + "step": 3442 + }, + { + "epoch": 0.51, + "learning_rate": 1.0189183574990162e-05, + "loss": 0.7339, + "step": 3443 + }, + { + "epoch": 0.51, + "learning_rate": 1.0184394667348572e-05, + "loss": 0.8301, + "step": 3444 + }, + { + "epoch": 0.51, + "learning_rate": 1.0179605717403815e-05, + "loss": 0.7583, + "step": 3445 + }, + { + "epoch": 0.51, + "learning_rate": 1.0174816726254563e-05, + "loss": 0.7266, + "step": 3446 + }, + { + "epoch": 0.51, + "learning_rate": 1.0170027694999481e-05, + "loss": 0.8477, + "step": 3447 + }, + { + "epoch": 0.51, + "learning_rate": 1.0165238624737261e-05, + "loss": 0.8013, + "step": 3448 + }, + { + "epoch": 0.51, + "learning_rate": 1.0160449516566586e-05, + "loss": 0.8423, + "step": 3449 + }, + { + "epoch": 0.51, + "learning_rate": 1.0155660371586162e-05, + "loss": 0.7988, + "step": 3450 + }, + { + "epoch": 0.51, + "learning_rate": 1.0150871190894693e-05, + "loss": 0.8574, + "step": 3451 + }, + { + "epoch": 0.51, + "learning_rate": 1.0146081975590897e-05, + "loss": 0.8149, + "step": 3452 + }, + { + "epoch": 0.51, + "learning_rate": 1.01412927267735e-05, + "loss": 0.8433, + "step": 3453 + }, + { + "epoch": 0.51, + "learning_rate": 1.0136503445541235e-05, + "loss": 0.7751, + "step": 3454 + }, + { + "epoch": 0.51, + "learning_rate": 1.0131714132992836e-05, + "loss": 0.812, + "step": 3455 + }, + { + "epoch": 0.51, + "learning_rate": 1.0126924790227056e-05, + "loss": 0.8066, + "step": 3456 + }, + { + "epoch": 0.51, + "learning_rate": 1.0122135418342644e-05, + "loss": 0.7905, + "step": 3457 + }, + { + "epoch": 0.51, + "learning_rate": 1.0117346018438367e-05, + "loss": 0.7905, + "step": 3458 + }, + { + "epoch": 0.51, + "learning_rate": 1.0112556591612986e-05, + "loss": 0.8325, + "step": 3459 + }, + { + "epoch": 0.51, + "learning_rate": 1.0107767138965274e-05, + "loss": 0.8179, + "step": 3460 + }, + { + "epoch": 0.51, + "learning_rate": 1.0102977661594013e-05, + "loss": 0.8291, + "step": 3461 + }, + { + "epoch": 0.51, + "learning_rate": 1.0098188160597989e-05, + "loss": 0.8618, + "step": 3462 + }, + { + "epoch": 0.51, + "learning_rate": 1.009339863707599e-05, + "loss": 0.8174, + "step": 3463 + }, + { + "epoch": 0.51, + "learning_rate": 1.0088609092126808e-05, + "loss": 0.7656, + "step": 3464 + }, + { + "epoch": 0.51, + "learning_rate": 1.008381952684925e-05, + "loss": 0.8457, + "step": 3465 + }, + { + "epoch": 0.51, + "learning_rate": 1.0079029942342113e-05, + "loss": 0.7661, + "step": 3466 + }, + { + "epoch": 0.51, + "learning_rate": 1.0074240339704216e-05, + "loss": 0.7739, + "step": 3467 + }, + { + "epoch": 0.51, + "learning_rate": 1.0069450720034364e-05, + "loss": 0.8623, + "step": 3468 + }, + { + "epoch": 0.51, + "learning_rate": 1.0064661084431377e-05, + "loss": 0.3267, + "step": 3469 + }, + { + "epoch": 0.51, + "learning_rate": 1.0059871433994075e-05, + "loss": 0.748, + "step": 3470 + }, + { + "epoch": 0.51, + "learning_rate": 1.0055081769821287e-05, + "loss": 0.8359, + "step": 3471 + }, + { + "epoch": 0.51, + "learning_rate": 1.0050292093011835e-05, + "loss": 0.8149, + "step": 3472 + }, + { + "epoch": 0.51, + "learning_rate": 1.0045502404664555e-05, + "loss": 0.8364, + "step": 3473 + }, + { + "epoch": 0.51, + "learning_rate": 1.004071270587827e-05, + "loss": 0.7925, + "step": 3474 + }, + { + "epoch": 0.51, + "learning_rate": 1.0035922997751825e-05, + "loss": 0.793, + "step": 3475 + }, + { + "epoch": 0.51, + "learning_rate": 1.003113328138406e-05, + "loss": 0.7683, + "step": 3476 + }, + { + "epoch": 0.51, + "learning_rate": 1.0026343557873806e-05, + "loss": 0.8486, + "step": 3477 + }, + { + "epoch": 0.51, + "learning_rate": 1.0021553828319906e-05, + "loss": 0.8359, + "step": 3478 + }, + { + "epoch": 0.51, + "learning_rate": 1.0016764093821203e-05, + "loss": 0.7866, + "step": 3479 + }, + { + "epoch": 0.51, + "learning_rate": 1.0011974355476545e-05, + "loss": 0.835, + "step": 3480 + }, + { + "epoch": 0.51, + "learning_rate": 1.0007184614384773e-05, + "loss": 0.8169, + "step": 3481 + }, + { + "epoch": 0.51, + "learning_rate": 1.0002394871644734e-05, + "loss": 0.8818, + "step": 3482 + }, + { + "epoch": 0.52, + "learning_rate": 9.997605128355267e-06, + "loss": 0.8213, + "step": 3483 + }, + { + "epoch": 0.52, + "learning_rate": 9.992815385615227e-06, + "loss": 0.8188, + "step": 3484 + }, + { + "epoch": 0.52, + "learning_rate": 9.988025644523458e-06, + "loss": 0.7959, + "step": 3485 + }, + { + "epoch": 0.52, + "learning_rate": 9.983235906178798e-06, + "loss": 0.8296, + "step": 3486 + }, + { + "epoch": 0.52, + "learning_rate": 9.978446171680097e-06, + "loss": 0.7812, + "step": 3487 + }, + { + "epoch": 0.52, + "learning_rate": 9.973656442126196e-06, + "loss": 0.8018, + "step": 3488 + }, + { + "epoch": 0.52, + "learning_rate": 9.968866718615946e-06, + "loss": 0.854, + "step": 3489 + }, + { + "epoch": 0.52, + "learning_rate": 9.964077002248177e-06, + "loss": 0.8726, + "step": 3490 + }, + { + "epoch": 0.52, + "learning_rate": 9.959287294121733e-06, + "loss": 0.7798, + "step": 3491 + }, + { + "epoch": 0.52, + "learning_rate": 9.954497595335448e-06, + "loss": 0.8203, + "step": 3492 + }, + { + "epoch": 0.52, + "learning_rate": 9.949707906988165e-06, + "loss": 0.7549, + "step": 3493 + }, + { + "epoch": 0.52, + "learning_rate": 9.944918230178718e-06, + "loss": 0.8306, + "step": 3494 + }, + { + "epoch": 0.52, + "learning_rate": 9.940128566005928e-06, + "loss": 0.8364, + "step": 3495 + }, + { + "epoch": 0.52, + "learning_rate": 9.935338915568626e-06, + "loss": 0.8325, + "step": 3496 + }, + { + "epoch": 0.52, + "learning_rate": 9.930549279965636e-06, + "loss": 0.8452, + "step": 3497 + }, + { + "epoch": 0.52, + "learning_rate": 9.92575966029579e-06, + "loss": 0.8188, + "step": 3498 + }, + { + "epoch": 0.52, + "learning_rate": 9.920970057657888e-06, + "loss": 0.7876, + "step": 3499 + }, + { + "epoch": 0.52, + "learning_rate": 9.916180473150753e-06, + "loss": 0.7891, + "step": 3500 + }, + { + "epoch": 0.52, + "learning_rate": 9.911390907873193e-06, + "loss": 0.8315, + "step": 3501 + }, + { + "epoch": 0.52, + "learning_rate": 9.906601362924016e-06, + "loss": 0.8169, + "step": 3502 + }, + { + "epoch": 0.52, + "learning_rate": 9.901811839402015e-06, + "loss": 0.7559, + "step": 3503 + }, + { + "epoch": 0.52, + "learning_rate": 9.897022338405989e-06, + "loss": 0.8223, + "step": 3504 + }, + { + "epoch": 0.52, + "learning_rate": 9.892232861034728e-06, + "loss": 0.8687, + "step": 3505 + }, + { + "epoch": 0.52, + "learning_rate": 9.887443408387019e-06, + "loss": 0.8389, + "step": 3506 + }, + { + "epoch": 0.52, + "learning_rate": 9.882653981561638e-06, + "loss": 0.3694, + "step": 3507 + }, + { + "epoch": 0.52, + "learning_rate": 9.87786458165736e-06, + "loss": 0.8833, + "step": 3508 + }, + { + "epoch": 0.52, + "learning_rate": 9.87307520977295e-06, + "loss": 0.8569, + "step": 3509 + }, + { + "epoch": 0.52, + "learning_rate": 9.868285867007167e-06, + "loss": 0.8291, + "step": 3510 + }, + { + "epoch": 0.52, + "learning_rate": 9.86349655445877e-06, + "loss": 0.8496, + "step": 3511 + }, + { + "epoch": 0.52, + "learning_rate": 9.858707273226503e-06, + "loss": 0.7998, + "step": 3512 + }, + { + "epoch": 0.52, + "learning_rate": 9.853918024409104e-06, + "loss": 0.8335, + "step": 3513 + }, + { + "epoch": 0.52, + "learning_rate": 9.849128809105309e-06, + "loss": 0.8237, + "step": 3514 + }, + { + "epoch": 0.52, + "learning_rate": 9.844339628413842e-06, + "loss": 0.791, + "step": 3515 + }, + { + "epoch": 0.52, + "learning_rate": 9.839550483433417e-06, + "loss": 0.8003, + "step": 3516 + }, + { + "epoch": 0.52, + "learning_rate": 9.834761375262742e-06, + "loss": 0.8604, + "step": 3517 + }, + { + "epoch": 0.52, + "learning_rate": 9.829972305000519e-06, + "loss": 0.792, + "step": 3518 + }, + { + "epoch": 0.52, + "learning_rate": 9.825183273745442e-06, + "loss": 0.8276, + "step": 3519 + }, + { + "epoch": 0.52, + "learning_rate": 9.820394282596187e-06, + "loss": 0.7185, + "step": 3520 + }, + { + "epoch": 0.52, + "learning_rate": 9.815605332651433e-06, + "loss": 0.8159, + "step": 3521 + }, + { + "epoch": 0.52, + "learning_rate": 9.810816425009838e-06, + "loss": 0.814, + "step": 3522 + }, + { + "epoch": 0.52, + "learning_rate": 9.806027560770061e-06, + "loss": 0.8218, + "step": 3523 + }, + { + "epoch": 0.52, + "learning_rate": 9.801238741030746e-06, + "loss": 0.3193, + "step": 3524 + }, + { + "epoch": 0.52, + "learning_rate": 9.796449966890524e-06, + "loss": 0.8267, + "step": 3525 + }, + { + "epoch": 0.52, + "learning_rate": 9.791661239448018e-06, + "loss": 0.7998, + "step": 3526 + }, + { + "epoch": 0.52, + "learning_rate": 9.786872559801842e-06, + "loss": 0.8071, + "step": 3527 + }, + { + "epoch": 0.52, + "learning_rate": 9.782083929050601e-06, + "loss": 0.8755, + "step": 3528 + }, + { + "epoch": 0.52, + "learning_rate": 9.777295348292879e-06, + "loss": 0.8647, + "step": 3529 + }, + { + "epoch": 0.52, + "learning_rate": 9.772506818627258e-06, + "loss": 0.8242, + "step": 3530 + }, + { + "epoch": 0.52, + "learning_rate": 9.767718341152305e-06, + "loss": 0.7915, + "step": 3531 + }, + { + "epoch": 0.52, + "learning_rate": 9.76292991696658e-06, + "loss": 0.8379, + "step": 3532 + }, + { + "epoch": 0.52, + "learning_rate": 9.75814154716862e-06, + "loss": 0.8354, + "step": 3533 + }, + { + "epoch": 0.52, + "learning_rate": 9.753353232856955e-06, + "loss": 0.7393, + "step": 3534 + }, + { + "epoch": 0.52, + "learning_rate": 9.748564975130106e-06, + "loss": 0.8335, + "step": 3535 + }, + { + "epoch": 0.52, + "learning_rate": 9.74377677508658e-06, + "loss": 0.7588, + "step": 3536 + }, + { + "epoch": 0.52, + "learning_rate": 9.738988633824863e-06, + "loss": 0.8657, + "step": 3537 + }, + { + "epoch": 0.52, + "learning_rate": 9.734200552443437e-06, + "loss": 0.771, + "step": 3538 + }, + { + "epoch": 0.52, + "learning_rate": 9.729412532040766e-06, + "loss": 0.8862, + "step": 3539 + }, + { + "epoch": 0.52, + "learning_rate": 9.724624573715297e-06, + "loss": 0.7842, + "step": 3540 + }, + { + "epoch": 0.52, + "learning_rate": 9.719836678565473e-06, + "loss": 0.8398, + "step": 3541 + }, + { + "epoch": 0.52, + "learning_rate": 9.71504884768971e-06, + "loss": 0.8262, + "step": 3542 + }, + { + "epoch": 0.52, + "learning_rate": 9.710261082186417e-06, + "loss": 0.7822, + "step": 3543 + }, + { + "epoch": 0.52, + "learning_rate": 9.705473383153985e-06, + "loss": 0.8413, + "step": 3544 + }, + { + "epoch": 0.52, + "learning_rate": 9.700685751690794e-06, + "loss": 0.8662, + "step": 3545 + }, + { + "epoch": 0.52, + "learning_rate": 9.695898188895199e-06, + "loss": 0.8667, + "step": 3546 + }, + { + "epoch": 0.52, + "learning_rate": 9.69111069586555e-06, + "loss": 0.7832, + "step": 3547 + }, + { + "epoch": 0.52, + "learning_rate": 9.686323273700176e-06, + "loss": 0.8271, + "step": 3548 + }, + { + "epoch": 0.52, + "learning_rate": 9.681535923497394e-06, + "loss": 0.8042, + "step": 3549 + }, + { + "epoch": 0.52, + "learning_rate": 9.67674864635549e-06, + "loss": 0.8135, + "step": 3550 + }, + { + "epoch": 0.53, + "learning_rate": 9.671961443372752e-06, + "loss": 0.8413, + "step": 3551 + }, + { + "epoch": 0.53, + "learning_rate": 9.667174315647443e-06, + "loss": 0.8599, + "step": 3552 + }, + { + "epoch": 0.53, + "learning_rate": 9.66238726427781e-06, + "loss": 0.8315, + "step": 3553 + }, + { + "epoch": 0.53, + "learning_rate": 9.657600290362073e-06, + "loss": 0.8052, + "step": 3554 + }, + { + "epoch": 0.53, + "learning_rate": 9.652813394998447e-06, + "loss": 0.8286, + "step": 3555 + }, + { + "epoch": 0.53, + "learning_rate": 9.648026579285125e-06, + "loss": 0.8081, + "step": 3556 + }, + { + "epoch": 0.53, + "learning_rate": 9.643239844320283e-06, + "loss": 0.813, + "step": 3557 + }, + { + "epoch": 0.53, + "learning_rate": 9.638453191202076e-06, + "loss": 0.8013, + "step": 3558 + }, + { + "epoch": 0.53, + "learning_rate": 9.633666621028634e-06, + "loss": 0.8032, + "step": 3559 + }, + { + "epoch": 0.53, + "learning_rate": 9.628880134898081e-06, + "loss": 0.853, + "step": 3560 + }, + { + "epoch": 0.53, + "learning_rate": 9.624093733908516e-06, + "loss": 0.7432, + "step": 3561 + }, + { + "epoch": 0.53, + "learning_rate": 9.61930741915802e-06, + "loss": 0.8359, + "step": 3562 + }, + { + "epoch": 0.53, + "learning_rate": 9.614521191744644e-06, + "loss": 0.8389, + "step": 3563 + }, + { + "epoch": 0.53, + "learning_rate": 9.60973505276643e-06, + "loss": 0.7998, + "step": 3564 + }, + { + "epoch": 0.53, + "learning_rate": 9.6049490033214e-06, + "loss": 0.3169, + "step": 3565 + }, + { + "epoch": 0.53, + "learning_rate": 9.600163044507555e-06, + "loss": 0.8325, + "step": 3566 + }, + { + "epoch": 0.53, + "learning_rate": 9.595377177422862e-06, + "loss": 0.7852, + "step": 3567 + }, + { + "epoch": 0.53, + "learning_rate": 9.590591403165285e-06, + "loss": 0.7817, + "step": 3568 + }, + { + "epoch": 0.53, + "learning_rate": 9.585805722832754e-06, + "loss": 0.8413, + "step": 3569 + }, + { + "epoch": 0.53, + "learning_rate": 9.581020137523192e-06, + "loss": 0.8657, + "step": 3570 + }, + { + "epoch": 0.53, + "learning_rate": 9.576234648334486e-06, + "loss": 0.8267, + "step": 3571 + }, + { + "epoch": 0.53, + "learning_rate": 9.571449256364501e-06, + "loss": 0.7642, + "step": 3572 + }, + { + "epoch": 0.53, + "learning_rate": 9.56666396271109e-06, + "loss": 0.718, + "step": 3573 + }, + { + "epoch": 0.53, + "learning_rate": 9.56187876847207e-06, + "loss": 0.8076, + "step": 3574 + }, + { + "epoch": 0.53, + "learning_rate": 9.55709367474526e-06, + "loss": 0.8389, + "step": 3575 + }, + { + "epoch": 0.53, + "learning_rate": 9.552308682628421e-06, + "loss": 0.8276, + "step": 3576 + }, + { + "epoch": 0.53, + "learning_rate": 9.547523793219315e-06, + "loss": 0.3435, + "step": 3577 + }, + { + "epoch": 0.53, + "learning_rate": 9.542739007615676e-06, + "loss": 0.8037, + "step": 3578 + }, + { + "epoch": 0.53, + "learning_rate": 9.537954326915215e-06, + "loss": 0.8438, + "step": 3579 + }, + { + "epoch": 0.53, + "learning_rate": 9.533169752215609e-06, + "loss": 0.8408, + "step": 3580 + }, + { + "epoch": 0.53, + "learning_rate": 9.528385284614523e-06, + "loss": 0.3149, + "step": 3581 + }, + { + "epoch": 0.53, + "learning_rate": 9.52360092520959e-06, + "loss": 0.8374, + "step": 3582 + }, + { + "epoch": 0.53, + "learning_rate": 9.518816675098422e-06, + "loss": 0.8354, + "step": 3583 + }, + { + "epoch": 0.53, + "learning_rate": 9.514032535378604e-06, + "loss": 0.8423, + "step": 3584 + }, + { + "epoch": 0.53, + "learning_rate": 9.509248507147694e-06, + "loss": 0.8384, + "step": 3585 + }, + { + "epoch": 0.53, + "learning_rate": 9.504464591503231e-06, + "loss": 0.8525, + "step": 3586 + }, + { + "epoch": 0.53, + "learning_rate": 9.499680789542719e-06, + "loss": 0.8799, + "step": 3587 + }, + { + "epoch": 0.53, + "learning_rate": 9.494897102363647e-06, + "loss": 0.3184, + "step": 3588 + }, + { + "epoch": 0.53, + "learning_rate": 9.490113531063464e-06, + "loss": 0.7788, + "step": 3589 + }, + { + "epoch": 0.53, + "learning_rate": 9.485330076739604e-06, + "loss": 0.6995, + "step": 3590 + }, + { + "epoch": 0.53, + "learning_rate": 9.480546740489468e-06, + "loss": 0.8188, + "step": 3591 + }, + { + "epoch": 0.53, + "learning_rate": 9.475763523410436e-06, + "loss": 0.7559, + "step": 3592 + }, + { + "epoch": 0.53, + "learning_rate": 9.47098042659985e-06, + "loss": 0.8291, + "step": 3593 + }, + { + "epoch": 0.53, + "learning_rate": 9.466197451155034e-06, + "loss": 0.8369, + "step": 3594 + }, + { + "epoch": 0.53, + "learning_rate": 9.461414598173282e-06, + "loss": 0.8462, + "step": 3595 + }, + { + "epoch": 0.53, + "learning_rate": 9.45663186875186e-06, + "loss": 0.8242, + "step": 3596 + }, + { + "epoch": 0.53, + "learning_rate": 9.451849263988002e-06, + "loss": 0.8188, + "step": 3597 + }, + { + "epoch": 0.53, + "learning_rate": 9.447066784978914e-06, + "loss": 0.3431, + "step": 3598 + }, + { + "epoch": 0.53, + "learning_rate": 9.44228443282178e-06, + "loss": 0.835, + "step": 3599 + }, + { + "epoch": 0.53, + "learning_rate": 9.43750220861375e-06, + "loss": 0.8721, + "step": 3600 + }, + { + "epoch": 0.53, + "learning_rate": 9.43272011345194e-06, + "loss": 0.8682, + "step": 3601 + }, + { + "epoch": 0.53, + "learning_rate": 9.427938148433444e-06, + "loss": 0.2971, + "step": 3602 + }, + { + "epoch": 0.53, + "learning_rate": 9.423156314655324e-06, + "loss": 0.8237, + "step": 3603 + }, + { + "epoch": 0.53, + "learning_rate": 9.41837461321461e-06, + "loss": 0.71, + "step": 3604 + }, + { + "epoch": 0.53, + "learning_rate": 9.413593045208303e-06, + "loss": 0.7861, + "step": 3605 + }, + { + "epoch": 0.53, + "learning_rate": 9.408811611733373e-06, + "loss": 0.7671, + "step": 3606 + }, + { + "epoch": 0.53, + "learning_rate": 9.404030313886758e-06, + "loss": 0.749, + "step": 3607 + }, + { + "epoch": 0.53, + "learning_rate": 9.399249152765369e-06, + "loss": 0.856, + "step": 3608 + }, + { + "epoch": 0.53, + "learning_rate": 9.394468129466083e-06, + "loss": 0.8345, + "step": 3609 + }, + { + "epoch": 0.53, + "learning_rate": 9.38968724508574e-06, + "loss": 0.8208, + "step": 3610 + }, + { + "epoch": 0.53, + "learning_rate": 9.384906500721159e-06, + "loss": 0.7944, + "step": 3611 + }, + { + "epoch": 0.53, + "learning_rate": 9.380125897469116e-06, + "loss": 0.8696, + "step": 3612 + }, + { + "epoch": 0.53, + "learning_rate": 9.375345436426367e-06, + "loss": 0.8115, + "step": 3613 + }, + { + "epoch": 0.53, + "learning_rate": 9.370565118689623e-06, + "loss": 0.801, + "step": 3614 + }, + { + "epoch": 0.53, + "learning_rate": 9.365784945355567e-06, + "loss": 0.8037, + "step": 3615 + }, + { + "epoch": 0.53, + "learning_rate": 9.36100491752085e-06, + "loss": 0.8091, + "step": 3616 + }, + { + "epoch": 0.53, + "learning_rate": 9.35622503628209e-06, + "loss": 0.814, + "step": 3617 + }, + { + "epoch": 0.54, + "learning_rate": 9.351445302735874e-06, + "loss": 0.752, + "step": 3618 + }, + { + "epoch": 0.54, + "learning_rate": 9.346665717978742e-06, + "loss": 0.7861, + "step": 3619 + }, + { + "epoch": 0.54, + "learning_rate": 9.341886283107215e-06, + "loss": 0.7886, + "step": 3620 + }, + { + "epoch": 0.54, + "learning_rate": 9.337106999217772e-06, + "loss": 0.8774, + "step": 3621 + }, + { + "epoch": 0.54, + "learning_rate": 9.332327867406862e-06, + "loss": 0.8394, + "step": 3622 + }, + { + "epoch": 0.54, + "learning_rate": 9.327548888770894e-06, + "loss": 0.8213, + "step": 3623 + }, + { + "epoch": 0.54, + "learning_rate": 9.322770064406244e-06, + "loss": 0.8579, + "step": 3624 + }, + { + "epoch": 0.54, + "learning_rate": 9.317991395409252e-06, + "loss": 0.8477, + "step": 3625 + }, + { + "epoch": 0.54, + "learning_rate": 9.313212882876228e-06, + "loss": 0.7646, + "step": 3626 + }, + { + "epoch": 0.54, + "learning_rate": 9.308434527903432e-06, + "loss": 0.7686, + "step": 3627 + }, + { + "epoch": 0.54, + "learning_rate": 9.303656331587105e-06, + "loss": 0.8188, + "step": 3628 + }, + { + "epoch": 0.54, + "learning_rate": 9.29887829502344e-06, + "loss": 0.7886, + "step": 3629 + }, + { + "epoch": 0.54, + "learning_rate": 9.294100419308603e-06, + "loss": 0.7783, + "step": 3630 + }, + { + "epoch": 0.54, + "learning_rate": 9.289322705538705e-06, + "loss": 0.8271, + "step": 3631 + }, + { + "epoch": 0.54, + "learning_rate": 9.28454515480984e-06, + "loss": 0.7854, + "step": 3632 + }, + { + "epoch": 0.54, + "learning_rate": 9.279767768218058e-06, + "loss": 0.3171, + "step": 3633 + }, + { + "epoch": 0.54, + "learning_rate": 9.274990546859366e-06, + "loss": 0.8032, + "step": 3634 + }, + { + "epoch": 0.54, + "learning_rate": 9.270213491829742e-06, + "loss": 0.7432, + "step": 3635 + }, + { + "epoch": 0.54, + "learning_rate": 9.265436604225111e-06, + "loss": 0.791, + "step": 3636 + }, + { + "epoch": 0.54, + "learning_rate": 9.26065988514138e-06, + "loss": 0.8198, + "step": 3637 + }, + { + "epoch": 0.54, + "learning_rate": 9.2558833356744e-06, + "loss": 0.8164, + "step": 3638 + }, + { + "epoch": 0.54, + "learning_rate": 9.251106956919997e-06, + "loss": 0.8711, + "step": 3639 + }, + { + "epoch": 0.54, + "learning_rate": 9.246330749973943e-06, + "loss": 0.8003, + "step": 3640 + }, + { + "epoch": 0.54, + "learning_rate": 9.241554715931975e-06, + "loss": 0.8228, + "step": 3641 + }, + { + "epoch": 0.54, + "learning_rate": 9.236778855889804e-06, + "loss": 0.8115, + "step": 3642 + }, + { + "epoch": 0.54, + "learning_rate": 9.232003170943091e-06, + "loss": 0.7944, + "step": 3643 + }, + { + "epoch": 0.54, + "learning_rate": 9.227227662187447e-06, + "loss": 0.8066, + "step": 3644 + }, + { + "epoch": 0.54, + "learning_rate": 9.222452330718455e-06, + "loss": 0.8403, + "step": 3645 + }, + { + "epoch": 0.54, + "learning_rate": 9.217677177631657e-06, + "loss": 0.8291, + "step": 3646 + }, + { + "epoch": 0.54, + "learning_rate": 9.212902204022556e-06, + "loss": 0.7822, + "step": 3647 + }, + { + "epoch": 0.54, + "learning_rate": 9.2081274109866e-06, + "loss": 0.7705, + "step": 3648 + }, + { + "epoch": 0.54, + "learning_rate": 9.203352799619207e-06, + "loss": 0.8203, + "step": 3649 + }, + { + "epoch": 0.54, + "learning_rate": 9.198578371015752e-06, + "loss": 0.8208, + "step": 3650 + }, + { + "epoch": 0.54, + "learning_rate": 9.193804126271571e-06, + "loss": 0.8066, + "step": 3651 + }, + { + "epoch": 0.54, + "learning_rate": 9.189030066481956e-06, + "loss": 0.7969, + "step": 3652 + }, + { + "epoch": 0.54, + "learning_rate": 9.184256192742145e-06, + "loss": 0.8203, + "step": 3653 + }, + { + "epoch": 0.54, + "learning_rate": 9.179482506147346e-06, + "loss": 0.8149, + "step": 3654 + }, + { + "epoch": 0.54, + "learning_rate": 9.174709007792723e-06, + "loss": 0.8105, + "step": 3655 + }, + { + "epoch": 0.54, + "learning_rate": 9.169935698773402e-06, + "loss": 0.8286, + "step": 3656 + }, + { + "epoch": 0.54, + "learning_rate": 9.165162580184444e-06, + "loss": 0.7896, + "step": 3657 + }, + { + "epoch": 0.54, + "learning_rate": 9.16038965312089e-06, + "loss": 0.8389, + "step": 3658 + }, + { + "epoch": 0.54, + "learning_rate": 9.155616918677723e-06, + "loss": 0.7817, + "step": 3659 + }, + { + "epoch": 0.54, + "learning_rate": 9.150844377949897e-06, + "loss": 0.8296, + "step": 3660 + }, + { + "epoch": 0.54, + "learning_rate": 9.146072032032298e-06, + "loss": 0.8057, + "step": 3661 + }, + { + "epoch": 0.54, + "learning_rate": 9.141299882019786e-06, + "loss": 0.7998, + "step": 3662 + }, + { + "epoch": 0.54, + "learning_rate": 9.136527929007171e-06, + "loss": 0.8242, + "step": 3663 + }, + { + "epoch": 0.54, + "learning_rate": 9.131756174089217e-06, + "loss": 0.7512, + "step": 3664 + }, + { + "epoch": 0.54, + "learning_rate": 9.126984618360642e-06, + "loss": 0.8081, + "step": 3665 + }, + { + "epoch": 0.54, + "learning_rate": 9.122213262916121e-06, + "loss": 0.8267, + "step": 3666 + }, + { + "epoch": 0.54, + "learning_rate": 9.117442108850278e-06, + "loss": 0.8481, + "step": 3667 + }, + { + "epoch": 0.54, + "learning_rate": 9.112671157257698e-06, + "loss": 0.7661, + "step": 3668 + }, + { + "epoch": 0.54, + "learning_rate": 9.107900409232914e-06, + "loss": 0.7891, + "step": 3669 + }, + { + "epoch": 0.54, + "learning_rate": 9.103129865870411e-06, + "loss": 0.8667, + "step": 3670 + }, + { + "epoch": 0.54, + "learning_rate": 9.098359528264634e-06, + "loss": 0.895, + "step": 3671 + }, + { + "epoch": 0.54, + "learning_rate": 9.093589397509974e-06, + "loss": 0.7676, + "step": 3672 + }, + { + "epoch": 0.54, + "learning_rate": 9.088819474700781e-06, + "loss": 0.8062, + "step": 3673 + }, + { + "epoch": 0.54, + "learning_rate": 9.084049760931349e-06, + "loss": 0.8384, + "step": 3674 + }, + { + "epoch": 0.54, + "learning_rate": 9.07928025729593e-06, + "loss": 0.8188, + "step": 3675 + }, + { + "epoch": 0.54, + "learning_rate": 9.074510964888727e-06, + "loss": 0.7944, + "step": 3676 + }, + { + "epoch": 0.54, + "learning_rate": 9.069741884803897e-06, + "loss": 0.3394, + "step": 3677 + }, + { + "epoch": 0.54, + "learning_rate": 9.06497301813554e-06, + "loss": 0.8086, + "step": 3678 + }, + { + "epoch": 0.54, + "learning_rate": 9.060204365977715e-06, + "loss": 0.8789, + "step": 3679 + }, + { + "epoch": 0.54, + "learning_rate": 9.055435929424425e-06, + "loss": 0.7842, + "step": 3680 + }, + { + "epoch": 0.54, + "learning_rate": 9.050667709569633e-06, + "loss": 0.7964, + "step": 3681 + }, + { + "epoch": 0.54, + "learning_rate": 9.045899707507247e-06, + "loss": 0.814, + "step": 3682 + }, + { + "epoch": 0.54, + "learning_rate": 9.041131924331121e-06, + "loss": 0.8105, + "step": 3683 + }, + { + "epoch": 0.54, + "learning_rate": 9.036364361135066e-06, + "loss": 0.8086, + "step": 3684 + }, + { + "epoch": 0.54, + "learning_rate": 9.031597019012834e-06, + "loss": 0.8706, + "step": 3685 + }, + { + "epoch": 0.55, + "learning_rate": 9.02682989905814e-06, + "loss": 0.7715, + "step": 3686 + }, + { + "epoch": 0.55, + "learning_rate": 9.022063002364631e-06, + "loss": 0.7217, + "step": 3687 + }, + { + "epoch": 0.55, + "learning_rate": 9.017296330025917e-06, + "loss": 0.3081, + "step": 3688 + }, + { + "epoch": 0.55, + "learning_rate": 9.012529883135548e-06, + "loss": 0.8506, + "step": 3689 + }, + { + "epoch": 0.55, + "learning_rate": 9.007763662787028e-06, + "loss": 0.7705, + "step": 3690 + }, + { + "epoch": 0.55, + "learning_rate": 9.0029976700738e-06, + "loss": 0.8188, + "step": 3691 + }, + { + "epoch": 0.55, + "learning_rate": 8.998231906089268e-06, + "loss": 0.3029, + "step": 3692 + }, + { + "epoch": 0.55, + "learning_rate": 8.993466371926773e-06, + "loss": 0.8413, + "step": 3693 + }, + { + "epoch": 0.55, + "learning_rate": 8.98870106867961e-06, + "loss": 0.8413, + "step": 3694 + }, + { + "epoch": 0.55, + "learning_rate": 8.98393599744101e-06, + "loss": 0.8652, + "step": 3695 + }, + { + "epoch": 0.55, + "learning_rate": 8.979171159304166e-06, + "loss": 0.7505, + "step": 3696 + }, + { + "epoch": 0.55, + "learning_rate": 8.974406555362207e-06, + "loss": 0.8179, + "step": 3697 + }, + { + "epoch": 0.55, + "learning_rate": 8.969642186708211e-06, + "loss": 0.853, + "step": 3698 + }, + { + "epoch": 0.55, + "learning_rate": 8.964878054435207e-06, + "loss": 0.811, + "step": 3699 + }, + { + "epoch": 0.55, + "learning_rate": 8.96011415963616e-06, + "loss": 0.8174, + "step": 3700 + }, + { + "epoch": 0.55, + "learning_rate": 8.955350503403985e-06, + "loss": 0.8066, + "step": 3701 + }, + { + "epoch": 0.55, + "learning_rate": 8.950587086831544e-06, + "loss": 0.7883, + "step": 3702 + }, + { + "epoch": 0.55, + "learning_rate": 8.94582391101165e-06, + "loss": 0.8364, + "step": 3703 + }, + { + "epoch": 0.55, + "learning_rate": 8.941060977037042e-06, + "loss": 0.793, + "step": 3704 + }, + { + "epoch": 0.55, + "learning_rate": 8.936298286000421e-06, + "loss": 0.8218, + "step": 3705 + }, + { + "epoch": 0.55, + "learning_rate": 8.931535838994427e-06, + "loss": 0.8652, + "step": 3706 + }, + { + "epoch": 0.55, + "learning_rate": 8.926773637111647e-06, + "loss": 0.813, + "step": 3707 + }, + { + "epoch": 0.55, + "learning_rate": 8.922011681444596e-06, + "loss": 0.8271, + "step": 3708 + }, + { + "epoch": 0.55, + "learning_rate": 8.917249973085757e-06, + "loss": 0.8428, + "step": 3709 + }, + { + "epoch": 0.55, + "learning_rate": 8.912488513127539e-06, + "loss": 0.8267, + "step": 3710 + }, + { + "epoch": 0.55, + "learning_rate": 8.907727302662304e-06, + "loss": 0.8247, + "step": 3711 + }, + { + "epoch": 0.55, + "learning_rate": 8.90296634278234e-06, + "loss": 0.8457, + "step": 3712 + }, + { + "epoch": 0.55, + "learning_rate": 8.898205634579899e-06, + "loss": 0.8276, + "step": 3713 + }, + { + "epoch": 0.55, + "learning_rate": 8.893445179147165e-06, + "loss": 0.8091, + "step": 3714 + }, + { + "epoch": 0.55, + "learning_rate": 8.888684977576262e-06, + "loss": 0.8501, + "step": 3715 + }, + { + "epoch": 0.55, + "learning_rate": 8.883925030959266e-06, + "loss": 0.7715, + "step": 3716 + }, + { + "epoch": 0.55, + "learning_rate": 8.879165340388171e-06, + "loss": 0.771, + "step": 3717 + }, + { + "epoch": 0.55, + "learning_rate": 8.87440590695494e-06, + "loss": 0.3491, + "step": 3718 + }, + { + "epoch": 0.55, + "learning_rate": 8.869646731751463e-06, + "loss": 0.853, + "step": 3719 + }, + { + "epoch": 0.55, + "learning_rate": 8.864887815869577e-06, + "loss": 0.3014, + "step": 3720 + }, + { + "epoch": 0.55, + "learning_rate": 8.860129160401045e-06, + "loss": 0.7656, + "step": 3721 + }, + { + "epoch": 0.55, + "learning_rate": 8.855370766437584e-06, + "loss": 0.7744, + "step": 3722 + }, + { + "epoch": 0.55, + "learning_rate": 8.850612635070852e-06, + "loss": 0.7793, + "step": 3723 + }, + { + "epoch": 0.55, + "learning_rate": 8.845854767392448e-06, + "loss": 0.7866, + "step": 3724 + }, + { + "epoch": 0.55, + "learning_rate": 8.84109716449389e-06, + "loss": 0.7856, + "step": 3725 + }, + { + "epoch": 0.55, + "learning_rate": 8.836339827466656e-06, + "loss": 0.7666, + "step": 3726 + }, + { + "epoch": 0.55, + "learning_rate": 8.831582757402161e-06, + "loss": 0.8345, + "step": 3727 + }, + { + "epoch": 0.55, + "learning_rate": 8.826825955391753e-06, + "loss": 0.7412, + "step": 3728 + }, + { + "epoch": 0.55, + "learning_rate": 8.822069422526724e-06, + "loss": 0.8232, + "step": 3729 + }, + { + "epoch": 0.55, + "learning_rate": 8.817313159898292e-06, + "loss": 0.7939, + "step": 3730 + }, + { + "epoch": 0.55, + "learning_rate": 8.812557168597626e-06, + "loss": 0.8462, + "step": 3731 + }, + { + "epoch": 0.55, + "learning_rate": 8.80780144971583e-06, + "loss": 0.7539, + "step": 3732 + }, + { + "epoch": 0.55, + "learning_rate": 8.803046004343945e-06, + "loss": 0.8281, + "step": 3733 + }, + { + "epoch": 0.55, + "learning_rate": 8.798290833572944e-06, + "loss": 0.9131, + "step": 3734 + }, + { + "epoch": 0.55, + "learning_rate": 8.793535938493743e-06, + "loss": 0.8057, + "step": 3735 + }, + { + "epoch": 0.55, + "learning_rate": 8.78878132019719e-06, + "loss": 0.8008, + "step": 3736 + }, + { + "epoch": 0.55, + "learning_rate": 8.784026979774083e-06, + "loss": 0.8101, + "step": 3737 + }, + { + "epoch": 0.55, + "learning_rate": 8.779272918315135e-06, + "loss": 0.7686, + "step": 3738 + }, + { + "epoch": 0.55, + "learning_rate": 8.774519136911006e-06, + "loss": 0.7734, + "step": 3739 + }, + { + "epoch": 0.55, + "learning_rate": 8.769765636652296e-06, + "loss": 0.8872, + "step": 3740 + }, + { + "epoch": 0.55, + "learning_rate": 8.765012418629537e-06, + "loss": 0.791, + "step": 3741 + }, + { + "epoch": 0.55, + "learning_rate": 8.760259483933188e-06, + "loss": 0.8398, + "step": 3742 + }, + { + "epoch": 0.55, + "learning_rate": 8.755506833653658e-06, + "loss": 0.8481, + "step": 3743 + }, + { + "epoch": 0.55, + "learning_rate": 8.750754468881278e-06, + "loss": 0.7866, + "step": 3744 + }, + { + "epoch": 0.55, + "learning_rate": 8.746002390706318e-06, + "loss": 0.77, + "step": 3745 + }, + { + "epoch": 0.55, + "learning_rate": 8.741250600218992e-06, + "loss": 0.7515, + "step": 3746 + }, + { + "epoch": 0.55, + "learning_rate": 8.736499098509428e-06, + "loss": 0.7505, + "step": 3747 + }, + { + "epoch": 0.55, + "learning_rate": 8.731747886667701e-06, + "loss": 0.3123, + "step": 3748 + }, + { + "epoch": 0.55, + "learning_rate": 8.726996965783818e-06, + "loss": 0.7861, + "step": 3749 + }, + { + "epoch": 0.55, + "learning_rate": 8.722246336947721e-06, + "loss": 0.7891, + "step": 3750 + }, + { + "epoch": 0.55, + "learning_rate": 8.717496001249275e-06, + "loss": 0.752, + "step": 3751 + }, + { + "epoch": 0.55, + "learning_rate": 8.712745959778293e-06, + "loss": 0.7751, + "step": 3752 + }, + { + "epoch": 0.56, + "learning_rate": 8.707996213624507e-06, + "loss": 0.8237, + "step": 3753 + }, + { + "epoch": 0.56, + "learning_rate": 8.70324676387759e-06, + "loss": 0.8096, + "step": 3754 + }, + { + "epoch": 0.56, + "learning_rate": 8.698497611627141e-06, + "loss": 0.3394, + "step": 3755 + }, + { + "epoch": 0.56, + "learning_rate": 8.693748757962694e-06, + "loss": 0.8232, + "step": 3756 + }, + { + "epoch": 0.56, + "learning_rate": 8.689000203973715e-06, + "loss": 0.7861, + "step": 3757 + }, + { + "epoch": 0.56, + "learning_rate": 8.684251950749603e-06, + "loss": 0.8096, + "step": 3758 + }, + { + "epoch": 0.56, + "learning_rate": 8.679503999379679e-06, + "loss": 0.7773, + "step": 3759 + }, + { + "epoch": 0.56, + "learning_rate": 8.674756350953202e-06, + "loss": 0.7788, + "step": 3760 + }, + { + "epoch": 0.56, + "learning_rate": 8.670009006559365e-06, + "loss": 0.7788, + "step": 3761 + }, + { + "epoch": 0.56, + "learning_rate": 8.665261967287284e-06, + "loss": 0.7578, + "step": 3762 + }, + { + "epoch": 0.56, + "learning_rate": 8.660515234226008e-06, + "loss": 0.8516, + "step": 3763 + }, + { + "epoch": 0.56, + "learning_rate": 8.655768808464515e-06, + "loss": 0.8071, + "step": 3764 + }, + { + "epoch": 0.56, + "learning_rate": 8.651022691091713e-06, + "loss": 0.7715, + "step": 3765 + }, + { + "epoch": 0.56, + "learning_rate": 8.646276883196438e-06, + "loss": 0.7891, + "step": 3766 + }, + { + "epoch": 0.56, + "learning_rate": 8.641531385867462e-06, + "loss": 0.7769, + "step": 3767 + }, + { + "epoch": 0.56, + "learning_rate": 8.636786200193472e-06, + "loss": 0.8433, + "step": 3768 + }, + { + "epoch": 0.56, + "learning_rate": 8.632041327263094e-06, + "loss": 0.8389, + "step": 3769 + }, + { + "epoch": 0.56, + "learning_rate": 8.627296768164883e-06, + "loss": 0.812, + "step": 3770 + }, + { + "epoch": 0.56, + "learning_rate": 8.622552523987317e-06, + "loss": 0.7725, + "step": 3771 + }, + { + "epoch": 0.56, + "learning_rate": 8.617808595818802e-06, + "loss": 0.7739, + "step": 3772 + }, + { + "epoch": 0.56, + "learning_rate": 8.613064984747672e-06, + "loss": 0.8184, + "step": 3773 + }, + { + "epoch": 0.56, + "learning_rate": 8.608321691862193e-06, + "loss": 0.8057, + "step": 3774 + }, + { + "epoch": 0.56, + "learning_rate": 8.60357871825055e-06, + "loss": 0.8555, + "step": 3775 + }, + { + "epoch": 0.56, + "learning_rate": 8.598836065000864e-06, + "loss": 0.7915, + "step": 3776 + }, + { + "epoch": 0.56, + "learning_rate": 8.594093733201174e-06, + "loss": 0.7661, + "step": 3777 + }, + { + "epoch": 0.56, + "learning_rate": 8.589351723939448e-06, + "loss": 0.855, + "step": 3778 + }, + { + "epoch": 0.56, + "learning_rate": 8.584610038303584e-06, + "loss": 0.7915, + "step": 3779 + }, + { + "epoch": 0.56, + "learning_rate": 8.5798686773814e-06, + "loss": 0.8076, + "step": 3780 + }, + { + "epoch": 0.56, + "learning_rate": 8.575127642260643e-06, + "loss": 0.6558, + "step": 3781 + }, + { + "epoch": 0.56, + "learning_rate": 8.570386934028982e-06, + "loss": 0.8389, + "step": 3782 + }, + { + "epoch": 0.56, + "learning_rate": 8.565646553774016e-06, + "loss": 0.8179, + "step": 3783 + }, + { + "epoch": 0.56, + "learning_rate": 8.560906502583268e-06, + "loss": 0.8677, + "step": 3784 + }, + { + "epoch": 0.56, + "learning_rate": 8.556166781544178e-06, + "loss": 0.8296, + "step": 3785 + }, + { + "epoch": 0.56, + "learning_rate": 8.551427391744118e-06, + "loss": 0.8042, + "step": 3786 + }, + { + "epoch": 0.56, + "learning_rate": 8.546688334270381e-06, + "loss": 0.8091, + "step": 3787 + }, + { + "epoch": 0.56, + "learning_rate": 8.541949610210193e-06, + "loss": 0.8159, + "step": 3788 + }, + { + "epoch": 0.56, + "learning_rate": 8.53721122065068e-06, + "loss": 0.8174, + "step": 3789 + }, + { + "epoch": 0.56, + "learning_rate": 8.532473166678916e-06, + "loss": 0.8237, + "step": 3790 + }, + { + "epoch": 0.56, + "learning_rate": 8.527735449381887e-06, + "loss": 0.3496, + "step": 3791 + }, + { + "epoch": 0.56, + "learning_rate": 8.5229980698465e-06, + "loss": 0.8262, + "step": 3792 + }, + { + "epoch": 0.56, + "learning_rate": 8.518261029159596e-06, + "loss": 0.854, + "step": 3793 + }, + { + "epoch": 0.56, + "learning_rate": 8.51352432840792e-06, + "loss": 0.3113, + "step": 3794 + }, + { + "epoch": 0.56, + "learning_rate": 8.508787968678153e-06, + "loss": 0.7935, + "step": 3795 + }, + { + "epoch": 0.56, + "learning_rate": 8.504051951056894e-06, + "loss": 0.8599, + "step": 3796 + }, + { + "epoch": 0.56, + "learning_rate": 8.499316276630666e-06, + "loss": 0.8647, + "step": 3797 + }, + { + "epoch": 0.56, + "learning_rate": 8.4945809464859e-06, + "loss": 0.8643, + "step": 3798 + }, + { + "epoch": 0.56, + "learning_rate": 8.48984596170897e-06, + "loss": 0.8071, + "step": 3799 + }, + { + "epoch": 0.56, + "learning_rate": 8.485111323386152e-06, + "loss": 0.8755, + "step": 3800 + }, + { + "epoch": 0.56, + "learning_rate": 8.480377032603658e-06, + "loss": 0.8101, + "step": 3801 + }, + { + "epoch": 0.56, + "learning_rate": 8.475643090447603e-06, + "loss": 0.8247, + "step": 3802 + }, + { + "epoch": 0.56, + "learning_rate": 8.470909498004031e-06, + "loss": 0.8398, + "step": 3803 + }, + { + "epoch": 0.56, + "learning_rate": 8.46617625635891e-06, + "loss": 0.854, + "step": 3804 + }, + { + "epoch": 0.56, + "learning_rate": 8.46144336659813e-06, + "loss": 0.769, + "step": 3805 + }, + { + "epoch": 0.56, + "learning_rate": 8.456710829807479e-06, + "loss": 0.7974, + "step": 3806 + }, + { + "epoch": 0.56, + "learning_rate": 8.451978647072688e-06, + "loss": 0.8115, + "step": 3807 + }, + { + "epoch": 0.56, + "learning_rate": 8.44724681947939e-06, + "loss": 0.8408, + "step": 3808 + }, + { + "epoch": 0.56, + "learning_rate": 8.442515348113152e-06, + "loss": 0.7407, + "step": 3809 + }, + { + "epoch": 0.56, + "learning_rate": 8.437784234059454e-06, + "loss": 0.7832, + "step": 3810 + }, + { + "epoch": 0.56, + "learning_rate": 8.433053478403679e-06, + "loss": 0.8516, + "step": 3811 + }, + { + "epoch": 0.56, + "learning_rate": 8.428323082231144e-06, + "loss": 0.8574, + "step": 3812 + }, + { + "epoch": 0.56, + "learning_rate": 8.423593046627086e-06, + "loss": 0.8472, + "step": 3813 + }, + { + "epoch": 0.56, + "learning_rate": 8.418863372676652e-06, + "loss": 0.8193, + "step": 3814 + }, + { + "epoch": 0.56, + "learning_rate": 8.414134061464898e-06, + "loss": 0.8555, + "step": 3815 + }, + { + "epoch": 0.56, + "learning_rate": 8.40940511407681e-06, + "loss": 0.8042, + "step": 3816 + }, + { + "epoch": 0.56, + "learning_rate": 8.404676531597285e-06, + "loss": 0.7905, + "step": 3817 + }, + { + "epoch": 0.56, + "learning_rate": 8.399948315111148e-06, + "loss": 0.811, + "step": 3818 + }, + { + "epoch": 0.56, + "learning_rate": 8.395220465703116e-06, + "loss": 0.8081, + "step": 3819 + }, + { + "epoch": 0.56, + "learning_rate": 8.39049298445784e-06, + "loss": 0.7944, + "step": 3820 + }, + { + "epoch": 0.57, + "learning_rate": 8.38576587245988e-06, + "loss": 0.8027, + "step": 3821 + }, + { + "epoch": 0.57, + "learning_rate": 8.381039130793718e-06, + "loss": 0.8159, + "step": 3822 + }, + { + "epoch": 0.57, + "learning_rate": 8.37631276054374e-06, + "loss": 0.8096, + "step": 3823 + }, + { + "epoch": 0.57, + "learning_rate": 8.371586762794257e-06, + "loss": 0.3267, + "step": 3824 + }, + { + "epoch": 0.57, + "learning_rate": 8.36686113862949e-06, + "loss": 0.8765, + "step": 3825 + }, + { + "epoch": 0.57, + "learning_rate": 8.362135889133571e-06, + "loss": 0.7817, + "step": 3826 + }, + { + "epoch": 0.57, + "learning_rate": 8.357411015390559e-06, + "loss": 0.812, + "step": 3827 + }, + { + "epoch": 0.57, + "learning_rate": 8.352686518484407e-06, + "loss": 0.8013, + "step": 3828 + }, + { + "epoch": 0.57, + "learning_rate": 8.347962399498996e-06, + "loss": 0.7783, + "step": 3829 + }, + { + "epoch": 0.57, + "learning_rate": 8.343238659518119e-06, + "loss": 0.7922, + "step": 3830 + }, + { + "epoch": 0.57, + "learning_rate": 8.338515299625477e-06, + "loss": 0.8281, + "step": 3831 + }, + { + "epoch": 0.57, + "learning_rate": 8.333792320904686e-06, + "loss": 0.7485, + "step": 3832 + }, + { + "epoch": 0.57, + "learning_rate": 8.329069724439273e-06, + "loss": 0.8389, + "step": 3833 + }, + { + "epoch": 0.57, + "learning_rate": 8.324347511312685e-06, + "loss": 0.8237, + "step": 3834 + }, + { + "epoch": 0.57, + "learning_rate": 8.319625682608273e-06, + "loss": 0.8652, + "step": 3835 + }, + { + "epoch": 0.57, + "learning_rate": 8.314904239409295e-06, + "loss": 0.7939, + "step": 3836 + }, + { + "epoch": 0.57, + "learning_rate": 8.310183182798937e-06, + "loss": 0.8359, + "step": 3837 + }, + { + "epoch": 0.57, + "learning_rate": 8.305462513860282e-06, + "loss": 0.3152, + "step": 3838 + }, + { + "epoch": 0.57, + "learning_rate": 8.300742233676329e-06, + "loss": 0.8218, + "step": 3839 + }, + { + "epoch": 0.57, + "learning_rate": 8.29602234332999e-06, + "loss": 0.7896, + "step": 3840 + }, + { + "epoch": 0.57, + "learning_rate": 8.291302843904086e-06, + "loss": 0.7861, + "step": 3841 + }, + { + "epoch": 0.57, + "learning_rate": 8.286583736481342e-06, + "loss": 0.7651, + "step": 3842 + }, + { + "epoch": 0.57, + "learning_rate": 8.281865022144403e-06, + "loss": 0.8096, + "step": 3843 + }, + { + "epoch": 0.57, + "learning_rate": 8.27714670197582e-06, + "loss": 0.8501, + "step": 3844 + }, + { + "epoch": 0.57, + "learning_rate": 8.272428777058052e-06, + "loss": 0.8511, + "step": 3845 + }, + { + "epoch": 0.57, + "learning_rate": 8.267711248473465e-06, + "loss": 0.8076, + "step": 3846 + }, + { + "epoch": 0.57, + "learning_rate": 8.262994117304343e-06, + "loss": 0.7808, + "step": 3847 + }, + { + "epoch": 0.57, + "learning_rate": 8.258277384632873e-06, + "loss": 0.8447, + "step": 3848 + }, + { + "epoch": 0.57, + "learning_rate": 8.253561051541145e-06, + "loss": 0.8193, + "step": 3849 + }, + { + "epoch": 0.57, + "learning_rate": 8.248845119111168e-06, + "loss": 0.8623, + "step": 3850 + }, + { + "epoch": 0.57, + "learning_rate": 8.244129588424855e-06, + "loss": 0.8223, + "step": 3851 + }, + { + "epoch": 0.57, + "learning_rate": 8.239414460564024e-06, + "loss": 0.8125, + "step": 3852 + }, + { + "epoch": 0.57, + "learning_rate": 8.234699736610402e-06, + "loss": 0.7852, + "step": 3853 + }, + { + "epoch": 0.57, + "learning_rate": 8.229985417645624e-06, + "loss": 0.8779, + "step": 3854 + }, + { + "epoch": 0.57, + "learning_rate": 8.225271504751232e-06, + "loss": 0.814, + "step": 3855 + }, + { + "epoch": 0.57, + "learning_rate": 8.22055799900868e-06, + "loss": 0.7993, + "step": 3856 + }, + { + "epoch": 0.57, + "learning_rate": 8.21584490149932e-06, + "loss": 0.8188, + "step": 3857 + }, + { + "epoch": 0.57, + "learning_rate": 8.211132213304412e-06, + "loss": 0.8198, + "step": 3858 + }, + { + "epoch": 0.57, + "learning_rate": 8.206419935505125e-06, + "loss": 0.8013, + "step": 3859 + }, + { + "epoch": 0.57, + "learning_rate": 8.201708069182534e-06, + "loss": 0.8047, + "step": 3860 + }, + { + "epoch": 0.57, + "learning_rate": 8.19699661541762e-06, + "loss": 0.7544, + "step": 3861 + }, + { + "epoch": 0.57, + "learning_rate": 8.192285575291263e-06, + "loss": 0.7983, + "step": 3862 + }, + { + "epoch": 0.57, + "learning_rate": 8.187574949884256e-06, + "loss": 0.8247, + "step": 3863 + }, + { + "epoch": 0.57, + "learning_rate": 8.182864740277293e-06, + "loss": 0.9131, + "step": 3864 + }, + { + "epoch": 0.57, + "learning_rate": 8.178154947550976e-06, + "loss": 0.8354, + "step": 3865 + }, + { + "epoch": 0.57, + "learning_rate": 8.173445572785805e-06, + "loss": 0.7305, + "step": 3866 + }, + { + "epoch": 0.57, + "learning_rate": 8.168736617062188e-06, + "loss": 0.8066, + "step": 3867 + }, + { + "epoch": 0.57, + "learning_rate": 8.16402808146044e-06, + "loss": 0.7456, + "step": 3868 + }, + { + "epoch": 0.57, + "learning_rate": 8.159319967060777e-06, + "loss": 0.8174, + "step": 3869 + }, + { + "epoch": 0.57, + "learning_rate": 8.15461227494331e-06, + "loss": 0.8467, + "step": 3870 + }, + { + "epoch": 0.57, + "learning_rate": 8.149905006188067e-06, + "loss": 0.8335, + "step": 3871 + }, + { + "epoch": 0.57, + "learning_rate": 8.145198161874971e-06, + "loss": 0.8267, + "step": 3872 + }, + { + "epoch": 0.57, + "learning_rate": 8.14049174308385e-06, + "loss": 0.8345, + "step": 3873 + }, + { + "epoch": 0.57, + "learning_rate": 8.13578575089444e-06, + "loss": 0.8193, + "step": 3874 + }, + { + "epoch": 0.57, + "learning_rate": 8.131080186386357e-06, + "loss": 0.8936, + "step": 3875 + }, + { + "epoch": 0.57, + "learning_rate": 8.126375050639149e-06, + "loss": 0.7944, + "step": 3876 + }, + { + "epoch": 0.57, + "learning_rate": 8.121670344732244e-06, + "loss": 0.7881, + "step": 3877 + }, + { + "epoch": 0.57, + "learning_rate": 8.116966069744987e-06, + "loss": 0.8516, + "step": 3878 + }, + { + "epoch": 0.57, + "learning_rate": 8.112262226756603e-06, + "loss": 0.8047, + "step": 3879 + }, + { + "epoch": 0.57, + "learning_rate": 8.10755881684624e-06, + "loss": 0.8242, + "step": 3880 + }, + { + "epoch": 0.57, + "learning_rate": 8.102855841092934e-06, + "loss": 0.7651, + "step": 3881 + }, + { + "epoch": 0.57, + "learning_rate": 8.098153300575633e-06, + "loss": 0.8281, + "step": 3882 + }, + { + "epoch": 0.57, + "learning_rate": 8.093451196373163e-06, + "loss": 0.7451, + "step": 3883 + }, + { + "epoch": 0.57, + "learning_rate": 8.08874952956427e-06, + "loss": 0.7578, + "step": 3884 + }, + { + "epoch": 0.57, + "learning_rate": 8.084048301227597e-06, + "loss": 0.7578, + "step": 3885 + }, + { + "epoch": 0.57, + "learning_rate": 8.07934751244168e-06, + "loss": 0.7798, + "step": 3886 + }, + { + "epoch": 0.57, + "learning_rate": 8.074647164284962e-06, + "loss": 0.79, + "step": 3887 + }, + { + "epoch": 0.57, + "learning_rate": 8.069947257835768e-06, + "loss": 0.9038, + "step": 3888 + }, + { + "epoch": 0.58, + "learning_rate": 8.065247794172342e-06, + "loss": 0.7568, + "step": 3889 + }, + { + "epoch": 0.58, + "learning_rate": 8.060548774372818e-06, + "loss": 0.7397, + "step": 3890 + }, + { + "epoch": 0.58, + "learning_rate": 8.055850199515231e-06, + "loss": 0.834, + "step": 3891 + }, + { + "epoch": 0.58, + "learning_rate": 8.051152070677504e-06, + "loss": 0.7773, + "step": 3892 + }, + { + "epoch": 0.58, + "learning_rate": 8.046454388937466e-06, + "loss": 0.7866, + "step": 3893 + }, + { + "epoch": 0.58, + "learning_rate": 8.041757155372846e-06, + "loss": 0.8027, + "step": 3894 + }, + { + "epoch": 0.58, + "learning_rate": 8.03706037106127e-06, + "loss": 0.897, + "step": 3895 + }, + { + "epoch": 0.58, + "learning_rate": 8.032364037080247e-06, + "loss": 0.771, + "step": 3896 + }, + { + "epoch": 0.58, + "learning_rate": 8.0276681545072e-06, + "loss": 0.7876, + "step": 3897 + }, + { + "epoch": 0.58, + "learning_rate": 8.022972724419437e-06, + "loss": 0.8555, + "step": 3898 + }, + { + "epoch": 0.58, + "learning_rate": 8.018277747894178e-06, + "loss": 0.3267, + "step": 3899 + }, + { + "epoch": 0.58, + "learning_rate": 8.013583226008512e-06, + "loss": 0.7886, + "step": 3900 + }, + { + "epoch": 0.58, + "learning_rate": 8.00888915983945e-06, + "loss": 0.8086, + "step": 3901 + }, + { + "epoch": 0.58, + "learning_rate": 8.004195550463882e-06, + "loss": 0.8496, + "step": 3902 + }, + { + "epoch": 0.58, + "learning_rate": 7.999502398958599e-06, + "loss": 0.8638, + "step": 3903 + }, + { + "epoch": 0.58, + "learning_rate": 7.994809706400297e-06, + "loss": 0.8472, + "step": 3904 + }, + { + "epoch": 0.58, + "learning_rate": 7.990117473865543e-06, + "loss": 0.8276, + "step": 3905 + }, + { + "epoch": 0.58, + "learning_rate": 7.985425702430821e-06, + "loss": 0.793, + "step": 3906 + }, + { + "epoch": 0.58, + "learning_rate": 7.980734393172495e-06, + "loss": 0.8086, + "step": 3907 + }, + { + "epoch": 0.58, + "learning_rate": 7.976043547166835e-06, + "loss": 0.8057, + "step": 3908 + }, + { + "epoch": 0.58, + "learning_rate": 7.971353165489991e-06, + "loss": 0.3077, + "step": 3909 + }, + { + "epoch": 0.58, + "learning_rate": 7.966663249218018e-06, + "loss": 0.8101, + "step": 3910 + }, + { + "epoch": 0.58, + "learning_rate": 7.961973799426857e-06, + "loss": 0.7837, + "step": 3911 + }, + { + "epoch": 0.58, + "learning_rate": 7.957284817192346e-06, + "loss": 0.8291, + "step": 3912 + }, + { + "epoch": 0.58, + "learning_rate": 7.952596303590215e-06, + "loss": 0.7769, + "step": 3913 + }, + { + "epoch": 0.58, + "learning_rate": 7.947908259696084e-06, + "loss": 0.7949, + "step": 3914 + }, + { + "epoch": 0.58, + "learning_rate": 7.94322068658547e-06, + "loss": 0.8511, + "step": 3915 + }, + { + "epoch": 0.58, + "learning_rate": 7.938533585333777e-06, + "loss": 0.7798, + "step": 3916 + }, + { + "epoch": 0.58, + "learning_rate": 7.933846957016304e-06, + "loss": 0.7334, + "step": 3917 + }, + { + "epoch": 0.58, + "learning_rate": 7.929160802708239e-06, + "loss": 0.8242, + "step": 3918 + }, + { + "epoch": 0.58, + "learning_rate": 7.924475123484664e-06, + "loss": 0.8442, + "step": 3919 + }, + { + "epoch": 0.58, + "learning_rate": 7.91978992042055e-06, + "loss": 0.7734, + "step": 3920 + }, + { + "epoch": 0.58, + "learning_rate": 7.915105194590763e-06, + "loss": 0.8535, + "step": 3921 + }, + { + "epoch": 0.58, + "learning_rate": 7.91042094707005e-06, + "loss": 0.8638, + "step": 3922 + }, + { + "epoch": 0.58, + "learning_rate": 7.905737178933058e-06, + "loss": 0.812, + "step": 3923 + }, + { + "epoch": 0.58, + "learning_rate": 7.901053891254318e-06, + "loss": 0.8335, + "step": 3924 + }, + { + "epoch": 0.58, + "learning_rate": 7.896371085108258e-06, + "loss": 0.7759, + "step": 3925 + }, + { + "epoch": 0.58, + "learning_rate": 7.891688761569185e-06, + "loss": 0.8584, + "step": 3926 + }, + { + "epoch": 0.58, + "learning_rate": 7.887006921711301e-06, + "loss": 0.8584, + "step": 3927 + }, + { + "epoch": 0.58, + "learning_rate": 7.8823255666087e-06, + "loss": 0.8237, + "step": 3928 + }, + { + "epoch": 0.58, + "learning_rate": 7.877644697335362e-06, + "loss": 0.7598, + "step": 3929 + }, + { + "epoch": 0.58, + "learning_rate": 7.872964314965151e-06, + "loss": 0.8389, + "step": 3930 + }, + { + "epoch": 0.58, + "learning_rate": 7.868284420571826e-06, + "loss": 0.8262, + "step": 3931 + }, + { + "epoch": 0.58, + "learning_rate": 7.863605015229033e-06, + "loss": 0.7905, + "step": 3932 + }, + { + "epoch": 0.58, + "learning_rate": 7.858926100010303e-06, + "loss": 0.8691, + "step": 3933 + }, + { + "epoch": 0.58, + "learning_rate": 7.854247675989057e-06, + "loss": 0.7808, + "step": 3934 + }, + { + "epoch": 0.58, + "learning_rate": 7.8495697442386e-06, + "loss": 0.7856, + "step": 3935 + }, + { + "epoch": 0.58, + "learning_rate": 7.844892305832126e-06, + "loss": 0.8079, + "step": 3936 + }, + { + "epoch": 0.58, + "learning_rate": 7.840215361842718e-06, + "loss": 0.9312, + "step": 3937 + }, + { + "epoch": 0.58, + "learning_rate": 7.835538913343347e-06, + "loss": 0.8457, + "step": 3938 + }, + { + "epoch": 0.58, + "learning_rate": 7.83086296140686e-06, + "loss": 0.8057, + "step": 3939 + }, + { + "epoch": 0.58, + "learning_rate": 7.826187507106e-06, + "loss": 0.7964, + "step": 3940 + }, + { + "epoch": 0.58, + "learning_rate": 7.821512551513395e-06, + "loss": 0.8242, + "step": 3941 + }, + { + "epoch": 0.58, + "learning_rate": 7.816838095701555e-06, + "loss": 0.8066, + "step": 3942 + }, + { + "epoch": 0.58, + "learning_rate": 7.812164140742875e-06, + "loss": 0.769, + "step": 3943 + }, + { + "epoch": 0.58, + "learning_rate": 7.80749068770964e-06, + "loss": 0.8057, + "step": 3944 + }, + { + "epoch": 0.58, + "learning_rate": 7.802817737674015e-06, + "loss": 0.7188, + "step": 3945 + }, + { + "epoch": 0.58, + "learning_rate": 7.798145291708054e-06, + "loss": 0.8223, + "step": 3946 + }, + { + "epoch": 0.58, + "learning_rate": 7.793473350883688e-06, + "loss": 0.8271, + "step": 3947 + }, + { + "epoch": 0.58, + "learning_rate": 7.788801916272739e-06, + "loss": 0.7788, + "step": 3948 + }, + { + "epoch": 0.58, + "learning_rate": 7.784130988946912e-06, + "loss": 0.8096, + "step": 3949 + }, + { + "epoch": 0.58, + "learning_rate": 7.779460569977796e-06, + "loss": 0.8198, + "step": 3950 + }, + { + "epoch": 0.58, + "learning_rate": 7.774790660436857e-06, + "loss": 0.8115, + "step": 3951 + }, + { + "epoch": 0.58, + "learning_rate": 7.770121261395451e-06, + "loss": 0.7693, + "step": 3952 + }, + { + "epoch": 0.58, + "learning_rate": 7.765452373924816e-06, + "loss": 0.8291, + "step": 3953 + }, + { + "epoch": 0.58, + "learning_rate": 7.76078399909607e-06, + "loss": 0.7827, + "step": 3954 + }, + { + "epoch": 0.58, + "learning_rate": 7.75611613798022e-06, + "loss": 0.7676, + "step": 3955 + }, + { + "epoch": 0.59, + "learning_rate": 7.751448791648136e-06, + "loss": 0.8208, + "step": 3956 + }, + { + "epoch": 0.59, + "learning_rate": 7.746781961170598e-06, + "loss": 0.7583, + "step": 3957 + }, + { + "epoch": 0.59, + "learning_rate": 7.742115647618246e-06, + "loss": 0.853, + "step": 3958 + }, + { + "epoch": 0.59, + "learning_rate": 7.737449852061618e-06, + "loss": 0.877, + "step": 3959 + }, + { + "epoch": 0.59, + "learning_rate": 7.73278457557111e-06, + "loss": 0.7881, + "step": 3960 + }, + { + "epoch": 0.59, + "learning_rate": 7.728119819217022e-06, + "loss": 0.7998, + "step": 3961 + }, + { + "epoch": 0.59, + "learning_rate": 7.723455584069524e-06, + "loss": 0.8457, + "step": 3962 + }, + { + "epoch": 0.59, + "learning_rate": 7.718791871198674e-06, + "loss": 0.8242, + "step": 3963 + }, + { + "epoch": 0.59, + "learning_rate": 7.714128681674392e-06, + "loss": 0.8101, + "step": 3964 + }, + { + "epoch": 0.59, + "learning_rate": 7.709466016566496e-06, + "loss": 0.8228, + "step": 3965 + }, + { + "epoch": 0.59, + "learning_rate": 7.70480387694468e-06, + "loss": 0.8369, + "step": 3966 + }, + { + "epoch": 0.59, + "learning_rate": 7.700142263878513e-06, + "loss": 0.8457, + "step": 3967 + }, + { + "epoch": 0.59, + "learning_rate": 7.69548117843745e-06, + "loss": 0.3354, + "step": 3968 + }, + { + "epoch": 0.59, + "learning_rate": 7.690820621690815e-06, + "loss": 0.7959, + "step": 3969 + }, + { + "epoch": 0.59, + "learning_rate": 7.686160594707814e-06, + "loss": 0.8037, + "step": 3970 + }, + { + "epoch": 0.59, + "learning_rate": 7.68150109855754e-06, + "loss": 0.8037, + "step": 3971 + }, + { + "epoch": 0.59, + "learning_rate": 7.67684213430896e-06, + "loss": 0.8203, + "step": 3972 + }, + { + "epoch": 0.59, + "learning_rate": 7.672183703030906e-06, + "loss": 0.7852, + "step": 3973 + }, + { + "epoch": 0.59, + "learning_rate": 7.667525805792108e-06, + "loss": 0.7656, + "step": 3974 + }, + { + "epoch": 0.59, + "learning_rate": 7.662868443661157e-06, + "loss": 0.7268, + "step": 3975 + }, + { + "epoch": 0.59, + "learning_rate": 7.65821161770654e-06, + "loss": 0.8223, + "step": 3976 + }, + { + "epoch": 0.59, + "learning_rate": 7.653555328996595e-06, + "loss": 0.8467, + "step": 3977 + }, + { + "epoch": 0.59, + "learning_rate": 7.648899578599558e-06, + "loss": 0.8159, + "step": 3978 + }, + { + "epoch": 0.59, + "learning_rate": 7.644244367583533e-06, + "loss": 0.7886, + "step": 3979 + }, + { + "epoch": 0.59, + "learning_rate": 7.639589697016508e-06, + "loss": 0.7583, + "step": 3980 + }, + { + "epoch": 0.59, + "learning_rate": 7.634935567966333e-06, + "loss": 0.8208, + "step": 3981 + }, + { + "epoch": 0.59, + "learning_rate": 7.630281981500742e-06, + "loss": 0.7612, + "step": 3982 + }, + { + "epoch": 0.59, + "learning_rate": 7.625628938687349e-06, + "loss": 0.8052, + "step": 3983 + }, + { + "epoch": 0.59, + "learning_rate": 7.620976440593632e-06, + "loss": 0.834, + "step": 3984 + }, + { + "epoch": 0.59, + "learning_rate": 7.616324488286959e-06, + "loss": 0.8081, + "step": 3985 + }, + { + "epoch": 0.59, + "learning_rate": 7.611673082834556e-06, + "loss": 0.7622, + "step": 3986 + }, + { + "epoch": 0.59, + "learning_rate": 7.607022225303537e-06, + "loss": 0.8237, + "step": 3987 + }, + { + "epoch": 0.59, + "learning_rate": 7.602371916760881e-06, + "loss": 0.834, + "step": 3988 + }, + { + "epoch": 0.59, + "learning_rate": 7.59772215827345e-06, + "loss": 0.7148, + "step": 3989 + }, + { + "epoch": 0.59, + "learning_rate": 7.593072950907969e-06, + "loss": 0.8174, + "step": 3990 + }, + { + "epoch": 0.59, + "learning_rate": 7.588424295731045e-06, + "loss": 0.771, + "step": 3991 + }, + { + "epoch": 0.59, + "learning_rate": 7.583776193809158e-06, + "loss": 0.7412, + "step": 3992 + }, + { + "epoch": 0.59, + "learning_rate": 7.579128646208658e-06, + "loss": 0.7583, + "step": 3993 + }, + { + "epoch": 0.59, + "learning_rate": 7.5744816539957655e-06, + "loss": 0.7954, + "step": 3994 + }, + { + "epoch": 0.59, + "learning_rate": 7.569835218236578e-06, + "loss": 0.8247, + "step": 3995 + }, + { + "epoch": 0.59, + "learning_rate": 7.5651893399970674e-06, + "loss": 0.8691, + "step": 3996 + }, + { + "epoch": 0.59, + "learning_rate": 7.560544020343071e-06, + "loss": 0.7529, + "step": 3997 + }, + { + "epoch": 0.59, + "learning_rate": 7.555899260340305e-06, + "loss": 0.8057, + "step": 3998 + }, + { + "epoch": 0.59, + "learning_rate": 7.551255061054348e-06, + "loss": 0.7549, + "step": 3999 + }, + { + "epoch": 0.59, + "learning_rate": 7.546611423550658e-06, + "loss": 0.8354, + "step": 4000 + }, + { + "epoch": 0.59, + "learning_rate": 7.541968348894564e-06, + "loss": 0.7759, + "step": 4001 + }, + { + "epoch": 0.59, + "learning_rate": 7.537325838151263e-06, + "loss": 0.8228, + "step": 4002 + }, + { + "epoch": 0.59, + "learning_rate": 7.53268389238582e-06, + "loss": 0.8057, + "step": 4003 + }, + { + "epoch": 0.59, + "learning_rate": 7.528042512663174e-06, + "loss": 0.7495, + "step": 4004 + }, + { + "epoch": 0.59, + "learning_rate": 7.523401700048135e-06, + "loss": 0.8525, + "step": 4005 + }, + { + "epoch": 0.59, + "learning_rate": 7.518761455605384e-06, + "loss": 0.8071, + "step": 4006 + }, + { + "epoch": 0.59, + "learning_rate": 7.5141217803994645e-06, + "loss": 0.8013, + "step": 4007 + }, + { + "epoch": 0.59, + "learning_rate": 7.509482675494797e-06, + "loss": 0.8203, + "step": 4008 + }, + { + "epoch": 0.59, + "learning_rate": 7.504844141955667e-06, + "loss": 0.8188, + "step": 4009 + }, + { + "epoch": 0.59, + "learning_rate": 7.500206180846235e-06, + "loss": 0.8091, + "step": 4010 + }, + { + "epoch": 0.59, + "learning_rate": 7.495568793230516e-06, + "loss": 0.7524, + "step": 4011 + }, + { + "epoch": 0.59, + "learning_rate": 7.490931980172409e-06, + "loss": 0.793, + "step": 4012 + }, + { + "epoch": 0.59, + "learning_rate": 7.486295742735674e-06, + "loss": 0.7866, + "step": 4013 + }, + { + "epoch": 0.59, + "learning_rate": 7.481660081983942e-06, + "loss": 0.8306, + "step": 4014 + }, + { + "epoch": 0.59, + "learning_rate": 7.477024998980707e-06, + "loss": 0.2963, + "step": 4015 + }, + { + "epoch": 0.59, + "learning_rate": 7.4723904947893325e-06, + "loss": 0.7969, + "step": 4016 + }, + { + "epoch": 0.59, + "learning_rate": 7.4677565704730515e-06, + "loss": 0.8594, + "step": 4017 + }, + { + "epoch": 0.59, + "learning_rate": 7.463123227094962e-06, + "loss": 0.8198, + "step": 4018 + }, + { + "epoch": 0.59, + "learning_rate": 7.458490465718029e-06, + "loss": 0.8296, + "step": 4019 + }, + { + "epoch": 0.59, + "learning_rate": 7.453858287405082e-06, + "loss": 0.8223, + "step": 4020 + }, + { + "epoch": 0.59, + "learning_rate": 7.449226693218823e-06, + "loss": 0.7983, + "step": 4021 + }, + { + "epoch": 0.59, + "learning_rate": 7.444595684221811e-06, + "loss": 0.7217, + "step": 4022 + }, + { + "epoch": 0.59, + "learning_rate": 7.43996526147648e-06, + "loss": 0.8237, + "step": 4023 + }, + { + "epoch": 0.6, + "learning_rate": 7.435335426045121e-06, + "loss": 0.8047, + "step": 4024 + }, + { + "epoch": 0.6, + "learning_rate": 7.430706178989895e-06, + "loss": 0.8438, + "step": 4025 + }, + { + "epoch": 0.6, + "learning_rate": 7.4260775213728275e-06, + "loss": 0.8047, + "step": 4026 + }, + { + "epoch": 0.6, + "learning_rate": 7.421449454255814e-06, + "loss": 0.8247, + "step": 4027 + }, + { + "epoch": 0.6, + "learning_rate": 7.416821978700597e-06, + "loss": 0.8237, + "step": 4028 + }, + { + "epoch": 0.6, + "learning_rate": 7.412195095768805e-06, + "loss": 0.3191, + "step": 4029 + }, + { + "epoch": 0.6, + "learning_rate": 7.4075688065219186e-06, + "loss": 0.7954, + "step": 4030 + }, + { + "epoch": 0.6, + "learning_rate": 7.402943112021284e-06, + "loss": 0.3041, + "step": 4031 + }, + { + "epoch": 0.6, + "learning_rate": 7.398318013328112e-06, + "loss": 0.7368, + "step": 4032 + }, + { + "epoch": 0.6, + "learning_rate": 7.393693511503475e-06, + "loss": 0.8652, + "step": 4033 + }, + { + "epoch": 0.6, + "learning_rate": 7.38906960760831e-06, + "loss": 0.7954, + "step": 4034 + }, + { + "epoch": 0.6, + "learning_rate": 7.384446302703416e-06, + "loss": 0.7974, + "step": 4035 + }, + { + "epoch": 0.6, + "learning_rate": 7.3798235978494605e-06, + "loss": 0.8389, + "step": 4036 + }, + { + "epoch": 0.6, + "learning_rate": 7.375201494106956e-06, + "loss": 0.7856, + "step": 4037 + }, + { + "epoch": 0.6, + "learning_rate": 7.3705799925362985e-06, + "loss": 0.7759, + "step": 4038 + }, + { + "epoch": 0.6, + "learning_rate": 7.365959094197734e-06, + "loss": 0.791, + "step": 4039 + }, + { + "epoch": 0.6, + "learning_rate": 7.361338800151376e-06, + "loss": 0.7783, + "step": 4040 + }, + { + "epoch": 0.6, + "learning_rate": 7.356719111457187e-06, + "loss": 0.8618, + "step": 4041 + }, + { + "epoch": 0.6, + "learning_rate": 7.352100029175002e-06, + "loss": 0.8281, + "step": 4042 + }, + { + "epoch": 0.6, + "learning_rate": 7.347481554364519e-06, + "loss": 0.8252, + "step": 4043 + }, + { + "epoch": 0.6, + "learning_rate": 7.342863688085291e-06, + "loss": 0.8184, + "step": 4044 + }, + { + "epoch": 0.6, + "learning_rate": 7.338246431396734e-06, + "loss": 0.7925, + "step": 4045 + }, + { + "epoch": 0.6, + "learning_rate": 7.3336297853581115e-06, + "loss": 0.2821, + "step": 4046 + }, + { + "epoch": 0.6, + "learning_rate": 7.329013751028569e-06, + "loss": 0.8423, + "step": 4047 + }, + { + "epoch": 0.6, + "learning_rate": 7.324398329467096e-06, + "loss": 0.7944, + "step": 4048 + }, + { + "epoch": 0.6, + "learning_rate": 7.319783521732551e-06, + "loss": 0.8037, + "step": 4049 + }, + { + "epoch": 0.6, + "learning_rate": 7.31516932888364e-06, + "loss": 0.8364, + "step": 4050 + }, + { + "epoch": 0.6, + "learning_rate": 7.310555751978934e-06, + "loss": 0.79, + "step": 4051 + }, + { + "epoch": 0.6, + "learning_rate": 7.305942792076867e-06, + "loss": 0.7832, + "step": 4052 + }, + { + "epoch": 0.6, + "learning_rate": 7.301330450235733e-06, + "loss": 0.8027, + "step": 4053 + }, + { + "epoch": 0.6, + "learning_rate": 7.2967187275136685e-06, + "loss": 0.8193, + "step": 4054 + }, + { + "epoch": 0.6, + "learning_rate": 7.292107624968683e-06, + "loss": 0.7744, + "step": 4055 + }, + { + "epoch": 0.6, + "learning_rate": 7.287497143658635e-06, + "loss": 0.8389, + "step": 4056 + }, + { + "epoch": 0.6, + "learning_rate": 7.282887284641257e-06, + "loss": 0.8184, + "step": 4057 + }, + { + "epoch": 0.6, + "learning_rate": 7.278278048974112e-06, + "loss": 0.7676, + "step": 4058 + }, + { + "epoch": 0.6, + "learning_rate": 7.273669437714641e-06, + "loss": 0.8013, + "step": 4059 + }, + { + "epoch": 0.6, + "learning_rate": 7.2690614519201315e-06, + "loss": 0.7695, + "step": 4060 + }, + { + "epoch": 0.6, + "learning_rate": 7.264454092647735e-06, + "loss": 0.79, + "step": 4061 + }, + { + "epoch": 0.6, + "learning_rate": 7.25984736095446e-06, + "loss": 0.8027, + "step": 4062 + }, + { + "epoch": 0.6, + "learning_rate": 7.255241257897154e-06, + "loss": 0.3135, + "step": 4063 + }, + { + "epoch": 0.6, + "learning_rate": 7.2506357845325405e-06, + "loss": 0.7925, + "step": 4064 + }, + { + "epoch": 0.6, + "learning_rate": 7.246030941917186e-06, + "loss": 0.8081, + "step": 4065 + }, + { + "epoch": 0.6, + "learning_rate": 7.241426731107526e-06, + "loss": 0.8071, + "step": 4066 + }, + { + "epoch": 0.6, + "learning_rate": 7.236823153159832e-06, + "loss": 0.7803, + "step": 4067 + }, + { + "epoch": 0.6, + "learning_rate": 7.232220209130245e-06, + "loss": 0.7236, + "step": 4068 + }, + { + "epoch": 0.6, + "learning_rate": 7.227617900074755e-06, + "loss": 0.8428, + "step": 4069 + }, + { + "epoch": 0.6, + "learning_rate": 7.223016227049209e-06, + "loss": 0.7998, + "step": 4070 + }, + { + "epoch": 0.6, + "learning_rate": 7.218415191109303e-06, + "loss": 0.7686, + "step": 4071 + }, + { + "epoch": 0.6, + "learning_rate": 7.213814793310589e-06, + "loss": 0.7769, + "step": 4072 + }, + { + "epoch": 0.6, + "learning_rate": 7.209215034708481e-06, + "loss": 0.856, + "step": 4073 + }, + { + "epoch": 0.6, + "learning_rate": 7.204615916358234e-06, + "loss": 0.8018, + "step": 4074 + }, + { + "epoch": 0.6, + "learning_rate": 7.2000174393149615e-06, + "loss": 0.8354, + "step": 4075 + }, + { + "epoch": 0.6, + "learning_rate": 7.195419604633629e-06, + "loss": 0.833, + "step": 4076 + }, + { + "epoch": 0.6, + "learning_rate": 7.190822413369055e-06, + "loss": 0.7788, + "step": 4077 + }, + { + "epoch": 0.6, + "learning_rate": 7.186225866575913e-06, + "loss": 0.8657, + "step": 4078 + }, + { + "epoch": 0.6, + "learning_rate": 7.1816299653087276e-06, + "loss": 0.2877, + "step": 4079 + }, + { + "epoch": 0.6, + "learning_rate": 7.177034710621869e-06, + "loss": 0.7998, + "step": 4080 + }, + { + "epoch": 0.6, + "learning_rate": 7.172440103569566e-06, + "loss": 0.7668, + "step": 4081 + }, + { + "epoch": 0.6, + "learning_rate": 7.167846145205897e-06, + "loss": 0.7871, + "step": 4082 + }, + { + "epoch": 0.6, + "learning_rate": 7.163252836584794e-06, + "loss": 0.7686, + "step": 4083 + }, + { + "epoch": 0.6, + "learning_rate": 7.158660178760033e-06, + "loss": 0.834, + "step": 4084 + }, + { + "epoch": 0.6, + "learning_rate": 7.154068172785248e-06, + "loss": 0.8193, + "step": 4085 + }, + { + "epoch": 0.6, + "learning_rate": 7.149476819713919e-06, + "loss": 0.7695, + "step": 4086 + }, + { + "epoch": 0.6, + "learning_rate": 7.144886120599379e-06, + "loss": 0.8208, + "step": 4087 + }, + { + "epoch": 0.6, + "learning_rate": 7.140296076494809e-06, + "loss": 0.7979, + "step": 4088 + }, + { + "epoch": 0.6, + "learning_rate": 7.135706688453239e-06, + "loss": 0.7676, + "step": 4089 + }, + { + "epoch": 0.6, + "learning_rate": 7.131117957527553e-06, + "loss": 0.8413, + "step": 4090 + }, + { + "epoch": 0.6, + "learning_rate": 7.126529884770479e-06, + "loss": 0.8164, + "step": 4091 + }, + { + "epoch": 0.61, + "learning_rate": 7.121942471234598e-06, + "loss": 0.3176, + "step": 4092 + }, + { + "epoch": 0.61, + "learning_rate": 7.117355717972334e-06, + "loss": 0.8008, + "step": 4093 + }, + { + "epoch": 0.61, + "learning_rate": 7.112769626035968e-06, + "loss": 0.8413, + "step": 4094 + }, + { + "epoch": 0.61, + "learning_rate": 7.108184196477622e-06, + "loss": 0.7092, + "step": 4095 + }, + { + "epoch": 0.61, + "learning_rate": 7.103599430349271e-06, + "loss": 0.7915, + "step": 4096 + }, + { + "epoch": 0.61, + "learning_rate": 7.099015328702734e-06, + "loss": 0.7749, + "step": 4097 + }, + { + "epoch": 0.61, + "learning_rate": 7.0944318925896775e-06, + "loss": 0.7847, + "step": 4098 + }, + { + "epoch": 0.61, + "learning_rate": 7.08984912306162e-06, + "loss": 0.9067, + "step": 4099 + }, + { + "epoch": 0.61, + "learning_rate": 7.0852670211699236e-06, + "loss": 0.8457, + "step": 4100 + }, + { + "epoch": 0.61, + "learning_rate": 7.0806855879657964e-06, + "loss": 0.7925, + "step": 4101 + }, + { + "epoch": 0.61, + "learning_rate": 7.076104824500294e-06, + "loss": 0.7856, + "step": 4102 + }, + { + "epoch": 0.61, + "learning_rate": 7.07152473182432e-06, + "loss": 0.78, + "step": 4103 + }, + { + "epoch": 0.61, + "learning_rate": 7.066945310988623e-06, + "loss": 0.7583, + "step": 4104 + }, + { + "epoch": 0.61, + "learning_rate": 7.062366563043795e-06, + "loss": 0.8271, + "step": 4105 + }, + { + "epoch": 0.61, + "learning_rate": 7.057788489040278e-06, + "loss": 0.7729, + "step": 4106 + }, + { + "epoch": 0.61, + "learning_rate": 7.053211090028357e-06, + "loss": 0.8232, + "step": 4107 + }, + { + "epoch": 0.61, + "learning_rate": 7.048634367058163e-06, + "loss": 0.8047, + "step": 4108 + }, + { + "epoch": 0.61, + "learning_rate": 7.044058321179671e-06, + "loss": 0.7217, + "step": 4109 + }, + { + "epoch": 0.61, + "learning_rate": 7.039482953442699e-06, + "loss": 0.8296, + "step": 4110 + }, + { + "epoch": 0.61, + "learning_rate": 7.0349082648969135e-06, + "loss": 0.7705, + "step": 4111 + }, + { + "epoch": 0.61, + "learning_rate": 7.030334256591822e-06, + "loss": 0.8608, + "step": 4112 + }, + { + "epoch": 0.61, + "learning_rate": 7.025760929576779e-06, + "loss": 0.8369, + "step": 4113 + }, + { + "epoch": 0.61, + "learning_rate": 7.02118828490098e-06, + "loss": 0.7749, + "step": 4114 + }, + { + "epoch": 0.61, + "learning_rate": 7.016616323613462e-06, + "loss": 0.7451, + "step": 4115 + }, + { + "epoch": 0.61, + "learning_rate": 7.012045046763111e-06, + "loss": 0.8042, + "step": 4116 + }, + { + "epoch": 0.61, + "learning_rate": 7.007474455398655e-06, + "loss": 0.791, + "step": 4117 + }, + { + "epoch": 0.61, + "learning_rate": 7.002904550568654e-06, + "loss": 0.7515, + "step": 4118 + }, + { + "epoch": 0.61, + "learning_rate": 6.9983353333215275e-06, + "loss": 0.7964, + "step": 4119 + }, + { + "epoch": 0.61, + "learning_rate": 6.993766804705526e-06, + "loss": 0.8184, + "step": 4120 + }, + { + "epoch": 0.61, + "learning_rate": 6.989198965768749e-06, + "loss": 0.8877, + "step": 4121 + }, + { + "epoch": 0.61, + "learning_rate": 6.984631817559126e-06, + "loss": 0.8638, + "step": 4122 + }, + { + "epoch": 0.61, + "learning_rate": 6.980065361124437e-06, + "loss": 0.7866, + "step": 4123 + }, + { + "epoch": 0.61, + "learning_rate": 6.975499597512307e-06, + "loss": 0.8262, + "step": 4124 + }, + { + "epoch": 0.61, + "learning_rate": 6.970934527770195e-06, + "loss": 0.8115, + "step": 4125 + }, + { + "epoch": 0.61, + "learning_rate": 6.966370152945407e-06, + "loss": 0.8149, + "step": 4126 + }, + { + "epoch": 0.61, + "learning_rate": 6.961806474085075e-06, + "loss": 0.8257, + "step": 4127 + }, + { + "epoch": 0.61, + "learning_rate": 6.957243492236189e-06, + "loss": 0.7871, + "step": 4128 + }, + { + "epoch": 0.61, + "learning_rate": 6.952681208445571e-06, + "loss": 0.7852, + "step": 4129 + }, + { + "epoch": 0.61, + "learning_rate": 6.948119623759888e-06, + "loss": 0.8433, + "step": 4130 + }, + { + "epoch": 0.61, + "learning_rate": 6.943558739225633e-06, + "loss": 0.7241, + "step": 4131 + }, + { + "epoch": 0.61, + "learning_rate": 6.938998555889152e-06, + "loss": 0.8218, + "step": 4132 + }, + { + "epoch": 0.61, + "learning_rate": 6.934439074796627e-06, + "loss": 0.7708, + "step": 4133 + }, + { + "epoch": 0.61, + "learning_rate": 6.929880296994083e-06, + "loss": 0.7705, + "step": 4134 + }, + { + "epoch": 0.61, + "learning_rate": 6.925322223527366e-06, + "loss": 0.8013, + "step": 4135 + }, + { + "epoch": 0.61, + "learning_rate": 6.9207648554421825e-06, + "loss": 0.7842, + "step": 4136 + }, + { + "epoch": 0.61, + "learning_rate": 6.916208193784062e-06, + "loss": 0.833, + "step": 4137 + }, + { + "epoch": 0.61, + "learning_rate": 6.911652239598385e-06, + "loss": 0.8062, + "step": 4138 + }, + { + "epoch": 0.61, + "learning_rate": 6.907096993930355e-06, + "loss": 0.8477, + "step": 4139 + }, + { + "epoch": 0.61, + "learning_rate": 6.902542457825021e-06, + "loss": 0.7729, + "step": 4140 + }, + { + "epoch": 0.61, + "learning_rate": 6.897988632327268e-06, + "loss": 0.7881, + "step": 4141 + }, + { + "epoch": 0.61, + "learning_rate": 6.8934355184818205e-06, + "loss": 0.7773, + "step": 4142 + }, + { + "epoch": 0.61, + "learning_rate": 6.888883117333243e-06, + "loss": 0.854, + "step": 4143 + }, + { + "epoch": 0.61, + "learning_rate": 6.884331429925919e-06, + "loss": 0.8369, + "step": 4144 + }, + { + "epoch": 0.61, + "learning_rate": 6.879780457304087e-06, + "loss": 0.7788, + "step": 4145 + }, + { + "epoch": 0.61, + "learning_rate": 6.875230200511812e-06, + "loss": 0.8125, + "step": 4146 + }, + { + "epoch": 0.61, + "learning_rate": 6.870680660593007e-06, + "loss": 0.8018, + "step": 4147 + }, + { + "epoch": 0.61, + "learning_rate": 6.8661318385913974e-06, + "loss": 0.8145, + "step": 4148 + }, + { + "epoch": 0.61, + "learning_rate": 6.861583735550565e-06, + "loss": 0.7808, + "step": 4149 + }, + { + "epoch": 0.61, + "learning_rate": 6.857036352513918e-06, + "loss": 0.7905, + "step": 4150 + }, + { + "epoch": 0.61, + "learning_rate": 6.852489690524703e-06, + "loss": 0.8149, + "step": 4151 + }, + { + "epoch": 0.61, + "learning_rate": 6.8479437506259936e-06, + "loss": 0.7842, + "step": 4152 + }, + { + "epoch": 0.61, + "learning_rate": 6.8433985338607075e-06, + "loss": 0.7251, + "step": 4153 + }, + { + "epoch": 0.61, + "learning_rate": 6.83885404127159e-06, + "loss": 0.7549, + "step": 4154 + }, + { + "epoch": 0.61, + "learning_rate": 6.834310273901224e-06, + "loss": 0.7471, + "step": 4155 + }, + { + "epoch": 0.61, + "learning_rate": 6.829767232792024e-06, + "loss": 0.7192, + "step": 4156 + }, + { + "epoch": 0.61, + "learning_rate": 6.8252249189862355e-06, + "loss": 0.7769, + "step": 4157 + }, + { + "epoch": 0.61, + "learning_rate": 6.820683333525942e-06, + "loss": 0.8291, + "step": 4158 + }, + { + "epoch": 0.62, + "learning_rate": 6.816142477453056e-06, + "loss": 0.7446, + "step": 4159 + }, + { + "epoch": 0.62, + "learning_rate": 6.811602351809328e-06, + "loss": 0.8135, + "step": 4160 + }, + { + "epoch": 0.62, + "learning_rate": 6.807062957636334e-06, + "loss": 0.7573, + "step": 4161 + }, + { + "epoch": 0.62, + "learning_rate": 6.802524295975486e-06, + "loss": 0.812, + "step": 4162 + }, + { + "epoch": 0.62, + "learning_rate": 6.797986367868028e-06, + "loss": 0.7529, + "step": 4163 + }, + { + "epoch": 0.62, + "learning_rate": 6.793449174355038e-06, + "loss": 0.7334, + "step": 4164 + }, + { + "epoch": 0.62, + "learning_rate": 6.788912716477417e-06, + "loss": 0.7598, + "step": 4165 + }, + { + "epoch": 0.62, + "learning_rate": 6.784376995275905e-06, + "loss": 0.7349, + "step": 4166 + }, + { + "epoch": 0.62, + "learning_rate": 6.779842011791074e-06, + "loss": 0.7778, + "step": 4167 + }, + { + "epoch": 0.62, + "learning_rate": 6.775307767063322e-06, + "loss": 0.749, + "step": 4168 + }, + { + "epoch": 0.62, + "learning_rate": 6.770774262132875e-06, + "loss": 0.7266, + "step": 4169 + }, + { + "epoch": 0.62, + "learning_rate": 6.766241498039798e-06, + "loss": 0.7798, + "step": 4170 + }, + { + "epoch": 0.62, + "learning_rate": 6.76170947582398e-06, + "loss": 0.813, + "step": 4171 + }, + { + "epoch": 0.62, + "learning_rate": 6.7571781965251405e-06, + "loss": 0.8599, + "step": 4172 + }, + { + "epoch": 0.62, + "learning_rate": 6.752647661182834e-06, + "loss": 0.8115, + "step": 4173 + }, + { + "epoch": 0.62, + "learning_rate": 6.748117870836433e-06, + "loss": 0.8008, + "step": 4174 + }, + { + "epoch": 0.62, + "learning_rate": 6.743588826525148e-06, + "loss": 0.7871, + "step": 4175 + }, + { + "epoch": 0.62, + "learning_rate": 6.739060529288017e-06, + "loss": 0.813, + "step": 4176 + }, + { + "epoch": 0.62, + "learning_rate": 6.734532980163908e-06, + "loss": 0.8281, + "step": 4177 + }, + { + "epoch": 0.62, + "learning_rate": 6.730006180191511e-06, + "loss": 0.7939, + "step": 4178 + }, + { + "epoch": 0.62, + "learning_rate": 6.725480130409347e-06, + "loss": 0.8135, + "step": 4179 + }, + { + "epoch": 0.62, + "learning_rate": 6.720954831855771e-06, + "loss": 0.3186, + "step": 4180 + }, + { + "epoch": 0.62, + "learning_rate": 6.71643028556896e-06, + "loss": 0.8052, + "step": 4181 + }, + { + "epoch": 0.62, + "learning_rate": 6.7119064925869145e-06, + "loss": 0.772, + "step": 4182 + }, + { + "epoch": 0.62, + "learning_rate": 6.707383453947471e-06, + "loss": 0.7627, + "step": 4183 + }, + { + "epoch": 0.62, + "learning_rate": 6.702861170688288e-06, + "loss": 0.8105, + "step": 4184 + }, + { + "epoch": 0.62, + "learning_rate": 6.698339643846854e-06, + "loss": 0.8389, + "step": 4185 + }, + { + "epoch": 0.62, + "learning_rate": 6.693818874460475e-06, + "loss": 0.7583, + "step": 4186 + }, + { + "epoch": 0.62, + "learning_rate": 6.6892988635662935e-06, + "loss": 0.792, + "step": 4187 + }, + { + "epoch": 0.62, + "learning_rate": 6.6847796122012754e-06, + "loss": 0.835, + "step": 4188 + }, + { + "epoch": 0.62, + "learning_rate": 6.680261121402207e-06, + "loss": 0.7212, + "step": 4189 + }, + { + "epoch": 0.62, + "learning_rate": 6.6757433922057114e-06, + "loss": 0.8994, + "step": 4190 + }, + { + "epoch": 0.62, + "learning_rate": 6.671226425648223e-06, + "loss": 0.8218, + "step": 4191 + }, + { + "epoch": 0.62, + "learning_rate": 6.666710222766009e-06, + "loss": 0.8965, + "step": 4192 + }, + { + "epoch": 0.62, + "learning_rate": 6.662194784595164e-06, + "loss": 0.7524, + "step": 4193 + }, + { + "epoch": 0.62, + "learning_rate": 6.657680112171603e-06, + "loss": 0.7729, + "step": 4194 + }, + { + "epoch": 0.62, + "learning_rate": 6.65316620653106e-06, + "loss": 0.8315, + "step": 4195 + }, + { + "epoch": 0.62, + "learning_rate": 6.648653068709105e-06, + "loss": 0.8706, + "step": 4196 + }, + { + "epoch": 0.62, + "learning_rate": 6.644140699741124e-06, + "loss": 0.8413, + "step": 4197 + }, + { + "epoch": 0.62, + "learning_rate": 6.639629100662332e-06, + "loss": 0.7622, + "step": 4198 + }, + { + "epoch": 0.62, + "learning_rate": 6.635118272507756e-06, + "loss": 0.7925, + "step": 4199 + }, + { + "epoch": 0.62, + "learning_rate": 6.63060821631226e-06, + "loss": 0.8281, + "step": 4200 + }, + { + "epoch": 0.62, + "learning_rate": 6.626098933110523e-06, + "loss": 0.8306, + "step": 4201 + }, + { + "epoch": 0.62, + "learning_rate": 6.62159042393705e-06, + "loss": 0.8809, + "step": 4202 + }, + { + "epoch": 0.62, + "learning_rate": 6.617082689826169e-06, + "loss": 0.7139, + "step": 4203 + }, + { + "epoch": 0.62, + "learning_rate": 6.6125757318120185e-06, + "loss": 0.8232, + "step": 4204 + }, + { + "epoch": 0.62, + "learning_rate": 6.6080695509285795e-06, + "loss": 0.8037, + "step": 4205 + }, + { + "epoch": 0.62, + "learning_rate": 6.603564148209638e-06, + "loss": 0.8105, + "step": 4206 + }, + { + "epoch": 0.62, + "learning_rate": 6.599059524688813e-06, + "loss": 0.8105, + "step": 4207 + }, + { + "epoch": 0.62, + "learning_rate": 6.594555681399532e-06, + "loss": 0.7778, + "step": 4208 + }, + { + "epoch": 0.62, + "learning_rate": 6.59005261937505e-06, + "loss": 0.7954, + "step": 4209 + }, + { + "epoch": 0.62, + "learning_rate": 6.58555033964845e-06, + "loss": 0.8032, + "step": 4210 + }, + { + "epoch": 0.62, + "learning_rate": 6.581048843252629e-06, + "loss": 0.79, + "step": 4211 + }, + { + "epoch": 0.62, + "learning_rate": 6.576548131220297e-06, + "loss": 0.8057, + "step": 4212 + }, + { + "epoch": 0.62, + "learning_rate": 6.572048204583993e-06, + "loss": 0.833, + "step": 4213 + }, + { + "epoch": 0.62, + "learning_rate": 6.567549064376078e-06, + "loss": 0.8252, + "step": 4214 + }, + { + "epoch": 0.62, + "learning_rate": 6.56305071162873e-06, + "loss": 0.855, + "step": 4215 + }, + { + "epoch": 0.62, + "learning_rate": 6.5585531473739385e-06, + "loss": 0.7939, + "step": 4216 + }, + { + "epoch": 0.62, + "learning_rate": 6.55405637264352e-06, + "loss": 0.7876, + "step": 4217 + }, + { + "epoch": 0.62, + "learning_rate": 6.5495603884691095e-06, + "loss": 0.79, + "step": 4218 + }, + { + "epoch": 0.62, + "learning_rate": 6.5450651958821605e-06, + "loss": 0.8276, + "step": 4219 + }, + { + "epoch": 0.62, + "learning_rate": 6.54057079591395e-06, + "loss": 0.7939, + "step": 4220 + }, + { + "epoch": 0.62, + "learning_rate": 6.536077189595554e-06, + "loss": 0.8384, + "step": 4221 + }, + { + "epoch": 0.62, + "learning_rate": 6.5315843779578865e-06, + "loss": 0.7334, + "step": 4222 + }, + { + "epoch": 0.62, + "learning_rate": 6.52709236203167e-06, + "loss": 0.7449, + "step": 4223 + }, + { + "epoch": 0.62, + "learning_rate": 6.522601142847456e-06, + "loss": 0.7656, + "step": 4224 + }, + { + "epoch": 0.62, + "learning_rate": 6.51811072143559e-06, + "loss": 0.77, + "step": 4225 + }, + { + "epoch": 0.62, + "learning_rate": 6.513621098826255e-06, + "loss": 0.7861, + "step": 4226 + }, + { + "epoch": 0.63, + "learning_rate": 6.509132276049441e-06, + "loss": 0.7083, + "step": 4227 + }, + { + "epoch": 0.63, + "learning_rate": 6.504644254134969e-06, + "loss": 0.8198, + "step": 4228 + }, + { + "epoch": 0.63, + "learning_rate": 6.50015703411245e-06, + "loss": 0.8325, + "step": 4229 + }, + { + "epoch": 0.63, + "learning_rate": 6.495670617011332e-06, + "loss": 0.8428, + "step": 4230 + }, + { + "epoch": 0.63, + "learning_rate": 6.491185003860874e-06, + "loss": 0.8911, + "step": 4231 + }, + { + "epoch": 0.63, + "learning_rate": 6.48670019569015e-06, + "loss": 0.8301, + "step": 4232 + }, + { + "epoch": 0.63, + "learning_rate": 6.482216193528044e-06, + "loss": 0.811, + "step": 4233 + }, + { + "epoch": 0.63, + "learning_rate": 6.477732998403261e-06, + "loss": 0.8149, + "step": 4234 + }, + { + "epoch": 0.63, + "learning_rate": 6.4732506113443215e-06, + "loss": 0.8481, + "step": 4235 + }, + { + "epoch": 0.63, + "learning_rate": 6.468769033379559e-06, + "loss": 0.7993, + "step": 4236 + }, + { + "epoch": 0.63, + "learning_rate": 6.4642882655371216e-06, + "loss": 0.7666, + "step": 4237 + }, + { + "epoch": 0.63, + "learning_rate": 6.459808308844967e-06, + "loss": 0.7832, + "step": 4238 + }, + { + "epoch": 0.63, + "learning_rate": 6.455329164330872e-06, + "loss": 0.9014, + "step": 4239 + }, + { + "epoch": 0.63, + "learning_rate": 6.450850833022429e-06, + "loss": 0.855, + "step": 4240 + }, + { + "epoch": 0.63, + "learning_rate": 6.44637331594704e-06, + "loss": 0.3093, + "step": 4241 + }, + { + "epoch": 0.63, + "learning_rate": 6.441896614131918e-06, + "loss": 0.8115, + "step": 4242 + }, + { + "epoch": 0.63, + "learning_rate": 6.437420728604091e-06, + "loss": 0.8066, + "step": 4243 + }, + { + "epoch": 0.63, + "learning_rate": 6.432945660390404e-06, + "loss": 0.3104, + "step": 4244 + }, + { + "epoch": 0.63, + "learning_rate": 6.428471410517513e-06, + "loss": 0.7754, + "step": 4245 + }, + { + "epoch": 0.63, + "learning_rate": 6.423997980011878e-06, + "loss": 0.7437, + "step": 4246 + }, + { + "epoch": 0.63, + "learning_rate": 6.419525369899782e-06, + "loss": 0.7563, + "step": 4247 + }, + { + "epoch": 0.63, + "learning_rate": 6.415053581207314e-06, + "loss": 0.8521, + "step": 4248 + }, + { + "epoch": 0.63, + "learning_rate": 6.410582614960375e-06, + "loss": 0.8315, + "step": 4249 + }, + { + "epoch": 0.63, + "learning_rate": 6.406112472184678e-06, + "loss": 0.792, + "step": 4250 + }, + { + "epoch": 0.63, + "learning_rate": 6.4016431539057476e-06, + "loss": 0.709, + "step": 4251 + }, + { + "epoch": 0.63, + "learning_rate": 6.397174661148919e-06, + "loss": 0.7949, + "step": 4252 + }, + { + "epoch": 0.63, + "learning_rate": 6.392706994939338e-06, + "loss": 0.811, + "step": 4253 + }, + { + "epoch": 0.63, + "learning_rate": 6.388240156301961e-06, + "loss": 0.7993, + "step": 4254 + }, + { + "epoch": 0.63, + "learning_rate": 6.383774146261552e-06, + "loss": 0.8101, + "step": 4255 + }, + { + "epoch": 0.63, + "learning_rate": 6.379308965842689e-06, + "loss": 0.7847, + "step": 4256 + }, + { + "epoch": 0.63, + "learning_rate": 6.374844616069755e-06, + "loss": 0.7983, + "step": 4257 + }, + { + "epoch": 0.63, + "learning_rate": 6.370381097966949e-06, + "loss": 0.8198, + "step": 4258 + }, + { + "epoch": 0.63, + "learning_rate": 6.3659184125582716e-06, + "loss": 0.769, + "step": 4259 + }, + { + "epoch": 0.63, + "learning_rate": 6.361456560867538e-06, + "loss": 0.7871, + "step": 4260 + }, + { + "epoch": 0.63, + "learning_rate": 6.356995543918369e-06, + "loss": 0.7505, + "step": 4261 + }, + { + "epoch": 0.63, + "learning_rate": 6.352535362734199e-06, + "loss": 0.8574, + "step": 4262 + }, + { + "epoch": 0.63, + "learning_rate": 6.34807601833826e-06, + "loss": 0.7773, + "step": 4263 + }, + { + "epoch": 0.63, + "learning_rate": 6.343617511753604e-06, + "loss": 0.7759, + "step": 4264 + }, + { + "epoch": 0.63, + "learning_rate": 6.339159844003085e-06, + "loss": 0.8237, + "step": 4265 + }, + { + "epoch": 0.63, + "learning_rate": 6.33470301610936e-06, + "loss": 0.8433, + "step": 4266 + }, + { + "epoch": 0.63, + "learning_rate": 6.330247029094908e-06, + "loss": 0.7681, + "step": 4267 + }, + { + "epoch": 0.63, + "learning_rate": 6.325791883981997e-06, + "loss": 0.7979, + "step": 4268 + }, + { + "epoch": 0.63, + "learning_rate": 6.3213375817927125e-06, + "loss": 0.7734, + "step": 4269 + }, + { + "epoch": 0.63, + "learning_rate": 6.316884123548947e-06, + "loss": 0.8125, + "step": 4270 + }, + { + "epoch": 0.63, + "learning_rate": 6.3124315102723965e-06, + "loss": 0.7661, + "step": 4271 + }, + { + "epoch": 0.63, + "learning_rate": 6.30797974298456e-06, + "loss": 0.7891, + "step": 4272 + }, + { + "epoch": 0.63, + "learning_rate": 6.30352882270675e-06, + "loss": 0.7646, + "step": 4273 + }, + { + "epoch": 0.63, + "learning_rate": 6.299078750460077e-06, + "loss": 0.769, + "step": 4274 + }, + { + "epoch": 0.63, + "learning_rate": 6.294629527265468e-06, + "loss": 0.7935, + "step": 4275 + }, + { + "epoch": 0.63, + "learning_rate": 6.290181154143635e-06, + "loss": 0.8311, + "step": 4276 + }, + { + "epoch": 0.63, + "learning_rate": 6.285733632115118e-06, + "loss": 0.8618, + "step": 4277 + }, + { + "epoch": 0.63, + "learning_rate": 6.281286962200251e-06, + "loss": 0.8188, + "step": 4278 + }, + { + "epoch": 0.63, + "learning_rate": 6.2768411454191715e-06, + "loss": 0.7715, + "step": 4279 + }, + { + "epoch": 0.63, + "learning_rate": 6.272396182791817e-06, + "loss": 0.8076, + "step": 4280 + }, + { + "epoch": 0.63, + "learning_rate": 6.267952075337942e-06, + "loss": 0.8301, + "step": 4281 + }, + { + "epoch": 0.63, + "learning_rate": 6.263508824077096e-06, + "loss": 0.8076, + "step": 4282 + }, + { + "epoch": 0.63, + "learning_rate": 6.259066430028632e-06, + "loss": 0.8584, + "step": 4283 + }, + { + "epoch": 0.63, + "learning_rate": 6.2546248942117134e-06, + "loss": 0.7891, + "step": 4284 + }, + { + "epoch": 0.63, + "learning_rate": 6.250184217645293e-06, + "loss": 0.8193, + "step": 4285 + }, + { + "epoch": 0.63, + "learning_rate": 6.2457444013481405e-06, + "loss": 0.8525, + "step": 4286 + }, + { + "epoch": 0.63, + "learning_rate": 6.241305446338821e-06, + "loss": 0.8081, + "step": 4287 + }, + { + "epoch": 0.63, + "learning_rate": 6.236867353635709e-06, + "loss": 0.7852, + "step": 4288 + }, + { + "epoch": 0.63, + "learning_rate": 6.232430124256966e-06, + "loss": 0.7754, + "step": 4289 + }, + { + "epoch": 0.63, + "learning_rate": 6.227993759220568e-06, + "loss": 0.8267, + "step": 4290 + }, + { + "epoch": 0.63, + "learning_rate": 6.2235582595442935e-06, + "loss": 0.8394, + "step": 4291 + }, + { + "epoch": 0.63, + "learning_rate": 6.219123626245722e-06, + "loss": 0.7588, + "step": 4292 + }, + { + "epoch": 0.63, + "learning_rate": 6.2146898603422225e-06, + "loss": 0.812, + "step": 4293 + }, + { + "epoch": 0.64, + "learning_rate": 6.210256962850976e-06, + "loss": 0.7983, + "step": 4294 + }, + { + "epoch": 0.64, + "learning_rate": 6.2058249347889646e-06, + "loss": 0.7668, + "step": 4295 + }, + { + "epoch": 0.64, + "learning_rate": 6.201393777172971e-06, + "loss": 0.7979, + "step": 4296 + }, + { + "epoch": 0.64, + "learning_rate": 6.196963491019569e-06, + "loss": 0.8223, + "step": 4297 + }, + { + "epoch": 0.64, + "learning_rate": 6.19253407734514e-06, + "loss": 0.8115, + "step": 4298 + }, + { + "epoch": 0.64, + "learning_rate": 6.188105537165865e-06, + "loss": 0.3074, + "step": 4299 + }, + { + "epoch": 0.64, + "learning_rate": 6.183677871497726e-06, + "loss": 0.8599, + "step": 4300 + }, + { + "epoch": 0.64, + "learning_rate": 6.179251081356505e-06, + "loss": 0.8076, + "step": 4301 + }, + { + "epoch": 0.64, + "learning_rate": 6.174825167757772e-06, + "loss": 0.8232, + "step": 4302 + }, + { + "epoch": 0.64, + "learning_rate": 6.1704001317169075e-06, + "loss": 0.8232, + "step": 4303 + }, + { + "epoch": 0.64, + "learning_rate": 6.165975974249086e-06, + "loss": 0.7817, + "step": 4304 + }, + { + "epoch": 0.64, + "learning_rate": 6.161552696369291e-06, + "loss": 0.8394, + "step": 4305 + }, + { + "epoch": 0.64, + "learning_rate": 6.157130299092282e-06, + "loss": 0.7329, + "step": 4306 + }, + { + "epoch": 0.64, + "learning_rate": 6.152708783432637e-06, + "loss": 0.79, + "step": 4307 + }, + { + "epoch": 0.64, + "learning_rate": 6.148288150404722e-06, + "loss": 0.302, + "step": 4308 + }, + { + "epoch": 0.64, + "learning_rate": 6.143868401022705e-06, + "loss": 0.8057, + "step": 4309 + }, + { + "epoch": 0.64, + "learning_rate": 6.139449536300548e-06, + "loss": 0.7734, + "step": 4310 + }, + { + "epoch": 0.64, + "learning_rate": 6.135031557252008e-06, + "loss": 0.7983, + "step": 4311 + }, + { + "epoch": 0.64, + "learning_rate": 6.130614464890645e-06, + "loss": 0.7896, + "step": 4312 + }, + { + "epoch": 0.64, + "learning_rate": 6.12619826022981e-06, + "loss": 0.7832, + "step": 4313 + }, + { + "epoch": 0.64, + "learning_rate": 6.1217829442826595e-06, + "loss": 0.8101, + "step": 4314 + }, + { + "epoch": 0.64, + "learning_rate": 6.117368518062131e-06, + "loss": 0.8638, + "step": 4315 + }, + { + "epoch": 0.64, + "learning_rate": 6.11295498258097e-06, + "loss": 0.8516, + "step": 4316 + }, + { + "epoch": 0.64, + "learning_rate": 6.1085423388517154e-06, + "loss": 0.7861, + "step": 4317 + }, + { + "epoch": 0.64, + "learning_rate": 6.1041305878866985e-06, + "loss": 0.8257, + "step": 4318 + }, + { + "epoch": 0.64, + "learning_rate": 6.099719730698046e-06, + "loss": 0.7495, + "step": 4319 + }, + { + "epoch": 0.64, + "learning_rate": 6.095309768297681e-06, + "loss": 0.3164, + "step": 4320 + }, + { + "epoch": 0.64, + "learning_rate": 6.090900701697324e-06, + "loss": 0.7549, + "step": 4321 + }, + { + "epoch": 0.64, + "learning_rate": 6.086492531908488e-06, + "loss": 0.8799, + "step": 4322 + }, + { + "epoch": 0.64, + "learning_rate": 6.082085259942474e-06, + "loss": 0.8188, + "step": 4323 + }, + { + "epoch": 0.64, + "learning_rate": 6.077678886810386e-06, + "loss": 0.7612, + "step": 4324 + }, + { + "epoch": 0.64, + "learning_rate": 6.073273413523119e-06, + "loss": 0.8027, + "step": 4325 + }, + { + "epoch": 0.64, + "learning_rate": 6.068868841091361e-06, + "loss": 0.811, + "step": 4326 + }, + { + "epoch": 0.64, + "learning_rate": 6.0644651705255905e-06, + "loss": 0.8218, + "step": 4327 + }, + { + "epoch": 0.64, + "learning_rate": 6.060062402836082e-06, + "loss": 0.811, + "step": 4328 + }, + { + "epoch": 0.64, + "learning_rate": 6.055660539032906e-06, + "loss": 0.793, + "step": 4329 + }, + { + "epoch": 0.64, + "learning_rate": 6.0512595801259185e-06, + "loss": 0.7778, + "step": 4330 + }, + { + "epoch": 0.64, + "learning_rate": 6.0468595271247755e-06, + "loss": 0.8613, + "step": 4331 + }, + { + "epoch": 0.64, + "learning_rate": 6.042460381038918e-06, + "loss": 0.7661, + "step": 4332 + }, + { + "epoch": 0.64, + "learning_rate": 6.038062142877583e-06, + "loss": 0.8462, + "step": 4333 + }, + { + "epoch": 0.64, + "learning_rate": 6.0336648136498e-06, + "loss": 0.8057, + "step": 4334 + }, + { + "epoch": 0.64, + "learning_rate": 6.029268394364389e-06, + "loss": 0.7456, + "step": 4335 + }, + { + "epoch": 0.64, + "learning_rate": 6.024872886029958e-06, + "loss": 0.7954, + "step": 4336 + }, + { + "epoch": 0.64, + "learning_rate": 6.020478289654909e-06, + "loss": 0.7705, + "step": 4337 + }, + { + "epoch": 0.64, + "learning_rate": 6.016084606247435e-06, + "loss": 0.7334, + "step": 4338 + }, + { + "epoch": 0.64, + "learning_rate": 6.011691836815523e-06, + "loss": 0.8066, + "step": 4339 + }, + { + "epoch": 0.64, + "learning_rate": 6.00729998236694e-06, + "loss": 0.8638, + "step": 4340 + }, + { + "epoch": 0.64, + "learning_rate": 6.002909043909253e-06, + "loss": 0.772, + "step": 4341 + }, + { + "epoch": 0.64, + "learning_rate": 5.9985190224498135e-06, + "loss": 0.7856, + "step": 4342 + }, + { + "epoch": 0.64, + "learning_rate": 5.994129918995769e-06, + "loss": 0.8184, + "step": 4343 + }, + { + "epoch": 0.64, + "learning_rate": 5.989741734554046e-06, + "loss": 0.7744, + "step": 4344 + }, + { + "epoch": 0.64, + "learning_rate": 5.9853544701313694e-06, + "loss": 0.8022, + "step": 4345 + }, + { + "epoch": 0.64, + "learning_rate": 5.980968126734248e-06, + "loss": 0.7837, + "step": 4346 + }, + { + "epoch": 0.64, + "learning_rate": 5.976582705368982e-06, + "loss": 0.8511, + "step": 4347 + }, + { + "epoch": 0.64, + "learning_rate": 5.972198207041661e-06, + "loss": 0.7004, + "step": 4348 + }, + { + "epoch": 0.64, + "learning_rate": 5.967814632758155e-06, + "loss": 0.8286, + "step": 4349 + }, + { + "epoch": 0.64, + "learning_rate": 5.963431983524134e-06, + "loss": 0.8325, + "step": 4350 + }, + { + "epoch": 0.64, + "learning_rate": 5.9590502603450455e-06, + "loss": 0.7681, + "step": 4351 + }, + { + "epoch": 0.64, + "learning_rate": 5.9546694642261325e-06, + "loss": 0.7361, + "step": 4352 + }, + { + "epoch": 0.64, + "learning_rate": 5.950289596172418e-06, + "loss": 0.8735, + "step": 4353 + }, + { + "epoch": 0.64, + "learning_rate": 5.945910657188717e-06, + "loss": 0.8223, + "step": 4354 + }, + { + "epoch": 0.64, + "learning_rate": 5.941532648279629e-06, + "loss": 0.8418, + "step": 4355 + }, + { + "epoch": 0.64, + "learning_rate": 5.937155570449547e-06, + "loss": 0.8047, + "step": 4356 + }, + { + "epoch": 0.64, + "learning_rate": 5.9327794247026325e-06, + "loss": 0.769, + "step": 4357 + }, + { + "epoch": 0.64, + "learning_rate": 5.928404212042855e-06, + "loss": 0.7778, + "step": 4358 + }, + { + "epoch": 0.64, + "learning_rate": 5.924029933473956e-06, + "loss": 0.8218, + "step": 4359 + }, + { + "epoch": 0.64, + "learning_rate": 5.9196565899994695e-06, + "loss": 0.7939, + "step": 4360 + }, + { + "epoch": 0.64, + "learning_rate": 5.9152841826227136e-06, + "loss": 0.7485, + "step": 4361 + }, + { + "epoch": 0.65, + "learning_rate": 5.910912712346781e-06, + "loss": 0.8364, + "step": 4362 + }, + { + "epoch": 0.65, + "learning_rate": 5.906542180174568e-06, + "loss": 0.8711, + "step": 4363 + }, + { + "epoch": 0.65, + "learning_rate": 5.902172587108742e-06, + "loss": 0.7637, + "step": 4364 + }, + { + "epoch": 0.65, + "learning_rate": 5.897803934151767e-06, + "loss": 0.8057, + "step": 4365 + }, + { + "epoch": 0.65, + "learning_rate": 5.893436222305869e-06, + "loss": 0.7239, + "step": 4366 + }, + { + "epoch": 0.65, + "learning_rate": 5.889069452573085e-06, + "loss": 0.8623, + "step": 4367 + }, + { + "epoch": 0.65, + "learning_rate": 5.884703625955219e-06, + "loss": 0.8442, + "step": 4368 + }, + { + "epoch": 0.65, + "learning_rate": 5.880338743453868e-06, + "loss": 0.7256, + "step": 4369 + }, + { + "epoch": 0.65, + "learning_rate": 5.875974806070402e-06, + "loss": 0.873, + "step": 4370 + }, + { + "epoch": 0.65, + "learning_rate": 5.871611814805978e-06, + "loss": 0.813, + "step": 4371 + }, + { + "epoch": 0.65, + "learning_rate": 5.867249770661543e-06, + "loss": 0.8477, + "step": 4372 + }, + { + "epoch": 0.65, + "learning_rate": 5.862888674637823e-06, + "loss": 0.7964, + "step": 4373 + }, + { + "epoch": 0.65, + "learning_rate": 5.85852852773532e-06, + "loss": 0.792, + "step": 4374 + }, + { + "epoch": 0.65, + "learning_rate": 5.854169330954324e-06, + "loss": 0.8511, + "step": 4375 + }, + { + "epoch": 0.65, + "learning_rate": 5.849811085294905e-06, + "loss": 0.3257, + "step": 4376 + }, + { + "epoch": 0.65, + "learning_rate": 5.845453791756921e-06, + "loss": 0.7974, + "step": 4377 + }, + { + "epoch": 0.65, + "learning_rate": 5.841097451340008e-06, + "loss": 0.7896, + "step": 4378 + }, + { + "epoch": 0.65, + "learning_rate": 5.836742065043575e-06, + "loss": 0.8252, + "step": 4379 + }, + { + "epoch": 0.65, + "learning_rate": 5.832387633866819e-06, + "loss": 0.814, + "step": 4380 + }, + { + "epoch": 0.65, + "learning_rate": 5.828034158808726e-06, + "loss": 0.8662, + "step": 4381 + }, + { + "epoch": 0.65, + "learning_rate": 5.823681640868049e-06, + "loss": 0.7603, + "step": 4382 + }, + { + "epoch": 0.65, + "learning_rate": 5.819330081043324e-06, + "loss": 0.7817, + "step": 4383 + }, + { + "epoch": 0.65, + "learning_rate": 5.814979480332878e-06, + "loss": 0.751, + "step": 4384 + }, + { + "epoch": 0.65, + "learning_rate": 5.810629839734803e-06, + "loss": 0.7993, + "step": 4385 + }, + { + "epoch": 0.65, + "learning_rate": 5.806281160246983e-06, + "loss": 0.731, + "step": 4386 + }, + { + "epoch": 0.65, + "learning_rate": 5.801933442867076e-06, + "loss": 0.8452, + "step": 4387 + }, + { + "epoch": 0.65, + "learning_rate": 5.797586688592513e-06, + "loss": 0.7319, + "step": 4388 + }, + { + "epoch": 0.65, + "learning_rate": 5.793240898420521e-06, + "loss": 0.8506, + "step": 4389 + }, + { + "epoch": 0.65, + "learning_rate": 5.78889607334809e-06, + "loss": 0.2996, + "step": 4390 + }, + { + "epoch": 0.65, + "learning_rate": 5.784552214371992e-06, + "loss": 0.8022, + "step": 4391 + }, + { + "epoch": 0.65, + "learning_rate": 5.7802093224887845e-06, + "loss": 0.8315, + "step": 4392 + }, + { + "epoch": 0.65, + "learning_rate": 5.775867398694792e-06, + "loss": 0.792, + "step": 4393 + }, + { + "epoch": 0.65, + "learning_rate": 5.771526443986132e-06, + "loss": 0.7612, + "step": 4394 + }, + { + "epoch": 0.65, + "learning_rate": 5.767186459358684e-06, + "loss": 0.8286, + "step": 4395 + }, + { + "epoch": 0.65, + "learning_rate": 5.762847445808111e-06, + "loss": 0.8589, + "step": 4396 + }, + { + "epoch": 0.65, + "learning_rate": 5.75850940432986e-06, + "loss": 0.7598, + "step": 4397 + }, + { + "epoch": 0.65, + "learning_rate": 5.754172335919142e-06, + "loss": 0.8511, + "step": 4398 + }, + { + "epoch": 0.65, + "learning_rate": 5.74983624157096e-06, + "loss": 0.2893, + "step": 4399 + }, + { + "epoch": 0.65, + "learning_rate": 5.745501122280075e-06, + "loss": 0.8135, + "step": 4400 + }, + { + "epoch": 0.65, + "learning_rate": 5.741166979041037e-06, + "loss": 0.79, + "step": 4401 + }, + { + "epoch": 0.65, + "learning_rate": 5.736833812848177e-06, + "loss": 0.8628, + "step": 4402 + }, + { + "epoch": 0.65, + "learning_rate": 5.73250162469559e-06, + "loss": 0.7896, + "step": 4403 + }, + { + "epoch": 0.65, + "learning_rate": 5.728170415577146e-06, + "loss": 0.8159, + "step": 4404 + }, + { + "epoch": 0.65, + "learning_rate": 5.723840186486504e-06, + "loss": 0.312, + "step": 4405 + }, + { + "epoch": 0.65, + "learning_rate": 5.719510938417081e-06, + "loss": 0.7119, + "step": 4406 + }, + { + "epoch": 0.65, + "learning_rate": 5.715182672362092e-06, + "loss": 0.7944, + "step": 4407 + }, + { + "epoch": 0.65, + "learning_rate": 5.710855389314491e-06, + "loss": 0.8359, + "step": 4408 + }, + { + "epoch": 0.65, + "learning_rate": 5.706529090267042e-06, + "loss": 0.791, + "step": 4409 + }, + { + "epoch": 0.65, + "learning_rate": 5.702203776212269e-06, + "loss": 0.7852, + "step": 4410 + }, + { + "epoch": 0.65, + "learning_rate": 5.6978794481424626e-06, + "loss": 0.7896, + "step": 4411 + }, + { + "epoch": 0.65, + "learning_rate": 5.693556107049709e-06, + "loss": 0.7842, + "step": 4412 + }, + { + "epoch": 0.65, + "learning_rate": 5.689233753925834e-06, + "loss": 0.7842, + "step": 4413 + }, + { + "epoch": 0.65, + "learning_rate": 5.684912389762467e-06, + "loss": 0.7988, + "step": 4414 + }, + { + "epoch": 0.65, + "learning_rate": 5.680592015551001e-06, + "loss": 0.7896, + "step": 4415 + }, + { + "epoch": 0.65, + "learning_rate": 5.6762726322826e-06, + "loss": 0.8726, + "step": 4416 + }, + { + "epoch": 0.65, + "learning_rate": 5.6719542409482e-06, + "loss": 0.8804, + "step": 4417 + }, + { + "epoch": 0.65, + "learning_rate": 5.6676368425385065e-06, + "loss": 0.8311, + "step": 4418 + }, + { + "epoch": 0.65, + "learning_rate": 5.663320438044005e-06, + "loss": 0.8105, + "step": 4419 + }, + { + "epoch": 0.65, + "learning_rate": 5.6590050284549604e-06, + "loss": 0.7754, + "step": 4420 + }, + { + "epoch": 0.65, + "learning_rate": 5.654690614761378e-06, + "loss": 0.8101, + "step": 4421 + }, + { + "epoch": 0.65, + "learning_rate": 5.650377197953072e-06, + "loss": 0.7886, + "step": 4422 + }, + { + "epoch": 0.65, + "learning_rate": 5.646064779019598e-06, + "loss": 0.7993, + "step": 4423 + }, + { + "epoch": 0.65, + "learning_rate": 5.6417533589503036e-06, + "loss": 0.7876, + "step": 4424 + }, + { + "epoch": 0.65, + "learning_rate": 5.637442938734306e-06, + "loss": 0.8271, + "step": 4425 + }, + { + "epoch": 0.65, + "learning_rate": 5.63313351936047e-06, + "loss": 0.8003, + "step": 4426 + }, + { + "epoch": 0.65, + "learning_rate": 5.6288251018174585e-06, + "loss": 0.7529, + "step": 4427 + }, + { + "epoch": 0.65, + "learning_rate": 5.6245176870936865e-06, + "loss": 0.8149, + "step": 4428 + }, + { + "epoch": 0.65, + "learning_rate": 5.620211276177353e-06, + "loss": 0.7578, + "step": 4429 + }, + { + "epoch": 0.66, + "learning_rate": 5.615905870056415e-06, + "loss": 0.793, + "step": 4430 + }, + { + "epoch": 0.66, + "learning_rate": 5.611601469718601e-06, + "loss": 0.8613, + "step": 4431 + }, + { + "epoch": 0.66, + "learning_rate": 5.607298076151416e-06, + "loss": 0.8027, + "step": 4432 + }, + { + "epoch": 0.66, + "learning_rate": 5.602995690342128e-06, + "loss": 0.856, + "step": 4433 + }, + { + "epoch": 0.66, + "learning_rate": 5.59869431327777e-06, + "loss": 0.8071, + "step": 4434 + }, + { + "epoch": 0.66, + "learning_rate": 5.5943939459451555e-06, + "loss": 0.7554, + "step": 4435 + }, + { + "epoch": 0.66, + "learning_rate": 5.5900945893308526e-06, + "loss": 0.7988, + "step": 4436 + }, + { + "epoch": 0.66, + "learning_rate": 5.585796244421211e-06, + "loss": 0.8169, + "step": 4437 + }, + { + "epoch": 0.66, + "learning_rate": 5.5814989122023385e-06, + "loss": 0.7891, + "step": 4438 + }, + { + "epoch": 0.66, + "learning_rate": 5.577202593660109e-06, + "loss": 0.7485, + "step": 4439 + }, + { + "epoch": 0.66, + "learning_rate": 5.5729072897801784e-06, + "loss": 0.7661, + "step": 4440 + }, + { + "epoch": 0.66, + "learning_rate": 5.5686130015479486e-06, + "loss": 0.7803, + "step": 4441 + }, + { + "epoch": 0.66, + "learning_rate": 5.564319729948611e-06, + "loss": 0.772, + "step": 4442 + }, + { + "epoch": 0.66, + "learning_rate": 5.560027475967107e-06, + "loss": 0.8438, + "step": 4443 + }, + { + "epoch": 0.66, + "learning_rate": 5.555736240588145e-06, + "loss": 0.7886, + "step": 4444 + }, + { + "epoch": 0.66, + "learning_rate": 5.551446024796214e-06, + "loss": 0.8047, + "step": 4445 + }, + { + "epoch": 0.66, + "learning_rate": 5.547156829575556e-06, + "loss": 0.8682, + "step": 4446 + }, + { + "epoch": 0.66, + "learning_rate": 5.542868655910179e-06, + "loss": 0.7959, + "step": 4447 + }, + { + "epoch": 0.66, + "learning_rate": 5.5385815047838685e-06, + "loss": 0.811, + "step": 4448 + }, + { + "epoch": 0.66, + "learning_rate": 5.534295377180158e-06, + "loss": 0.8008, + "step": 4449 + }, + { + "epoch": 0.66, + "learning_rate": 5.530010274082365e-06, + "loss": 0.7632, + "step": 4450 + }, + { + "epoch": 0.66, + "learning_rate": 5.525726196473557e-06, + "loss": 0.7944, + "step": 4451 + }, + { + "epoch": 0.66, + "learning_rate": 5.521443145336568e-06, + "loss": 0.7759, + "step": 4452 + }, + { + "epoch": 0.66, + "learning_rate": 5.51716112165401e-06, + "loss": 0.7681, + "step": 4453 + }, + { + "epoch": 0.66, + "learning_rate": 5.512880126408243e-06, + "loss": 0.8242, + "step": 4454 + }, + { + "epoch": 0.66, + "learning_rate": 5.508600160581396e-06, + "loss": 0.7798, + "step": 4455 + }, + { + "epoch": 0.66, + "learning_rate": 5.504321225155369e-06, + "loss": 0.8149, + "step": 4456 + }, + { + "epoch": 0.66, + "learning_rate": 5.500043321111814e-06, + "loss": 0.8203, + "step": 4457 + }, + { + "epoch": 0.66, + "learning_rate": 5.4957664494321585e-06, + "loss": 0.8496, + "step": 4458 + }, + { + "epoch": 0.66, + "learning_rate": 5.491490611097586e-06, + "loss": 0.7778, + "step": 4459 + }, + { + "epoch": 0.66, + "learning_rate": 5.487215807089035e-06, + "loss": 0.7573, + "step": 4460 + }, + { + "epoch": 0.66, + "learning_rate": 5.482942038387229e-06, + "loss": 0.7612, + "step": 4461 + }, + { + "epoch": 0.66, + "learning_rate": 5.478669305972628e-06, + "loss": 0.7832, + "step": 4462 + }, + { + "epoch": 0.66, + "learning_rate": 5.474397610825479e-06, + "loss": 0.7852, + "step": 4463 + }, + { + "epoch": 0.66, + "learning_rate": 5.470126953925774e-06, + "loss": 0.894, + "step": 4464 + }, + { + "epoch": 0.66, + "learning_rate": 5.465857336253266e-06, + "loss": 0.9023, + "step": 4465 + }, + { + "epoch": 0.66, + "learning_rate": 5.461588758787484e-06, + "loss": 0.3379, + "step": 4466 + }, + { + "epoch": 0.66, + "learning_rate": 5.457321222507705e-06, + "loss": 0.8003, + "step": 4467 + }, + { + "epoch": 0.66, + "learning_rate": 5.453054728392971e-06, + "loss": 0.7471, + "step": 4468 + }, + { + "epoch": 0.66, + "learning_rate": 5.448789277422091e-06, + "loss": 0.8496, + "step": 4469 + }, + { + "epoch": 0.66, + "learning_rate": 5.444524870573622e-06, + "loss": 0.7915, + "step": 4470 + }, + { + "epoch": 0.66, + "learning_rate": 5.440261508825897e-06, + "loss": 0.8545, + "step": 4471 + }, + { + "epoch": 0.66, + "learning_rate": 5.435999193156997e-06, + "loss": 0.8896, + "step": 4472 + }, + { + "epoch": 0.66, + "learning_rate": 5.431737924544763e-06, + "loss": 0.855, + "step": 4473 + }, + { + "epoch": 0.66, + "learning_rate": 5.427477703966808e-06, + "loss": 0.7554, + "step": 4474 + }, + { + "epoch": 0.66, + "learning_rate": 5.423218532400489e-06, + "loss": 0.7515, + "step": 4475 + }, + { + "epoch": 0.66, + "learning_rate": 5.418960410822938e-06, + "loss": 0.8037, + "step": 4476 + }, + { + "epoch": 0.66, + "learning_rate": 5.414703340211034e-06, + "loss": 0.7197, + "step": 4477 + }, + { + "epoch": 0.66, + "learning_rate": 5.410447321541412e-06, + "loss": 0.77, + "step": 4478 + }, + { + "epoch": 0.66, + "learning_rate": 5.406192355790485e-06, + "loss": 0.7769, + "step": 4479 + }, + { + "epoch": 0.66, + "learning_rate": 5.401938443934405e-06, + "loss": 0.7476, + "step": 4480 + }, + { + "epoch": 0.66, + "learning_rate": 5.397685586949086e-06, + "loss": 0.8149, + "step": 4481 + }, + { + "epoch": 0.66, + "learning_rate": 5.3934337858102115e-06, + "loss": 0.7969, + "step": 4482 + }, + { + "epoch": 0.66, + "learning_rate": 5.389183041493206e-06, + "loss": 0.7622, + "step": 4483 + }, + { + "epoch": 0.66, + "learning_rate": 5.384933354973272e-06, + "loss": 0.8003, + "step": 4484 + }, + { + "epoch": 0.66, + "learning_rate": 5.380684727225341e-06, + "loss": 0.8184, + "step": 4485 + }, + { + "epoch": 0.66, + "learning_rate": 5.376437159224126e-06, + "loss": 0.7822, + "step": 4486 + }, + { + "epoch": 0.66, + "learning_rate": 5.3721906519440945e-06, + "loss": 0.8188, + "step": 4487 + }, + { + "epoch": 0.66, + "learning_rate": 5.367945206359455e-06, + "loss": 0.8281, + "step": 4488 + }, + { + "epoch": 0.66, + "learning_rate": 5.363700823444195e-06, + "loss": 0.7598, + "step": 4489 + }, + { + "epoch": 0.66, + "learning_rate": 5.35945750417203e-06, + "loss": 0.8062, + "step": 4490 + }, + { + "epoch": 0.66, + "learning_rate": 5.3552152495164535e-06, + "loss": 0.8242, + "step": 4491 + }, + { + "epoch": 0.66, + "learning_rate": 5.3509740604507135e-06, + "loss": 0.7295, + "step": 4492 + }, + { + "epoch": 0.66, + "learning_rate": 5.346733937947804e-06, + "loss": 0.8237, + "step": 4493 + }, + { + "epoch": 0.66, + "learning_rate": 5.34249488298048e-06, + "loss": 0.7778, + "step": 4494 + }, + { + "epoch": 0.66, + "learning_rate": 5.338256896521246e-06, + "loss": 0.8301, + "step": 4495 + }, + { + "epoch": 0.66, + "learning_rate": 5.334019979542366e-06, + "loss": 0.7866, + "step": 4496 + }, + { + "epoch": 0.67, + "learning_rate": 5.329784133015872e-06, + "loss": 0.7852, + "step": 4497 + }, + { + "epoch": 0.67, + "learning_rate": 5.325549357913515e-06, + "loss": 0.8135, + "step": 4498 + }, + { + "epoch": 0.67, + "learning_rate": 5.3213156552068394e-06, + "loss": 0.8555, + "step": 4499 + }, + { + "epoch": 0.67, + "learning_rate": 5.317083025867114e-06, + "loss": 0.7744, + "step": 4500 + }, + { + "epoch": 0.67, + "learning_rate": 5.312851470865383e-06, + "loss": 0.77, + "step": 4501 + }, + { + "epoch": 0.67, + "learning_rate": 5.30862099117243e-06, + "loss": 0.8228, + "step": 4502 + }, + { + "epoch": 0.67, + "learning_rate": 5.304391587758793e-06, + "loss": 0.77, + "step": 4503 + }, + { + "epoch": 0.67, + "learning_rate": 5.3001632615947706e-06, + "loss": 0.752, + "step": 4504 + }, + { + "epoch": 0.67, + "learning_rate": 5.295936013650413e-06, + "loss": 0.7842, + "step": 4505 + }, + { + "epoch": 0.67, + "learning_rate": 5.291709844895518e-06, + "loss": 0.7427, + "step": 4506 + }, + { + "epoch": 0.67, + "learning_rate": 5.287484756299638e-06, + "loss": 0.7944, + "step": 4507 + }, + { + "epoch": 0.67, + "learning_rate": 5.283260748832072e-06, + "loss": 0.7832, + "step": 4508 + }, + { + "epoch": 0.67, + "learning_rate": 5.279037823461881e-06, + "loss": 0.3065, + "step": 4509 + }, + { + "epoch": 0.67, + "learning_rate": 5.274815981157883e-06, + "loss": 0.791, + "step": 4510 + }, + { + "epoch": 0.67, + "learning_rate": 5.2705952228886195e-06, + "loss": 0.8345, + "step": 4511 + }, + { + "epoch": 0.67, + "learning_rate": 5.266375549622415e-06, + "loss": 0.8398, + "step": 4512 + }, + { + "epoch": 0.67, + "learning_rate": 5.262156962327325e-06, + "loss": 0.7744, + "step": 4513 + }, + { + "epoch": 0.67, + "learning_rate": 5.257939461971169e-06, + "loss": 0.7014, + "step": 4514 + }, + { + "epoch": 0.67, + "learning_rate": 5.253723049521507e-06, + "loss": 0.835, + "step": 4515 + }, + { + "epoch": 0.67, + "learning_rate": 5.249507725945648e-06, + "loss": 0.7891, + "step": 4516 + }, + { + "epoch": 0.67, + "learning_rate": 5.245293492210668e-06, + "loss": 0.8164, + "step": 4517 + }, + { + "epoch": 0.67, + "learning_rate": 5.241080349283369e-06, + "loss": 0.7515, + "step": 4518 + }, + { + "epoch": 0.67, + "learning_rate": 5.236868298130327e-06, + "loss": 0.7651, + "step": 4519 + }, + { + "epoch": 0.67, + "learning_rate": 5.232657339717848e-06, + "loss": 0.8481, + "step": 4520 + }, + { + "epoch": 0.67, + "learning_rate": 5.228447475011995e-06, + "loss": 0.7803, + "step": 4521 + }, + { + "epoch": 0.67, + "learning_rate": 5.224238704978584e-06, + "loss": 0.7676, + "step": 4522 + }, + { + "epoch": 0.67, + "learning_rate": 5.220031030583175e-06, + "loss": 0.8228, + "step": 4523 + }, + { + "epoch": 0.67, + "learning_rate": 5.2158244527910715e-06, + "loss": 0.8188, + "step": 4524 + }, + { + "epoch": 0.67, + "learning_rate": 5.21161897256734e-06, + "loss": 0.7485, + "step": 4525 + }, + { + "epoch": 0.67, + "learning_rate": 5.20741459087678e-06, + "loss": 0.7119, + "step": 4526 + }, + { + "epoch": 0.67, + "learning_rate": 5.203211308683953e-06, + "loss": 0.291, + "step": 4527 + }, + { + "epoch": 0.67, + "learning_rate": 5.199009126953155e-06, + "loss": 0.8188, + "step": 4528 + }, + { + "epoch": 0.67, + "learning_rate": 5.194808046648434e-06, + "loss": 0.7515, + "step": 4529 + }, + { + "epoch": 0.67, + "learning_rate": 5.190608068733593e-06, + "loss": 0.7539, + "step": 4530 + }, + { + "epoch": 0.67, + "learning_rate": 5.186409194172173e-06, + "loss": 0.7588, + "step": 4531 + }, + { + "epoch": 0.67, + "learning_rate": 5.182211423927459e-06, + "loss": 0.8667, + "step": 4532 + }, + { + "epoch": 0.67, + "learning_rate": 5.178014758962499e-06, + "loss": 0.8218, + "step": 4533 + }, + { + "epoch": 0.67, + "learning_rate": 5.173819200240067e-06, + "loss": 0.7671, + "step": 4534 + }, + { + "epoch": 0.67, + "learning_rate": 5.169624748722701e-06, + "loss": 0.7593, + "step": 4535 + }, + { + "epoch": 0.67, + "learning_rate": 5.165431405372674e-06, + "loss": 0.7756, + "step": 4536 + }, + { + "epoch": 0.67, + "learning_rate": 5.161239171152004e-06, + "loss": 0.8091, + "step": 4537 + }, + { + "epoch": 0.67, + "learning_rate": 5.1570480470224636e-06, + "loss": 0.2808, + "step": 4538 + }, + { + "epoch": 0.67, + "learning_rate": 5.1528580339455615e-06, + "loss": 0.7705, + "step": 4539 + }, + { + "epoch": 0.67, + "learning_rate": 5.14866913288256e-06, + "loss": 0.8047, + "step": 4540 + }, + { + "epoch": 0.67, + "learning_rate": 5.144481344794459e-06, + "loss": 0.7661, + "step": 4541 + }, + { + "epoch": 0.67, + "learning_rate": 5.140294670642002e-06, + "loss": 0.8057, + "step": 4542 + }, + { + "epoch": 0.67, + "learning_rate": 5.1361091113856875e-06, + "loss": 0.7612, + "step": 4543 + }, + { + "epoch": 0.67, + "learning_rate": 5.1319246679857504e-06, + "loss": 0.8354, + "step": 4544 + }, + { + "epoch": 0.67, + "learning_rate": 5.127741341402164e-06, + "loss": 0.8052, + "step": 4545 + }, + { + "epoch": 0.67, + "learning_rate": 5.123559132594661e-06, + "loss": 0.8203, + "step": 4546 + }, + { + "epoch": 0.67, + "learning_rate": 5.1193780425227e-06, + "loss": 0.7764, + "step": 4547 + }, + { + "epoch": 0.67, + "learning_rate": 5.115198072145502e-06, + "loss": 0.7749, + "step": 4548 + }, + { + "epoch": 0.67, + "learning_rate": 5.111019222422013e-06, + "loss": 0.7773, + "step": 4549 + }, + { + "epoch": 0.67, + "learning_rate": 5.106841494310929e-06, + "loss": 0.3215, + "step": 4550 + }, + { + "epoch": 0.67, + "learning_rate": 5.102664888770695e-06, + "loss": 0.8423, + "step": 4551 + }, + { + "epoch": 0.67, + "learning_rate": 5.098489406759487e-06, + "loss": 0.7588, + "step": 4552 + }, + { + "epoch": 0.67, + "learning_rate": 5.094315049235236e-06, + "loss": 0.7905, + "step": 4553 + }, + { + "epoch": 0.67, + "learning_rate": 5.0901418171556035e-06, + "loss": 0.8472, + "step": 4554 + }, + { + "epoch": 0.67, + "learning_rate": 5.085969711477993e-06, + "loss": 0.8062, + "step": 4555 + }, + { + "epoch": 0.67, + "learning_rate": 5.081798733159566e-06, + "loss": 0.8301, + "step": 4556 + }, + { + "epoch": 0.67, + "learning_rate": 5.077628883157205e-06, + "loss": 0.2653, + "step": 4557 + }, + { + "epoch": 0.67, + "learning_rate": 5.073460162427539e-06, + "loss": 0.7915, + "step": 4558 + }, + { + "epoch": 0.67, + "learning_rate": 5.069292571926949e-06, + "loss": 0.7754, + "step": 4559 + }, + { + "epoch": 0.67, + "learning_rate": 5.065126112611542e-06, + "loss": 0.8135, + "step": 4560 + }, + { + "epoch": 0.67, + "learning_rate": 5.060960785437183e-06, + "loss": 0.7988, + "step": 4561 + }, + { + "epoch": 0.67, + "learning_rate": 5.056796591359451e-06, + "loss": 0.7925, + "step": 4562 + }, + { + "epoch": 0.67, + "learning_rate": 5.052633531333687e-06, + "loss": 0.8086, + "step": 4563 + }, + { + "epoch": 0.67, + "learning_rate": 5.048471606314971e-06, + "loss": 0.7568, + "step": 4564 + }, + { + "epoch": 0.68, + "learning_rate": 5.044310817258111e-06, + "loss": 0.3557, + "step": 4565 + }, + { + "epoch": 0.68, + "learning_rate": 5.0401511651176624e-06, + "loss": 0.8555, + "step": 4566 + }, + { + "epoch": 0.68, + "learning_rate": 5.035992650847913e-06, + "loss": 0.8052, + "step": 4567 + }, + { + "epoch": 0.68, + "learning_rate": 5.031835275402898e-06, + "loss": 0.8018, + "step": 4568 + }, + { + "epoch": 0.68, + "learning_rate": 5.027679039736391e-06, + "loss": 0.8101, + "step": 4569 + }, + { + "epoch": 0.68, + "learning_rate": 5.023523944801896e-06, + "loss": 0.7466, + "step": 4570 + }, + { + "epoch": 0.68, + "learning_rate": 5.019369991552658e-06, + "loss": 0.8184, + "step": 4571 + }, + { + "epoch": 0.68, + "learning_rate": 5.015217180941669e-06, + "loss": 0.79, + "step": 4572 + }, + { + "epoch": 0.68, + "learning_rate": 5.011065513921645e-06, + "loss": 0.8003, + "step": 4573 + }, + { + "epoch": 0.68, + "learning_rate": 5.006914991445056e-06, + "loss": 0.7988, + "step": 4574 + }, + { + "epoch": 0.68, + "learning_rate": 5.002765614464085e-06, + "loss": 0.7949, + "step": 4575 + }, + { + "epoch": 0.68, + "learning_rate": 4.9986173839306765e-06, + "loss": 0.3162, + "step": 4576 + }, + { + "epoch": 0.68, + "learning_rate": 4.994470300796505e-06, + "loss": 0.7808, + "step": 4577 + }, + { + "epoch": 0.68, + "learning_rate": 4.990324366012977e-06, + "loss": 0.8242, + "step": 4578 + }, + { + "epoch": 0.68, + "learning_rate": 4.986179580531238e-06, + "loss": 0.7563, + "step": 4579 + }, + { + "epoch": 0.68, + "learning_rate": 4.982035945302164e-06, + "loss": 0.8145, + "step": 4580 + }, + { + "epoch": 0.68, + "learning_rate": 4.977893461276378e-06, + "loss": 0.7539, + "step": 4581 + }, + { + "epoch": 0.68, + "learning_rate": 4.973752129404237e-06, + "loss": 0.8481, + "step": 4582 + }, + { + "epoch": 0.68, + "learning_rate": 4.969611950635827e-06, + "loss": 0.8057, + "step": 4583 + }, + { + "epoch": 0.68, + "learning_rate": 4.965472925920975e-06, + "loss": 0.7837, + "step": 4584 + }, + { + "epoch": 0.68, + "learning_rate": 4.961335056209234e-06, + "loss": 0.7988, + "step": 4585 + }, + { + "epoch": 0.68, + "learning_rate": 4.957198342449904e-06, + "loss": 0.2999, + "step": 4586 + }, + { + "epoch": 0.68, + "learning_rate": 4.9530627855920236e-06, + "loss": 0.8105, + "step": 4587 + }, + { + "epoch": 0.68, + "learning_rate": 4.948928386584342e-06, + "loss": 0.8218, + "step": 4588 + }, + { + "epoch": 0.68, + "learning_rate": 4.944795146375368e-06, + "loss": 0.8062, + "step": 4589 + }, + { + "epoch": 0.68, + "learning_rate": 4.94066306591333e-06, + "loss": 0.7842, + "step": 4590 + }, + { + "epoch": 0.68, + "learning_rate": 4.9365321461462e-06, + "loss": 0.8066, + "step": 4591 + }, + { + "epoch": 0.68, + "learning_rate": 4.932402388021677e-06, + "loss": 0.8203, + "step": 4592 + }, + { + "epoch": 0.68, + "learning_rate": 4.928273792487189e-06, + "loss": 0.8013, + "step": 4593 + }, + { + "epoch": 0.68, + "learning_rate": 4.924146360489914e-06, + "loss": 0.7959, + "step": 4594 + }, + { + "epoch": 0.68, + "learning_rate": 4.920020092976746e-06, + "loss": 0.7759, + "step": 4595 + }, + { + "epoch": 0.68, + "learning_rate": 4.915894990894317e-06, + "loss": 0.8223, + "step": 4596 + }, + { + "epoch": 0.68, + "learning_rate": 4.911771055189001e-06, + "loss": 0.7998, + "step": 4597 + }, + { + "epoch": 0.68, + "learning_rate": 4.9076482868068856e-06, + "loss": 0.749, + "step": 4598 + }, + { + "epoch": 0.68, + "learning_rate": 4.9035266866938125e-06, + "loss": 0.7891, + "step": 4599 + }, + { + "epoch": 0.68, + "learning_rate": 4.899406255795338e-06, + "loss": 0.7769, + "step": 4600 + }, + { + "epoch": 0.68, + "learning_rate": 4.895286995056756e-06, + "loss": 0.7822, + "step": 4601 + }, + { + "epoch": 0.68, + "learning_rate": 4.891168905423097e-06, + "loss": 0.7612, + "step": 4602 + }, + { + "epoch": 0.68, + "learning_rate": 4.887051987839112e-06, + "loss": 0.7607, + "step": 4603 + }, + { + "epoch": 0.68, + "learning_rate": 4.882936243249298e-06, + "loss": 0.7681, + "step": 4604 + }, + { + "epoch": 0.68, + "learning_rate": 4.878821672597868e-06, + "loss": 0.7998, + "step": 4605 + }, + { + "epoch": 0.68, + "learning_rate": 4.87470827682877e-06, + "loss": 0.8169, + "step": 4606 + }, + { + "epoch": 0.68, + "learning_rate": 4.870596056885693e-06, + "loss": 0.8066, + "step": 4607 + }, + { + "epoch": 0.68, + "learning_rate": 4.866485013712041e-06, + "loss": 0.8027, + "step": 4608 + }, + { + "epoch": 0.68, + "learning_rate": 4.862375148250954e-06, + "loss": 0.8516, + "step": 4609 + }, + { + "epoch": 0.68, + "learning_rate": 4.858266461445308e-06, + "loss": 0.7192, + "step": 4610 + }, + { + "epoch": 0.68, + "learning_rate": 4.854158954237697e-06, + "loss": 0.8213, + "step": 4611 + }, + { + "epoch": 0.68, + "learning_rate": 4.850052627570457e-06, + "loss": 0.7593, + "step": 4612 + }, + { + "epoch": 0.68, + "learning_rate": 4.8459474823856445e-06, + "loss": 0.8354, + "step": 4613 + }, + { + "epoch": 0.68, + "learning_rate": 4.841843519625042e-06, + "loss": 0.7156, + "step": 4614 + }, + { + "epoch": 0.68, + "learning_rate": 4.837740740230174e-06, + "loss": 0.7793, + "step": 4615 + }, + { + "epoch": 0.68, + "learning_rate": 4.833639145142277e-06, + "loss": 0.8115, + "step": 4616 + }, + { + "epoch": 0.68, + "learning_rate": 4.829538735302333e-06, + "loss": 0.811, + "step": 4617 + }, + { + "epoch": 0.68, + "learning_rate": 4.8254395116510374e-06, + "loss": 0.7622, + "step": 4618 + }, + { + "epoch": 0.68, + "learning_rate": 4.821341475128819e-06, + "loss": 0.77, + "step": 4619 + }, + { + "epoch": 0.68, + "learning_rate": 4.81724462667584e-06, + "loss": 0.8511, + "step": 4620 + }, + { + "epoch": 0.68, + "learning_rate": 4.813148967231981e-06, + "loss": 0.812, + "step": 4621 + }, + { + "epoch": 0.68, + "learning_rate": 4.809054497736849e-06, + "loss": 0.7402, + "step": 4622 + }, + { + "epoch": 0.68, + "learning_rate": 4.804961219129792e-06, + "loss": 0.7896, + "step": 4623 + }, + { + "epoch": 0.68, + "learning_rate": 4.8008691323498664e-06, + "loss": 0.7598, + "step": 4624 + }, + { + "epoch": 0.68, + "learning_rate": 4.796778238335872e-06, + "loss": 0.8154, + "step": 4625 + }, + { + "epoch": 0.68, + "learning_rate": 4.792688538026324e-06, + "loss": 0.7905, + "step": 4626 + }, + { + "epoch": 0.68, + "learning_rate": 4.788600032359461e-06, + "loss": 0.8101, + "step": 4627 + }, + { + "epoch": 0.68, + "learning_rate": 4.784512722273261e-06, + "loss": 0.79, + "step": 4628 + }, + { + "epoch": 0.68, + "learning_rate": 4.780426608705415e-06, + "loss": 0.7822, + "step": 4629 + }, + { + "epoch": 0.68, + "learning_rate": 4.7763416925933495e-06, + "loss": 0.8154, + "step": 4630 + }, + { + "epoch": 0.68, + "learning_rate": 4.7722579748742086e-06, + "loss": 0.7881, + "step": 4631 + }, + { + "epoch": 0.69, + "learning_rate": 4.7681754564848606e-06, + "loss": 0.8159, + "step": 4632 + }, + { + "epoch": 0.69, + "learning_rate": 4.764094138361909e-06, + "loss": 0.8105, + "step": 4633 + }, + { + "epoch": 0.69, + "learning_rate": 4.760014021441671e-06, + "loss": 0.8643, + "step": 4634 + }, + { + "epoch": 0.69, + "learning_rate": 4.75593510666019e-06, + "loss": 0.7803, + "step": 4635 + }, + { + "epoch": 0.69, + "learning_rate": 4.751857394953243e-06, + "loss": 0.8174, + "step": 4636 + }, + { + "epoch": 0.69, + "learning_rate": 4.747780887256316e-06, + "loss": 0.7349, + "step": 4637 + }, + { + "epoch": 0.69, + "learning_rate": 4.743705584504634e-06, + "loss": 0.7725, + "step": 4638 + }, + { + "epoch": 0.69, + "learning_rate": 4.739631487633136e-06, + "loss": 0.7856, + "step": 4639 + }, + { + "epoch": 0.69, + "learning_rate": 4.735558597576482e-06, + "loss": 0.8188, + "step": 4640 + }, + { + "epoch": 0.69, + "learning_rate": 4.731486915269066e-06, + "loss": 0.3047, + "step": 4641 + }, + { + "epoch": 0.69, + "learning_rate": 4.727416441644998e-06, + "loss": 0.8257, + "step": 4642 + }, + { + "epoch": 0.69, + "learning_rate": 4.723347177638106e-06, + "loss": 0.7739, + "step": 4643 + }, + { + "epoch": 0.69, + "learning_rate": 4.719279124181953e-06, + "loss": 0.8232, + "step": 4644 + }, + { + "epoch": 0.69, + "learning_rate": 4.715212282209809e-06, + "loss": 0.2848, + "step": 4645 + }, + { + "epoch": 0.69, + "learning_rate": 4.7111466526546835e-06, + "loss": 0.8232, + "step": 4646 + }, + { + "epoch": 0.69, + "learning_rate": 4.707082236449294e-06, + "loss": 0.7974, + "step": 4647 + }, + { + "epoch": 0.69, + "learning_rate": 4.703019034526082e-06, + "loss": 0.8447, + "step": 4648 + }, + { + "epoch": 0.69, + "learning_rate": 4.698957047817217e-06, + "loss": 0.7642, + "step": 4649 + }, + { + "epoch": 0.69, + "learning_rate": 4.694896277254581e-06, + "loss": 0.7554, + "step": 4650 + }, + { + "epoch": 0.69, + "learning_rate": 4.690836723769792e-06, + "loss": 0.7422, + "step": 4651 + }, + { + "epoch": 0.69, + "learning_rate": 4.686778388294163e-06, + "loss": 0.8589, + "step": 4652 + }, + { + "epoch": 0.69, + "learning_rate": 4.682721271758749e-06, + "loss": 0.8101, + "step": 4653 + }, + { + "epoch": 0.69, + "learning_rate": 4.678665375094325e-06, + "loss": 0.8076, + "step": 4654 + }, + { + "epoch": 0.69, + "learning_rate": 4.674610699231373e-06, + "loss": 0.7583, + "step": 4655 + }, + { + "epoch": 0.69, + "learning_rate": 4.670557245100105e-06, + "loss": 0.7979, + "step": 4656 + }, + { + "epoch": 0.69, + "learning_rate": 4.666505013630448e-06, + "loss": 0.2937, + "step": 4657 + }, + { + "epoch": 0.69, + "learning_rate": 4.66245400575205e-06, + "loss": 0.7632, + "step": 4658 + }, + { + "epoch": 0.69, + "learning_rate": 4.658404222394288e-06, + "loss": 0.8052, + "step": 4659 + }, + { + "epoch": 0.69, + "learning_rate": 4.654355664486233e-06, + "loss": 0.7646, + "step": 4660 + }, + { + "epoch": 0.69, + "learning_rate": 4.650308332956704e-06, + "loss": 0.7793, + "step": 4661 + }, + { + "epoch": 0.69, + "learning_rate": 4.6462622287342154e-06, + "loss": 0.8091, + "step": 4662 + }, + { + "epoch": 0.69, + "learning_rate": 4.642217352747015e-06, + "loss": 0.7852, + "step": 4663 + }, + { + "epoch": 0.69, + "learning_rate": 4.63817370592307e-06, + "loss": 0.8042, + "step": 4664 + }, + { + "epoch": 0.69, + "learning_rate": 4.634131289190046e-06, + "loss": 0.8081, + "step": 4665 + }, + { + "epoch": 0.69, + "learning_rate": 4.630090103475349e-06, + "loss": 0.8003, + "step": 4666 + }, + { + "epoch": 0.69, + "learning_rate": 4.6260501497060875e-06, + "loss": 0.7793, + "step": 4667 + }, + { + "epoch": 0.69, + "learning_rate": 4.6220114288091e-06, + "loss": 0.7734, + "step": 4668 + }, + { + "epoch": 0.69, + "learning_rate": 4.617973941710932e-06, + "loss": 0.7925, + "step": 4669 + }, + { + "epoch": 0.69, + "learning_rate": 4.613937689337844e-06, + "loss": 0.7524, + "step": 4670 + }, + { + "epoch": 0.69, + "learning_rate": 4.609902672615824e-06, + "loss": 0.8052, + "step": 4671 + }, + { + "epoch": 0.69, + "learning_rate": 4.605868892470579e-06, + "loss": 0.7905, + "step": 4672 + }, + { + "epoch": 0.69, + "learning_rate": 4.601836349827507e-06, + "loss": 0.769, + "step": 4673 + }, + { + "epoch": 0.69, + "learning_rate": 4.597805045611753e-06, + "loss": 0.7666, + "step": 4674 + }, + { + "epoch": 0.69, + "learning_rate": 4.593774980748155e-06, + "loss": 0.8315, + "step": 4675 + }, + { + "epoch": 0.69, + "learning_rate": 4.5897461561612814e-06, + "loss": 0.7778, + "step": 4676 + }, + { + "epoch": 0.69, + "learning_rate": 4.585718572775417e-06, + "loss": 0.2926, + "step": 4677 + }, + { + "epoch": 0.69, + "learning_rate": 4.58169223151454e-06, + "loss": 0.8003, + "step": 4678 + }, + { + "epoch": 0.69, + "learning_rate": 4.5776671333023725e-06, + "loss": 0.7993, + "step": 4679 + }, + { + "epoch": 0.69, + "learning_rate": 4.5736432790623295e-06, + "loss": 0.8237, + "step": 4680 + }, + { + "epoch": 0.69, + "learning_rate": 4.569620669717556e-06, + "loss": 0.7739, + "step": 4681 + }, + { + "epoch": 0.69, + "learning_rate": 4.565599306190902e-06, + "loss": 0.8066, + "step": 4682 + }, + { + "epoch": 0.69, + "learning_rate": 4.561579189404929e-06, + "loss": 0.8418, + "step": 4683 + }, + { + "epoch": 0.69, + "learning_rate": 4.557560320281927e-06, + "loss": 0.7847, + "step": 4684 + }, + { + "epoch": 0.69, + "learning_rate": 4.553542699743884e-06, + "loss": 0.8047, + "step": 4685 + }, + { + "epoch": 0.69, + "learning_rate": 4.549526328712509e-06, + "loss": 0.748, + "step": 4686 + }, + { + "epoch": 0.69, + "learning_rate": 4.5455112081092265e-06, + "loss": 0.7061, + "step": 4687 + }, + { + "epoch": 0.69, + "learning_rate": 4.541497338855166e-06, + "loss": 0.7583, + "step": 4688 + }, + { + "epoch": 0.69, + "learning_rate": 4.537484721871181e-06, + "loss": 0.7681, + "step": 4689 + }, + { + "epoch": 0.69, + "learning_rate": 4.5334733580778305e-06, + "loss": 0.7954, + "step": 4690 + }, + { + "epoch": 0.69, + "learning_rate": 4.529463248395379e-06, + "loss": 0.7944, + "step": 4691 + }, + { + "epoch": 0.69, + "learning_rate": 4.525454393743822e-06, + "loss": 0.7939, + "step": 4692 + }, + { + "epoch": 0.69, + "learning_rate": 4.52144679504285e-06, + "loss": 0.72, + "step": 4693 + }, + { + "epoch": 0.69, + "learning_rate": 4.517440453211876e-06, + "loss": 0.7778, + "step": 4694 + }, + { + "epoch": 0.69, + "learning_rate": 4.5134353691700196e-06, + "loss": 0.7856, + "step": 4695 + }, + { + "epoch": 0.69, + "learning_rate": 4.509431543836109e-06, + "loss": 0.8062, + "step": 4696 + }, + { + "epoch": 0.69, + "learning_rate": 4.505428978128693e-06, + "loss": 0.7349, + "step": 4697 + }, + { + "epoch": 0.69, + "learning_rate": 4.5014276729660235e-06, + "loss": 0.772, + "step": 4698 + }, + { + "epoch": 0.69, + "learning_rate": 4.497427629266061e-06, + "loss": 0.772, + "step": 4699 + }, + { + "epoch": 0.7, + "learning_rate": 4.493428847946489e-06, + "loss": 0.7251, + "step": 4700 + }, + { + "epoch": 0.7, + "learning_rate": 4.4894313299246874e-06, + "loss": 0.7207, + "step": 4701 + }, + { + "epoch": 0.7, + "learning_rate": 4.485435076117758e-06, + "loss": 0.7549, + "step": 4702 + }, + { + "epoch": 0.7, + "learning_rate": 4.481440087442505e-06, + "loss": 0.7944, + "step": 4703 + }, + { + "epoch": 0.7, + "learning_rate": 4.4774463648154375e-06, + "loss": 0.8008, + "step": 4704 + }, + { + "epoch": 0.7, + "learning_rate": 4.473453909152792e-06, + "loss": 0.7949, + "step": 4705 + }, + { + "epoch": 0.7, + "learning_rate": 4.469462721370498e-06, + "loss": 0.748, + "step": 4706 + }, + { + "epoch": 0.7, + "learning_rate": 4.465472802384196e-06, + "loss": 0.7578, + "step": 4707 + }, + { + "epoch": 0.7, + "learning_rate": 4.461484153109246e-06, + "loss": 0.7852, + "step": 4708 + }, + { + "epoch": 0.7, + "learning_rate": 4.4574967744607025e-06, + "loss": 0.7129, + "step": 4709 + }, + { + "epoch": 0.7, + "learning_rate": 4.453510667353342e-06, + "loss": 0.8174, + "step": 4710 + }, + { + "epoch": 0.7, + "learning_rate": 4.4495258327016415e-06, + "loss": 0.77, + "step": 4711 + }, + { + "epoch": 0.7, + "learning_rate": 4.445542271419781e-06, + "loss": 0.8232, + "step": 4712 + }, + { + "epoch": 0.7, + "learning_rate": 4.441559984421664e-06, + "loss": 0.8208, + "step": 4713 + }, + { + "epoch": 0.7, + "learning_rate": 4.437578972620884e-06, + "loss": 0.7217, + "step": 4714 + }, + { + "epoch": 0.7, + "learning_rate": 4.433599236930759e-06, + "loss": 0.8506, + "step": 4715 + }, + { + "epoch": 0.7, + "learning_rate": 4.429620778264302e-06, + "loss": 0.8389, + "step": 4716 + }, + { + "epoch": 0.7, + "learning_rate": 4.425643597534231e-06, + "loss": 0.8096, + "step": 4717 + }, + { + "epoch": 0.7, + "learning_rate": 4.421667695652987e-06, + "loss": 0.8413, + "step": 4718 + }, + { + "epoch": 0.7, + "learning_rate": 4.4176930735327005e-06, + "loss": 0.3108, + "step": 4719 + }, + { + "epoch": 0.7, + "learning_rate": 4.413719732085212e-06, + "loss": 0.835, + "step": 4720 + }, + { + "epoch": 0.7, + "learning_rate": 4.40974767222208e-06, + "loss": 0.7915, + "step": 4721 + }, + { + "epoch": 0.7, + "learning_rate": 4.405776894854552e-06, + "loss": 0.7983, + "step": 4722 + }, + { + "epoch": 0.7, + "learning_rate": 4.401807400893601e-06, + "loss": 0.7231, + "step": 4723 + }, + { + "epoch": 0.7, + "learning_rate": 4.397839191249879e-06, + "loss": 0.771, + "step": 4724 + }, + { + "epoch": 0.7, + "learning_rate": 4.393872266833764e-06, + "loss": 0.8501, + "step": 4725 + }, + { + "epoch": 0.7, + "learning_rate": 4.38990662855534e-06, + "loss": 0.8096, + "step": 4726 + }, + { + "epoch": 0.7, + "learning_rate": 4.385942277324381e-06, + "loss": 0.8389, + "step": 4727 + }, + { + "epoch": 0.7, + "learning_rate": 4.381979214050385e-06, + "loss": 0.7852, + "step": 4728 + }, + { + "epoch": 0.7, + "learning_rate": 4.378017439642529e-06, + "loss": 0.791, + "step": 4729 + }, + { + "epoch": 0.7, + "learning_rate": 4.374056955009716e-06, + "loss": 0.8027, + "step": 4730 + }, + { + "epoch": 0.7, + "learning_rate": 4.370097761060552e-06, + "loss": 0.8687, + "step": 4731 + }, + { + "epoch": 0.7, + "learning_rate": 4.3661398587033355e-06, + "loss": 0.7705, + "step": 4732 + }, + { + "epoch": 0.7, + "learning_rate": 4.362183248846074e-06, + "loss": 0.8271, + "step": 4733 + }, + { + "epoch": 0.7, + "learning_rate": 4.358227932396476e-06, + "loss": 0.7944, + "step": 4734 + }, + { + "epoch": 0.7, + "learning_rate": 4.354273910261959e-06, + "loss": 0.7783, + "step": 4735 + }, + { + "epoch": 0.7, + "learning_rate": 4.350321183349648e-06, + "loss": 0.7715, + "step": 4736 + }, + { + "epoch": 0.7, + "learning_rate": 4.34636975256635e-06, + "loss": 0.7373, + "step": 4737 + }, + { + "epoch": 0.7, + "learning_rate": 4.342419618818594e-06, + "loss": 0.8774, + "step": 4738 + }, + { + "epoch": 0.7, + "learning_rate": 4.338470783012609e-06, + "loss": 0.2917, + "step": 4739 + }, + { + "epoch": 0.7, + "learning_rate": 4.334523246054316e-06, + "loss": 0.3115, + "step": 4740 + }, + { + "epoch": 0.7, + "learning_rate": 4.330577008849356e-06, + "loss": 0.8574, + "step": 4741 + }, + { + "epoch": 0.7, + "learning_rate": 4.326632072303045e-06, + "loss": 0.8188, + "step": 4742 + }, + { + "epoch": 0.7, + "learning_rate": 4.3226884373204245e-06, + "loss": 0.8232, + "step": 4743 + }, + { + "epoch": 0.7, + "learning_rate": 4.318746104806233e-06, + "loss": 0.7969, + "step": 4744 + }, + { + "epoch": 0.7, + "learning_rate": 4.314805075664901e-06, + "loss": 0.8042, + "step": 4745 + }, + { + "epoch": 0.7, + "learning_rate": 4.310865350800566e-06, + "loss": 0.8174, + "step": 4746 + }, + { + "epoch": 0.7, + "learning_rate": 4.306926931117061e-06, + "loss": 0.7666, + "step": 4747 + }, + { + "epoch": 0.7, + "learning_rate": 4.302989817517931e-06, + "loss": 0.8457, + "step": 4748 + }, + { + "epoch": 0.7, + "learning_rate": 4.299054010906418e-06, + "loss": 0.7935, + "step": 4749 + }, + { + "epoch": 0.7, + "learning_rate": 4.295119512185448e-06, + "loss": 0.7993, + "step": 4750 + }, + { + "epoch": 0.7, + "learning_rate": 4.29118632225767e-06, + "loss": 0.7578, + "step": 4751 + }, + { + "epoch": 0.7, + "learning_rate": 4.2872544420254145e-06, + "loss": 0.8071, + "step": 4752 + }, + { + "epoch": 0.7, + "learning_rate": 4.283323872390728e-06, + "loss": 0.7607, + "step": 4753 + }, + { + "epoch": 0.7, + "learning_rate": 4.279394614255343e-06, + "loss": 0.7617, + "step": 4754 + }, + { + "epoch": 0.7, + "learning_rate": 4.275466668520692e-06, + "loss": 0.7163, + "step": 4755 + }, + { + "epoch": 0.7, + "learning_rate": 4.2715400360879174e-06, + "loss": 0.3066, + "step": 4756 + }, + { + "epoch": 0.7, + "learning_rate": 4.267614717857847e-06, + "loss": 0.8062, + "step": 4757 + }, + { + "epoch": 0.7, + "learning_rate": 4.26369071473102e-06, + "loss": 0.7871, + "step": 4758 + }, + { + "epoch": 0.7, + "learning_rate": 4.259768027607663e-06, + "loss": 0.7627, + "step": 4759 + }, + { + "epoch": 0.7, + "learning_rate": 4.255846657387701e-06, + "loss": 0.8174, + "step": 4760 + }, + { + "epoch": 0.7, + "learning_rate": 4.251926604970769e-06, + "loss": 0.8169, + "step": 4761 + }, + { + "epoch": 0.7, + "learning_rate": 4.248007871256185e-06, + "loss": 0.7529, + "step": 4762 + }, + { + "epoch": 0.7, + "learning_rate": 4.244090457142972e-06, + "loss": 0.8384, + "step": 4763 + }, + { + "epoch": 0.7, + "learning_rate": 4.240174363529852e-06, + "loss": 0.8145, + "step": 4764 + }, + { + "epoch": 0.7, + "learning_rate": 4.236259591315235e-06, + "loss": 0.8364, + "step": 4765 + }, + { + "epoch": 0.7, + "learning_rate": 4.232346141397242e-06, + "loss": 0.8457, + "step": 4766 + }, + { + "epoch": 0.7, + "learning_rate": 4.228434014673679e-06, + "loss": 0.3204, + "step": 4767 + }, + { + "epoch": 0.71, + "learning_rate": 4.2245232120420474e-06, + "loss": 0.7886, + "step": 4768 + }, + { + "epoch": 0.71, + "learning_rate": 4.220613734399558e-06, + "loss": 0.7998, + "step": 4769 + }, + { + "epoch": 0.71, + "learning_rate": 4.216705582643105e-06, + "loss": 0.3048, + "step": 4770 + }, + { + "epoch": 0.71, + "learning_rate": 4.212798757669278e-06, + "loss": 0.8506, + "step": 4771 + }, + { + "epoch": 0.71, + "learning_rate": 4.208893260374376e-06, + "loss": 0.2631, + "step": 4772 + }, + { + "epoch": 0.71, + "learning_rate": 4.204989091654374e-06, + "loss": 0.7832, + "step": 4773 + }, + { + "epoch": 0.71, + "learning_rate": 4.201086252404962e-06, + "loss": 0.7861, + "step": 4774 + }, + { + "epoch": 0.71, + "learning_rate": 4.197184743521512e-06, + "loss": 0.7881, + "step": 4775 + }, + { + "epoch": 0.71, + "learning_rate": 4.19328456589909e-06, + "loss": 0.792, + "step": 4776 + }, + { + "epoch": 0.71, + "learning_rate": 4.189385720432465e-06, + "loss": 0.7563, + "step": 4777 + }, + { + "epoch": 0.71, + "learning_rate": 4.185488208016093e-06, + "loss": 0.8213, + "step": 4778 + }, + { + "epoch": 0.71, + "learning_rate": 4.181592029544134e-06, + "loss": 0.7871, + "step": 4779 + }, + { + "epoch": 0.71, + "learning_rate": 4.177697185910428e-06, + "loss": 0.8057, + "step": 4780 + }, + { + "epoch": 0.71, + "learning_rate": 4.1738036780085175e-06, + "loss": 0.6899, + "step": 4781 + }, + { + "epoch": 0.71, + "learning_rate": 4.1699115067316405e-06, + "loss": 0.8247, + "step": 4782 + }, + { + "epoch": 0.71, + "learning_rate": 4.166020672972723e-06, + "loss": 0.8042, + "step": 4783 + }, + { + "epoch": 0.71, + "learning_rate": 4.162131177624383e-06, + "loss": 0.7979, + "step": 4784 + }, + { + "epoch": 0.71, + "learning_rate": 4.158243021578941e-06, + "loss": 0.8438, + "step": 4785 + }, + { + "epoch": 0.71, + "learning_rate": 4.154356205728396e-06, + "loss": 0.7534, + "step": 4786 + }, + { + "epoch": 0.71, + "learning_rate": 4.150470730964454e-06, + "loss": 0.8193, + "step": 4787 + }, + { + "epoch": 0.71, + "learning_rate": 4.1465865981785055e-06, + "loss": 0.7705, + "step": 4788 + }, + { + "epoch": 0.71, + "learning_rate": 4.14270380826163e-06, + "loss": 0.7053, + "step": 4789 + }, + { + "epoch": 0.71, + "learning_rate": 4.138822362104608e-06, + "loss": 0.8179, + "step": 4790 + }, + { + "epoch": 0.71, + "learning_rate": 4.134942260597903e-06, + "loss": 0.7285, + "step": 4791 + }, + { + "epoch": 0.71, + "learning_rate": 4.131063504631678e-06, + "loss": 0.7891, + "step": 4792 + }, + { + "epoch": 0.71, + "learning_rate": 4.1271860950957835e-06, + "loss": 0.8105, + "step": 4793 + }, + { + "epoch": 0.71, + "learning_rate": 4.123310032879754e-06, + "loss": 0.8135, + "step": 4794 + }, + { + "epoch": 0.71, + "learning_rate": 4.11943531887283e-06, + "loss": 0.7676, + "step": 4795 + }, + { + "epoch": 0.71, + "learning_rate": 4.11556195396393e-06, + "loss": 0.7671, + "step": 4796 + }, + { + "epoch": 0.71, + "learning_rate": 4.111689939041666e-06, + "loss": 0.7344, + "step": 4797 + }, + { + "epoch": 0.71, + "learning_rate": 4.107819274994348e-06, + "loss": 0.8545, + "step": 4798 + }, + { + "epoch": 0.71, + "learning_rate": 4.103949962709961e-06, + "loss": 0.8115, + "step": 4799 + }, + { + "epoch": 0.71, + "learning_rate": 4.1000820030762024e-06, + "loss": 0.7764, + "step": 4800 + }, + { + "epoch": 0.71, + "learning_rate": 4.096215396980429e-06, + "loss": 0.7539, + "step": 4801 + }, + { + "epoch": 0.71, + "learning_rate": 4.0923501453097115e-06, + "loss": 0.7998, + "step": 4802 + }, + { + "epoch": 0.71, + "learning_rate": 4.088486248950806e-06, + "loss": 0.7979, + "step": 4803 + }, + { + "epoch": 0.71, + "learning_rate": 4.084623708790145e-06, + "loss": 0.7808, + "step": 4804 + }, + { + "epoch": 0.71, + "learning_rate": 4.080762525713873e-06, + "loss": 0.7612, + "step": 4805 + }, + { + "epoch": 0.71, + "learning_rate": 4.07690270060779e-06, + "loss": 0.7715, + "step": 4806 + }, + { + "epoch": 0.71, + "learning_rate": 4.073044234357413e-06, + "loss": 0.7783, + "step": 4807 + }, + { + "epoch": 0.71, + "learning_rate": 4.069187127847941e-06, + "loss": 0.7637, + "step": 4808 + }, + { + "epoch": 0.71, + "learning_rate": 4.065331381964252e-06, + "loss": 0.8325, + "step": 4809 + }, + { + "epoch": 0.71, + "learning_rate": 4.061476997590918e-06, + "loss": 0.7329, + "step": 4810 + }, + { + "epoch": 0.71, + "learning_rate": 4.057623975612199e-06, + "loss": 0.8418, + "step": 4811 + }, + { + "epoch": 0.71, + "learning_rate": 4.05377231691204e-06, + "loss": 0.3296, + "step": 4812 + }, + { + "epoch": 0.71, + "learning_rate": 4.049922022374082e-06, + "loss": 0.8232, + "step": 4813 + }, + { + "epoch": 0.71, + "learning_rate": 4.046073092881634e-06, + "loss": 0.7578, + "step": 4814 + }, + { + "epoch": 0.71, + "learning_rate": 4.0422255293177074e-06, + "loss": 0.8237, + "step": 4815 + }, + { + "epoch": 0.71, + "learning_rate": 4.0383793325650025e-06, + "loss": 0.8491, + "step": 4816 + }, + { + "epoch": 0.71, + "learning_rate": 4.034534503505896e-06, + "loss": 0.2971, + "step": 4817 + }, + { + "epoch": 0.71, + "learning_rate": 4.030691043022454e-06, + "loss": 0.834, + "step": 4818 + }, + { + "epoch": 0.71, + "learning_rate": 4.026848951996427e-06, + "loss": 0.7837, + "step": 4819 + }, + { + "epoch": 0.71, + "learning_rate": 4.023008231309257e-06, + "loss": 0.7886, + "step": 4820 + }, + { + "epoch": 0.71, + "learning_rate": 4.019168881842071e-06, + "loss": 0.7847, + "step": 4821 + }, + { + "epoch": 0.71, + "learning_rate": 4.015330904475675e-06, + "loss": 0.877, + "step": 4822 + }, + { + "epoch": 0.71, + "learning_rate": 4.0114943000905645e-06, + "loss": 0.7969, + "step": 4823 + }, + { + "epoch": 0.71, + "learning_rate": 4.0076590695669186e-06, + "loss": 0.7529, + "step": 4824 + }, + { + "epoch": 0.71, + "learning_rate": 4.0038252137846e-06, + "loss": 0.8066, + "step": 4825 + }, + { + "epoch": 0.71, + "learning_rate": 3.99999273362317e-06, + "loss": 0.7876, + "step": 4826 + }, + { + "epoch": 0.71, + "learning_rate": 3.996161629961848e-06, + "loss": 0.8052, + "step": 4827 + }, + { + "epoch": 0.71, + "learning_rate": 3.992331903679559e-06, + "loss": 0.7715, + "step": 4828 + }, + { + "epoch": 0.71, + "learning_rate": 3.988503555654902e-06, + "loss": 0.7695, + "step": 4829 + }, + { + "epoch": 0.71, + "learning_rate": 3.984676586766167e-06, + "loss": 0.7417, + "step": 4830 + }, + { + "epoch": 0.71, + "learning_rate": 3.980850997891321e-06, + "loss": 0.8101, + "step": 4831 + }, + { + "epoch": 0.71, + "learning_rate": 3.977026789908015e-06, + "loss": 0.7712, + "step": 4832 + }, + { + "epoch": 0.71, + "learning_rate": 3.973203963693589e-06, + "loss": 0.8442, + "step": 4833 + }, + { + "epoch": 0.71, + "learning_rate": 3.969382520125058e-06, + "loss": 0.8062, + "step": 4834 + }, + { + "epoch": 0.72, + "learning_rate": 3.9655624600791285e-06, + "loss": 0.769, + "step": 4835 + }, + { + "epoch": 0.72, + "learning_rate": 3.9617437844321834e-06, + "loss": 0.77, + "step": 4836 + }, + { + "epoch": 0.72, + "learning_rate": 3.957926494060285e-06, + "loss": 0.8794, + "step": 4837 + }, + { + "epoch": 0.72, + "learning_rate": 3.954110589839185e-06, + "loss": 0.7939, + "step": 4838 + }, + { + "epoch": 0.72, + "learning_rate": 3.950296072644323e-06, + "loss": 0.3143, + "step": 4839 + }, + { + "epoch": 0.72, + "learning_rate": 3.946482943350797e-06, + "loss": 0.2867, + "step": 4840 + }, + { + "epoch": 0.72, + "learning_rate": 3.942671202833412e-06, + "loss": 0.8433, + "step": 4841 + }, + { + "epoch": 0.72, + "learning_rate": 3.9388608519666375e-06, + "loss": 0.8135, + "step": 4842 + }, + { + "epoch": 0.72, + "learning_rate": 3.935051891624636e-06, + "loss": 0.7412, + "step": 4843 + }, + { + "epoch": 0.72, + "learning_rate": 3.931244322681243e-06, + "loss": 0.7769, + "step": 4844 + }, + { + "epoch": 0.72, + "learning_rate": 3.927438146009974e-06, + "loss": 0.7695, + "step": 4845 + }, + { + "epoch": 0.72, + "learning_rate": 3.923633362484036e-06, + "loss": 0.8428, + "step": 4846 + }, + { + "epoch": 0.72, + "learning_rate": 3.919829972976304e-06, + "loss": 0.8311, + "step": 4847 + }, + { + "epoch": 0.72, + "learning_rate": 3.916027978359335e-06, + "loss": 0.7734, + "step": 4848 + }, + { + "epoch": 0.72, + "learning_rate": 3.9122273795053745e-06, + "loss": 0.812, + "step": 4849 + }, + { + "epoch": 0.72, + "learning_rate": 3.9084281772863375e-06, + "loss": 0.7866, + "step": 4850 + }, + { + "epoch": 0.72, + "learning_rate": 3.90463037257383e-06, + "loss": 0.7871, + "step": 4851 + }, + { + "epoch": 0.72, + "learning_rate": 3.900833966239126e-06, + "loss": 0.8042, + "step": 4852 + }, + { + "epoch": 0.72, + "learning_rate": 3.89703895915318e-06, + "loss": 0.791, + "step": 4853 + }, + { + "epoch": 0.72, + "learning_rate": 3.8932453521866365e-06, + "loss": 0.75, + "step": 4854 + }, + { + "epoch": 0.72, + "learning_rate": 3.889453146209804e-06, + "loss": 0.8086, + "step": 4855 + }, + { + "epoch": 0.72, + "learning_rate": 3.885662342092684e-06, + "loss": 0.8311, + "step": 4856 + }, + { + "epoch": 0.72, + "learning_rate": 3.881872940704946e-06, + "loss": 0.8394, + "step": 4857 + }, + { + "epoch": 0.72, + "learning_rate": 3.8780849429159365e-06, + "loss": 0.8647, + "step": 4858 + }, + { + "epoch": 0.72, + "learning_rate": 3.874298349594692e-06, + "loss": 0.8687, + "step": 4859 + }, + { + "epoch": 0.72, + "learning_rate": 3.870513161609915e-06, + "loss": 0.7441, + "step": 4860 + }, + { + "epoch": 0.72, + "learning_rate": 3.866729379829986e-06, + "loss": 0.7886, + "step": 4861 + }, + { + "epoch": 0.72, + "learning_rate": 3.862947005122975e-06, + "loss": 0.8325, + "step": 4862 + }, + { + "epoch": 0.72, + "learning_rate": 3.859166038356612e-06, + "loss": 0.8423, + "step": 4863 + }, + { + "epoch": 0.72, + "learning_rate": 3.855386480398322e-06, + "loss": 0.7197, + "step": 4864 + }, + { + "epoch": 0.72, + "learning_rate": 3.851608332115192e-06, + "loss": 0.2643, + "step": 4865 + }, + { + "epoch": 0.72, + "learning_rate": 3.8478315943739895e-06, + "loss": 0.7544, + "step": 4866 + }, + { + "epoch": 0.72, + "learning_rate": 3.844056268041165e-06, + "loss": 0.7896, + "step": 4867 + }, + { + "epoch": 0.72, + "learning_rate": 3.840282353982836e-06, + "loss": 0.7661, + "step": 4868 + }, + { + "epoch": 0.72, + "learning_rate": 3.8365098530648045e-06, + "loss": 0.7969, + "step": 4869 + }, + { + "epoch": 0.72, + "learning_rate": 3.832738766152544e-06, + "loss": 0.7881, + "step": 4870 + }, + { + "epoch": 0.72, + "learning_rate": 3.828969094111197e-06, + "loss": 0.7676, + "step": 4871 + }, + { + "epoch": 0.72, + "learning_rate": 3.825200837805595e-06, + "loss": 0.7783, + "step": 4872 + }, + { + "epoch": 0.72, + "learning_rate": 3.8214339981002364e-06, + "loss": 0.7827, + "step": 4873 + }, + { + "epoch": 0.72, + "learning_rate": 3.817668575859292e-06, + "loss": 0.8203, + "step": 4874 + }, + { + "epoch": 0.72, + "learning_rate": 3.8139045719466197e-06, + "loss": 0.7717, + "step": 4875 + }, + { + "epoch": 0.72, + "learning_rate": 3.8101419872257327e-06, + "loss": 0.8018, + "step": 4876 + }, + { + "epoch": 0.72, + "learning_rate": 3.8063808225598407e-06, + "loss": 0.793, + "step": 4877 + }, + { + "epoch": 0.72, + "learning_rate": 3.802621078811811e-06, + "loss": 0.7563, + "step": 4878 + }, + { + "epoch": 0.72, + "learning_rate": 3.7988627568441884e-06, + "loss": 0.7729, + "step": 4879 + }, + { + "epoch": 0.72, + "learning_rate": 3.795105857519199e-06, + "loss": 0.7812, + "step": 4880 + }, + { + "epoch": 0.72, + "learning_rate": 3.791350381698735e-06, + "loss": 0.8208, + "step": 4881 + }, + { + "epoch": 0.72, + "learning_rate": 3.7875963302443597e-06, + "loss": 0.7122, + "step": 4882 + }, + { + "epoch": 0.72, + "learning_rate": 3.7838437040173216e-06, + "loss": 0.8086, + "step": 4883 + }, + { + "epoch": 0.72, + "learning_rate": 3.7800925038785274e-06, + "loss": 0.7158, + "step": 4884 + }, + { + "epoch": 0.72, + "learning_rate": 3.7763427306885725e-06, + "loss": 0.8252, + "step": 4885 + }, + { + "epoch": 0.72, + "learning_rate": 3.7725943853077105e-06, + "loss": 0.8081, + "step": 4886 + }, + { + "epoch": 0.72, + "learning_rate": 3.768847468595871e-06, + "loss": 0.8169, + "step": 4887 + }, + { + "epoch": 0.72, + "learning_rate": 3.7651019814126656e-06, + "loss": 0.2904, + "step": 4888 + }, + { + "epoch": 0.72, + "learning_rate": 3.7613579246173624e-06, + "loss": 0.7896, + "step": 4889 + }, + { + "epoch": 0.72, + "learning_rate": 3.7576152990689217e-06, + "loss": 0.751, + "step": 4890 + }, + { + "epoch": 0.72, + "learning_rate": 3.7538741056259478e-06, + "loss": 0.7598, + "step": 4891 + }, + { + "epoch": 0.72, + "learning_rate": 3.7501343451467386e-06, + "loss": 0.793, + "step": 4892 + }, + { + "epoch": 0.72, + "learning_rate": 3.746396018489261e-06, + "loss": 0.7158, + "step": 4893 + }, + { + "epoch": 0.72, + "learning_rate": 3.7426591265111445e-06, + "loss": 0.769, + "step": 4894 + }, + { + "epoch": 0.72, + "learning_rate": 3.738923670069694e-06, + "loss": 0.7925, + "step": 4895 + }, + { + "epoch": 0.72, + "learning_rate": 3.73518965002188e-06, + "loss": 0.7817, + "step": 4896 + }, + { + "epoch": 0.72, + "learning_rate": 3.7314570672243523e-06, + "loss": 0.8032, + "step": 4897 + }, + { + "epoch": 0.72, + "learning_rate": 3.7277259225334284e-06, + "loss": 0.8467, + "step": 4898 + }, + { + "epoch": 0.72, + "learning_rate": 3.7239962168050935e-06, + "loss": 0.7554, + "step": 4899 + }, + { + "epoch": 0.72, + "learning_rate": 3.7202679508950015e-06, + "loss": 0.7896, + "step": 4900 + }, + { + "epoch": 0.72, + "learning_rate": 3.716541125658475e-06, + "loss": 0.7593, + "step": 4901 + }, + { + "epoch": 0.72, + "learning_rate": 3.712815741950511e-06, + "loss": 0.8154, + "step": 4902 + }, + { + "epoch": 0.73, + "learning_rate": 3.7090918006257825e-06, + "loss": 0.7847, + "step": 4903 + }, + { + "epoch": 0.73, + "learning_rate": 3.7053693025386074e-06, + "loss": 0.7388, + "step": 4904 + }, + { + "epoch": 0.73, + "learning_rate": 3.701648248542995e-06, + "loss": 0.8159, + "step": 4905 + }, + { + "epoch": 0.73, + "learning_rate": 3.6979286394926204e-06, + "loss": 0.7676, + "step": 4906 + }, + { + "epoch": 0.73, + "learning_rate": 3.6942104762408183e-06, + "loss": 0.8027, + "step": 4907 + }, + { + "epoch": 0.73, + "learning_rate": 3.6904937596405975e-06, + "loss": 0.7598, + "step": 4908 + }, + { + "epoch": 0.73, + "learning_rate": 3.68677849054463e-06, + "loss": 0.77, + "step": 4909 + }, + { + "epoch": 0.73, + "learning_rate": 3.683064669805263e-06, + "loss": 0.7695, + "step": 4910 + }, + { + "epoch": 0.73, + "learning_rate": 3.6793522982745135e-06, + "loss": 0.8418, + "step": 4911 + }, + { + "epoch": 0.73, + "learning_rate": 3.6756413768040487e-06, + "loss": 0.8062, + "step": 4912 + }, + { + "epoch": 0.73, + "learning_rate": 3.671931906245224e-06, + "loss": 0.7471, + "step": 4913 + }, + { + "epoch": 0.73, + "learning_rate": 3.6682238874490463e-06, + "loss": 0.791, + "step": 4914 + }, + { + "epoch": 0.73, + "learning_rate": 3.664517321266199e-06, + "loss": 0.769, + "step": 4915 + }, + { + "epoch": 0.73, + "learning_rate": 3.6608122085470367e-06, + "loss": 0.7366, + "step": 4916 + }, + { + "epoch": 0.73, + "learning_rate": 3.6571085501415583e-06, + "loss": 0.7505, + "step": 4917 + }, + { + "epoch": 0.73, + "learning_rate": 3.6534063468994554e-06, + "loss": 0.8271, + "step": 4918 + }, + { + "epoch": 0.73, + "learning_rate": 3.649705599670067e-06, + "loss": 0.7744, + "step": 4919 + }, + { + "epoch": 0.73, + "learning_rate": 3.6460063093024113e-06, + "loss": 0.8013, + "step": 4920 + }, + { + "epoch": 0.73, + "learning_rate": 3.6423084766451622e-06, + "loss": 0.8442, + "step": 4921 + }, + { + "epoch": 0.73, + "learning_rate": 3.6386121025466626e-06, + "loss": 0.7827, + "step": 4922 + }, + { + "epoch": 0.73, + "learning_rate": 3.634917187854925e-06, + "loss": 0.7656, + "step": 4923 + }, + { + "epoch": 0.73, + "learning_rate": 3.6312237334176216e-06, + "loss": 0.8169, + "step": 4924 + }, + { + "epoch": 0.73, + "learning_rate": 3.6275317400820884e-06, + "loss": 0.811, + "step": 4925 + }, + { + "epoch": 0.73, + "learning_rate": 3.6238412086953356e-06, + "loss": 0.7471, + "step": 4926 + }, + { + "epoch": 0.73, + "learning_rate": 3.620152140104025e-06, + "loss": 0.7209, + "step": 4927 + }, + { + "epoch": 0.73, + "learning_rate": 3.6164645351544956e-06, + "loss": 0.8311, + "step": 4928 + }, + { + "epoch": 0.73, + "learning_rate": 3.612778394692741e-06, + "loss": 0.7852, + "step": 4929 + }, + { + "epoch": 0.73, + "learning_rate": 3.6090937195644205e-06, + "loss": 0.811, + "step": 4930 + }, + { + "epoch": 0.73, + "learning_rate": 3.6054105106148642e-06, + "loss": 0.8174, + "step": 4931 + }, + { + "epoch": 0.73, + "learning_rate": 3.6017287686890545e-06, + "loss": 0.8066, + "step": 4932 + }, + { + "epoch": 0.73, + "learning_rate": 3.5980484946316507e-06, + "loss": 0.8232, + "step": 4933 + }, + { + "epoch": 0.73, + "learning_rate": 3.594369689286963e-06, + "loss": 0.7471, + "step": 4934 + }, + { + "epoch": 0.73, + "learning_rate": 3.590692353498968e-06, + "loss": 0.8433, + "step": 4935 + }, + { + "epoch": 0.73, + "learning_rate": 3.5870164881113135e-06, + "loss": 0.7837, + "step": 4936 + }, + { + "epoch": 0.73, + "learning_rate": 3.583342093967299e-06, + "loss": 0.7036, + "step": 4937 + }, + { + "epoch": 0.73, + "learning_rate": 3.5796691719098886e-06, + "loss": 0.748, + "step": 4938 + }, + { + "epoch": 0.73, + "learning_rate": 3.5759977227817167e-06, + "loss": 0.8052, + "step": 4939 + }, + { + "epoch": 0.73, + "learning_rate": 3.572327747425066e-06, + "loss": 0.7959, + "step": 4940 + }, + { + "epoch": 0.73, + "learning_rate": 3.5686592466818992e-06, + "loss": 0.8159, + "step": 4941 + }, + { + "epoch": 0.73, + "learning_rate": 3.564992221393825e-06, + "loss": 0.8018, + "step": 4942 + }, + { + "epoch": 0.73, + "learning_rate": 3.5613266724021156e-06, + "loss": 0.792, + "step": 4943 + }, + { + "epoch": 0.73, + "learning_rate": 3.5576626005477153e-06, + "loss": 0.8066, + "step": 4944 + }, + { + "epoch": 0.73, + "learning_rate": 3.5540000066712156e-06, + "loss": 0.7627, + "step": 4945 + }, + { + "epoch": 0.73, + "learning_rate": 3.5503388916128824e-06, + "loss": 0.7559, + "step": 4946 + }, + { + "epoch": 0.73, + "learning_rate": 3.546679256212633e-06, + "loss": 0.7583, + "step": 4947 + }, + { + "epoch": 0.73, + "learning_rate": 3.5430211013100424e-06, + "loss": 0.312, + "step": 4948 + }, + { + "epoch": 0.73, + "learning_rate": 3.5393644277443596e-06, + "loss": 0.8179, + "step": 4949 + }, + { + "epoch": 0.73, + "learning_rate": 3.535709236354482e-06, + "loss": 0.7549, + "step": 4950 + }, + { + "epoch": 0.73, + "learning_rate": 3.532055527978967e-06, + "loss": 0.7515, + "step": 4951 + }, + { + "epoch": 0.73, + "learning_rate": 3.5284033034560415e-06, + "loss": 0.7607, + "step": 4952 + }, + { + "epoch": 0.73, + "learning_rate": 3.5247525636235802e-06, + "loss": 0.7568, + "step": 4953 + }, + { + "epoch": 0.73, + "learning_rate": 3.5211033093191282e-06, + "loss": 0.7407, + "step": 4954 + }, + { + "epoch": 0.73, + "learning_rate": 3.5174555413798805e-06, + "loss": 0.3083, + "step": 4955 + }, + { + "epoch": 0.73, + "learning_rate": 3.513809260642694e-06, + "loss": 0.8574, + "step": 4956 + }, + { + "epoch": 0.73, + "learning_rate": 3.510164467944089e-06, + "loss": 0.7705, + "step": 4957 + }, + { + "epoch": 0.73, + "learning_rate": 3.50652116412024e-06, + "loss": 0.3071, + "step": 4958 + }, + { + "epoch": 0.73, + "learning_rate": 3.502879350006977e-06, + "loss": 0.7603, + "step": 4959 + }, + { + "epoch": 0.73, + "learning_rate": 3.4992390264397967e-06, + "loss": 0.7588, + "step": 4960 + }, + { + "epoch": 0.73, + "learning_rate": 3.495600194253843e-06, + "loss": 0.7881, + "step": 4961 + }, + { + "epoch": 0.73, + "learning_rate": 3.491962854283932e-06, + "loss": 0.7671, + "step": 4962 + }, + { + "epoch": 0.73, + "learning_rate": 3.488327007364525e-06, + "loss": 0.7529, + "step": 4963 + }, + { + "epoch": 0.73, + "learning_rate": 3.48469265432974e-06, + "loss": 0.3048, + "step": 4964 + }, + { + "epoch": 0.73, + "learning_rate": 3.4810597960133665e-06, + "loss": 0.7915, + "step": 4965 + }, + { + "epoch": 0.73, + "learning_rate": 3.477428433248833e-06, + "loss": 0.813, + "step": 4966 + }, + { + "epoch": 0.73, + "learning_rate": 3.473798566869244e-06, + "loss": 0.8452, + "step": 4967 + }, + { + "epoch": 0.73, + "learning_rate": 3.4701701977073386e-06, + "loss": 0.7891, + "step": 4968 + }, + { + "epoch": 0.73, + "learning_rate": 3.4665433265955307e-06, + "loss": 0.8306, + "step": 4969 + }, + { + "epoch": 0.73, + "learning_rate": 3.4629179543658852e-06, + "loss": 0.8164, + "step": 4970 + }, + { + "epoch": 0.74, + "learning_rate": 3.459294081850121e-06, + "loss": 0.8223, + "step": 4971 + }, + { + "epoch": 0.74, + "learning_rate": 3.4556717098796124e-06, + "loss": 0.7715, + "step": 4972 + }, + { + "epoch": 0.74, + "learning_rate": 3.452050839285388e-06, + "loss": 0.8062, + "step": 4973 + }, + { + "epoch": 0.74, + "learning_rate": 3.448431470898138e-06, + "loss": 0.7852, + "step": 4974 + }, + { + "epoch": 0.74, + "learning_rate": 3.444813605548213e-06, + "loss": 0.8203, + "step": 4975 + }, + { + "epoch": 0.74, + "learning_rate": 3.4411972440655963e-06, + "loss": 0.3362, + "step": 4976 + }, + { + "epoch": 0.74, + "learning_rate": 3.437582387279946e-06, + "loss": 0.7495, + "step": 4977 + }, + { + "epoch": 0.74, + "learning_rate": 3.4339690360205757e-06, + "loss": 0.7983, + "step": 4978 + }, + { + "epoch": 0.74, + "learning_rate": 3.430357191116439e-06, + "loss": 0.8174, + "step": 4979 + }, + { + "epoch": 0.74, + "learning_rate": 3.426746853396162e-06, + "loss": 0.7959, + "step": 4980 + }, + { + "epoch": 0.74, + "learning_rate": 3.423138023688003e-06, + "loss": 0.7651, + "step": 4981 + }, + { + "epoch": 0.74, + "learning_rate": 3.419530702819893e-06, + "loss": 0.3052, + "step": 4982 + }, + { + "epoch": 0.74, + "learning_rate": 3.4159248916194144e-06, + "loss": 0.8281, + "step": 4983 + }, + { + "epoch": 0.74, + "learning_rate": 3.412320590913796e-06, + "loss": 0.7441, + "step": 4984 + }, + { + "epoch": 0.74, + "learning_rate": 3.4087178015299226e-06, + "loss": 0.7983, + "step": 4985 + }, + { + "epoch": 0.74, + "learning_rate": 3.405116524294331e-06, + "loss": 0.7471, + "step": 4986 + }, + { + "epoch": 0.74, + "learning_rate": 3.4015167600332166e-06, + "loss": 0.8047, + "step": 4987 + }, + { + "epoch": 0.74, + "learning_rate": 3.3979185095724298e-06, + "loss": 0.731, + "step": 4988 + }, + { + "epoch": 0.74, + "learning_rate": 3.3943217737374556e-06, + "loss": 0.7197, + "step": 4989 + }, + { + "epoch": 0.74, + "learning_rate": 3.390726553353455e-06, + "loss": 0.3218, + "step": 4990 + }, + { + "epoch": 0.74, + "learning_rate": 3.387132849245224e-06, + "loss": 0.752, + "step": 4991 + }, + { + "epoch": 0.74, + "learning_rate": 3.383540662237219e-06, + "loss": 0.7485, + "step": 4992 + }, + { + "epoch": 0.74, + "learning_rate": 3.379949993153554e-06, + "loss": 0.8232, + "step": 4993 + }, + { + "epoch": 0.74, + "learning_rate": 3.376360842817975e-06, + "loss": 0.7322, + "step": 4994 + }, + { + "epoch": 0.74, + "learning_rate": 3.3727732120539005e-06, + "loss": 0.8027, + "step": 4995 + }, + { + "epoch": 0.74, + "learning_rate": 3.369187101684387e-06, + "loss": 0.8384, + "step": 4996 + }, + { + "epoch": 0.74, + "learning_rate": 3.3656025125321512e-06, + "loss": 0.8135, + "step": 4997 + }, + { + "epoch": 0.74, + "learning_rate": 3.3620194454195565e-06, + "loss": 0.8037, + "step": 4998 + }, + { + "epoch": 0.74, + "learning_rate": 3.358437901168611e-06, + "loss": 0.8135, + "step": 4999 + }, + { + "epoch": 0.74, + "learning_rate": 3.354857880600988e-06, + "loss": 0.752, + "step": 5000 + }, + { + "epoch": 0.74, + "learning_rate": 3.351279384538e-06, + "loss": 0.7583, + "step": 5001 + }, + { + "epoch": 0.74, + "learning_rate": 3.3477024138006074e-06, + "loss": 0.8208, + "step": 5002 + }, + { + "epoch": 0.74, + "learning_rate": 3.3441269692094346e-06, + "loss": 0.7866, + "step": 5003 + }, + { + "epoch": 0.74, + "learning_rate": 3.3405530515847406e-06, + "loss": 0.8032, + "step": 5004 + }, + { + "epoch": 0.74, + "learning_rate": 3.336980661746446e-06, + "loss": 0.7534, + "step": 5005 + }, + { + "epoch": 0.74, + "learning_rate": 3.3334098005141123e-06, + "loss": 0.8022, + "step": 5006 + }, + { + "epoch": 0.74, + "learning_rate": 3.329840468706952e-06, + "loss": 0.7515, + "step": 5007 + }, + { + "epoch": 0.74, + "learning_rate": 3.3262726671438337e-06, + "loss": 0.7021, + "step": 5008 + }, + { + "epoch": 0.74, + "learning_rate": 3.322706396643264e-06, + "loss": 0.7686, + "step": 5009 + }, + { + "epoch": 0.74, + "learning_rate": 3.3191416580234093e-06, + "loss": 0.8208, + "step": 5010 + }, + { + "epoch": 0.74, + "learning_rate": 3.315578452102076e-06, + "loss": 0.3289, + "step": 5011 + }, + { + "epoch": 0.74, + "learning_rate": 3.3120167796967195e-06, + "loss": 0.8325, + "step": 5012 + }, + { + "epoch": 0.74, + "learning_rate": 3.3084566416244525e-06, + "loss": 0.7817, + "step": 5013 + }, + { + "epoch": 0.74, + "learning_rate": 3.3048980387020245e-06, + "loss": 0.7886, + "step": 5014 + }, + { + "epoch": 0.74, + "learning_rate": 3.3013409717458355e-06, + "loss": 0.7544, + "step": 5015 + }, + { + "epoch": 0.74, + "learning_rate": 3.2977854415719412e-06, + "loss": 0.7554, + "step": 5016 + }, + { + "epoch": 0.74, + "learning_rate": 3.2942314489960314e-06, + "loss": 0.7642, + "step": 5017 + }, + { + "epoch": 0.74, + "learning_rate": 3.290678994833457e-06, + "loss": 0.7856, + "step": 5018 + }, + { + "epoch": 0.74, + "learning_rate": 3.2871280798992065e-06, + "loss": 0.7964, + "step": 5019 + }, + { + "epoch": 0.74, + "learning_rate": 3.283578705007915e-06, + "loss": 0.7539, + "step": 5020 + }, + { + "epoch": 0.74, + "learning_rate": 3.280030870973874e-06, + "loss": 0.7881, + "step": 5021 + }, + { + "epoch": 0.74, + "learning_rate": 3.27648457861101e-06, + "loss": 0.8291, + "step": 5022 + }, + { + "epoch": 0.74, + "learning_rate": 3.2729398287328983e-06, + "loss": 0.7046, + "step": 5023 + }, + { + "epoch": 0.74, + "learning_rate": 3.2693966221527707e-06, + "loss": 0.7695, + "step": 5024 + }, + { + "epoch": 0.74, + "learning_rate": 3.2658549596834875e-06, + "loss": 0.7534, + "step": 5025 + }, + { + "epoch": 0.74, + "learning_rate": 3.262314842137573e-06, + "loss": 0.813, + "step": 5026 + }, + { + "epoch": 0.74, + "learning_rate": 3.258776270327184e-06, + "loss": 0.7988, + "step": 5027 + }, + { + "epoch": 0.74, + "learning_rate": 3.2552392450641248e-06, + "loss": 0.7861, + "step": 5028 + }, + { + "epoch": 0.74, + "learning_rate": 3.2517037671598516e-06, + "loss": 0.8105, + "step": 5029 + }, + { + "epoch": 0.74, + "learning_rate": 3.2481698374254556e-06, + "loss": 0.7598, + "step": 5030 + }, + { + "epoch": 0.74, + "learning_rate": 3.2446374566716854e-06, + "loss": 0.7637, + "step": 5031 + }, + { + "epoch": 0.74, + "learning_rate": 3.241106625708923e-06, + "loss": 0.8032, + "step": 5032 + }, + { + "epoch": 0.74, + "learning_rate": 3.237577345347196e-06, + "loss": 0.8555, + "step": 5033 + }, + { + "epoch": 0.74, + "learning_rate": 3.2340496163961855e-06, + "loss": 0.8022, + "step": 5034 + }, + { + "epoch": 0.74, + "learning_rate": 3.230523439665206e-06, + "loss": 0.7725, + "step": 5035 + }, + { + "epoch": 0.74, + "learning_rate": 3.2269988159632203e-06, + "loss": 0.7944, + "step": 5036 + }, + { + "epoch": 0.74, + "learning_rate": 3.2234757460988386e-06, + "loss": 0.8003, + "step": 5037 + }, + { + "epoch": 0.75, + "learning_rate": 3.2199542308803055e-06, + "loss": 0.791, + "step": 5038 + }, + { + "epoch": 0.75, + "learning_rate": 3.216434271115524e-06, + "loss": 0.7695, + "step": 5039 + }, + { + "epoch": 0.75, + "learning_rate": 3.2129158676120176e-06, + "loss": 0.812, + "step": 5040 + }, + { + "epoch": 0.75, + "learning_rate": 3.209399021176971e-06, + "loss": 0.7393, + "step": 5041 + }, + { + "epoch": 0.75, + "learning_rate": 3.205883732617212e-06, + "loss": 0.7607, + "step": 5042 + }, + { + "epoch": 0.75, + "learning_rate": 3.202370002739198e-06, + "loss": 0.7368, + "step": 5043 + }, + { + "epoch": 0.75, + "learning_rate": 3.1988578323490427e-06, + "loss": 0.7446, + "step": 5044 + }, + { + "epoch": 0.75, + "learning_rate": 3.1953472222524918e-06, + "loss": 0.7959, + "step": 5045 + }, + { + "epoch": 0.75, + "learning_rate": 3.191838173254934e-06, + "loss": 0.8032, + "step": 5046 + }, + { + "epoch": 0.75, + "learning_rate": 3.1883306861614104e-06, + "loss": 0.7417, + "step": 5047 + }, + { + "epoch": 0.75, + "learning_rate": 3.1848247617765915e-06, + "loss": 0.8062, + "step": 5048 + }, + { + "epoch": 0.75, + "learning_rate": 3.1813204009047902e-06, + "loss": 0.7866, + "step": 5049 + }, + { + "epoch": 0.75, + "learning_rate": 3.177817604349973e-06, + "loss": 0.8833, + "step": 5050 + }, + { + "epoch": 0.75, + "learning_rate": 3.17431637291573e-06, + "loss": 0.8301, + "step": 5051 + }, + { + "epoch": 0.75, + "learning_rate": 3.170816707405312e-06, + "loss": 0.751, + "step": 5052 + }, + { + "epoch": 0.75, + "learning_rate": 3.167318608621587e-06, + "loss": 0.7412, + "step": 5053 + }, + { + "epoch": 0.75, + "learning_rate": 3.1638220773670825e-06, + "loss": 0.2949, + "step": 5054 + }, + { + "epoch": 0.75, + "learning_rate": 3.160327114443963e-06, + "loss": 0.7739, + "step": 5055 + }, + { + "epoch": 0.75, + "learning_rate": 3.1568337206540246e-06, + "loss": 0.7354, + "step": 5056 + }, + { + "epoch": 0.75, + "learning_rate": 3.1533418967987172e-06, + "loss": 0.8042, + "step": 5057 + }, + { + "epoch": 0.75, + "learning_rate": 3.1498516436791113e-06, + "loss": 0.7046, + "step": 5058 + }, + { + "epoch": 0.75, + "learning_rate": 3.1463629620959347e-06, + "loss": 0.7563, + "step": 5059 + }, + { + "epoch": 0.75, + "learning_rate": 3.142875852849551e-06, + "loss": 0.7852, + "step": 5060 + }, + { + "epoch": 0.75, + "learning_rate": 3.1393903167399553e-06, + "loss": 0.79, + "step": 5061 + }, + { + "epoch": 0.75, + "learning_rate": 3.13590635456679e-06, + "loss": 0.8071, + "step": 5062 + }, + { + "epoch": 0.75, + "learning_rate": 3.1324239671293276e-06, + "loss": 0.7227, + "step": 5063 + }, + { + "epoch": 0.75, + "learning_rate": 3.128943155226489e-06, + "loss": 0.7939, + "step": 5064 + }, + { + "epoch": 0.75, + "learning_rate": 3.125463919656836e-06, + "loss": 0.7935, + "step": 5065 + }, + { + "epoch": 0.75, + "learning_rate": 3.1219862612185493e-06, + "loss": 0.8486, + "step": 5066 + }, + { + "epoch": 0.75, + "learning_rate": 3.118510180709471e-06, + "loss": 0.811, + "step": 5067 + }, + { + "epoch": 0.75, + "learning_rate": 3.115035678927063e-06, + "loss": 0.8115, + "step": 5068 + }, + { + "epoch": 0.75, + "learning_rate": 3.1115627566684415e-06, + "loss": 0.7703, + "step": 5069 + }, + { + "epoch": 0.75, + "learning_rate": 3.1080914147303465e-06, + "loss": 0.7925, + "step": 5070 + }, + { + "epoch": 0.75, + "learning_rate": 3.10462165390916e-06, + "loss": 0.8257, + "step": 5071 + }, + { + "epoch": 0.75, + "learning_rate": 3.1011534750009033e-06, + "loss": 0.7495, + "step": 5072 + }, + { + "epoch": 0.75, + "learning_rate": 3.097686878801237e-06, + "loss": 0.7876, + "step": 5073 + }, + { + "epoch": 0.75, + "learning_rate": 3.0942218661054533e-06, + "loss": 0.835, + "step": 5074 + }, + { + "epoch": 0.75, + "learning_rate": 3.090758437708482e-06, + "loss": 0.6992, + "step": 5075 + }, + { + "epoch": 0.75, + "learning_rate": 3.087296594404887e-06, + "loss": 0.8052, + "step": 5076 + }, + { + "epoch": 0.75, + "learning_rate": 3.083836336988876e-06, + "loss": 0.7588, + "step": 5077 + }, + { + "epoch": 0.75, + "learning_rate": 3.080377666254294e-06, + "loss": 0.749, + "step": 5078 + }, + { + "epoch": 0.75, + "learning_rate": 3.0769205829946048e-06, + "loss": 0.8179, + "step": 5079 + }, + { + "epoch": 0.75, + "learning_rate": 3.0734650880029293e-06, + "loss": 0.8066, + "step": 5080 + }, + { + "epoch": 0.75, + "learning_rate": 3.070011182072008e-06, + "loss": 0.7256, + "step": 5081 + }, + { + "epoch": 0.75, + "learning_rate": 3.0665588659942314e-06, + "loss": 0.7949, + "step": 5082 + }, + { + "epoch": 0.75, + "learning_rate": 3.0631081405616136e-06, + "loss": 0.7676, + "step": 5083 + }, + { + "epoch": 0.75, + "learning_rate": 3.059659006565804e-06, + "loss": 0.8296, + "step": 5084 + }, + { + "epoch": 0.75, + "learning_rate": 3.0562114647980966e-06, + "loss": 0.7935, + "step": 5085 + }, + { + "epoch": 0.75, + "learning_rate": 3.0527655160494117e-06, + "loss": 0.7964, + "step": 5086 + }, + { + "epoch": 0.75, + "learning_rate": 3.0493211611103034e-06, + "loss": 0.6865, + "step": 5087 + }, + { + "epoch": 0.75, + "learning_rate": 3.0458784007709685e-06, + "loss": 0.811, + "step": 5088 + }, + { + "epoch": 0.75, + "learning_rate": 3.0424372358212285e-06, + "loss": 0.7698, + "step": 5089 + }, + { + "epoch": 0.75, + "learning_rate": 3.038997667050546e-06, + "loss": 0.7578, + "step": 5090 + }, + { + "epoch": 0.75, + "learning_rate": 3.035559695248015e-06, + "loss": 0.7925, + "step": 5091 + }, + { + "epoch": 0.75, + "learning_rate": 3.032123321202357e-06, + "loss": 0.2915, + "step": 5092 + }, + { + "epoch": 0.75, + "learning_rate": 3.0286885457019398e-06, + "loss": 0.7632, + "step": 5093 + }, + { + "epoch": 0.75, + "learning_rate": 3.025255369534751e-06, + "loss": 0.7993, + "step": 5094 + }, + { + "epoch": 0.75, + "learning_rate": 3.021823793488423e-06, + "loss": 0.7222, + "step": 5095 + }, + { + "epoch": 0.75, + "learning_rate": 3.0183938183502147e-06, + "loss": 0.8047, + "step": 5096 + }, + { + "epoch": 0.75, + "learning_rate": 3.014965444907013e-06, + "loss": 0.7876, + "step": 5097 + }, + { + "epoch": 0.75, + "learning_rate": 3.01153867394535e-06, + "loss": 0.7446, + "step": 5098 + }, + { + "epoch": 0.75, + "learning_rate": 3.0081135062513813e-06, + "loss": 0.8237, + "step": 5099 + }, + { + "epoch": 0.75, + "learning_rate": 3.0046899426108924e-06, + "loss": 0.771, + "step": 5100 + }, + { + "epoch": 0.75, + "learning_rate": 3.0012679838093107e-06, + "loss": 0.8257, + "step": 5101 + }, + { + "epoch": 0.75, + "learning_rate": 2.997847630631685e-06, + "loss": 0.7832, + "step": 5102 + }, + { + "epoch": 0.75, + "learning_rate": 2.9944288838627055e-06, + "loss": 0.7952, + "step": 5103 + }, + { + "epoch": 0.75, + "learning_rate": 2.991011744286686e-06, + "loss": 0.7588, + "step": 5104 + }, + { + "epoch": 0.75, + "learning_rate": 2.987596212687571e-06, + "loss": 0.8457, + "step": 5105 + }, + { + "epoch": 0.76, + "learning_rate": 2.9841822898489457e-06, + "loss": 0.8008, + "step": 5106 + }, + { + "epoch": 0.76, + "learning_rate": 2.9807699765540144e-06, + "loss": 0.7964, + "step": 5107 + }, + { + "epoch": 0.76, + "learning_rate": 2.977359273585624e-06, + "loss": 0.8057, + "step": 5108 + }, + { + "epoch": 0.76, + "learning_rate": 2.9739501817262416e-06, + "loss": 0.7964, + "step": 5109 + }, + { + "epoch": 0.76, + "learning_rate": 2.970542701757967e-06, + "loss": 0.7842, + "step": 5110 + }, + { + "epoch": 0.76, + "learning_rate": 2.9671368344625375e-06, + "loss": 0.7793, + "step": 5111 + }, + { + "epoch": 0.76, + "learning_rate": 2.9637325806213115e-06, + "loss": 0.7993, + "step": 5112 + }, + { + "epoch": 0.76, + "learning_rate": 2.9603299410152774e-06, + "loss": 0.7925, + "step": 5113 + }, + { + "epoch": 0.76, + "learning_rate": 2.9569289164250647e-06, + "loss": 0.8555, + "step": 5114 + }, + { + "epoch": 0.76, + "learning_rate": 2.9535295076309156e-06, + "loss": 0.7612, + "step": 5115 + }, + { + "epoch": 0.76, + "learning_rate": 2.9501317154127184e-06, + "loss": 0.7402, + "step": 5116 + }, + { + "epoch": 0.76, + "learning_rate": 2.9467355405499788e-06, + "loss": 0.7944, + "step": 5117 + }, + { + "epoch": 0.76, + "learning_rate": 2.9433409838218307e-06, + "loss": 0.7773, + "step": 5118 + }, + { + "epoch": 0.76, + "learning_rate": 2.9399480460070486e-06, + "loss": 0.791, + "step": 5119 + }, + { + "epoch": 0.76, + "learning_rate": 2.9365567278840214e-06, + "loss": 0.814, + "step": 5120 + }, + { + "epoch": 0.76, + "learning_rate": 2.933167030230779e-06, + "loss": 0.3159, + "step": 5121 + }, + { + "epoch": 0.76, + "learning_rate": 2.9297789538249712e-06, + "loss": 0.7793, + "step": 5122 + }, + { + "epoch": 0.76, + "learning_rate": 2.9263924994438754e-06, + "loss": 0.8452, + "step": 5123 + }, + { + "epoch": 0.76, + "learning_rate": 2.923007667864405e-06, + "loss": 0.8423, + "step": 5124 + }, + { + "epoch": 0.76, + "learning_rate": 2.919624459863093e-06, + "loss": 0.8438, + "step": 5125 + }, + { + "epoch": 0.76, + "learning_rate": 2.916242876216101e-06, + "loss": 0.7173, + "step": 5126 + }, + { + "epoch": 0.76, + "learning_rate": 2.912862917699225e-06, + "loss": 0.8447, + "step": 5127 + }, + { + "epoch": 0.76, + "learning_rate": 2.9094845850878773e-06, + "loss": 0.8413, + "step": 5128 + }, + { + "epoch": 0.76, + "learning_rate": 2.9061078791571105e-06, + "loss": 0.855, + "step": 5129 + }, + { + "epoch": 0.76, + "learning_rate": 2.902732800681586e-06, + "loss": 0.7705, + "step": 5130 + }, + { + "epoch": 0.76, + "learning_rate": 2.8993593504356065e-06, + "loss": 0.7852, + "step": 5131 + }, + { + "epoch": 0.76, + "learning_rate": 2.8959875291931018e-06, + "loss": 0.7969, + "step": 5132 + }, + { + "epoch": 0.76, + "learning_rate": 2.892617337727619e-06, + "loss": 0.7305, + "step": 5133 + }, + { + "epoch": 0.76, + "learning_rate": 2.8892487768123356e-06, + "loss": 0.8018, + "step": 5134 + }, + { + "epoch": 0.76, + "learning_rate": 2.88588184722005e-06, + "loss": 0.769, + "step": 5135 + }, + { + "epoch": 0.76, + "learning_rate": 2.8825165497231964e-06, + "loss": 0.7891, + "step": 5136 + }, + { + "epoch": 0.76, + "learning_rate": 2.879152885093832e-06, + "loss": 0.7793, + "step": 5137 + }, + { + "epoch": 0.76, + "learning_rate": 2.8757908541036338e-06, + "loss": 0.3008, + "step": 5138 + }, + { + "epoch": 0.76, + "learning_rate": 2.8724304575239048e-06, + "loss": 0.728, + "step": 5139 + }, + { + "epoch": 0.76, + "learning_rate": 2.869071696125574e-06, + "loss": 0.7544, + "step": 5140 + }, + { + "epoch": 0.76, + "learning_rate": 2.865714570679199e-06, + "loss": 0.8218, + "step": 5141 + }, + { + "epoch": 0.76, + "learning_rate": 2.8623590819549653e-06, + "loss": 0.7622, + "step": 5142 + }, + { + "epoch": 0.76, + "learning_rate": 2.8590052307226646e-06, + "loss": 0.3176, + "step": 5143 + }, + { + "epoch": 0.76, + "learning_rate": 2.8556530177517326e-06, + "loss": 0.8496, + "step": 5144 + }, + { + "epoch": 0.76, + "learning_rate": 2.8523024438112236e-06, + "loss": 0.8081, + "step": 5145 + }, + { + "epoch": 0.76, + "learning_rate": 2.848953509669813e-06, + "loss": 0.7837, + "step": 5146 + }, + { + "epoch": 0.76, + "learning_rate": 2.8456062160957986e-06, + "loss": 0.792, + "step": 5147 + }, + { + "epoch": 0.76, + "learning_rate": 2.8422605638571042e-06, + "loss": 0.8628, + "step": 5148 + }, + { + "epoch": 0.76, + "learning_rate": 2.838916553721278e-06, + "loss": 0.8232, + "step": 5149 + }, + { + "epoch": 0.76, + "learning_rate": 2.8355741864554964e-06, + "loss": 0.8213, + "step": 5150 + }, + { + "epoch": 0.76, + "learning_rate": 2.832233462826548e-06, + "loss": 0.7749, + "step": 5151 + }, + { + "epoch": 0.76, + "learning_rate": 2.828894383600851e-06, + "loss": 0.7876, + "step": 5152 + }, + { + "epoch": 0.76, + "learning_rate": 2.8255569495444403e-06, + "loss": 0.7207, + "step": 5153 + }, + { + "epoch": 0.76, + "learning_rate": 2.822221161422983e-06, + "loss": 0.8135, + "step": 5154 + }, + { + "epoch": 0.76, + "learning_rate": 2.818887020001769e-06, + "loss": 0.7925, + "step": 5155 + }, + { + "epoch": 0.76, + "learning_rate": 2.8155545260456917e-06, + "loss": 0.8149, + "step": 5156 + }, + { + "epoch": 0.76, + "learning_rate": 2.8122236803192915e-06, + "loss": 0.8169, + "step": 5157 + }, + { + "epoch": 0.76, + "learning_rate": 2.8088944835867104e-06, + "loss": 0.7788, + "step": 5158 + }, + { + "epoch": 0.76, + "learning_rate": 2.805566936611728e-06, + "loss": 0.6833, + "step": 5159 + }, + { + "epoch": 0.76, + "learning_rate": 2.8022410401577347e-06, + "loss": 0.7808, + "step": 5160 + }, + { + "epoch": 0.76, + "learning_rate": 2.798916794987744e-06, + "loss": 0.8027, + "step": 5161 + }, + { + "epoch": 0.76, + "learning_rate": 2.795594201864398e-06, + "loss": 0.7876, + "step": 5162 + }, + { + "epoch": 0.76, + "learning_rate": 2.792273261549949e-06, + "loss": 0.3162, + "step": 5163 + }, + { + "epoch": 0.76, + "learning_rate": 2.7889539748062746e-06, + "loss": 0.7554, + "step": 5164 + }, + { + "epoch": 0.76, + "learning_rate": 2.7856363423948774e-06, + "loss": 0.7561, + "step": 5165 + }, + { + "epoch": 0.76, + "learning_rate": 2.782320365076874e-06, + "loss": 0.7983, + "step": 5166 + }, + { + "epoch": 0.76, + "learning_rate": 2.779006043613006e-06, + "loss": 0.7915, + "step": 5167 + }, + { + "epoch": 0.76, + "learning_rate": 2.775693378763633e-06, + "loss": 0.8374, + "step": 5168 + }, + { + "epoch": 0.76, + "learning_rate": 2.772382371288731e-06, + "loss": 0.2827, + "step": 5169 + }, + { + "epoch": 0.76, + "learning_rate": 2.7690730219479054e-06, + "loss": 0.7905, + "step": 5170 + }, + { + "epoch": 0.76, + "learning_rate": 2.7657653315003686e-06, + "loss": 0.7812, + "step": 5171 + }, + { + "epoch": 0.76, + "learning_rate": 2.762459300704966e-06, + "loss": 0.79, + "step": 5172 + }, + { + "epoch": 0.77, + "learning_rate": 2.7591549303201513e-06, + "loss": 0.7939, + "step": 5173 + }, + { + "epoch": 0.77, + "learning_rate": 2.7558522211039995e-06, + "loss": 0.7695, + "step": 5174 + }, + { + "epoch": 0.77, + "learning_rate": 2.752551173814212e-06, + "loss": 0.7676, + "step": 5175 + }, + { + "epoch": 0.77, + "learning_rate": 2.7492517892080982e-06, + "loss": 0.8047, + "step": 5176 + }, + { + "epoch": 0.77, + "learning_rate": 2.7459540680425912e-06, + "loss": 0.7954, + "step": 5177 + }, + { + "epoch": 0.77, + "learning_rate": 2.742658011074246e-06, + "loss": 0.7651, + "step": 5178 + }, + { + "epoch": 0.77, + "learning_rate": 2.7393636190592278e-06, + "loss": 0.8008, + "step": 5179 + }, + { + "epoch": 0.77, + "learning_rate": 2.7360708927533285e-06, + "loss": 0.8042, + "step": 5180 + }, + { + "epoch": 0.77, + "learning_rate": 2.7327798329119525e-06, + "loss": 0.7573, + "step": 5181 + }, + { + "epoch": 0.77, + "learning_rate": 2.729490440290118e-06, + "loss": 0.7104, + "step": 5182 + }, + { + "epoch": 0.77, + "learning_rate": 2.7262027156424733e-06, + "loss": 0.8345, + "step": 5183 + }, + { + "epoch": 0.77, + "learning_rate": 2.722916659723268e-06, + "loss": 0.8013, + "step": 5184 + }, + { + "epoch": 0.77, + "learning_rate": 2.7196322732863855e-06, + "loss": 0.7754, + "step": 5185 + }, + { + "epoch": 0.77, + "learning_rate": 2.716349557085315e-06, + "loss": 0.7617, + "step": 5186 + }, + { + "epoch": 0.77, + "learning_rate": 2.7130685118731615e-06, + "loss": 0.3508, + "step": 5187 + }, + { + "epoch": 0.77, + "learning_rate": 2.7097891384026562e-06, + "loss": 0.7578, + "step": 5188 + }, + { + "epoch": 0.77, + "learning_rate": 2.706511437426139e-06, + "loss": 0.7852, + "step": 5189 + }, + { + "epoch": 0.77, + "learning_rate": 2.703235409695566e-06, + "loss": 0.8286, + "step": 5190 + }, + { + "epoch": 0.77, + "learning_rate": 2.6999610559625156e-06, + "loss": 0.7773, + "step": 5191 + }, + { + "epoch": 0.77, + "learning_rate": 2.6966883769781737e-06, + "loss": 0.7495, + "step": 5192 + }, + { + "epoch": 0.77, + "learning_rate": 2.6934173734933524e-06, + "loss": 0.8276, + "step": 5193 + }, + { + "epoch": 0.77, + "learning_rate": 2.6901480462584707e-06, + "loss": 0.7549, + "step": 5194 + }, + { + "epoch": 0.77, + "learning_rate": 2.6868803960235624e-06, + "loss": 0.7471, + "step": 5195 + }, + { + "epoch": 0.77, + "learning_rate": 2.6836144235382864e-06, + "loss": 0.7424, + "step": 5196 + }, + { + "epoch": 0.77, + "learning_rate": 2.6803501295519085e-06, + "loss": 0.7783, + "step": 5197 + }, + { + "epoch": 0.77, + "learning_rate": 2.6770875148133058e-06, + "loss": 0.7803, + "step": 5198 + }, + { + "epoch": 0.77, + "learning_rate": 2.673826580070984e-06, + "loss": 0.2963, + "step": 5199 + }, + { + "epoch": 0.77, + "learning_rate": 2.670567326073047e-06, + "loss": 0.8359, + "step": 5200 + }, + { + "epoch": 0.77, + "learning_rate": 2.6673097535672287e-06, + "loss": 0.8389, + "step": 5201 + }, + { + "epoch": 0.77, + "learning_rate": 2.664053863300866e-06, + "loss": 0.8208, + "step": 5202 + }, + { + "epoch": 0.77, + "learning_rate": 2.6607996560209103e-06, + "loss": 0.7578, + "step": 5203 + }, + { + "epoch": 0.77, + "learning_rate": 2.6575471324739376e-06, + "loss": 0.8281, + "step": 5204 + }, + { + "epoch": 0.77, + "learning_rate": 2.6542962934061224e-06, + "loss": 0.8257, + "step": 5205 + }, + { + "epoch": 0.77, + "learning_rate": 2.6510471395632707e-06, + "loss": 0.8384, + "step": 5206 + }, + { + "epoch": 0.77, + "learning_rate": 2.6477996716907796e-06, + "loss": 0.7529, + "step": 5207 + }, + { + "epoch": 0.77, + "learning_rate": 2.6445538905336764e-06, + "loss": 0.79, + "step": 5208 + }, + { + "epoch": 0.77, + "learning_rate": 2.6413097968365996e-06, + "loss": 0.7061, + "step": 5209 + }, + { + "epoch": 0.77, + "learning_rate": 2.638067391343797e-06, + "loss": 0.8027, + "step": 5210 + }, + { + "epoch": 0.77, + "learning_rate": 2.6348266747991236e-06, + "loss": 0.7212, + "step": 5211 + }, + { + "epoch": 0.77, + "learning_rate": 2.631587647946061e-06, + "loss": 0.8237, + "step": 5212 + }, + { + "epoch": 0.77, + "learning_rate": 2.6283503115276875e-06, + "loss": 0.7061, + "step": 5213 + }, + { + "epoch": 0.77, + "learning_rate": 2.625114666286709e-06, + "loss": 0.8281, + "step": 5214 + }, + { + "epoch": 0.77, + "learning_rate": 2.621880712965431e-06, + "loss": 0.2712, + "step": 5215 + }, + { + "epoch": 0.77, + "learning_rate": 2.618648452305773e-06, + "loss": 0.8101, + "step": 5216 + }, + { + "epoch": 0.77, + "learning_rate": 2.6154178850492752e-06, + "loss": 0.7773, + "step": 5217 + }, + { + "epoch": 0.77, + "learning_rate": 2.6121890119370753e-06, + "loss": 0.8066, + "step": 5218 + }, + { + "epoch": 0.77, + "learning_rate": 2.608961833709941e-06, + "loss": 0.7383, + "step": 5219 + }, + { + "epoch": 0.77, + "learning_rate": 2.6057363511082255e-06, + "loss": 0.7227, + "step": 5220 + }, + { + "epoch": 0.77, + "learning_rate": 2.6025125648719153e-06, + "loss": 0.8301, + "step": 5221 + }, + { + "epoch": 0.77, + "learning_rate": 2.5992904757406025e-06, + "loss": 0.812, + "step": 5222 + }, + { + "epoch": 0.77, + "learning_rate": 2.5960700844534827e-06, + "loss": 0.7827, + "step": 5223 + }, + { + "epoch": 0.77, + "learning_rate": 2.592851391749368e-06, + "loss": 0.8062, + "step": 5224 + }, + { + "epoch": 0.77, + "learning_rate": 2.589634398366676e-06, + "loss": 0.7417, + "step": 5225 + }, + { + "epoch": 0.77, + "learning_rate": 2.586419105043442e-06, + "loss": 0.7417, + "step": 5226 + }, + { + "epoch": 0.77, + "learning_rate": 2.5832055125173095e-06, + "loss": 0.7827, + "step": 5227 + }, + { + "epoch": 0.77, + "learning_rate": 2.5799936215255216e-06, + "loss": 0.3033, + "step": 5228 + }, + { + "epoch": 0.77, + "learning_rate": 2.5767834328049444e-06, + "loss": 0.7671, + "step": 5229 + }, + { + "epoch": 0.77, + "learning_rate": 2.5735749470920446e-06, + "loss": 0.8218, + "step": 5230 + }, + { + "epoch": 0.77, + "learning_rate": 2.570368165122902e-06, + "loss": 0.8291, + "step": 5231 + }, + { + "epoch": 0.77, + "learning_rate": 2.5671630876332132e-06, + "loss": 0.7861, + "step": 5232 + }, + { + "epoch": 0.77, + "learning_rate": 2.563959715358262e-06, + "loss": 0.8169, + "step": 5233 + }, + { + "epoch": 0.77, + "learning_rate": 2.560758049032963e-06, + "loss": 0.8218, + "step": 5234 + }, + { + "epoch": 0.77, + "learning_rate": 2.557558089391827e-06, + "loss": 0.79, + "step": 5235 + }, + { + "epoch": 0.77, + "learning_rate": 2.5543598371689826e-06, + "loss": 0.7617, + "step": 5236 + }, + { + "epoch": 0.77, + "learning_rate": 2.551163293098159e-06, + "loss": 0.6978, + "step": 5237 + }, + { + "epoch": 0.77, + "learning_rate": 2.5479684579126905e-06, + "loss": 0.7612, + "step": 5238 + }, + { + "epoch": 0.77, + "learning_rate": 2.5447753323455294e-06, + "loss": 0.7866, + "step": 5239 + }, + { + "epoch": 0.77, + "learning_rate": 2.541583917129237e-06, + "loss": 0.7832, + "step": 5240 + }, + { + "epoch": 0.78, + "learning_rate": 2.5383942129959637e-06, + "loss": 0.7827, + "step": 5241 + }, + { + "epoch": 0.78, + "learning_rate": 2.5352062206774896e-06, + "loss": 0.8159, + "step": 5242 + }, + { + "epoch": 0.78, + "learning_rate": 2.532019940905186e-06, + "loss": 0.7749, + "step": 5243 + }, + { + "epoch": 0.78, + "learning_rate": 2.528835374410045e-06, + "loss": 0.8394, + "step": 5244 + }, + { + "epoch": 0.78, + "learning_rate": 2.5256525219226533e-06, + "loss": 0.7227, + "step": 5245 + }, + { + "epoch": 0.78, + "learning_rate": 2.5224713841732084e-06, + "loss": 0.7734, + "step": 5246 + }, + { + "epoch": 0.78, + "learning_rate": 2.51929196189152e-06, + "loss": 0.7539, + "step": 5247 + }, + { + "epoch": 0.78, + "learning_rate": 2.516114255806995e-06, + "loss": 0.7886, + "step": 5248 + }, + { + "epoch": 0.78, + "learning_rate": 2.5129382666486555e-06, + "loss": 0.8052, + "step": 5249 + }, + { + "epoch": 0.78, + "learning_rate": 2.5097639951451247e-06, + "loss": 0.7881, + "step": 5250 + }, + { + "epoch": 0.78, + "learning_rate": 2.5065914420246295e-06, + "loss": 0.8076, + "step": 5251 + }, + { + "epoch": 0.78, + "learning_rate": 2.5034206080150093e-06, + "loss": 0.8037, + "step": 5252 + }, + { + "epoch": 0.78, + "learning_rate": 2.500251493843705e-06, + "loss": 0.7676, + "step": 5253 + }, + { + "epoch": 0.78, + "learning_rate": 2.497084100237759e-06, + "loss": 0.7354, + "step": 5254 + }, + { + "epoch": 0.78, + "learning_rate": 2.493918427923829e-06, + "loss": 0.8027, + "step": 5255 + }, + { + "epoch": 0.78, + "learning_rate": 2.490754477628168e-06, + "loss": 0.8213, + "step": 5256 + }, + { + "epoch": 0.78, + "learning_rate": 2.4875922500766414e-06, + "loss": 0.8037, + "step": 5257 + }, + { + "epoch": 0.78, + "learning_rate": 2.4844317459947167e-06, + "loss": 0.8247, + "step": 5258 + }, + { + "epoch": 0.78, + "learning_rate": 2.4812729661074587e-06, + "loss": 0.7461, + "step": 5259 + }, + { + "epoch": 0.78, + "learning_rate": 2.4781159111395515e-06, + "loss": 0.7031, + "step": 5260 + }, + { + "epoch": 0.78, + "learning_rate": 2.474960581815269e-06, + "loss": 0.79, + "step": 5261 + }, + { + "epoch": 0.78, + "learning_rate": 2.471806978858501e-06, + "loss": 0.8115, + "step": 5262 + }, + { + "epoch": 0.78, + "learning_rate": 2.468655102992733e-06, + "loss": 0.7749, + "step": 5263 + }, + { + "epoch": 0.78, + "learning_rate": 2.4655049549410535e-06, + "loss": 0.8013, + "step": 5264 + }, + { + "epoch": 0.78, + "learning_rate": 2.462356535426166e-06, + "loss": 0.7495, + "step": 5265 + }, + { + "epoch": 0.78, + "learning_rate": 2.459209845170365e-06, + "loss": 0.7295, + "step": 5266 + }, + { + "epoch": 0.78, + "learning_rate": 2.4560648848955495e-06, + "loss": 0.8188, + "step": 5267 + }, + { + "epoch": 0.78, + "learning_rate": 2.4529216553232326e-06, + "loss": 0.811, + "step": 5268 + }, + { + "epoch": 0.78, + "learning_rate": 2.449780157174515e-06, + "loss": 0.3337, + "step": 5269 + }, + { + "epoch": 0.78, + "learning_rate": 2.4466403911701152e-06, + "loss": 0.2997, + "step": 5270 + }, + { + "epoch": 0.78, + "learning_rate": 2.443502358030344e-06, + "loss": 0.7476, + "step": 5271 + }, + { + "epoch": 0.78, + "learning_rate": 2.4403660584751134e-06, + "loss": 0.77, + "step": 5272 + }, + { + "epoch": 0.78, + "learning_rate": 2.4372314932239495e-06, + "loss": 0.7837, + "step": 5273 + }, + { + "epoch": 0.78, + "learning_rate": 2.4340986629959694e-06, + "loss": 0.7539, + "step": 5274 + }, + { + "epoch": 0.78, + "learning_rate": 2.430967568509892e-06, + "loss": 0.8335, + "step": 5275 + }, + { + "epoch": 0.78, + "learning_rate": 2.427838210484049e-06, + "loss": 0.7461, + "step": 5276 + }, + { + "epoch": 0.78, + "learning_rate": 2.42471058963636e-06, + "loss": 0.7881, + "step": 5277 + }, + { + "epoch": 0.78, + "learning_rate": 2.421584706684359e-06, + "loss": 0.8232, + "step": 5278 + }, + { + "epoch": 0.78, + "learning_rate": 2.4184605623451707e-06, + "loss": 0.7959, + "step": 5279 + }, + { + "epoch": 0.78, + "learning_rate": 2.4153381573355227e-06, + "loss": 0.791, + "step": 5280 + }, + { + "epoch": 0.78, + "learning_rate": 2.4122174923717525e-06, + "loss": 0.72, + "step": 5281 + }, + { + "epoch": 0.78, + "learning_rate": 2.409098568169784e-06, + "loss": 0.7661, + "step": 5282 + }, + { + "epoch": 0.78, + "learning_rate": 2.4059813854451586e-06, + "loss": 0.645, + "step": 5283 + }, + { + "epoch": 0.78, + "learning_rate": 2.4028659449130033e-06, + "loss": 0.8179, + "step": 5284 + }, + { + "epoch": 0.78, + "learning_rate": 2.3997522472880496e-06, + "loss": 0.7764, + "step": 5285 + }, + { + "epoch": 0.78, + "learning_rate": 2.3966402932846365e-06, + "loss": 0.7668, + "step": 5286 + }, + { + "epoch": 0.78, + "learning_rate": 2.393530083616694e-06, + "loss": 0.8491, + "step": 5287 + }, + { + "epoch": 0.78, + "learning_rate": 2.3904216189977534e-06, + "loss": 0.7695, + "step": 5288 + }, + { + "epoch": 0.78, + "learning_rate": 2.3873149001409533e-06, + "loss": 0.6934, + "step": 5289 + }, + { + "epoch": 0.78, + "learning_rate": 2.384209927759019e-06, + "loss": 0.8281, + "step": 5290 + }, + { + "epoch": 0.78, + "learning_rate": 2.3811067025642907e-06, + "loss": 0.769, + "step": 5291 + }, + { + "epoch": 0.78, + "learning_rate": 2.378005225268689e-06, + "loss": 0.7527, + "step": 5292 + }, + { + "epoch": 0.78, + "learning_rate": 2.3749054965837492e-06, + "loss": 0.8052, + "step": 5293 + }, + { + "epoch": 0.78, + "learning_rate": 2.371807517220601e-06, + "loss": 0.834, + "step": 5294 + }, + { + "epoch": 0.78, + "learning_rate": 2.3687112878899676e-06, + "loss": 0.8179, + "step": 5295 + }, + { + "epoch": 0.78, + "learning_rate": 2.365616809302184e-06, + "loss": 0.791, + "step": 5296 + }, + { + "epoch": 0.78, + "learning_rate": 2.3625240821671613e-06, + "loss": 0.7739, + "step": 5297 + }, + { + "epoch": 0.78, + "learning_rate": 2.3594331071944277e-06, + "loss": 0.748, + "step": 5298 + }, + { + "epoch": 0.78, + "learning_rate": 2.3563438850931076e-06, + "loss": 0.8105, + "step": 5299 + }, + { + "epoch": 0.78, + "learning_rate": 2.353256416571916e-06, + "loss": 0.7778, + "step": 5300 + }, + { + "epoch": 0.78, + "learning_rate": 2.3501707023391683e-06, + "loss": 0.8105, + "step": 5301 + }, + { + "epoch": 0.78, + "learning_rate": 2.3470867431027754e-06, + "loss": 0.8218, + "step": 5302 + }, + { + "epoch": 0.78, + "learning_rate": 2.3440045395702514e-06, + "loss": 0.8418, + "step": 5303 + }, + { + "epoch": 0.78, + "learning_rate": 2.3409240924487086e-06, + "loss": 0.3137, + "step": 5304 + }, + { + "epoch": 0.78, + "learning_rate": 2.3378454024448427e-06, + "loss": 0.7036, + "step": 5305 + }, + { + "epoch": 0.78, + "learning_rate": 2.334768470264963e-06, + "loss": 0.7334, + "step": 5306 + }, + { + "epoch": 0.78, + "learning_rate": 2.331693296614963e-06, + "loss": 0.7544, + "step": 5307 + }, + { + "epoch": 0.78, + "learning_rate": 2.3286198822003414e-06, + "loss": 0.8057, + "step": 5308 + }, + { + "epoch": 0.79, + "learning_rate": 2.325548227726194e-06, + "loss": 0.7686, + "step": 5309 + }, + { + "epoch": 0.79, + "learning_rate": 2.322478333897199e-06, + "loss": 0.8433, + "step": 5310 + }, + { + "epoch": 0.79, + "learning_rate": 2.3194102014176447e-06, + "loss": 0.3177, + "step": 5311 + }, + { + "epoch": 0.79, + "learning_rate": 2.3163438309914145e-06, + "loss": 0.811, + "step": 5312 + }, + { + "epoch": 0.79, + "learning_rate": 2.3132792233219814e-06, + "loss": 0.8066, + "step": 5313 + }, + { + "epoch": 0.79, + "learning_rate": 2.3102163791124167e-06, + "loss": 0.8569, + "step": 5314 + }, + { + "epoch": 0.79, + "learning_rate": 2.3071552990653844e-06, + "loss": 0.7578, + "step": 5315 + }, + { + "epoch": 0.79, + "learning_rate": 2.3040959838831488e-06, + "loss": 0.7959, + "step": 5316 + }, + { + "epoch": 0.79, + "learning_rate": 2.301038434267573e-06, + "loss": 0.7544, + "step": 5317 + }, + { + "epoch": 0.79, + "learning_rate": 2.2979826509200974e-06, + "loss": 0.7852, + "step": 5318 + }, + { + "epoch": 0.79, + "learning_rate": 2.2949286345417777e-06, + "loss": 0.7593, + "step": 5319 + }, + { + "epoch": 0.79, + "learning_rate": 2.2918763858332503e-06, + "loss": 0.7231, + "step": 5320 + }, + { + "epoch": 0.79, + "learning_rate": 2.2888259054947548e-06, + "loss": 0.7749, + "step": 5321 + }, + { + "epoch": 0.79, + "learning_rate": 2.285777194226121e-06, + "loss": 0.7417, + "step": 5322 + }, + { + "epoch": 0.79, + "learning_rate": 2.2827302527267693e-06, + "loss": 0.7822, + "step": 5323 + }, + { + "epoch": 0.79, + "learning_rate": 2.2796850816957227e-06, + "loss": 0.7739, + "step": 5324 + }, + { + "epoch": 0.79, + "learning_rate": 2.2766416818315897e-06, + "loss": 0.8413, + "step": 5325 + }, + { + "epoch": 0.79, + "learning_rate": 2.2736000538325807e-06, + "loss": 0.8018, + "step": 5326 + }, + { + "epoch": 0.79, + "learning_rate": 2.2705601983964933e-06, + "loss": 0.7129, + "step": 5327 + }, + { + "epoch": 0.79, + "learning_rate": 2.2675221162207153e-06, + "loss": 0.7744, + "step": 5328 + }, + { + "epoch": 0.79, + "learning_rate": 2.2644858080022403e-06, + "loss": 0.748, + "step": 5329 + }, + { + "epoch": 0.79, + "learning_rate": 2.2614512744376436e-06, + "loss": 0.7461, + "step": 5330 + }, + { + "epoch": 0.79, + "learning_rate": 2.258418516223094e-06, + "loss": 0.8281, + "step": 5331 + }, + { + "epoch": 0.79, + "learning_rate": 2.2553875340543617e-06, + "loss": 0.8276, + "step": 5332 + }, + { + "epoch": 0.79, + "learning_rate": 2.252358328626799e-06, + "loss": 0.7476, + "step": 5333 + }, + { + "epoch": 0.79, + "learning_rate": 2.249330900635359e-06, + "loss": 0.2892, + "step": 5334 + }, + { + "epoch": 0.79, + "learning_rate": 2.246305250774583e-06, + "loss": 0.7056, + "step": 5335 + }, + { + "epoch": 0.79, + "learning_rate": 2.2432813797386e-06, + "loss": 0.7764, + "step": 5336 + }, + { + "epoch": 0.79, + "learning_rate": 2.2402592882211418e-06, + "loss": 0.7983, + "step": 5337 + }, + { + "epoch": 0.79, + "learning_rate": 2.2372389769155235e-06, + "loss": 0.8442, + "step": 5338 + }, + { + "epoch": 0.79, + "learning_rate": 2.23422044651465e-06, + "loss": 0.7842, + "step": 5339 + }, + { + "epoch": 0.79, + "learning_rate": 2.2312036977110283e-06, + "loss": 0.7661, + "step": 5340 + }, + { + "epoch": 0.79, + "learning_rate": 2.2281887311967454e-06, + "loss": 0.7544, + "step": 5341 + }, + { + "epoch": 0.79, + "learning_rate": 2.2251755476634883e-06, + "loss": 0.8242, + "step": 5342 + }, + { + "epoch": 0.79, + "learning_rate": 2.222164147802528e-06, + "loss": 0.7856, + "step": 5343 + }, + { + "epoch": 0.79, + "learning_rate": 2.2191545323047257e-06, + "loss": 0.7627, + "step": 5344 + }, + { + "epoch": 0.79, + "learning_rate": 2.216146701860544e-06, + "loss": 0.7959, + "step": 5345 + }, + { + "epoch": 0.79, + "learning_rate": 2.213140657160021e-06, + "loss": 0.811, + "step": 5346 + }, + { + "epoch": 0.79, + "learning_rate": 2.2101363988928006e-06, + "loss": 0.8335, + "step": 5347 + }, + { + "epoch": 0.79, + "learning_rate": 2.207133927748104e-06, + "loss": 0.7583, + "step": 5348 + }, + { + "epoch": 0.79, + "learning_rate": 2.2041332444147447e-06, + "loss": 0.7324, + "step": 5349 + }, + { + "epoch": 0.79, + "learning_rate": 2.2011343495811353e-06, + "loss": 0.749, + "step": 5350 + }, + { + "epoch": 0.79, + "learning_rate": 2.1981372439352687e-06, + "loss": 0.832, + "step": 5351 + }, + { + "epoch": 0.79, + "learning_rate": 2.1951419281647267e-06, + "loss": 0.7856, + "step": 5352 + }, + { + "epoch": 0.79, + "learning_rate": 2.1921484029566887e-06, + "loss": 0.7979, + "step": 5353 + }, + { + "epoch": 0.79, + "learning_rate": 2.189156668997915e-06, + "loss": 0.7729, + "step": 5354 + }, + { + "epoch": 0.79, + "learning_rate": 2.1861667269747623e-06, + "loss": 0.8037, + "step": 5355 + }, + { + "epoch": 0.79, + "learning_rate": 2.1831785775731705e-06, + "loss": 0.7949, + "step": 5356 + }, + { + "epoch": 0.79, + "learning_rate": 2.1801922214786663e-06, + "loss": 0.749, + "step": 5357 + }, + { + "epoch": 0.79, + "learning_rate": 2.1772076593763757e-06, + "loss": 0.7788, + "step": 5358 + }, + { + "epoch": 0.79, + "learning_rate": 2.174224891951e-06, + "loss": 0.8296, + "step": 5359 + }, + { + "epoch": 0.79, + "learning_rate": 2.1712439198868408e-06, + "loss": 0.8228, + "step": 5360 + }, + { + "epoch": 0.79, + "learning_rate": 2.1682647438677782e-06, + "loss": 0.7993, + "step": 5361 + }, + { + "epoch": 0.79, + "learning_rate": 2.165287364577282e-06, + "loss": 0.7549, + "step": 5362 + }, + { + "epoch": 0.79, + "learning_rate": 2.1623117826984187e-06, + "loss": 0.8096, + "step": 5363 + }, + { + "epoch": 0.79, + "learning_rate": 2.1593379989138306e-06, + "loss": 0.7407, + "step": 5364 + }, + { + "epoch": 0.79, + "learning_rate": 2.1563660139057506e-06, + "loss": 0.7783, + "step": 5365 + }, + { + "epoch": 0.79, + "learning_rate": 2.1533958283560064e-06, + "loss": 0.7456, + "step": 5366 + }, + { + "epoch": 0.79, + "learning_rate": 2.1504274429460024e-06, + "loss": 0.7339, + "step": 5367 + }, + { + "epoch": 0.79, + "learning_rate": 2.1474608583567426e-06, + "loss": 0.7998, + "step": 5368 + }, + { + "epoch": 0.79, + "learning_rate": 2.1444960752687994e-06, + "loss": 0.7998, + "step": 5369 + }, + { + "epoch": 0.79, + "learning_rate": 2.141533094362347e-06, + "loss": 0.7773, + "step": 5370 + }, + { + "epoch": 0.79, + "learning_rate": 2.138571916317146e-06, + "loss": 0.7891, + "step": 5371 + }, + { + "epoch": 0.79, + "learning_rate": 2.135612541812534e-06, + "loss": 0.8076, + "step": 5372 + }, + { + "epoch": 0.79, + "learning_rate": 2.1326549715274467e-06, + "loss": 0.7964, + "step": 5373 + }, + { + "epoch": 0.79, + "learning_rate": 2.1296992061403898e-06, + "loss": 0.2772, + "step": 5374 + }, + { + "epoch": 0.79, + "learning_rate": 2.126745246329469e-06, + "loss": 0.7656, + "step": 5375 + }, + { + "epoch": 0.8, + "learning_rate": 2.1237930927723736e-06, + "loss": 0.7178, + "step": 5376 + }, + { + "epoch": 0.8, + "learning_rate": 2.1208427461463753e-06, + "loss": 0.2944, + "step": 5377 + }, + { + "epoch": 0.8, + "learning_rate": 2.117894207128327e-06, + "loss": 0.7744, + "step": 5378 + }, + { + "epoch": 0.8, + "learning_rate": 2.1149474763946777e-06, + "loss": 0.7881, + "step": 5379 + }, + { + "epoch": 0.8, + "learning_rate": 2.1120025546214516e-06, + "loss": 0.7671, + "step": 5380 + }, + { + "epoch": 0.8, + "learning_rate": 2.1090594424842694e-06, + "loss": 0.7373, + "step": 5381 + }, + { + "epoch": 0.8, + "learning_rate": 2.1061181406583184e-06, + "loss": 0.7583, + "step": 5382 + }, + { + "epoch": 0.8, + "learning_rate": 2.103178649818387e-06, + "loss": 0.8013, + "step": 5383 + }, + { + "epoch": 0.8, + "learning_rate": 2.1002409706388462e-06, + "loss": 0.7656, + "step": 5384 + }, + { + "epoch": 0.8, + "learning_rate": 2.097305103793643e-06, + "loss": 0.7451, + "step": 5385 + }, + { + "epoch": 0.8, + "learning_rate": 2.0943710499563164e-06, + "loss": 0.834, + "step": 5386 + }, + { + "epoch": 0.8, + "learning_rate": 2.0914388097999803e-06, + "loss": 0.8037, + "step": 5387 + }, + { + "epoch": 0.8, + "learning_rate": 2.088508383997344e-06, + "loss": 0.3103, + "step": 5388 + }, + { + "epoch": 0.8, + "learning_rate": 2.085579773220697e-06, + "loss": 0.7646, + "step": 5389 + }, + { + "epoch": 0.8, + "learning_rate": 2.0826529781419092e-06, + "loss": 0.7974, + "step": 5390 + }, + { + "epoch": 0.8, + "learning_rate": 2.079727999432434e-06, + "loss": 0.7861, + "step": 5391 + }, + { + "epoch": 0.8, + "learning_rate": 2.0768048377633065e-06, + "loss": 0.7798, + "step": 5392 + }, + { + "epoch": 0.8, + "learning_rate": 2.073883493805152e-06, + "loss": 0.7129, + "step": 5393 + }, + { + "epoch": 0.8, + "learning_rate": 2.070963968228179e-06, + "loss": 0.7593, + "step": 5394 + }, + { + "epoch": 0.8, + "learning_rate": 2.0680462617021644e-06, + "loss": 0.7261, + "step": 5395 + }, + { + "epoch": 0.8, + "learning_rate": 2.065130374896486e-06, + "loss": 0.7983, + "step": 5396 + }, + { + "epoch": 0.8, + "learning_rate": 2.0622163084800904e-06, + "loss": 0.8486, + "step": 5397 + }, + { + "epoch": 0.8, + "learning_rate": 2.059304063121518e-06, + "loss": 0.771, + "step": 5398 + }, + { + "epoch": 0.8, + "learning_rate": 2.0563936394888827e-06, + "loss": 0.8208, + "step": 5399 + }, + { + "epoch": 0.8, + "learning_rate": 2.0534850382498807e-06, + "loss": 0.772, + "step": 5400 + }, + { + "epoch": 0.8, + "learning_rate": 2.050578260071798e-06, + "loss": 0.814, + "step": 5401 + }, + { + "epoch": 0.8, + "learning_rate": 2.047673305621496e-06, + "loss": 0.3116, + "step": 5402 + }, + { + "epoch": 0.8, + "learning_rate": 2.0447701755654138e-06, + "loss": 0.8193, + "step": 5403 + }, + { + "epoch": 0.8, + "learning_rate": 2.0418688705695846e-06, + "loss": 0.8052, + "step": 5404 + }, + { + "epoch": 0.8, + "learning_rate": 2.038969391299609e-06, + "loss": 0.7812, + "step": 5405 + }, + { + "epoch": 0.8, + "learning_rate": 2.0360717384206785e-06, + "loss": 0.7871, + "step": 5406 + }, + { + "epoch": 0.8, + "learning_rate": 2.033175912597566e-06, + "loss": 0.7563, + "step": 5407 + }, + { + "epoch": 0.8, + "learning_rate": 2.030281914494612e-06, + "loss": 0.7905, + "step": 5408 + }, + { + "epoch": 0.8, + "learning_rate": 2.027389744775755e-06, + "loss": 0.7832, + "step": 5409 + }, + { + "epoch": 0.8, + "learning_rate": 2.0244994041045016e-06, + "loss": 0.7686, + "step": 5410 + }, + { + "epoch": 0.8, + "learning_rate": 2.021610893143947e-06, + "loss": 0.8218, + "step": 5411 + }, + { + "epoch": 0.8, + "learning_rate": 2.018724212556762e-06, + "loss": 0.8506, + "step": 5412 + }, + { + "epoch": 0.8, + "learning_rate": 2.0158393630051944e-06, + "loss": 0.77, + "step": 5413 + }, + { + "epoch": 0.8, + "learning_rate": 2.0129563451510814e-06, + "loss": 0.7788, + "step": 5414 + }, + { + "epoch": 0.8, + "learning_rate": 2.0100751596558333e-06, + "loss": 0.8203, + "step": 5415 + }, + { + "epoch": 0.8, + "learning_rate": 2.0071958071804385e-06, + "loss": 0.2914, + "step": 5416 + }, + { + "epoch": 0.8, + "learning_rate": 2.004318288385472e-06, + "loss": 0.8472, + "step": 5417 + }, + { + "epoch": 0.8, + "learning_rate": 2.0014426039310786e-06, + "loss": 0.8354, + "step": 5418 + }, + { + "epoch": 0.8, + "learning_rate": 1.9985687544769936e-06, + "loss": 0.8335, + "step": 5419 + }, + { + "epoch": 0.8, + "learning_rate": 1.995696740682521e-06, + "loss": 0.7471, + "step": 5420 + }, + { + "epoch": 0.8, + "learning_rate": 1.992826563206548e-06, + "loss": 0.75, + "step": 5421 + }, + { + "epoch": 0.8, + "learning_rate": 1.989958222707543e-06, + "loss": 0.8286, + "step": 5422 + }, + { + "epoch": 0.8, + "learning_rate": 1.9870917198435467e-06, + "loss": 0.7847, + "step": 5423 + }, + { + "epoch": 0.8, + "learning_rate": 1.9842270552721864e-06, + "loss": 0.7783, + "step": 5424 + }, + { + "epoch": 0.8, + "learning_rate": 1.9813642296506606e-06, + "loss": 0.7847, + "step": 5425 + }, + { + "epoch": 0.8, + "learning_rate": 1.9785032436357467e-06, + "loss": 0.7915, + "step": 5426 + }, + { + "epoch": 0.8, + "learning_rate": 1.9756440978838056e-06, + "loss": 0.873, + "step": 5427 + }, + { + "epoch": 0.8, + "learning_rate": 1.9727867930507706e-06, + "loss": 0.7578, + "step": 5428 + }, + { + "epoch": 0.8, + "learning_rate": 1.969931329792152e-06, + "loss": 0.8101, + "step": 5429 + }, + { + "epoch": 0.8, + "learning_rate": 1.967077708763043e-06, + "loss": 0.2897, + "step": 5430 + }, + { + "epoch": 0.8, + "learning_rate": 1.9642259306181088e-06, + "loss": 0.7529, + "step": 5431 + }, + { + "epoch": 0.8, + "learning_rate": 1.9613759960115986e-06, + "loss": 0.7695, + "step": 5432 + }, + { + "epoch": 0.8, + "learning_rate": 1.9585279055973296e-06, + "loss": 0.3184, + "step": 5433 + }, + { + "epoch": 0.8, + "learning_rate": 1.9556816600286997e-06, + "loss": 0.7334, + "step": 5434 + }, + { + "epoch": 0.8, + "learning_rate": 1.9528372599586896e-06, + "loss": 0.74, + "step": 5435 + }, + { + "epoch": 0.8, + "learning_rate": 1.949994706039845e-06, + "loss": 0.8037, + "step": 5436 + }, + { + "epoch": 0.8, + "learning_rate": 1.9471539989243005e-06, + "loss": 0.8149, + "step": 5437 + }, + { + "epoch": 0.8, + "learning_rate": 1.944315139263758e-06, + "loss": 0.7754, + "step": 5438 + }, + { + "epoch": 0.8, + "learning_rate": 1.9414781277094963e-06, + "loss": 0.752, + "step": 5439 + }, + { + "epoch": 0.8, + "learning_rate": 1.938642964912376e-06, + "loss": 0.7939, + "step": 5440 + }, + { + "epoch": 0.8, + "learning_rate": 1.93580965152283e-06, + "loss": 0.7964, + "step": 5441 + }, + { + "epoch": 0.8, + "learning_rate": 1.932978188190863e-06, + "loss": 0.7891, + "step": 5442 + }, + { + "epoch": 0.8, + "learning_rate": 1.9301485755660633e-06, + "loss": 0.752, + "step": 5443 + }, + { + "epoch": 0.81, + "learning_rate": 1.9273208142975865e-06, + "loss": 0.7598, + "step": 5444 + }, + { + "epoch": 0.81, + "learning_rate": 1.9244949050341723e-06, + "loss": 0.2965, + "step": 5445 + }, + { + "epoch": 0.81, + "learning_rate": 1.9216708484241275e-06, + "loss": 0.8491, + "step": 5446 + }, + { + "epoch": 0.81, + "learning_rate": 1.9188486451153353e-06, + "loss": 0.7271, + "step": 5447 + }, + { + "epoch": 0.81, + "learning_rate": 1.9160282957552614e-06, + "loss": 0.7578, + "step": 5448 + }, + { + "epoch": 0.81, + "learning_rate": 1.913209800990935e-06, + "loss": 0.814, + "step": 5449 + }, + { + "epoch": 0.81, + "learning_rate": 1.9103931614689653e-06, + "loss": 0.2769, + "step": 5450 + }, + { + "epoch": 0.81, + "learning_rate": 1.9075783778355383e-06, + "loss": 0.792, + "step": 5451 + }, + { + "epoch": 0.81, + "learning_rate": 1.9047654507364087e-06, + "loss": 0.8071, + "step": 5452 + }, + { + "epoch": 0.81, + "learning_rate": 1.9019543808169117e-06, + "loss": 0.8286, + "step": 5453 + }, + { + "epoch": 0.81, + "learning_rate": 1.8991451687219509e-06, + "loss": 0.7764, + "step": 5454 + }, + { + "epoch": 0.81, + "learning_rate": 1.8963378150960032e-06, + "loss": 0.8623, + "step": 5455 + }, + { + "epoch": 0.81, + "learning_rate": 1.8935323205831257e-06, + "loss": 0.7637, + "step": 5456 + }, + { + "epoch": 0.81, + "learning_rate": 1.8907286858269413e-06, + "loss": 0.8691, + "step": 5457 + }, + { + "epoch": 0.81, + "learning_rate": 1.8879269114706556e-06, + "loss": 0.8208, + "step": 5458 + }, + { + "epoch": 0.81, + "learning_rate": 1.8851269981570343e-06, + "loss": 0.7764, + "step": 5459 + }, + { + "epoch": 0.81, + "learning_rate": 1.8823289465284244e-06, + "loss": 0.8184, + "step": 5460 + }, + { + "epoch": 0.81, + "learning_rate": 1.8795327572267519e-06, + "loss": 0.7676, + "step": 5461 + }, + { + "epoch": 0.81, + "learning_rate": 1.8767384308935033e-06, + "loss": 0.7202, + "step": 5462 + }, + { + "epoch": 0.81, + "learning_rate": 1.8739459681697425e-06, + "loss": 0.7822, + "step": 5463 + }, + { + "epoch": 0.81, + "learning_rate": 1.871155369696105e-06, + "loss": 0.7642, + "step": 5464 + }, + { + "epoch": 0.81, + "learning_rate": 1.8683666361128028e-06, + "loss": 0.8599, + "step": 5465 + }, + { + "epoch": 0.81, + "learning_rate": 1.865579768059621e-06, + "loss": 0.2931, + "step": 5466 + }, + { + "epoch": 0.81, + "learning_rate": 1.8627947661759027e-06, + "loss": 0.7778, + "step": 5467 + }, + { + "epoch": 0.81, + "learning_rate": 1.8600116311005823e-06, + "loss": 0.8354, + "step": 5468 + }, + { + "epoch": 0.81, + "learning_rate": 1.857230363472149e-06, + "loss": 0.8364, + "step": 5469 + }, + { + "epoch": 0.81, + "learning_rate": 1.8544509639286756e-06, + "loss": 0.8257, + "step": 5470 + }, + { + "epoch": 0.81, + "learning_rate": 1.8516734331078068e-06, + "loss": 0.7739, + "step": 5471 + }, + { + "epoch": 0.81, + "learning_rate": 1.8488977716467438e-06, + "loss": 0.7886, + "step": 5472 + }, + { + "epoch": 0.81, + "learning_rate": 1.8461239801822761e-06, + "loss": 0.7627, + "step": 5473 + }, + { + "epoch": 0.81, + "learning_rate": 1.8433520593507515e-06, + "loss": 0.731, + "step": 5474 + }, + { + "epoch": 0.81, + "learning_rate": 1.8405820097881e-06, + "loss": 0.7949, + "step": 5475 + }, + { + "epoch": 0.81, + "learning_rate": 1.8378138321298122e-06, + "loss": 0.8213, + "step": 5476 + }, + { + "epoch": 0.81, + "learning_rate": 1.8350475270109536e-06, + "loss": 0.7666, + "step": 5477 + }, + { + "epoch": 0.81, + "learning_rate": 1.8322830950661607e-06, + "loss": 0.7671, + "step": 5478 + }, + { + "epoch": 0.81, + "learning_rate": 1.8295205369296443e-06, + "loss": 0.7891, + "step": 5479 + }, + { + "epoch": 0.81, + "learning_rate": 1.8267598532351727e-06, + "loss": 0.3225, + "step": 5480 + }, + { + "epoch": 0.81, + "learning_rate": 1.8240010446160973e-06, + "loss": 0.8071, + "step": 5481 + }, + { + "epoch": 0.81, + "learning_rate": 1.8212441117053293e-06, + "loss": 0.7747, + "step": 5482 + }, + { + "epoch": 0.81, + "learning_rate": 1.8184890551353574e-06, + "loss": 0.7954, + "step": 5483 + }, + { + "epoch": 0.81, + "learning_rate": 1.8157358755382427e-06, + "loss": 0.7402, + "step": 5484 + }, + { + "epoch": 0.81, + "learning_rate": 1.8129845735455986e-06, + "loss": 0.7861, + "step": 5485 + }, + { + "epoch": 0.81, + "learning_rate": 1.8102351497886262e-06, + "loss": 0.7637, + "step": 5486 + }, + { + "epoch": 0.81, + "learning_rate": 1.8074876048980838e-06, + "loss": 0.7195, + "step": 5487 + }, + { + "epoch": 0.81, + "learning_rate": 1.8047419395043086e-06, + "loss": 0.8354, + "step": 5488 + }, + { + "epoch": 0.81, + "learning_rate": 1.801998154237199e-06, + "loss": 0.7866, + "step": 5489 + }, + { + "epoch": 0.81, + "learning_rate": 1.7992562497262211e-06, + "loss": 0.835, + "step": 5490 + }, + { + "epoch": 0.81, + "learning_rate": 1.7965162266004177e-06, + "loss": 0.6855, + "step": 5491 + }, + { + "epoch": 0.81, + "learning_rate": 1.7937780854883936e-06, + "loss": 0.8066, + "step": 5492 + }, + { + "epoch": 0.81, + "learning_rate": 1.7910418270183195e-06, + "loss": 0.8335, + "step": 5493 + }, + { + "epoch": 0.81, + "learning_rate": 1.7883074518179445e-06, + "loss": 0.8145, + "step": 5494 + }, + { + "epoch": 0.81, + "learning_rate": 1.7855749605145722e-06, + "loss": 0.8291, + "step": 5495 + }, + { + "epoch": 0.81, + "learning_rate": 1.7828443537350871e-06, + "loss": 0.8022, + "step": 5496 + }, + { + "epoch": 0.81, + "learning_rate": 1.7801156321059332e-06, + "loss": 0.7556, + "step": 5497 + }, + { + "epoch": 0.81, + "learning_rate": 1.7773887962531211e-06, + "loss": 0.7251, + "step": 5498 + }, + { + "epoch": 0.81, + "learning_rate": 1.774663846802236e-06, + "loss": 0.7866, + "step": 5499 + }, + { + "epoch": 0.81, + "learning_rate": 1.771940784378422e-06, + "loss": 0.7554, + "step": 5500 + }, + { + "epoch": 0.81, + "learning_rate": 1.7692196096063985e-06, + "loss": 0.7939, + "step": 5501 + }, + { + "epoch": 0.81, + "learning_rate": 1.766500323110445e-06, + "loss": 0.7886, + "step": 5502 + }, + { + "epoch": 0.81, + "learning_rate": 1.763782925514409e-06, + "loss": 0.3323, + "step": 5503 + }, + { + "epoch": 0.81, + "learning_rate": 1.7610674174417108e-06, + "loss": 0.7373, + "step": 5504 + }, + { + "epoch": 0.81, + "learning_rate": 1.758353799515329e-06, + "loss": 0.8066, + "step": 5505 + }, + { + "epoch": 0.81, + "learning_rate": 1.7556420723578106e-06, + "loss": 0.7627, + "step": 5506 + }, + { + "epoch": 0.81, + "learning_rate": 1.752932236591275e-06, + "loss": 0.7305, + "step": 5507 + }, + { + "epoch": 0.81, + "learning_rate": 1.750224292837398e-06, + "loss": 0.8496, + "step": 5508 + }, + { + "epoch": 0.81, + "learning_rate": 1.7475182417174318e-06, + "loss": 0.7549, + "step": 5509 + }, + { + "epoch": 0.81, + "learning_rate": 1.7448140838521854e-06, + "loss": 0.7715, + "step": 5510 + }, + { + "epoch": 0.81, + "learning_rate": 1.7421118198620345e-06, + "loss": 0.8311, + "step": 5511 + }, + { + "epoch": 0.82, + "learning_rate": 1.739411450366929e-06, + "loss": 0.835, + "step": 5512 + }, + { + "epoch": 0.82, + "learning_rate": 1.7367129759863754e-06, + "loss": 0.7773, + "step": 5513 + }, + { + "epoch": 0.82, + "learning_rate": 1.7340163973394441e-06, + "loss": 0.7231, + "step": 5514 + }, + { + "epoch": 0.82, + "learning_rate": 1.7313217150447802e-06, + "loss": 0.7866, + "step": 5515 + }, + { + "epoch": 0.82, + "learning_rate": 1.7286289297205826e-06, + "loss": 0.77, + "step": 5516 + }, + { + "epoch": 0.82, + "learning_rate": 1.7259380419846272e-06, + "loss": 0.8276, + "step": 5517 + }, + { + "epoch": 0.82, + "learning_rate": 1.7232490524542434e-06, + "loss": 0.7126, + "step": 5518 + }, + { + "epoch": 0.82, + "learning_rate": 1.7205619617463276e-06, + "loss": 0.8501, + "step": 5519 + }, + { + "epoch": 0.82, + "learning_rate": 1.717876770477347e-06, + "loss": 0.772, + "step": 5520 + }, + { + "epoch": 0.82, + "learning_rate": 1.715193479263325e-06, + "loss": 0.8374, + "step": 5521 + }, + { + "epoch": 0.82, + "learning_rate": 1.7125120887198566e-06, + "loss": 0.7822, + "step": 5522 + }, + { + "epoch": 0.82, + "learning_rate": 1.7098325994620934e-06, + "loss": 0.8091, + "step": 5523 + }, + { + "epoch": 0.82, + "learning_rate": 1.7071550121047543e-06, + "loss": 0.7297, + "step": 5524 + }, + { + "epoch": 0.82, + "learning_rate": 1.7044793272621241e-06, + "loss": 0.7759, + "step": 5525 + }, + { + "epoch": 0.82, + "learning_rate": 1.7018055455480475e-06, + "loss": 0.79, + "step": 5526 + }, + { + "epoch": 0.82, + "learning_rate": 1.6991336675759318e-06, + "loss": 0.7886, + "step": 5527 + }, + { + "epoch": 0.82, + "learning_rate": 1.6964636939587541e-06, + "loss": 0.8394, + "step": 5528 + }, + { + "epoch": 0.82, + "learning_rate": 1.693795625309045e-06, + "loss": 0.731, + "step": 5529 + }, + { + "epoch": 0.82, + "learning_rate": 1.6911294622389075e-06, + "loss": 0.7354, + "step": 5530 + }, + { + "epoch": 0.82, + "learning_rate": 1.6884652053600027e-06, + "loss": 0.7266, + "step": 5531 + }, + { + "epoch": 0.82, + "learning_rate": 1.6858028552835503e-06, + "loss": 0.7314, + "step": 5532 + }, + { + "epoch": 0.82, + "learning_rate": 1.6831424126203444e-06, + "loss": 0.8154, + "step": 5533 + }, + { + "epoch": 0.82, + "learning_rate": 1.6804838779807264e-06, + "loss": 0.7798, + "step": 5534 + }, + { + "epoch": 0.82, + "learning_rate": 1.6778272519746174e-06, + "loss": 0.8193, + "step": 5535 + }, + { + "epoch": 0.82, + "learning_rate": 1.67517253521148e-06, + "loss": 0.7671, + "step": 5536 + }, + { + "epoch": 0.82, + "learning_rate": 1.6725197283003548e-06, + "loss": 0.7051, + "step": 5537 + }, + { + "epoch": 0.82, + "learning_rate": 1.6698688318498423e-06, + "loss": 0.7578, + "step": 5538 + }, + { + "epoch": 0.82, + "learning_rate": 1.6672198464680989e-06, + "loss": 0.8105, + "step": 5539 + }, + { + "epoch": 0.82, + "learning_rate": 1.6645727727628447e-06, + "loss": 0.2991, + "step": 5540 + }, + { + "epoch": 0.82, + "learning_rate": 1.6619276113413607e-06, + "loss": 0.7324, + "step": 5541 + }, + { + "epoch": 0.82, + "learning_rate": 1.6592843628104915e-06, + "loss": 0.7573, + "step": 5542 + }, + { + "epoch": 0.82, + "learning_rate": 1.6566430277766478e-06, + "loss": 0.7935, + "step": 5543 + }, + { + "epoch": 0.82, + "learning_rate": 1.6540036068457833e-06, + "loss": 0.7944, + "step": 5544 + }, + { + "epoch": 0.82, + "learning_rate": 1.6513661006234315e-06, + "loss": 0.8633, + "step": 5545 + }, + { + "epoch": 0.82, + "learning_rate": 1.648730509714681e-06, + "loss": 0.7925, + "step": 5546 + }, + { + "epoch": 0.82, + "learning_rate": 1.6460968347241756e-06, + "loss": 0.7358, + "step": 5547 + }, + { + "epoch": 0.82, + "learning_rate": 1.64346507625613e-06, + "loss": 0.7842, + "step": 5548 + }, + { + "epoch": 0.82, + "learning_rate": 1.6408352349143032e-06, + "loss": 0.731, + "step": 5549 + }, + { + "epoch": 0.82, + "learning_rate": 1.638207311302029e-06, + "loss": 0.7881, + "step": 5550 + }, + { + "epoch": 0.82, + "learning_rate": 1.6355813060221993e-06, + "loss": 0.7544, + "step": 5551 + }, + { + "epoch": 0.82, + "learning_rate": 1.6329572196772581e-06, + "loss": 0.813, + "step": 5552 + }, + { + "epoch": 0.82, + "learning_rate": 1.6303350528692163e-06, + "loss": 0.7783, + "step": 5553 + }, + { + "epoch": 0.82, + "learning_rate": 1.6277148061996385e-06, + "loss": 0.8193, + "step": 5554 + }, + { + "epoch": 0.82, + "learning_rate": 1.6250964802696545e-06, + "loss": 0.751, + "step": 5555 + }, + { + "epoch": 0.82, + "learning_rate": 1.6224800756799576e-06, + "loss": 0.7842, + "step": 5556 + }, + { + "epoch": 0.82, + "learning_rate": 1.6198655930307816e-06, + "loss": 0.8345, + "step": 5557 + }, + { + "epoch": 0.82, + "learning_rate": 1.6172530329219416e-06, + "loss": 0.7988, + "step": 5558 + }, + { + "epoch": 0.82, + "learning_rate": 1.6146423959527947e-06, + "loss": 0.7832, + "step": 5559 + }, + { + "epoch": 0.82, + "learning_rate": 1.6120336827222705e-06, + "loss": 0.7607, + "step": 5560 + }, + { + "epoch": 0.82, + "learning_rate": 1.6094268938288472e-06, + "loss": 0.8115, + "step": 5561 + }, + { + "epoch": 0.82, + "learning_rate": 1.606822029870564e-06, + "loss": 0.8403, + "step": 5562 + }, + { + "epoch": 0.82, + "learning_rate": 1.6042190914450217e-06, + "loss": 0.8169, + "step": 5563 + }, + { + "epoch": 0.82, + "learning_rate": 1.6016180791493741e-06, + "loss": 0.7715, + "step": 5564 + }, + { + "epoch": 0.82, + "learning_rate": 1.5990189935803402e-06, + "loss": 0.7661, + "step": 5565 + }, + { + "epoch": 0.82, + "learning_rate": 1.5964218353341898e-06, + "loss": 0.7642, + "step": 5566 + }, + { + "epoch": 0.82, + "learning_rate": 1.593826605006753e-06, + "loss": 0.9038, + "step": 5567 + }, + { + "epoch": 0.82, + "learning_rate": 1.5912333031934224e-06, + "loss": 0.7373, + "step": 5568 + }, + { + "epoch": 0.82, + "learning_rate": 1.588641930489141e-06, + "loss": 0.812, + "step": 5569 + }, + { + "epoch": 0.82, + "learning_rate": 1.586052487488411e-06, + "loss": 0.8438, + "step": 5570 + }, + { + "epoch": 0.82, + "learning_rate": 1.5834649747852958e-06, + "loss": 0.7412, + "step": 5571 + }, + { + "epoch": 0.82, + "learning_rate": 1.58087939297341e-06, + "loss": 0.7969, + "step": 5572 + }, + { + "epoch": 0.82, + "learning_rate": 1.5782957426459334e-06, + "loss": 0.772, + "step": 5573 + }, + { + "epoch": 0.82, + "learning_rate": 1.575714024395595e-06, + "loss": 0.7412, + "step": 5574 + }, + { + "epoch": 0.82, + "learning_rate": 1.5731342388146809e-06, + "loss": 0.79, + "step": 5575 + }, + { + "epoch": 0.82, + "learning_rate": 1.5705563864950412e-06, + "loss": 0.7881, + "step": 5576 + }, + { + "epoch": 0.82, + "learning_rate": 1.5679804680280719e-06, + "loss": 0.8198, + "step": 5577 + }, + { + "epoch": 0.82, + "learning_rate": 1.5654064840047367e-06, + "loss": 0.7461, + "step": 5578 + }, + { + "epoch": 0.83, + "learning_rate": 1.5628344350155477e-06, + "loss": 0.7715, + "step": 5579 + }, + { + "epoch": 0.83, + "learning_rate": 1.5602643216505719e-06, + "loss": 0.7563, + "step": 5580 + }, + { + "epoch": 0.83, + "learning_rate": 1.5576961444994398e-06, + "loss": 0.7612, + "step": 5581 + }, + { + "epoch": 0.83, + "learning_rate": 1.5551299041513324e-06, + "loss": 0.8257, + "step": 5582 + }, + { + "epoch": 0.83, + "learning_rate": 1.552565601194984e-06, + "loss": 0.792, + "step": 5583 + }, + { + "epoch": 0.83, + "learning_rate": 1.5500032362186923e-06, + "loss": 0.7207, + "step": 5584 + }, + { + "epoch": 0.83, + "learning_rate": 1.5474428098103022e-06, + "loss": 0.771, + "step": 5585 + }, + { + "epoch": 0.83, + "learning_rate": 1.5448843225572218e-06, + "loss": 0.2783, + "step": 5586 + }, + { + "epoch": 0.83, + "learning_rate": 1.5423277750464071e-06, + "loss": 0.7393, + "step": 5587 + }, + { + "epoch": 0.83, + "learning_rate": 1.5397731678643713e-06, + "loss": 0.8271, + "step": 5588 + }, + { + "epoch": 0.83, + "learning_rate": 1.5372205015971853e-06, + "loss": 0.7637, + "step": 5589 + }, + { + "epoch": 0.83, + "learning_rate": 1.534669776830473e-06, + "loss": 0.7202, + "step": 5590 + }, + { + "epoch": 0.83, + "learning_rate": 1.5321209941494075e-06, + "loss": 0.728, + "step": 5591 + }, + { + "epoch": 0.83, + "learning_rate": 1.5295741541387287e-06, + "loss": 0.3461, + "step": 5592 + }, + { + "epoch": 0.83, + "learning_rate": 1.5270292573827173e-06, + "loss": 0.7505, + "step": 5593 + }, + { + "epoch": 0.83, + "learning_rate": 1.524486304465218e-06, + "loss": 0.8208, + "step": 5594 + }, + { + "epoch": 0.83, + "learning_rate": 1.5219452959696269e-06, + "loss": 0.7656, + "step": 5595 + }, + { + "epoch": 0.83, + "learning_rate": 1.5194062324788872e-06, + "loss": 0.7856, + "step": 5596 + }, + { + "epoch": 0.83, + "learning_rate": 1.5168691145755087e-06, + "loss": 0.7949, + "step": 5597 + }, + { + "epoch": 0.83, + "learning_rate": 1.5143339428415426e-06, + "loss": 0.7969, + "step": 5598 + }, + { + "epoch": 0.83, + "learning_rate": 1.5118007178586024e-06, + "loss": 0.7837, + "step": 5599 + }, + { + "epoch": 0.83, + "learning_rate": 1.509269440207851e-06, + "loss": 0.7715, + "step": 5600 + }, + { + "epoch": 0.83, + "learning_rate": 1.506740110470002e-06, + "loss": 0.8726, + "step": 5601 + }, + { + "epoch": 0.83, + "learning_rate": 1.5042127292253284e-06, + "loss": 0.7983, + "step": 5602 + }, + { + "epoch": 0.83, + "learning_rate": 1.5016872970536523e-06, + "loss": 0.835, + "step": 5603 + }, + { + "epoch": 0.83, + "learning_rate": 1.4991638145343467e-06, + "loss": 0.8218, + "step": 5604 + }, + { + "epoch": 0.83, + "learning_rate": 1.4966422822463443e-06, + "loss": 0.748, + "step": 5605 + }, + { + "epoch": 0.83, + "learning_rate": 1.4941227007681214e-06, + "loss": 0.7817, + "step": 5606 + }, + { + "epoch": 0.83, + "learning_rate": 1.4916050706777185e-06, + "loss": 0.7485, + "step": 5607 + }, + { + "epoch": 0.83, + "learning_rate": 1.4890893925527118e-06, + "loss": 0.7837, + "step": 5608 + }, + { + "epoch": 0.83, + "learning_rate": 1.4865756669702425e-06, + "loss": 0.3191, + "step": 5609 + }, + { + "epoch": 0.83, + "learning_rate": 1.4840638945070051e-06, + "loss": 0.8633, + "step": 5610 + }, + { + "epoch": 0.83, + "learning_rate": 1.4815540757392354e-06, + "loss": 0.7073, + "step": 5611 + }, + { + "epoch": 0.83, + "learning_rate": 1.4790462112427317e-06, + "loss": 0.8418, + "step": 5612 + }, + { + "epoch": 0.83, + "learning_rate": 1.476540301592837e-06, + "loss": 0.7847, + "step": 5613 + }, + { + "epoch": 0.83, + "learning_rate": 1.474036347364446e-06, + "loss": 0.7708, + "step": 5614 + }, + { + "epoch": 0.83, + "learning_rate": 1.4715343491320122e-06, + "loss": 0.293, + "step": 5615 + }, + { + "epoch": 0.83, + "learning_rate": 1.4690343074695312e-06, + "loss": 0.7832, + "step": 5616 + }, + { + "epoch": 0.83, + "learning_rate": 1.466536222950552e-06, + "loss": 0.7935, + "step": 5617 + }, + { + "epoch": 0.83, + "learning_rate": 1.4640400961481815e-06, + "loss": 0.7983, + "step": 5618 + }, + { + "epoch": 0.83, + "learning_rate": 1.4615459276350663e-06, + "loss": 0.7676, + "step": 5619 + }, + { + "epoch": 0.83, + "learning_rate": 1.4590537179834174e-06, + "loss": 0.7134, + "step": 5620 + }, + { + "epoch": 0.83, + "learning_rate": 1.4565634677649786e-06, + "loss": 0.7539, + "step": 5621 + }, + { + "epoch": 0.83, + "learning_rate": 1.4540751775510598e-06, + "loss": 0.7915, + "step": 5622 + }, + { + "epoch": 0.83, + "learning_rate": 1.451588847912516e-06, + "loss": 0.7993, + "step": 5623 + }, + { + "epoch": 0.83, + "learning_rate": 1.4491044794197528e-06, + "loss": 0.7412, + "step": 5624 + }, + { + "epoch": 0.83, + "learning_rate": 1.4466220726427227e-06, + "loss": 0.7661, + "step": 5625 + }, + { + "epoch": 0.83, + "learning_rate": 1.444141628150928e-06, + "loss": 0.8511, + "step": 5626 + }, + { + "epoch": 0.83, + "learning_rate": 1.4416631465134278e-06, + "loss": 0.7388, + "step": 5627 + }, + { + "epoch": 0.83, + "learning_rate": 1.4391866282988266e-06, + "loss": 0.7715, + "step": 5628 + }, + { + "epoch": 0.83, + "learning_rate": 1.4367120740752772e-06, + "loss": 0.8032, + "step": 5629 + }, + { + "epoch": 0.83, + "learning_rate": 1.4342394844104824e-06, + "loss": 0.8262, + "step": 5630 + }, + { + "epoch": 0.83, + "learning_rate": 1.4317688598716928e-06, + "loss": 0.7729, + "step": 5631 + }, + { + "epoch": 0.83, + "learning_rate": 1.429300201025713e-06, + "loss": 0.2886, + "step": 5632 + }, + { + "epoch": 0.83, + "learning_rate": 1.4268335084388974e-06, + "loss": 0.8257, + "step": 5633 + }, + { + "epoch": 0.83, + "learning_rate": 1.424368782677138e-06, + "loss": 0.7505, + "step": 5634 + }, + { + "epoch": 0.83, + "learning_rate": 1.4219060243058879e-06, + "loss": 0.7231, + "step": 5635 + }, + { + "epoch": 0.83, + "learning_rate": 1.419445233890142e-06, + "loss": 0.8228, + "step": 5636 + }, + { + "epoch": 0.83, + "learning_rate": 1.4169864119944498e-06, + "loss": 0.813, + "step": 5637 + }, + { + "epoch": 0.83, + "learning_rate": 1.4145295591829023e-06, + "loss": 0.7581, + "step": 5638 + }, + { + "epoch": 0.83, + "learning_rate": 1.4120746760191407e-06, + "loss": 0.7222, + "step": 5639 + }, + { + "epoch": 0.83, + "learning_rate": 1.409621763066359e-06, + "loss": 0.7217, + "step": 5640 + }, + { + "epoch": 0.83, + "learning_rate": 1.4071708208872925e-06, + "loss": 0.7637, + "step": 5641 + }, + { + "epoch": 0.83, + "learning_rate": 1.4047218500442305e-06, + "loss": 0.8003, + "step": 5642 + }, + { + "epoch": 0.83, + "learning_rate": 1.402274851099006e-06, + "loss": 0.7646, + "step": 5643 + }, + { + "epoch": 0.83, + "learning_rate": 1.3998298246129983e-06, + "loss": 0.7227, + "step": 5644 + }, + { + "epoch": 0.83, + "learning_rate": 1.3973867711471378e-06, + "loss": 0.2899, + "step": 5645 + }, + { + "epoch": 0.83, + "learning_rate": 1.3949456912619075e-06, + "loss": 0.7505, + "step": 5646 + }, + { + "epoch": 0.84, + "learning_rate": 1.3925065855173204e-06, + "loss": 0.7539, + "step": 5647 + }, + { + "epoch": 0.84, + "learning_rate": 1.3900694544729554e-06, + "loss": 0.7778, + "step": 5648 + }, + { + "epoch": 0.84, + "learning_rate": 1.3876342986879243e-06, + "loss": 0.7241, + "step": 5649 + }, + { + "epoch": 0.84, + "learning_rate": 1.385201118720898e-06, + "loss": 0.8696, + "step": 5650 + }, + { + "epoch": 0.84, + "learning_rate": 1.3827699151300845e-06, + "loss": 0.7954, + "step": 5651 + }, + { + "epoch": 0.84, + "learning_rate": 1.3803406884732396e-06, + "loss": 0.7217, + "step": 5652 + }, + { + "epoch": 0.84, + "learning_rate": 1.3779134393076732e-06, + "loss": 0.7156, + "step": 5653 + }, + { + "epoch": 0.84, + "learning_rate": 1.375488168190232e-06, + "loss": 0.75, + "step": 5654 + }, + { + "epoch": 0.84, + "learning_rate": 1.3730648756773136e-06, + "loss": 0.7632, + "step": 5655 + }, + { + "epoch": 0.84, + "learning_rate": 1.3706435623248627e-06, + "loss": 0.7954, + "step": 5656 + }, + { + "epoch": 0.84, + "learning_rate": 1.3682242286883662e-06, + "loss": 0.7886, + "step": 5657 + }, + { + "epoch": 0.84, + "learning_rate": 1.3658068753228614e-06, + "loss": 0.7773, + "step": 5658 + }, + { + "epoch": 0.84, + "learning_rate": 1.363391502782927e-06, + "loss": 0.3052, + "step": 5659 + }, + { + "epoch": 0.84, + "learning_rate": 1.3609781116226883e-06, + "loss": 0.7891, + "step": 5660 + }, + { + "epoch": 0.84, + "learning_rate": 1.3585667023958193e-06, + "loss": 0.752, + "step": 5661 + }, + { + "epoch": 0.84, + "learning_rate": 1.3561572756555342e-06, + "loss": 0.7944, + "step": 5662 + }, + { + "epoch": 0.84, + "learning_rate": 1.3537498319545984e-06, + "loss": 0.7271, + "step": 5663 + }, + { + "epoch": 0.84, + "learning_rate": 1.3513443718453166e-06, + "loss": 0.7319, + "step": 5664 + }, + { + "epoch": 0.84, + "learning_rate": 1.3489408958795392e-06, + "loss": 0.8252, + "step": 5665 + }, + { + "epoch": 0.84, + "learning_rate": 1.3465394046086677e-06, + "loss": 0.7651, + "step": 5666 + }, + { + "epoch": 0.84, + "learning_rate": 1.3441398985836407e-06, + "loss": 0.8662, + "step": 5667 + }, + { + "epoch": 0.84, + "learning_rate": 1.3417423783549422e-06, + "loss": 0.7349, + "step": 5668 + }, + { + "epoch": 0.84, + "learning_rate": 1.3393468444726066e-06, + "loss": 0.7051, + "step": 5669 + }, + { + "epoch": 0.84, + "learning_rate": 1.3369532974862053e-06, + "loss": 0.8188, + "step": 5670 + }, + { + "epoch": 0.84, + "learning_rate": 1.3345617379448616e-06, + "loss": 0.8003, + "step": 5671 + }, + { + "epoch": 0.84, + "learning_rate": 1.3321721663972353e-06, + "loss": 0.8008, + "step": 5672 + }, + { + "epoch": 0.84, + "learning_rate": 1.3297845833915323e-06, + "loss": 0.7192, + "step": 5673 + }, + { + "epoch": 0.84, + "learning_rate": 1.3273989894755067e-06, + "loss": 0.7393, + "step": 5674 + }, + { + "epoch": 0.84, + "learning_rate": 1.3250153851964498e-06, + "loss": 0.7886, + "step": 5675 + }, + { + "epoch": 0.84, + "learning_rate": 1.3226337711012016e-06, + "loss": 0.7783, + "step": 5676 + }, + { + "epoch": 0.84, + "learning_rate": 1.3202541477361441e-06, + "loss": 0.7542, + "step": 5677 + }, + { + "epoch": 0.84, + "learning_rate": 1.3178765156471973e-06, + "loss": 0.8096, + "step": 5678 + }, + { + "epoch": 0.84, + "learning_rate": 1.3155008753798348e-06, + "loss": 0.8267, + "step": 5679 + }, + { + "epoch": 0.84, + "learning_rate": 1.3131272274790653e-06, + "loss": 0.8086, + "step": 5680 + }, + { + "epoch": 0.84, + "learning_rate": 1.3107555724894394e-06, + "loss": 0.7769, + "step": 5681 + }, + { + "epoch": 0.84, + "learning_rate": 1.3083859109550601e-06, + "loss": 0.8179, + "step": 5682 + }, + { + "epoch": 0.84, + "learning_rate": 1.306018243419559e-06, + "loss": 0.7939, + "step": 5683 + }, + { + "epoch": 0.84, + "learning_rate": 1.303652570426125e-06, + "loss": 0.7988, + "step": 5684 + }, + { + "epoch": 0.84, + "learning_rate": 1.3012888925174794e-06, + "loss": 0.7603, + "step": 5685 + }, + { + "epoch": 0.84, + "learning_rate": 1.2989272102358851e-06, + "loss": 0.7754, + "step": 5686 + }, + { + "epoch": 0.84, + "learning_rate": 1.2965675241231568e-06, + "loss": 0.7842, + "step": 5687 + }, + { + "epoch": 0.84, + "learning_rate": 1.2942098347206412e-06, + "loss": 0.7656, + "step": 5688 + }, + { + "epoch": 0.84, + "learning_rate": 1.2918541425692343e-06, + "loss": 0.7202, + "step": 5689 + }, + { + "epoch": 0.84, + "learning_rate": 1.2895004482093675e-06, + "loss": 0.8267, + "step": 5690 + }, + { + "epoch": 0.84, + "learning_rate": 1.2871487521810166e-06, + "loss": 0.8013, + "step": 5691 + }, + { + "epoch": 0.84, + "learning_rate": 1.2847990550237021e-06, + "loss": 0.8071, + "step": 5692 + }, + { + "epoch": 0.84, + "learning_rate": 1.282451357276483e-06, + "loss": 0.7769, + "step": 5693 + }, + { + "epoch": 0.84, + "learning_rate": 1.2801056594779548e-06, + "loss": 0.7568, + "step": 5694 + }, + { + "epoch": 0.84, + "learning_rate": 1.277761962166265e-06, + "loss": 0.7617, + "step": 5695 + }, + { + "epoch": 0.84, + "learning_rate": 1.2754202658790915e-06, + "loss": 0.7925, + "step": 5696 + }, + { + "epoch": 0.84, + "learning_rate": 1.2730805711536641e-06, + "loss": 0.8242, + "step": 5697 + }, + { + "epoch": 0.84, + "learning_rate": 1.2707428785267396e-06, + "loss": 0.73, + "step": 5698 + }, + { + "epoch": 0.84, + "learning_rate": 1.268407188534626e-06, + "loss": 0.8027, + "step": 5699 + }, + { + "epoch": 0.84, + "learning_rate": 1.266073501713172e-06, + "loss": 0.7056, + "step": 5700 + }, + { + "epoch": 0.84, + "learning_rate": 1.2637418185977602e-06, + "loss": 0.8203, + "step": 5701 + }, + { + "epoch": 0.84, + "learning_rate": 1.2614121397233191e-06, + "loss": 0.7686, + "step": 5702 + }, + { + "epoch": 0.84, + "learning_rate": 1.2590844656243107e-06, + "loss": 0.8271, + "step": 5703 + }, + { + "epoch": 0.84, + "learning_rate": 1.2567587968347461e-06, + "loss": 0.3098, + "step": 5704 + }, + { + "epoch": 0.84, + "learning_rate": 1.2544351338881721e-06, + "loss": 0.7764, + "step": 5705 + }, + { + "epoch": 0.84, + "learning_rate": 1.2521134773176745e-06, + "loss": 0.7769, + "step": 5706 + }, + { + "epoch": 0.84, + "learning_rate": 1.2497938276558786e-06, + "loss": 0.7822, + "step": 5707 + }, + { + "epoch": 0.84, + "learning_rate": 1.2474761854349483e-06, + "loss": 0.814, + "step": 5708 + }, + { + "epoch": 0.84, + "learning_rate": 1.2451605511865894e-06, + "loss": 0.7588, + "step": 5709 + }, + { + "epoch": 0.84, + "learning_rate": 1.2428469254420528e-06, + "loss": 0.8267, + "step": 5710 + }, + { + "epoch": 0.84, + "learning_rate": 1.2405353087321126e-06, + "loss": 0.7461, + "step": 5711 + }, + { + "epoch": 0.84, + "learning_rate": 1.2382257015870957e-06, + "loss": 0.7803, + "step": 5712 + }, + { + "epoch": 0.84, + "learning_rate": 1.2359181045368674e-06, + "loss": 0.772, + "step": 5713 + }, + { + "epoch": 0.85, + "learning_rate": 1.233612518110824e-06, + "loss": 0.7715, + "step": 5714 + }, + { + "epoch": 0.85, + "learning_rate": 1.2313089428379066e-06, + "loss": 0.7388, + "step": 5715 + }, + { + "epoch": 0.85, + "learning_rate": 1.2290073792465895e-06, + "loss": 0.791, + "step": 5716 + }, + { + "epoch": 0.85, + "learning_rate": 1.2267078278648937e-06, + "loss": 0.8086, + "step": 5717 + }, + { + "epoch": 0.85, + "learning_rate": 1.2244102892203758e-06, + "loss": 0.8208, + "step": 5718 + }, + { + "epoch": 0.85, + "learning_rate": 1.222114763840121e-06, + "loss": 0.7734, + "step": 5719 + }, + { + "epoch": 0.85, + "learning_rate": 1.2198212522507679e-06, + "loss": 0.7583, + "step": 5720 + }, + { + "epoch": 0.85, + "learning_rate": 1.2175297549784803e-06, + "loss": 0.8667, + "step": 5721 + }, + { + "epoch": 0.85, + "learning_rate": 1.2152402725489686e-06, + "loss": 0.7705, + "step": 5722 + }, + { + "epoch": 0.85, + "learning_rate": 1.2129528054874807e-06, + "loss": 0.7886, + "step": 5723 + }, + { + "epoch": 0.85, + "learning_rate": 1.2106673543187909e-06, + "loss": 0.7734, + "step": 5724 + }, + { + "epoch": 0.85, + "learning_rate": 1.2083839195672253e-06, + "loss": 0.7988, + "step": 5725 + }, + { + "epoch": 0.85, + "learning_rate": 1.2061025017566374e-06, + "loss": 0.8042, + "step": 5726 + }, + { + "epoch": 0.85, + "learning_rate": 1.2038231014104273e-06, + "loss": 0.7222, + "step": 5727 + }, + { + "epoch": 0.85, + "learning_rate": 1.2015457190515223e-06, + "loss": 0.2944, + "step": 5728 + }, + { + "epoch": 0.85, + "learning_rate": 1.199270355202391e-06, + "loss": 0.7024, + "step": 5729 + }, + { + "epoch": 0.85, + "learning_rate": 1.1969970103850426e-06, + "loss": 0.7695, + "step": 5730 + }, + { + "epoch": 0.85, + "learning_rate": 1.1947256851210176e-06, + "loss": 0.8535, + "step": 5731 + }, + { + "epoch": 0.85, + "learning_rate": 1.1924563799313937e-06, + "loss": 0.7988, + "step": 5732 + }, + { + "epoch": 0.85, + "learning_rate": 1.190189095336791e-06, + "loss": 0.7998, + "step": 5733 + }, + { + "epoch": 0.85, + "learning_rate": 1.1879238318573573e-06, + "loss": 0.8311, + "step": 5734 + }, + { + "epoch": 0.85, + "learning_rate": 1.1856605900127848e-06, + "loss": 0.7168, + "step": 5735 + }, + { + "epoch": 0.85, + "learning_rate": 1.183399370322297e-06, + "loss": 0.7241, + "step": 5736 + }, + { + "epoch": 0.85, + "learning_rate": 1.1811401733046523e-06, + "loss": 0.7915, + "step": 5737 + }, + { + "epoch": 0.85, + "learning_rate": 1.1788829994781525e-06, + "loss": 0.8457, + "step": 5738 + }, + { + "epoch": 0.85, + "learning_rate": 1.1766278493606253e-06, + "loss": 0.8369, + "step": 5739 + }, + { + "epoch": 0.85, + "learning_rate": 1.1743747234694437e-06, + "loss": 0.7769, + "step": 5740 + }, + { + "epoch": 0.85, + "learning_rate": 1.1721236223215092e-06, + "loss": 0.3274, + "step": 5741 + }, + { + "epoch": 0.85, + "learning_rate": 1.1698745464332595e-06, + "loss": 0.7261, + "step": 5742 + }, + { + "epoch": 0.85, + "learning_rate": 1.1676274963206747e-06, + "loss": 0.8081, + "step": 5743 + }, + { + "epoch": 0.85, + "learning_rate": 1.1653824724992601e-06, + "loss": 0.769, + "step": 5744 + }, + { + "epoch": 0.85, + "learning_rate": 1.163139475484063e-06, + "loss": 0.8022, + "step": 5745 + }, + { + "epoch": 0.85, + "learning_rate": 1.1608985057896638e-06, + "loss": 0.8184, + "step": 5746 + }, + { + "epoch": 0.85, + "learning_rate": 1.1586595639301768e-06, + "loss": 0.7544, + "step": 5747 + }, + { + "epoch": 0.85, + "learning_rate": 1.1564226504192532e-06, + "loss": 0.79, + "step": 5748 + }, + { + "epoch": 0.85, + "learning_rate": 1.1541877657700784e-06, + "loss": 0.7715, + "step": 5749 + }, + { + "epoch": 0.85, + "learning_rate": 1.1519549104953686e-06, + "loss": 0.7866, + "step": 5750 + }, + { + "epoch": 0.85, + "learning_rate": 1.1497240851073798e-06, + "loss": 0.2767, + "step": 5751 + }, + { + "epoch": 0.85, + "learning_rate": 1.1474952901178982e-06, + "loss": 0.6943, + "step": 5752 + }, + { + "epoch": 0.85, + "learning_rate": 1.1452685260382501e-06, + "loss": 0.2902, + "step": 5753 + }, + { + "epoch": 0.85, + "learning_rate": 1.143043793379287e-06, + "loss": 0.6895, + "step": 5754 + }, + { + "epoch": 0.85, + "learning_rate": 1.1408210926513997e-06, + "loss": 0.7529, + "step": 5755 + }, + { + "epoch": 0.85, + "learning_rate": 1.1386004243645143e-06, + "loss": 0.769, + "step": 5756 + }, + { + "epoch": 0.85, + "learning_rate": 1.1363817890280892e-06, + "loss": 0.7773, + "step": 5757 + }, + { + "epoch": 0.85, + "learning_rate": 1.1341651871511106e-06, + "loss": 0.832, + "step": 5758 + }, + { + "epoch": 0.85, + "learning_rate": 1.1319506192421092e-06, + "loss": 0.7554, + "step": 5759 + }, + { + "epoch": 0.85, + "learning_rate": 1.129738085809139e-06, + "loss": 0.7905, + "step": 5760 + }, + { + "epoch": 0.85, + "learning_rate": 1.1275275873597957e-06, + "loss": 0.7832, + "step": 5761 + }, + { + "epoch": 0.85, + "learning_rate": 1.1253191244012007e-06, + "loss": 0.8184, + "step": 5762 + }, + { + "epoch": 0.85, + "learning_rate": 1.1231126974400108e-06, + "loss": 0.2966, + "step": 5763 + }, + { + "epoch": 0.85, + "learning_rate": 1.1209083069824212e-06, + "loss": 0.8213, + "step": 5764 + }, + { + "epoch": 0.85, + "learning_rate": 1.1187059535341505e-06, + "loss": 0.6914, + "step": 5765 + }, + { + "epoch": 0.85, + "learning_rate": 1.1165056376004558e-06, + "loss": 0.7466, + "step": 5766 + }, + { + "epoch": 0.85, + "learning_rate": 1.1143073596861276e-06, + "loss": 0.771, + "step": 5767 + }, + { + "epoch": 0.85, + "learning_rate": 1.1121111202954836e-06, + "loss": 0.7603, + "step": 5768 + }, + { + "epoch": 0.85, + "learning_rate": 1.1099169199323823e-06, + "loss": 0.6914, + "step": 5769 + }, + { + "epoch": 0.85, + "learning_rate": 1.107724759100205e-06, + "loss": 0.731, + "step": 5770 + }, + { + "epoch": 0.85, + "learning_rate": 1.1055346383018683e-06, + "loss": 0.7549, + "step": 5771 + }, + { + "epoch": 0.85, + "learning_rate": 1.1033465580398273e-06, + "loss": 0.8149, + "step": 5772 + }, + { + "epoch": 0.85, + "learning_rate": 1.1011605188160579e-06, + "loss": 0.834, + "step": 5773 + }, + { + "epoch": 0.85, + "learning_rate": 1.0989765211320792e-06, + "loss": 0.7944, + "step": 5774 + }, + { + "epoch": 0.85, + "learning_rate": 1.096794565488929e-06, + "loss": 0.79, + "step": 5775 + }, + { + "epoch": 0.85, + "learning_rate": 1.0946146523871882e-06, + "loss": 0.311, + "step": 5776 + }, + { + "epoch": 0.85, + "learning_rate": 1.0924367823269644e-06, + "loss": 0.7939, + "step": 5777 + }, + { + "epoch": 0.85, + "learning_rate": 1.090260955807897e-06, + "loss": 0.7593, + "step": 5778 + }, + { + "epoch": 0.85, + "learning_rate": 1.0880871733291532e-06, + "loss": 0.7773, + "step": 5779 + }, + { + "epoch": 0.85, + "learning_rate": 1.0859154353894385e-06, + "loss": 0.8018, + "step": 5780 + }, + { + "epoch": 0.85, + "learning_rate": 1.0837457424869823e-06, + "loss": 0.7925, + "step": 5781 + }, + { + "epoch": 0.86, + "learning_rate": 1.0815780951195521e-06, + "loss": 0.7, + "step": 5782 + }, + { + "epoch": 0.86, + "learning_rate": 1.0794124937844341e-06, + "loss": 0.7456, + "step": 5783 + }, + { + "epoch": 0.86, + "learning_rate": 1.077248938978458e-06, + "loss": 0.749, + "step": 5784 + }, + { + "epoch": 0.86, + "learning_rate": 1.0750874311979786e-06, + "loss": 0.8091, + "step": 5785 + }, + { + "epoch": 0.86, + "learning_rate": 1.0729279709388796e-06, + "loss": 0.8003, + "step": 5786 + }, + { + "epoch": 0.86, + "learning_rate": 1.0707705586965812e-06, + "loss": 0.7329, + "step": 5787 + }, + { + "epoch": 0.86, + "learning_rate": 1.0686151949660217e-06, + "loss": 0.7739, + "step": 5788 + }, + { + "epoch": 0.86, + "learning_rate": 1.0664618802416814e-06, + "loss": 0.7427, + "step": 5789 + }, + { + "epoch": 0.86, + "learning_rate": 1.0643106150175664e-06, + "loss": 0.7935, + "step": 5790 + }, + { + "epoch": 0.86, + "learning_rate": 1.0621613997872115e-06, + "loss": 0.2903, + "step": 5791 + }, + { + "epoch": 0.86, + "learning_rate": 1.0600142350436816e-06, + "loss": 0.7617, + "step": 5792 + }, + { + "epoch": 0.86, + "learning_rate": 1.05786912127957e-06, + "loss": 0.7217, + "step": 5793 + }, + { + "epoch": 0.86, + "learning_rate": 1.0557260589870022e-06, + "loss": 0.6851, + "step": 5794 + }, + { + "epoch": 0.86, + "learning_rate": 1.0535850486576372e-06, + "loss": 0.8232, + "step": 5795 + }, + { + "epoch": 0.86, + "learning_rate": 1.0514460907826473e-06, + "loss": 0.8003, + "step": 5796 + }, + { + "epoch": 0.86, + "learning_rate": 1.0493091858527538e-06, + "loss": 0.7231, + "step": 5797 + }, + { + "epoch": 0.86, + "learning_rate": 1.0471743343581907e-06, + "loss": 0.8018, + "step": 5798 + }, + { + "epoch": 0.86, + "learning_rate": 1.0450415367887324e-06, + "loss": 0.8027, + "step": 5799 + }, + { + "epoch": 0.86, + "learning_rate": 1.0429107936336803e-06, + "loss": 0.7744, + "step": 5800 + }, + { + "epoch": 0.86, + "learning_rate": 1.0407821053818535e-06, + "loss": 0.7485, + "step": 5801 + }, + { + "epoch": 0.86, + "learning_rate": 1.0386554725216158e-06, + "loss": 0.8154, + "step": 5802 + }, + { + "epoch": 0.86, + "learning_rate": 1.0365308955408459e-06, + "loss": 0.7368, + "step": 5803 + }, + { + "epoch": 0.86, + "learning_rate": 1.0344083749269608e-06, + "loss": 0.75, + "step": 5804 + }, + { + "epoch": 0.86, + "learning_rate": 1.0322879111669004e-06, + "loss": 0.8105, + "step": 5805 + }, + { + "epoch": 0.86, + "learning_rate": 1.0301695047471326e-06, + "loss": 0.7246, + "step": 5806 + }, + { + "epoch": 0.86, + "learning_rate": 1.0280531561536567e-06, + "loss": 0.7603, + "step": 5807 + }, + { + "epoch": 0.86, + "learning_rate": 1.025938865871996e-06, + "loss": 0.8384, + "step": 5808 + }, + { + "epoch": 0.86, + "learning_rate": 1.0238266343872028e-06, + "loss": 0.7969, + "step": 5809 + }, + { + "epoch": 0.86, + "learning_rate": 1.0217164621838605e-06, + "loss": 0.812, + "step": 5810 + }, + { + "epoch": 0.86, + "learning_rate": 1.019608349746074e-06, + "loss": 0.7847, + "step": 5811 + }, + { + "epoch": 0.86, + "learning_rate": 1.017502297557481e-06, + "loss": 0.813, + "step": 5812 + }, + { + "epoch": 0.86, + "learning_rate": 1.015398306101245e-06, + "loss": 0.8301, + "step": 5813 + }, + { + "epoch": 0.86, + "learning_rate": 1.0132963758600533e-06, + "loss": 0.7202, + "step": 5814 + }, + { + "epoch": 0.86, + "learning_rate": 1.0111965073161268e-06, + "loss": 0.7275, + "step": 5815 + }, + { + "epoch": 0.86, + "learning_rate": 1.0090987009512055e-06, + "loss": 0.7729, + "step": 5816 + }, + { + "epoch": 0.86, + "learning_rate": 1.0070029572465657e-06, + "loss": 0.8179, + "step": 5817 + }, + { + "epoch": 0.86, + "learning_rate": 1.0049092766830015e-06, + "loss": 0.7588, + "step": 5818 + }, + { + "epoch": 0.86, + "learning_rate": 1.0028176597408378e-06, + "loss": 0.7119, + "step": 5819 + }, + { + "epoch": 0.86, + "learning_rate": 1.0007281068999286e-06, + "loss": 0.8101, + "step": 5820 + }, + { + "epoch": 0.86, + "learning_rate": 9.986406186396503e-07, + "loss": 0.7974, + "step": 5821 + }, + { + "epoch": 0.86, + "learning_rate": 9.965551954389042e-07, + "loss": 0.8188, + "step": 5822 + }, + { + "epoch": 0.86, + "learning_rate": 9.944718377761242e-07, + "loss": 0.7812, + "step": 5823 + }, + { + "epoch": 0.86, + "learning_rate": 9.923905461292638e-07, + "loss": 0.8091, + "step": 5824 + }, + { + "epoch": 0.86, + "learning_rate": 9.903113209758098e-07, + "loss": 0.7393, + "step": 5825 + }, + { + "epoch": 0.86, + "learning_rate": 9.88234162792767e-07, + "loss": 0.792, + "step": 5826 + }, + { + "epoch": 0.86, + "learning_rate": 9.861590720566684e-07, + "loss": 0.8486, + "step": 5827 + }, + { + "epoch": 0.86, + "learning_rate": 9.840860492435778e-07, + "loss": 0.8369, + "step": 5828 + }, + { + "epoch": 0.86, + "learning_rate": 9.820150948290797e-07, + "loss": 0.8442, + "step": 5829 + }, + { + "epoch": 0.86, + "learning_rate": 9.799462092882806e-07, + "loss": 0.7832, + "step": 5830 + }, + { + "epoch": 0.86, + "learning_rate": 9.77879393095823e-07, + "loss": 0.7456, + "step": 5831 + }, + { + "epoch": 0.86, + "learning_rate": 9.758146467258645e-07, + "loss": 0.8096, + "step": 5832 + }, + { + "epoch": 0.86, + "learning_rate": 9.737519706520938e-07, + "loss": 0.7871, + "step": 5833 + }, + { + "epoch": 0.86, + "learning_rate": 9.716913653477223e-07, + "loss": 0.8062, + "step": 5834 + }, + { + "epoch": 0.86, + "learning_rate": 9.696328312854842e-07, + "loss": 0.7554, + "step": 5835 + }, + { + "epoch": 0.86, + "learning_rate": 9.67576368937645e-07, + "loss": 0.7324, + "step": 5836 + }, + { + "epoch": 0.86, + "learning_rate": 9.655219787759862e-07, + "loss": 0.3182, + "step": 5837 + }, + { + "epoch": 0.86, + "learning_rate": 9.634696612718242e-07, + "loss": 0.7974, + "step": 5838 + }, + { + "epoch": 0.86, + "learning_rate": 9.614194168959912e-07, + "loss": 0.731, + "step": 5839 + }, + { + "epoch": 0.86, + "learning_rate": 9.593712461188442e-07, + "loss": 0.7739, + "step": 5840 + }, + { + "epoch": 0.86, + "learning_rate": 9.57325149410273e-07, + "loss": 0.7676, + "step": 5841 + }, + { + "epoch": 0.86, + "learning_rate": 9.552811272396822e-07, + "loss": 0.7988, + "step": 5842 + }, + { + "epoch": 0.86, + "learning_rate": 9.532391800760033e-07, + "loss": 0.7715, + "step": 5843 + }, + { + "epoch": 0.86, + "learning_rate": 9.511993083876958e-07, + "loss": 0.8203, + "step": 5844 + }, + { + "epoch": 0.86, + "learning_rate": 9.491615126427356e-07, + "loss": 0.8081, + "step": 5845 + }, + { + "epoch": 0.86, + "learning_rate": 9.471257933086308e-07, + "loss": 0.3162, + "step": 5846 + }, + { + "epoch": 0.86, + "learning_rate": 9.450921508524057e-07, + "loss": 0.8179, + "step": 5847 + }, + { + "epoch": 0.86, + "learning_rate": 9.430605857406117e-07, + "loss": 0.8052, + "step": 5848 + }, + { + "epoch": 0.86, + "learning_rate": 9.410310984393245e-07, + "loss": 0.7676, + "step": 5849 + }, + { + "epoch": 0.87, + "learning_rate": 9.390036894141397e-07, + "loss": 0.7744, + "step": 5850 + }, + { + "epoch": 0.87, + "learning_rate": 9.369783591301818e-07, + "loss": 0.7524, + "step": 5851 + }, + { + "epoch": 0.87, + "learning_rate": 9.349551080520913e-07, + "loss": 0.8037, + "step": 5852 + }, + { + "epoch": 0.87, + "learning_rate": 9.329339366440349e-07, + "loss": 0.8159, + "step": 5853 + }, + { + "epoch": 0.87, + "learning_rate": 9.309148453697059e-07, + "loss": 0.8135, + "step": 5854 + }, + { + "epoch": 0.87, + "learning_rate": 9.28897834692315e-07, + "loss": 0.7656, + "step": 5855 + }, + { + "epoch": 0.87, + "learning_rate": 9.268829050745964e-07, + "loss": 0.8179, + "step": 5856 + }, + { + "epoch": 0.87, + "learning_rate": 9.248700569788105e-07, + "loss": 0.7876, + "step": 5857 + }, + { + "epoch": 0.87, + "learning_rate": 9.228592908667344e-07, + "loss": 0.8032, + "step": 5858 + }, + { + "epoch": 0.87, + "learning_rate": 9.208506071996759e-07, + "loss": 0.7739, + "step": 5859 + }, + { + "epoch": 0.87, + "learning_rate": 9.188440064384541e-07, + "loss": 0.7295, + "step": 5860 + }, + { + "epoch": 0.87, + "learning_rate": 9.168394890434184e-07, + "loss": 0.7568, + "step": 5861 + }, + { + "epoch": 0.87, + "learning_rate": 9.148370554744402e-07, + "loss": 0.2955, + "step": 5862 + }, + { + "epoch": 0.87, + "learning_rate": 9.128367061909072e-07, + "loss": 0.7964, + "step": 5863 + }, + { + "epoch": 0.87, + "learning_rate": 9.10838441651738e-07, + "loss": 0.8022, + "step": 5864 + }, + { + "epoch": 0.87, + "learning_rate": 9.088422623153603e-07, + "loss": 0.7583, + "step": 5865 + }, + { + "epoch": 0.87, + "learning_rate": 9.068481686397324e-07, + "loss": 0.7651, + "step": 5866 + }, + { + "epoch": 0.87, + "learning_rate": 9.048561610823359e-07, + "loss": 0.7886, + "step": 5867 + }, + { + "epoch": 0.87, + "learning_rate": 9.028662401001664e-07, + "loss": 0.8315, + "step": 5868 + }, + { + "epoch": 0.87, + "learning_rate": 9.008784061497455e-07, + "loss": 0.8071, + "step": 5869 + }, + { + "epoch": 0.87, + "learning_rate": 8.988926596871128e-07, + "loss": 0.7822, + "step": 5870 + }, + { + "epoch": 0.87, + "learning_rate": 8.969090011678328e-07, + "loss": 0.7505, + "step": 5871 + }, + { + "epoch": 0.87, + "learning_rate": 8.949274310469936e-07, + "loss": 0.8237, + "step": 5872 + }, + { + "epoch": 0.87, + "learning_rate": 8.929479497791926e-07, + "loss": 0.7402, + "step": 5873 + }, + { + "epoch": 0.87, + "learning_rate": 8.909705578185601e-07, + "loss": 0.7593, + "step": 5874 + }, + { + "epoch": 0.87, + "learning_rate": 8.889952556187376e-07, + "loss": 0.7891, + "step": 5875 + }, + { + "epoch": 0.87, + "learning_rate": 8.87022043632898e-07, + "loss": 0.75, + "step": 5876 + }, + { + "epoch": 0.87, + "learning_rate": 8.850509223137249e-07, + "loss": 0.8203, + "step": 5877 + }, + { + "epoch": 0.87, + "learning_rate": 8.830818921134255e-07, + "loss": 0.3445, + "step": 5878 + }, + { + "epoch": 0.87, + "learning_rate": 8.811149534837271e-07, + "loss": 0.8071, + "step": 5879 + }, + { + "epoch": 0.87, + "learning_rate": 8.791501068758823e-07, + "loss": 0.8247, + "step": 5880 + }, + { + "epoch": 0.87, + "learning_rate": 8.771873527406549e-07, + "loss": 0.7788, + "step": 5881 + }, + { + "epoch": 0.87, + "learning_rate": 8.752266915283347e-07, + "loss": 0.7407, + "step": 5882 + }, + { + "epoch": 0.87, + "learning_rate": 8.732681236887264e-07, + "loss": 0.7358, + "step": 5883 + }, + { + "epoch": 0.87, + "learning_rate": 8.713116496711605e-07, + "loss": 0.7544, + "step": 5884 + }, + { + "epoch": 0.87, + "learning_rate": 8.693572699244879e-07, + "loss": 0.7217, + "step": 5885 + }, + { + "epoch": 0.87, + "learning_rate": 8.674049848970667e-07, + "loss": 0.6897, + "step": 5886 + }, + { + "epoch": 0.87, + "learning_rate": 8.654547950367898e-07, + "loss": 0.7949, + "step": 5887 + }, + { + "epoch": 0.87, + "learning_rate": 8.635067007910581e-07, + "loss": 0.7861, + "step": 5888 + }, + { + "epoch": 0.87, + "learning_rate": 8.615607026068018e-07, + "loss": 0.8174, + "step": 5889 + }, + { + "epoch": 0.87, + "learning_rate": 8.596168009304617e-07, + "loss": 0.8027, + "step": 5890 + }, + { + "epoch": 0.87, + "learning_rate": 8.576749962079989e-07, + "loss": 0.7666, + "step": 5891 + }, + { + "epoch": 0.87, + "learning_rate": 8.55735288884899e-07, + "loss": 0.8389, + "step": 5892 + }, + { + "epoch": 0.87, + "learning_rate": 8.537976794061587e-07, + "loss": 0.7559, + "step": 5893 + }, + { + "epoch": 0.87, + "learning_rate": 8.51862168216303e-07, + "loss": 0.812, + "step": 5894 + }, + { + "epoch": 0.87, + "learning_rate": 8.49928755759365e-07, + "loss": 0.7319, + "step": 5895 + }, + { + "epoch": 0.87, + "learning_rate": 8.479974424789017e-07, + "loss": 0.7295, + "step": 5896 + }, + { + "epoch": 0.87, + "learning_rate": 8.460682288179911e-07, + "loss": 0.8198, + "step": 5897 + }, + { + "epoch": 0.87, + "learning_rate": 8.441411152192247e-07, + "loss": 0.8232, + "step": 5898 + }, + { + "epoch": 0.87, + "learning_rate": 8.422161021247122e-07, + "loss": 0.7812, + "step": 5899 + }, + { + "epoch": 0.87, + "learning_rate": 8.402931899760869e-07, + "loss": 0.7466, + "step": 5900 + }, + { + "epoch": 0.87, + "learning_rate": 8.383723792144916e-07, + "loss": 0.7334, + "step": 5901 + }, + { + "epoch": 0.87, + "learning_rate": 8.36453670280597e-07, + "loss": 0.7417, + "step": 5902 + }, + { + "epoch": 0.87, + "learning_rate": 8.345370636145844e-07, + "loss": 0.7808, + "step": 5903 + }, + { + "epoch": 0.87, + "learning_rate": 8.326225596561521e-07, + "loss": 0.7832, + "step": 5904 + }, + { + "epoch": 0.87, + "learning_rate": 8.307101588445232e-07, + "loss": 0.749, + "step": 5905 + }, + { + "epoch": 0.87, + "learning_rate": 8.287998616184312e-07, + "loss": 0.7578, + "step": 5906 + }, + { + "epoch": 0.87, + "learning_rate": 8.268916684161276e-07, + "loss": 0.8193, + "step": 5907 + }, + { + "epoch": 0.87, + "learning_rate": 8.249855796753881e-07, + "loss": 0.7778, + "step": 5908 + }, + { + "epoch": 0.87, + "learning_rate": 8.23081595833497e-07, + "loss": 0.7588, + "step": 5909 + }, + { + "epoch": 0.87, + "learning_rate": 8.211797173272617e-07, + "loss": 0.3357, + "step": 5910 + }, + { + "epoch": 0.87, + "learning_rate": 8.192799445930044e-07, + "loss": 0.751, + "step": 5911 + }, + { + "epoch": 0.87, + "learning_rate": 8.173822780665608e-07, + "loss": 0.8003, + "step": 5912 + }, + { + "epoch": 0.87, + "learning_rate": 8.154867181832915e-07, + "loss": 0.7783, + "step": 5913 + }, + { + "epoch": 0.87, + "learning_rate": 8.135932653780642e-07, + "loss": 0.8052, + "step": 5914 + }, + { + "epoch": 0.87, + "learning_rate": 8.117019200852716e-07, + "loss": 0.2882, + "step": 5915 + }, + { + "epoch": 0.87, + "learning_rate": 8.098126827388187e-07, + "loss": 0.8281, + "step": 5916 + }, + { + "epoch": 0.88, + "learning_rate": 8.079255537721253e-07, + "loss": 0.7578, + "step": 5917 + }, + { + "epoch": 0.88, + "learning_rate": 8.060405336181343e-07, + "loss": 0.7861, + "step": 5918 + }, + { + "epoch": 0.88, + "learning_rate": 8.041576227092963e-07, + "loss": 0.7979, + "step": 5919 + }, + { + "epoch": 0.88, + "learning_rate": 8.022768214775811e-07, + "loss": 0.7412, + "step": 5920 + }, + { + "epoch": 0.88, + "learning_rate": 8.003981303544795e-07, + "loss": 0.7905, + "step": 5921 + }, + { + "epoch": 0.88, + "learning_rate": 7.985215497709909e-07, + "loss": 0.7256, + "step": 5922 + }, + { + "epoch": 0.88, + "learning_rate": 7.966470801576354e-07, + "loss": 0.791, + "step": 5923 + }, + { + "epoch": 0.88, + "learning_rate": 7.947747219444468e-07, + "loss": 0.8286, + "step": 5924 + }, + { + "epoch": 0.88, + "learning_rate": 7.929044755609728e-07, + "loss": 0.7588, + "step": 5925 + }, + { + "epoch": 0.88, + "learning_rate": 7.91036341436282e-07, + "loss": 0.8105, + "step": 5926 + }, + { + "epoch": 0.88, + "learning_rate": 7.891703199989509e-07, + "loss": 0.8237, + "step": 5927 + }, + { + "epoch": 0.88, + "learning_rate": 7.873064116770802e-07, + "loss": 0.8149, + "step": 5928 + }, + { + "epoch": 0.88, + "learning_rate": 7.854446168982777e-07, + "loss": 0.7783, + "step": 5929 + }, + { + "epoch": 0.88, + "learning_rate": 7.835849360896697e-07, + "loss": 0.8716, + "step": 5930 + }, + { + "epoch": 0.88, + "learning_rate": 7.817273696778994e-07, + "loss": 0.7754, + "step": 5931 + }, + { + "epoch": 0.88, + "learning_rate": 7.798719180891223e-07, + "loss": 0.7246, + "step": 5932 + }, + { + "epoch": 0.88, + "learning_rate": 7.780185817490082e-07, + "loss": 0.7905, + "step": 5933 + }, + { + "epoch": 0.88, + "learning_rate": 7.761673610827447e-07, + "loss": 0.7549, + "step": 5934 + }, + { + "epoch": 0.88, + "learning_rate": 7.743182565150286e-07, + "loss": 0.7891, + "step": 5935 + }, + { + "epoch": 0.88, + "learning_rate": 7.724712684700819e-07, + "loss": 0.772, + "step": 5936 + }, + { + "epoch": 0.88, + "learning_rate": 7.706263973716266e-07, + "loss": 0.7954, + "step": 5937 + }, + { + "epoch": 0.88, + "learning_rate": 7.687836436429086e-07, + "loss": 0.7485, + "step": 5938 + }, + { + "epoch": 0.88, + "learning_rate": 7.669430077066887e-07, + "loss": 0.793, + "step": 5939 + }, + { + "epoch": 0.88, + "learning_rate": 7.651044899852367e-07, + "loss": 0.7812, + "step": 5940 + }, + { + "epoch": 0.88, + "learning_rate": 7.632680909003398e-07, + "loss": 0.7783, + "step": 5941 + }, + { + "epoch": 0.88, + "learning_rate": 7.614338108732944e-07, + "loss": 0.7822, + "step": 5942 + }, + { + "epoch": 0.88, + "learning_rate": 7.59601650324917e-07, + "loss": 0.3201, + "step": 5943 + }, + { + "epoch": 0.88, + "learning_rate": 7.577716096755383e-07, + "loss": 0.7563, + "step": 5944 + }, + { + "epoch": 0.88, + "learning_rate": 7.559436893449968e-07, + "loss": 0.7319, + "step": 5945 + }, + { + "epoch": 0.88, + "learning_rate": 7.541178897526447e-07, + "loss": 0.6865, + "step": 5946 + }, + { + "epoch": 0.88, + "learning_rate": 7.522942113173559e-07, + "loss": 0.6982, + "step": 5947 + }, + { + "epoch": 0.88, + "learning_rate": 7.504726544575069e-07, + "loss": 0.7954, + "step": 5948 + }, + { + "epoch": 0.88, + "learning_rate": 7.48653219590999e-07, + "loss": 0.7578, + "step": 5949 + }, + { + "epoch": 0.88, + "learning_rate": 7.468359071352338e-07, + "loss": 0.8062, + "step": 5950 + }, + { + "epoch": 0.88, + "learning_rate": 7.450207175071356e-07, + "loss": 0.7671, + "step": 5951 + }, + { + "epoch": 0.88, + "learning_rate": 7.43207651123139e-07, + "loss": 0.7773, + "step": 5952 + }, + { + "epoch": 0.88, + "learning_rate": 7.413967083991925e-07, + "loss": 0.7466, + "step": 5953 + }, + { + "epoch": 0.88, + "learning_rate": 7.395878897507525e-07, + "loss": 0.7881, + "step": 5954 + }, + { + "epoch": 0.88, + "learning_rate": 7.377811955927928e-07, + "loss": 0.7695, + "step": 5955 + }, + { + "epoch": 0.88, + "learning_rate": 7.359766263397994e-07, + "loss": 0.8096, + "step": 5956 + }, + { + "epoch": 0.88, + "learning_rate": 7.341741824057713e-07, + "loss": 0.7554, + "step": 5957 + }, + { + "epoch": 0.88, + "learning_rate": 7.323738642042178e-07, + "loss": 0.8398, + "step": 5958 + }, + { + "epoch": 0.88, + "learning_rate": 7.305756721481605e-07, + "loss": 0.6831, + "step": 5959 + }, + { + "epoch": 0.88, + "learning_rate": 7.28779606650134e-07, + "loss": 0.8091, + "step": 5960 + }, + { + "epoch": 0.88, + "learning_rate": 7.269856681221854e-07, + "loss": 0.8125, + "step": 5961 + }, + { + "epoch": 0.88, + "learning_rate": 7.251938569758777e-07, + "loss": 0.8403, + "step": 5962 + }, + { + "epoch": 0.88, + "learning_rate": 7.234041736222752e-07, + "loss": 0.813, + "step": 5963 + }, + { + "epoch": 0.88, + "learning_rate": 7.216166184719653e-07, + "loss": 0.7739, + "step": 5964 + }, + { + "epoch": 0.88, + "learning_rate": 7.198311919350387e-07, + "loss": 0.7349, + "step": 5965 + }, + { + "epoch": 0.88, + "learning_rate": 7.180478944211055e-07, + "loss": 0.7695, + "step": 5966 + }, + { + "epoch": 0.88, + "learning_rate": 7.162667263392819e-07, + "loss": 0.75, + "step": 5967 + }, + { + "epoch": 0.88, + "learning_rate": 7.144876880981955e-07, + "loss": 0.8022, + "step": 5968 + }, + { + "epoch": 0.88, + "learning_rate": 7.127107801059896e-07, + "loss": 0.6821, + "step": 5969 + }, + { + "epoch": 0.88, + "learning_rate": 7.109360027703139e-07, + "loss": 0.7891, + "step": 5970 + }, + { + "epoch": 0.88, + "learning_rate": 7.091633564983314e-07, + "loss": 0.7637, + "step": 5971 + }, + { + "epoch": 0.88, + "learning_rate": 7.073928416967179e-07, + "loss": 0.7622, + "step": 5972 + }, + { + "epoch": 0.88, + "learning_rate": 7.056244587716565e-07, + "loss": 0.7705, + "step": 5973 + }, + { + "epoch": 0.88, + "learning_rate": 7.03858208128847e-07, + "loss": 0.7974, + "step": 5974 + }, + { + "epoch": 0.88, + "learning_rate": 7.020940901734918e-07, + "loss": 0.7871, + "step": 5975 + }, + { + "epoch": 0.88, + "learning_rate": 7.003321053103107e-07, + "loss": 0.75, + "step": 5976 + }, + { + "epoch": 0.88, + "learning_rate": 6.985722539435313e-07, + "loss": 0.7803, + "step": 5977 + }, + { + "epoch": 0.88, + "learning_rate": 6.96814536476893e-07, + "loss": 0.7983, + "step": 5978 + }, + { + "epoch": 0.88, + "learning_rate": 6.950589533136454e-07, + "loss": 0.7969, + "step": 5979 + }, + { + "epoch": 0.88, + "learning_rate": 6.933055048565473e-07, + "loss": 0.287, + "step": 5980 + }, + { + "epoch": 0.88, + "learning_rate": 6.915541915078672e-07, + "loss": 0.8257, + "step": 5981 + }, + { + "epoch": 0.88, + "learning_rate": 6.898050136693879e-07, + "loss": 0.7734, + "step": 5982 + }, + { + "epoch": 0.88, + "learning_rate": 6.880579717423985e-07, + "loss": 0.7437, + "step": 5983 + }, + { + "epoch": 0.88, + "learning_rate": 6.863130661276974e-07, + "loss": 0.7778, + "step": 5984 + }, + { + "epoch": 0.89, + "learning_rate": 6.845702972255974e-07, + "loss": 0.7461, + "step": 5985 + }, + { + "epoch": 0.89, + "learning_rate": 6.828296654359146e-07, + "loss": 0.7461, + "step": 5986 + }, + { + "epoch": 0.89, + "learning_rate": 6.810911711579826e-07, + "loss": 0.7959, + "step": 5987 + }, + { + "epoch": 0.89, + "learning_rate": 6.793548147906393e-07, + "loss": 0.8179, + "step": 5988 + }, + { + "epoch": 0.89, + "learning_rate": 6.776205967322303e-07, + "loss": 0.7847, + "step": 5989 + }, + { + "epoch": 0.89, + "learning_rate": 6.758885173806184e-07, + "loss": 0.8037, + "step": 5990 + }, + { + "epoch": 0.89, + "learning_rate": 6.741585771331672e-07, + "loss": 0.7778, + "step": 5991 + }, + { + "epoch": 0.89, + "learning_rate": 6.724307763867555e-07, + "loss": 0.7534, + "step": 5992 + }, + { + "epoch": 0.89, + "learning_rate": 6.707051155377686e-07, + "loss": 0.8027, + "step": 5993 + }, + { + "epoch": 0.89, + "learning_rate": 6.689815949820999e-07, + "loss": 0.8223, + "step": 5994 + }, + { + "epoch": 0.89, + "learning_rate": 6.672602151151564e-07, + "loss": 0.8374, + "step": 5995 + }, + { + "epoch": 0.89, + "learning_rate": 6.655409763318498e-07, + "loss": 0.2925, + "step": 5996 + }, + { + "epoch": 0.89, + "learning_rate": 6.638238790265983e-07, + "loss": 0.8022, + "step": 5997 + }, + { + "epoch": 0.89, + "learning_rate": 6.621089235933375e-07, + "loss": 0.7437, + "step": 5998 + }, + { + "epoch": 0.89, + "learning_rate": 6.603961104255018e-07, + "loss": 0.6819, + "step": 5999 + }, + { + "epoch": 0.89, + "learning_rate": 6.586854399160425e-07, + "loss": 0.8242, + "step": 6000 + }, + { + "epoch": 0.89, + "learning_rate": 6.569769124574133e-07, + "loss": 0.8198, + "step": 6001 + }, + { + "epoch": 0.89, + "learning_rate": 6.552705284415773e-07, + "loss": 0.7773, + "step": 6002 + }, + { + "epoch": 0.89, + "learning_rate": 6.535662882600091e-07, + "loss": 0.7578, + "step": 6003 + }, + { + "epoch": 0.89, + "learning_rate": 6.518641923036884e-07, + "loss": 0.7607, + "step": 6004 + }, + { + "epoch": 0.89, + "learning_rate": 6.501642409631059e-07, + "loss": 0.7695, + "step": 6005 + }, + { + "epoch": 0.89, + "learning_rate": 6.484664346282555e-07, + "loss": 0.7598, + "step": 6006 + }, + { + "epoch": 0.89, + "learning_rate": 6.46770773688642e-07, + "loss": 0.769, + "step": 6007 + }, + { + "epoch": 0.89, + "learning_rate": 6.45077258533281e-07, + "loss": 0.7266, + "step": 6008 + }, + { + "epoch": 0.89, + "learning_rate": 6.433858895506895e-07, + "loss": 0.7817, + "step": 6009 + }, + { + "epoch": 0.89, + "learning_rate": 6.416966671288949e-07, + "loss": 0.3, + "step": 6010 + }, + { + "epoch": 0.89, + "learning_rate": 6.400095916554361e-07, + "loss": 0.8022, + "step": 6011 + }, + { + "epoch": 0.89, + "learning_rate": 6.383246635173512e-07, + "loss": 0.7744, + "step": 6012 + }, + { + "epoch": 0.89, + "learning_rate": 6.366418831011955e-07, + "loss": 0.7852, + "step": 6013 + }, + { + "epoch": 0.89, + "learning_rate": 6.349612507930236e-07, + "loss": 0.7915, + "step": 6014 + }, + { + "epoch": 0.89, + "learning_rate": 6.332827669783981e-07, + "loss": 0.8276, + "step": 6015 + }, + { + "epoch": 0.89, + "learning_rate": 6.316064320423953e-07, + "loss": 0.8169, + "step": 6016 + }, + { + "epoch": 0.89, + "learning_rate": 6.299322463695912e-07, + "loss": 0.8066, + "step": 6017 + }, + { + "epoch": 0.89, + "learning_rate": 6.282602103440705e-07, + "loss": 0.772, + "step": 6018 + }, + { + "epoch": 0.89, + "learning_rate": 6.265903243494286e-07, + "loss": 0.7725, + "step": 6019 + }, + { + "epoch": 0.89, + "learning_rate": 6.249225887687615e-07, + "loss": 0.7637, + "step": 6020 + }, + { + "epoch": 0.89, + "learning_rate": 6.232570039846786e-07, + "loss": 0.3281, + "step": 6021 + }, + { + "epoch": 0.89, + "learning_rate": 6.215935703792908e-07, + "loss": 0.7891, + "step": 6022 + }, + { + "epoch": 0.89, + "learning_rate": 6.199322883342152e-07, + "loss": 0.7485, + "step": 6023 + }, + { + "epoch": 0.89, + "learning_rate": 6.182731582305801e-07, + "loss": 0.7852, + "step": 6024 + }, + { + "epoch": 0.89, + "learning_rate": 6.166161804490145e-07, + "loss": 0.8208, + "step": 6025 + }, + { + "epoch": 0.89, + "learning_rate": 6.14961355369661e-07, + "loss": 0.7515, + "step": 6026 + }, + { + "epoch": 0.89, + "learning_rate": 6.133086833721569e-07, + "loss": 0.7397, + "step": 6027 + }, + { + "epoch": 0.89, + "learning_rate": 6.116581648356557e-07, + "loss": 0.7378, + "step": 6028 + }, + { + "epoch": 0.89, + "learning_rate": 6.100098001388155e-07, + "loss": 0.7812, + "step": 6029 + }, + { + "epoch": 0.89, + "learning_rate": 6.083635896597951e-07, + "loss": 0.7686, + "step": 6030 + }, + { + "epoch": 0.89, + "learning_rate": 6.067195337762644e-07, + "loss": 0.751, + "step": 6031 + }, + { + "epoch": 0.89, + "learning_rate": 6.050776328653929e-07, + "loss": 0.8057, + "step": 6032 + }, + { + "epoch": 0.89, + "learning_rate": 6.034378873038638e-07, + "loss": 0.7544, + "step": 6033 + }, + { + "epoch": 0.89, + "learning_rate": 6.018002974678616e-07, + "loss": 0.7402, + "step": 6034 + }, + { + "epoch": 0.89, + "learning_rate": 6.001648637330726e-07, + "loss": 0.7646, + "step": 6035 + }, + { + "epoch": 0.89, + "learning_rate": 5.985315864746965e-07, + "loss": 0.7451, + "step": 6036 + }, + { + "epoch": 0.89, + "learning_rate": 5.969004660674294e-07, + "loss": 0.7412, + "step": 6037 + }, + { + "epoch": 0.89, + "learning_rate": 5.952715028854795e-07, + "loss": 0.7871, + "step": 6038 + }, + { + "epoch": 0.89, + "learning_rate": 5.936446973025612e-07, + "loss": 0.7808, + "step": 6039 + }, + { + "epoch": 0.89, + "learning_rate": 5.920200496918837e-07, + "loss": 0.7573, + "step": 6040 + }, + { + "epoch": 0.89, + "learning_rate": 5.903975604261725e-07, + "loss": 0.7998, + "step": 6041 + }, + { + "epoch": 0.89, + "learning_rate": 5.887772298776496e-07, + "loss": 0.8203, + "step": 6042 + }, + { + "epoch": 0.89, + "learning_rate": 5.871590584180497e-07, + "loss": 0.7578, + "step": 6043 + }, + { + "epoch": 0.89, + "learning_rate": 5.855430464186052e-07, + "loss": 0.813, + "step": 6044 + }, + { + "epoch": 0.89, + "learning_rate": 5.839291942500547e-07, + "loss": 0.7197, + "step": 6045 + }, + { + "epoch": 0.89, + "learning_rate": 5.823175022826444e-07, + "loss": 0.8062, + "step": 6046 + }, + { + "epoch": 0.89, + "learning_rate": 5.807079708861252e-07, + "loss": 0.8008, + "step": 6047 + }, + { + "epoch": 0.89, + "learning_rate": 5.791006004297451e-07, + "loss": 0.8086, + "step": 6048 + }, + { + "epoch": 0.89, + "learning_rate": 5.774953912822634e-07, + "loss": 0.7817, + "step": 6049 + }, + { + "epoch": 0.89, + "learning_rate": 5.758923438119413e-07, + "loss": 0.7251, + "step": 6050 + }, + { + "epoch": 0.89, + "learning_rate": 5.742914583865434e-07, + "loss": 0.7737, + "step": 6051 + }, + { + "epoch": 0.9, + "learning_rate": 5.726927353733424e-07, + "loss": 0.751, + "step": 6052 + }, + { + "epoch": 0.9, + "learning_rate": 5.710961751391075e-07, + "loss": 0.8076, + "step": 6053 + }, + { + "epoch": 0.9, + "learning_rate": 5.695017780501188e-07, + "loss": 0.7871, + "step": 6054 + }, + { + "epoch": 0.9, + "learning_rate": 5.679095444721538e-07, + "loss": 0.8203, + "step": 6055 + }, + { + "epoch": 0.9, + "learning_rate": 5.663194747705014e-07, + "loss": 0.792, + "step": 6056 + }, + { + "epoch": 0.9, + "learning_rate": 5.647315693099464e-07, + "loss": 0.7876, + "step": 6057 + }, + { + "epoch": 0.9, + "learning_rate": 5.631458284547797e-07, + "loss": 0.8193, + "step": 6058 + }, + { + "epoch": 0.9, + "learning_rate": 5.615622525688002e-07, + "loss": 0.8027, + "step": 6059 + }, + { + "epoch": 0.9, + "learning_rate": 5.59980842015303e-07, + "loss": 0.7417, + "step": 6060 + }, + { + "epoch": 0.9, + "learning_rate": 5.5840159715709e-07, + "loss": 0.8267, + "step": 6061 + }, + { + "epoch": 0.9, + "learning_rate": 5.568245183564669e-07, + "loss": 0.7915, + "step": 6062 + }, + { + "epoch": 0.9, + "learning_rate": 5.552496059752399e-07, + "loss": 0.8042, + "step": 6063 + }, + { + "epoch": 0.9, + "learning_rate": 5.536768603747222e-07, + "loss": 0.7954, + "step": 6064 + }, + { + "epoch": 0.9, + "learning_rate": 5.521062819157264e-07, + "loss": 0.79, + "step": 6065 + }, + { + "epoch": 0.9, + "learning_rate": 5.505378709585662e-07, + "loss": 0.7383, + "step": 6066 + }, + { + "epoch": 0.9, + "learning_rate": 5.489716278630652e-07, + "loss": 0.7959, + "step": 6067 + }, + { + "epoch": 0.9, + "learning_rate": 5.474075529885425e-07, + "loss": 0.8091, + "step": 6068 + }, + { + "epoch": 0.9, + "learning_rate": 5.458456466938233e-07, + "loss": 0.7944, + "step": 6069 + }, + { + "epoch": 0.9, + "learning_rate": 5.442859093372354e-07, + "loss": 0.8867, + "step": 6070 + }, + { + "epoch": 0.9, + "learning_rate": 5.42728341276606e-07, + "loss": 0.8018, + "step": 6071 + }, + { + "epoch": 0.9, + "learning_rate": 5.411729428692691e-07, + "loss": 0.8062, + "step": 6072 + }, + { + "epoch": 0.9, + "learning_rate": 5.396197144720572e-07, + "loss": 0.7378, + "step": 6073 + }, + { + "epoch": 0.9, + "learning_rate": 5.380686564413063e-07, + "loss": 0.3372, + "step": 6074 + }, + { + "epoch": 0.9, + "learning_rate": 5.365197691328561e-07, + "loss": 0.7446, + "step": 6075 + }, + { + "epoch": 0.9, + "learning_rate": 5.349730529020436e-07, + "loss": 0.7666, + "step": 6076 + }, + { + "epoch": 0.9, + "learning_rate": 5.33428508103715e-07, + "loss": 0.7451, + "step": 6077 + }, + { + "epoch": 0.9, + "learning_rate": 5.318861350922111e-07, + "loss": 0.7031, + "step": 6078 + }, + { + "epoch": 0.9, + "learning_rate": 5.303459342213779e-07, + "loss": 0.8159, + "step": 6079 + }, + { + "epoch": 0.9, + "learning_rate": 5.28807905844565e-07, + "loss": 0.8384, + "step": 6080 + }, + { + "epoch": 0.9, + "learning_rate": 5.272720503146201e-07, + "loss": 0.8135, + "step": 6081 + }, + { + "epoch": 0.9, + "learning_rate": 5.257383679838912e-07, + "loss": 0.7637, + "step": 6082 + }, + { + "epoch": 0.9, + "learning_rate": 5.242068592042349e-07, + "loss": 0.8433, + "step": 6083 + }, + { + "epoch": 0.9, + "learning_rate": 5.226775243269999e-07, + "loss": 0.7788, + "step": 6084 + }, + { + "epoch": 0.9, + "learning_rate": 5.211503637030435e-07, + "loss": 0.8115, + "step": 6085 + }, + { + "epoch": 0.9, + "learning_rate": 5.19625377682722e-07, + "loss": 0.7979, + "step": 6086 + }, + { + "epoch": 0.9, + "learning_rate": 5.181025666158889e-07, + "loss": 0.7798, + "step": 6087 + }, + { + "epoch": 0.9, + "learning_rate": 5.165819308519049e-07, + "loss": 0.8052, + "step": 6088 + }, + { + "epoch": 0.9, + "learning_rate": 5.150634707396263e-07, + "loss": 0.793, + "step": 6089 + }, + { + "epoch": 0.9, + "learning_rate": 5.135471866274167e-07, + "loss": 0.7505, + "step": 6090 + }, + { + "epoch": 0.9, + "learning_rate": 5.120330788631334e-07, + "loss": 0.7764, + "step": 6091 + }, + { + "epoch": 0.9, + "learning_rate": 5.105211477941374e-07, + "loss": 0.7827, + "step": 6092 + }, + { + "epoch": 0.9, + "learning_rate": 5.090113937672925e-07, + "loss": 0.3127, + "step": 6093 + }, + { + "epoch": 0.9, + "learning_rate": 5.075038171289603e-07, + "loss": 0.7983, + "step": 6094 + }, + { + "epoch": 0.9, + "learning_rate": 5.059984182250022e-07, + "loss": 0.8105, + "step": 6095 + }, + { + "epoch": 0.9, + "learning_rate": 5.044951974007838e-07, + "loss": 0.7097, + "step": 6096 + }, + { + "epoch": 0.9, + "learning_rate": 5.029941550011663e-07, + "loss": 0.7954, + "step": 6097 + }, + { + "epoch": 0.9, + "learning_rate": 5.014952913705162e-07, + "loss": 0.7783, + "step": 6098 + }, + { + "epoch": 0.9, + "learning_rate": 4.999986068526941e-07, + "loss": 0.8042, + "step": 6099 + }, + { + "epoch": 0.9, + "learning_rate": 4.985041017910653e-07, + "loss": 0.7812, + "step": 6100 + }, + { + "epoch": 0.9, + "learning_rate": 4.970117765284943e-07, + "loss": 0.791, + "step": 6101 + }, + { + "epoch": 0.9, + "learning_rate": 4.955216314073452e-07, + "loss": 0.7891, + "step": 6102 + }, + { + "epoch": 0.9, + "learning_rate": 4.940336667694834e-07, + "loss": 0.729, + "step": 6103 + }, + { + "epoch": 0.9, + "learning_rate": 4.925478829562668e-07, + "loss": 0.7476, + "step": 6104 + }, + { + "epoch": 0.9, + "learning_rate": 4.910642803085631e-07, + "loss": 0.7998, + "step": 6105 + }, + { + "epoch": 0.9, + "learning_rate": 4.895828591667351e-07, + "loss": 0.7959, + "step": 6106 + }, + { + "epoch": 0.9, + "learning_rate": 4.881036198706446e-07, + "loss": 0.7612, + "step": 6107 + }, + { + "epoch": 0.9, + "learning_rate": 4.866265627596522e-07, + "loss": 0.8198, + "step": 6108 + }, + { + "epoch": 0.9, + "learning_rate": 4.851516881726181e-07, + "loss": 0.7729, + "step": 6109 + }, + { + "epoch": 0.9, + "learning_rate": 4.83678996447906e-07, + "loss": 0.7974, + "step": 6110 + }, + { + "epoch": 0.9, + "learning_rate": 4.822084879233746e-07, + "loss": 0.7256, + "step": 6111 + }, + { + "epoch": 0.9, + "learning_rate": 4.807401629363806e-07, + "loss": 0.7822, + "step": 6112 + }, + { + "epoch": 0.9, + "learning_rate": 4.792740218237835e-07, + "loss": 0.7954, + "step": 6113 + }, + { + "epoch": 0.9, + "learning_rate": 4.778100649219398e-07, + "loss": 0.7803, + "step": 6114 + }, + { + "epoch": 0.9, + "learning_rate": 4.763482925667051e-07, + "loss": 0.7588, + "step": 6115 + }, + { + "epoch": 0.9, + "learning_rate": 4.74888705093437e-07, + "loss": 0.7715, + "step": 6116 + }, + { + "epoch": 0.9, + "learning_rate": 4.7343130283698193e-07, + "loss": 0.8179, + "step": 6117 + }, + { + "epoch": 0.9, + "learning_rate": 4.7197608613169685e-07, + "loss": 0.8228, + "step": 6118 + }, + { + "epoch": 0.9, + "learning_rate": 4.705230553114326e-07, + "loss": 0.7744, + "step": 6119 + }, + { + "epoch": 0.91, + "learning_rate": 4.6907221070953803e-07, + "loss": 0.7969, + "step": 6120 + }, + { + "epoch": 0.91, + "learning_rate": 4.6762355265885793e-07, + "loss": 0.8008, + "step": 6121 + }, + { + "epoch": 0.91, + "learning_rate": 4.661770814917399e-07, + "loss": 0.7207, + "step": 6122 + }, + { + "epoch": 0.91, + "learning_rate": 4.6473279754002844e-07, + "loss": 0.7788, + "step": 6123 + }, + { + "epoch": 0.91, + "learning_rate": 4.6329070113506847e-07, + "loss": 0.7812, + "step": 6124 + }, + { + "epoch": 0.91, + "learning_rate": 4.618507926076954e-07, + "loss": 0.8198, + "step": 6125 + }, + { + "epoch": 0.91, + "learning_rate": 4.604130722882516e-07, + "loss": 0.7129, + "step": 6126 + }, + { + "epoch": 0.91, + "learning_rate": 4.5897754050657104e-07, + "loss": 0.295, + "step": 6127 + }, + { + "epoch": 0.91, + "learning_rate": 4.575441975919914e-07, + "loss": 0.7334, + "step": 6128 + }, + { + "epoch": 0.91, + "learning_rate": 4.56113043873343e-07, + "loss": 0.7563, + "step": 6129 + }, + { + "epoch": 0.91, + "learning_rate": 4.546840796789553e-07, + "loss": 0.751, + "step": 6130 + }, + { + "epoch": 0.91, + "learning_rate": 4.532573053366585e-07, + "loss": 0.7827, + "step": 6131 + }, + { + "epoch": 0.91, + "learning_rate": 4.518327211737761e-07, + "loss": 0.7075, + "step": 6132 + }, + { + "epoch": 0.91, + "learning_rate": 4.504103275171323e-07, + "loss": 0.7427, + "step": 6133 + }, + { + "epoch": 0.91, + "learning_rate": 4.4899012469304725e-07, + "loss": 0.705, + "step": 6134 + }, + { + "epoch": 0.91, + "learning_rate": 4.4757211302733806e-07, + "loss": 0.7104, + "step": 6135 + }, + { + "epoch": 0.91, + "learning_rate": 4.4615629284532005e-07, + "loss": 0.7373, + "step": 6136 + }, + { + "epoch": 0.91, + "learning_rate": 4.447426644718067e-07, + "loss": 0.7725, + "step": 6137 + }, + { + "epoch": 0.91, + "learning_rate": 4.433312282311064e-07, + "loss": 0.8159, + "step": 6138 + }, + { + "epoch": 0.91, + "learning_rate": 4.4192198444702685e-07, + "loss": 0.814, + "step": 6139 + }, + { + "epoch": 0.91, + "learning_rate": 4.4051493344286934e-07, + "loss": 0.7642, + "step": 6140 + }, + { + "epoch": 0.91, + "learning_rate": 4.3911007554143683e-07, + "loss": 0.7241, + "step": 6141 + }, + { + "epoch": 0.91, + "learning_rate": 4.3770741106502704e-07, + "loss": 0.7485, + "step": 6142 + }, + { + "epoch": 0.91, + "learning_rate": 4.3630694033543255e-07, + "loss": 0.7529, + "step": 6143 + }, + { + "epoch": 0.91, + "learning_rate": 4.3490866367394525e-07, + "loss": 0.7314, + "step": 6144 + }, + { + "epoch": 0.91, + "learning_rate": 4.3351258140135186e-07, + "loss": 0.7632, + "step": 6145 + }, + { + "epoch": 0.91, + "learning_rate": 4.3211869383793735e-07, + "loss": 0.8027, + "step": 6146 + }, + { + "epoch": 0.91, + "learning_rate": 4.3072700130348255e-07, + "loss": 0.7036, + "step": 6147 + }, + { + "epoch": 0.91, + "learning_rate": 4.2933750411726425e-07, + "loss": 0.2764, + "step": 6148 + }, + { + "epoch": 0.91, + "learning_rate": 4.279502025980564e-07, + "loss": 0.7617, + "step": 6149 + }, + { + "epoch": 0.91, + "learning_rate": 4.2656509706412774e-07, + "loss": 0.7266, + "step": 6150 + }, + { + "epoch": 0.91, + "learning_rate": 4.2518218783324404e-07, + "loss": 0.7661, + "step": 6151 + }, + { + "epoch": 0.91, + "learning_rate": 4.2380147522266937e-07, + "loss": 0.7979, + "step": 6152 + }, + { + "epoch": 0.91, + "learning_rate": 4.2242295954915913e-07, + "loss": 0.791, + "step": 6153 + }, + { + "epoch": 0.91, + "learning_rate": 4.210466411289704e-07, + "loss": 0.8145, + "step": 6154 + }, + { + "epoch": 0.91, + "learning_rate": 4.1967252027785066e-07, + "loss": 0.748, + "step": 6155 + }, + { + "epoch": 0.91, + "learning_rate": 4.1830059731104657e-07, + "loss": 0.7529, + "step": 6156 + }, + { + "epoch": 0.91, + "learning_rate": 4.169308725433008e-07, + "loss": 0.7529, + "step": 6157 + }, + { + "epoch": 0.91, + "learning_rate": 4.1556334628884973e-07, + "loss": 0.8018, + "step": 6158 + }, + { + "epoch": 0.91, + "learning_rate": 4.1419801886142584e-07, + "loss": 0.8169, + "step": 6159 + }, + { + "epoch": 0.91, + "learning_rate": 4.128348905742585e-07, + "loss": 0.7593, + "step": 6160 + }, + { + "epoch": 0.91, + "learning_rate": 4.1147396174007094e-07, + "loss": 0.7651, + "step": 6161 + }, + { + "epoch": 0.91, + "learning_rate": 4.1011523267108333e-07, + "loss": 0.8247, + "step": 6162 + }, + { + "epoch": 0.91, + "learning_rate": 4.087587036790119e-07, + "loss": 0.7837, + "step": 6163 + }, + { + "epoch": 0.91, + "learning_rate": 4.0740437507506226e-07, + "loss": 0.7563, + "step": 6164 + }, + { + "epoch": 0.91, + "learning_rate": 4.060522471699435e-07, + "loss": 0.7656, + "step": 6165 + }, + { + "epoch": 0.91, + "learning_rate": 4.0470232027385424e-07, + "loss": 0.8237, + "step": 6166 + }, + { + "epoch": 0.91, + "learning_rate": 4.0335459469649117e-07, + "loss": 0.7764, + "step": 6167 + }, + { + "epoch": 0.91, + "learning_rate": 4.0200907074704367e-07, + "loss": 0.7329, + "step": 6168 + }, + { + "epoch": 0.91, + "learning_rate": 4.0066574873419697e-07, + "loss": 0.793, + "step": 6169 + }, + { + "epoch": 0.91, + "learning_rate": 3.9932462896613124e-07, + "loss": 0.8481, + "step": 6170 + }, + { + "epoch": 0.91, + "learning_rate": 3.979857117505226e-07, + "loss": 0.2988, + "step": 6171 + }, + { + "epoch": 0.91, + "learning_rate": 3.9664899739453753e-07, + "loss": 0.7847, + "step": 6172 + }, + { + "epoch": 0.91, + "learning_rate": 3.9531448620484304e-07, + "loss": 0.7886, + "step": 6173 + }, + { + "epoch": 0.91, + "learning_rate": 3.9398217848759637e-07, + "loss": 0.7856, + "step": 6174 + }, + { + "epoch": 0.91, + "learning_rate": 3.926520745484541e-07, + "loss": 0.7461, + "step": 6175 + }, + { + "epoch": 0.91, + "learning_rate": 3.913241746925589e-07, + "loss": 0.7744, + "step": 6176 + }, + { + "epoch": 0.91, + "learning_rate": 3.899984792245548e-07, + "loss": 0.7983, + "step": 6177 + }, + { + "epoch": 0.91, + "learning_rate": 3.8867498844857964e-07, + "loss": 0.8662, + "step": 6178 + }, + { + "epoch": 0.91, + "learning_rate": 3.873537026682617e-07, + "loss": 0.7827, + "step": 6179 + }, + { + "epoch": 0.91, + "learning_rate": 3.8603462218672837e-07, + "loss": 0.79, + "step": 6180 + }, + { + "epoch": 0.91, + "learning_rate": 3.847177473065955e-07, + "loss": 0.7422, + "step": 6181 + }, + { + "epoch": 0.91, + "learning_rate": 3.8340307832997693e-07, + "loss": 0.7661, + "step": 6182 + }, + { + "epoch": 0.91, + "learning_rate": 3.820906155584803e-07, + "loss": 0.7427, + "step": 6183 + }, + { + "epoch": 0.91, + "learning_rate": 3.8078035929320467e-07, + "loss": 0.7788, + "step": 6184 + }, + { + "epoch": 0.91, + "learning_rate": 3.7947230983474304e-07, + "loss": 0.7656, + "step": 6185 + }, + { + "epoch": 0.91, + "learning_rate": 3.781664674831875e-07, + "loss": 0.7627, + "step": 6186 + }, + { + "epoch": 0.91, + "learning_rate": 3.7686283253811516e-07, + "loss": 0.7876, + "step": 6187 + }, + { + "epoch": 0.92, + "learning_rate": 3.755614052986056e-07, + "loss": 0.7729, + "step": 6188 + }, + { + "epoch": 0.92, + "learning_rate": 3.742621860632245e-07, + "loss": 0.7896, + "step": 6189 + }, + { + "epoch": 0.92, + "learning_rate": 3.729651751300334e-07, + "loss": 0.8398, + "step": 6190 + }, + { + "epoch": 0.92, + "learning_rate": 3.71670372796592e-07, + "loss": 0.7622, + "step": 6191 + }, + { + "epoch": 0.92, + "learning_rate": 3.703777793599461e-07, + "loss": 0.689, + "step": 6192 + }, + { + "epoch": 0.92, + "learning_rate": 3.690873951166385e-07, + "loss": 0.8022, + "step": 6193 + }, + { + "epoch": 0.92, + "learning_rate": 3.6779922036270234e-07, + "loss": 0.7554, + "step": 6194 + }, + { + "epoch": 0.92, + "learning_rate": 3.66513255393669e-07, + "loss": 0.3113, + "step": 6195 + }, + { + "epoch": 0.92, + "learning_rate": 3.652295005045603e-07, + "loss": 0.8052, + "step": 6196 + }, + { + "epoch": 0.92, + "learning_rate": 3.639479559898895e-07, + "loss": 0.7974, + "step": 6197 + }, + { + "epoch": 0.92, + "learning_rate": 3.626686221436648e-07, + "loss": 0.8062, + "step": 6198 + }, + { + "epoch": 0.92, + "learning_rate": 3.613914992593825e-07, + "loss": 0.7905, + "step": 6199 + }, + { + "epoch": 0.92, + "learning_rate": 3.6011658763003944e-07, + "loss": 0.7998, + "step": 6200 + }, + { + "epoch": 0.92, + "learning_rate": 3.588438875481226e-07, + "loss": 0.7329, + "step": 6201 + }, + { + "epoch": 0.92, + "learning_rate": 3.575733993056063e-07, + "loss": 0.7671, + "step": 6202 + }, + { + "epoch": 0.92, + "learning_rate": 3.56305123193964e-07, + "loss": 0.7915, + "step": 6203 + }, + { + "epoch": 0.92, + "learning_rate": 3.550390595041564e-07, + "loss": 0.7764, + "step": 6204 + }, + { + "epoch": 0.92, + "learning_rate": 3.5377520852664217e-07, + "loss": 0.8149, + "step": 6205 + }, + { + "epoch": 0.92, + "learning_rate": 3.525135705513694e-07, + "loss": 0.7881, + "step": 6206 + }, + { + "epoch": 0.92, + "learning_rate": 3.512541458677754e-07, + "loss": 0.7866, + "step": 6207 + }, + { + "epoch": 0.92, + "learning_rate": 3.4999693476479577e-07, + "loss": 0.7769, + "step": 6208 + }, + { + "epoch": 0.92, + "learning_rate": 3.4874193753085426e-07, + "loss": 0.792, + "step": 6209 + }, + { + "epoch": 0.92, + "learning_rate": 3.474891544538683e-07, + "loss": 0.7847, + "step": 6210 + }, + { + "epoch": 0.92, + "learning_rate": 3.46238585821248e-07, + "loss": 0.7295, + "step": 6211 + }, + { + "epoch": 0.92, + "learning_rate": 3.4499023191989055e-07, + "loss": 0.7559, + "step": 6212 + }, + { + "epoch": 0.92, + "learning_rate": 3.437440930361924e-07, + "loss": 0.7974, + "step": 6213 + }, + { + "epoch": 0.92, + "learning_rate": 3.425001694560381e-07, + "loss": 0.8213, + "step": 6214 + }, + { + "epoch": 0.92, + "learning_rate": 3.412584614648018e-07, + "loss": 0.7485, + "step": 6215 + }, + { + "epoch": 0.92, + "learning_rate": 3.4001896934735436e-07, + "loss": 0.8198, + "step": 6216 + }, + { + "epoch": 0.92, + "learning_rate": 3.3878169338805276e-07, + "loss": 0.7524, + "step": 6217 + }, + { + "epoch": 0.92, + "learning_rate": 3.3754663387075116e-07, + "loss": 0.7925, + "step": 6218 + }, + { + "epoch": 0.92, + "learning_rate": 3.363137910787906e-07, + "loss": 0.2733, + "step": 6219 + }, + { + "epoch": 0.92, + "learning_rate": 3.3508316529500596e-07, + "loss": 0.7168, + "step": 6220 + }, + { + "epoch": 0.92, + "learning_rate": 3.3385475680172366e-07, + "loss": 0.8062, + "step": 6221 + }, + { + "epoch": 0.92, + "learning_rate": 3.3262856588076044e-07, + "loss": 0.7134, + "step": 6222 + }, + { + "epoch": 0.92, + "learning_rate": 3.314045928134224e-07, + "loss": 0.7749, + "step": 6223 + }, + { + "epoch": 0.92, + "learning_rate": 3.3018283788051386e-07, + "loss": 0.8545, + "step": 6224 + }, + { + "epoch": 0.92, + "learning_rate": 3.289633013623206e-07, + "loss": 0.7837, + "step": 6225 + }, + { + "epoch": 0.92, + "learning_rate": 3.277459835386276e-07, + "loss": 0.7725, + "step": 6226 + }, + { + "epoch": 0.92, + "learning_rate": 3.265308846887061e-07, + "loss": 0.7607, + "step": 6227 + }, + { + "epoch": 0.92, + "learning_rate": 3.253180050913185e-07, + "loss": 0.8027, + "step": 6228 + }, + { + "epoch": 0.92, + "learning_rate": 3.241073450247223e-07, + "loss": 0.812, + "step": 6229 + }, + { + "epoch": 0.92, + "learning_rate": 3.2289890476665975e-07, + "loss": 0.8452, + "step": 6230 + }, + { + "epoch": 0.92, + "learning_rate": 3.216926845943702e-07, + "loss": 0.7432, + "step": 6231 + }, + { + "epoch": 0.92, + "learning_rate": 3.2048868478457673e-07, + "loss": 0.7498, + "step": 6232 + }, + { + "epoch": 0.92, + "learning_rate": 3.192869056134984e-07, + "loss": 0.7471, + "step": 6233 + }, + { + "epoch": 0.92, + "learning_rate": 3.180873473568447e-07, + "loss": 0.7617, + "step": 6234 + }, + { + "epoch": 0.92, + "learning_rate": 3.168900102898109e-07, + "loss": 0.7681, + "step": 6235 + }, + { + "epoch": 0.92, + "learning_rate": 3.1569489468708746e-07, + "loss": 0.7959, + "step": 6236 + }, + { + "epoch": 0.92, + "learning_rate": 3.145020008228539e-07, + "loss": 0.7051, + "step": 6237 + }, + { + "epoch": 0.92, + "learning_rate": 3.133113289707779e-07, + "loss": 0.7495, + "step": 6238 + }, + { + "epoch": 0.92, + "learning_rate": 3.12122879404021e-07, + "loss": 0.8184, + "step": 6239 + }, + { + "epoch": 0.92, + "learning_rate": 3.1093665239523175e-07, + "loss": 0.7485, + "step": 6240 + }, + { + "epoch": 0.92, + "learning_rate": 3.097526482165503e-07, + "loss": 0.7705, + "step": 6241 + }, + { + "epoch": 0.92, + "learning_rate": 3.0857086713960706e-07, + "loss": 0.7476, + "step": 6242 + }, + { + "epoch": 0.92, + "learning_rate": 3.073913094355219e-07, + "loss": 0.8213, + "step": 6243 + }, + { + "epoch": 0.92, + "learning_rate": 3.0621397537490494e-07, + "loss": 0.7749, + "step": 6244 + }, + { + "epoch": 0.92, + "learning_rate": 3.050388652278558e-07, + "loss": 0.7725, + "step": 6245 + }, + { + "epoch": 0.92, + "learning_rate": 3.038659792639631e-07, + "loss": 0.8232, + "step": 6246 + }, + { + "epoch": 0.92, + "learning_rate": 3.0269531775230733e-07, + "loss": 0.7781, + "step": 6247 + }, + { + "epoch": 0.92, + "learning_rate": 3.0152688096145687e-07, + "loss": 0.8115, + "step": 6248 + }, + { + "epoch": 0.92, + "learning_rate": 3.003606691594696e-07, + "loss": 0.7764, + "step": 6249 + }, + { + "epoch": 0.92, + "learning_rate": 2.991966826138959e-07, + "loss": 0.7812, + "step": 6250 + }, + { + "epoch": 0.92, + "learning_rate": 2.9803492159177103e-07, + "loss": 0.7275, + "step": 6251 + }, + { + "epoch": 0.92, + "learning_rate": 2.968753863596241e-07, + "loss": 0.7544, + "step": 6252 + }, + { + "epoch": 0.92, + "learning_rate": 2.9571807718347e-07, + "loss": 0.8687, + "step": 6253 + }, + { + "epoch": 0.92, + "learning_rate": 2.945629943288131e-07, + "loss": 0.7991, + "step": 6254 + }, + { + "epoch": 0.93, + "learning_rate": 2.9341013806065243e-07, + "loss": 0.7544, + "step": 6255 + }, + { + "epoch": 0.93, + "learning_rate": 2.9225950864346876e-07, + "loss": 0.7642, + "step": 6256 + }, + { + "epoch": 0.93, + "learning_rate": 2.911111063412353e-07, + "loss": 0.7695, + "step": 6257 + }, + { + "epoch": 0.93, + "learning_rate": 2.8996493141741686e-07, + "loss": 0.7661, + "step": 6258 + }, + { + "epoch": 0.93, + "learning_rate": 2.888209841349632e-07, + "loss": 0.7734, + "step": 6259 + }, + { + "epoch": 0.93, + "learning_rate": 2.8767926475631426e-07, + "loss": 0.8086, + "step": 6260 + }, + { + "epoch": 0.93, + "learning_rate": 2.865397735434006e-07, + "loss": 0.7383, + "step": 6261 + }, + { + "epoch": 0.93, + "learning_rate": 2.8540251075763857e-07, + "loss": 0.8062, + "step": 6262 + }, + { + "epoch": 0.93, + "learning_rate": 2.8426747665993517e-07, + "loss": 0.291, + "step": 6263 + }, + { + "epoch": 0.93, + "learning_rate": 2.831346715106864e-07, + "loss": 0.791, + "step": 6264 + }, + { + "epoch": 0.93, + "learning_rate": 2.8200409556977894e-07, + "loss": 0.8237, + "step": 6265 + }, + { + "epoch": 0.93, + "learning_rate": 2.8087574909657965e-07, + "loss": 0.7979, + "step": 6266 + }, + { + "epoch": 0.93, + "learning_rate": 2.7974963234995266e-07, + "loss": 0.7534, + "step": 6267 + }, + { + "epoch": 0.93, + "learning_rate": 2.7862574558825017e-07, + "loss": 0.8022, + "step": 6268 + }, + { + "epoch": 0.93, + "learning_rate": 2.7750408906930704e-07, + "loss": 0.8599, + "step": 6269 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638466305045073e-07, + "loss": 0.7578, + "step": 6270 + }, + { + "epoch": 0.93, + "learning_rate": 2.752674677884948e-07, + "loss": 0.7822, + "step": 6271 + }, + { + "epoch": 0.93, + "learning_rate": 2.74152503539743e-07, + "loss": 0.7349, + "step": 6272 + }, + { + "epoch": 0.93, + "learning_rate": 2.7303977055998743e-07, + "loss": 0.7554, + "step": 6273 + }, + { + "epoch": 0.93, + "learning_rate": 2.719292691045061e-07, + "loss": 0.8823, + "step": 6274 + }, + { + "epoch": 0.93, + "learning_rate": 2.708209994280675e-07, + "loss": 0.7241, + "step": 6275 + }, + { + "epoch": 0.93, + "learning_rate": 2.697149617849237e-07, + "loss": 0.7056, + "step": 6276 + }, + { + "epoch": 0.93, + "learning_rate": 2.6861115642882073e-07, + "loss": 0.7197, + "step": 6277 + }, + { + "epoch": 0.93, + "learning_rate": 2.675095836129915e-07, + "loss": 0.8042, + "step": 6278 + }, + { + "epoch": 0.93, + "learning_rate": 2.6641024359015056e-07, + "loss": 0.7329, + "step": 6279 + }, + { + "epoch": 0.93, + "learning_rate": 2.653131366125061e-07, + "loss": 0.769, + "step": 6280 + }, + { + "epoch": 0.93, + "learning_rate": 2.6421826293175357e-07, + "loss": 0.7993, + "step": 6281 + }, + { + "epoch": 0.93, + "learning_rate": 2.6312562279907416e-07, + "loss": 0.7617, + "step": 6282 + }, + { + "epoch": 0.93, + "learning_rate": 2.620352164651374e-07, + "loss": 0.3093, + "step": 6283 + }, + { + "epoch": 0.93, + "learning_rate": 2.6094704418009984e-07, + "loss": 0.7939, + "step": 6284 + }, + { + "epoch": 0.93, + "learning_rate": 2.5986110619360626e-07, + "loss": 0.7505, + "step": 6285 + }, + { + "epoch": 0.93, + "learning_rate": 2.587774027547918e-07, + "loss": 0.8442, + "step": 6286 + }, + { + "epoch": 0.93, + "learning_rate": 2.576959341122698e-07, + "loss": 0.7954, + "step": 6287 + }, + { + "epoch": 0.93, + "learning_rate": 2.566167005141529e-07, + "loss": 0.7607, + "step": 6288 + }, + { + "epoch": 0.93, + "learning_rate": 2.555397022080297e-07, + "loss": 0.708, + "step": 6289 + }, + { + "epoch": 0.93, + "learning_rate": 2.544649394409848e-07, + "loss": 0.7588, + "step": 6290 + }, + { + "epoch": 0.93, + "learning_rate": 2.5339241245958767e-07, + "loss": 0.7881, + "step": 6291 + }, + { + "epoch": 0.93, + "learning_rate": 2.523221215098881e-07, + "loss": 0.7671, + "step": 6292 + }, + { + "epoch": 0.93, + "learning_rate": 2.5125406683743417e-07, + "loss": 0.8325, + "step": 6293 + }, + { + "epoch": 0.93, + "learning_rate": 2.5018824868725087e-07, + "loss": 0.7759, + "step": 6294 + }, + { + "epoch": 0.93, + "learning_rate": 2.491246673038572e-07, + "loss": 0.7559, + "step": 6295 + }, + { + "epoch": 0.93, + "learning_rate": 2.480633229312557e-07, + "loss": 0.7769, + "step": 6296 + }, + { + "epoch": 0.93, + "learning_rate": 2.4700421581293375e-07, + "loss": 0.8223, + "step": 6297 + }, + { + "epoch": 0.93, + "learning_rate": 2.4594734619187155e-07, + "loss": 0.7598, + "step": 6298 + }, + { + "epoch": 0.93, + "learning_rate": 2.4489271431053066e-07, + "loss": 0.7798, + "step": 6299 + }, + { + "epoch": 0.93, + "learning_rate": 2.438403204108597e-07, + "loss": 0.79, + "step": 6300 + }, + { + "epoch": 0.93, + "learning_rate": 2.427901647342967e-07, + "loss": 0.7676, + "step": 6301 + }, + { + "epoch": 0.93, + "learning_rate": 2.4174224752176345e-07, + "loss": 0.7876, + "step": 6302 + }, + { + "epoch": 0.93, + "learning_rate": 2.40696569013672e-07, + "loss": 0.7393, + "step": 6303 + }, + { + "epoch": 0.93, + "learning_rate": 2.396531294499149e-07, + "loss": 0.8276, + "step": 6304 + }, + { + "epoch": 0.93, + "learning_rate": 2.386119290698752e-07, + "loss": 0.8145, + "step": 6305 + }, + { + "epoch": 0.93, + "learning_rate": 2.3757296811242281e-07, + "loss": 0.7842, + "step": 6306 + }, + { + "epoch": 0.93, + "learning_rate": 2.3653624681591048e-07, + "loss": 0.7954, + "step": 6307 + }, + { + "epoch": 0.93, + "learning_rate": 2.3550176541818015e-07, + "loss": 0.7358, + "step": 6308 + }, + { + "epoch": 0.93, + "learning_rate": 2.3446952415655977e-07, + "loss": 0.7168, + "step": 6309 + }, + { + "epoch": 0.93, + "learning_rate": 2.334395232678599e-07, + "loss": 0.7649, + "step": 6310 + }, + { + "epoch": 0.93, + "learning_rate": 2.3241176298838153e-07, + "loss": 0.7651, + "step": 6311 + }, + { + "epoch": 0.93, + "learning_rate": 2.3138624355391049e-07, + "loss": 0.3406, + "step": 6312 + }, + { + "epoch": 0.93, + "learning_rate": 2.3036296519971413e-07, + "loss": 0.6938, + "step": 6313 + }, + { + "epoch": 0.93, + "learning_rate": 2.2934192816055355e-07, + "loss": 0.7646, + "step": 6314 + }, + { + "epoch": 0.93, + "learning_rate": 2.283231326706681e-07, + "loss": 0.3262, + "step": 6315 + }, + { + "epoch": 0.93, + "learning_rate": 2.273065789637896e-07, + "loss": 0.769, + "step": 6316 + }, + { + "epoch": 0.93, + "learning_rate": 2.2629226727312936e-07, + "loss": 0.7549, + "step": 6317 + }, + { + "epoch": 0.93, + "learning_rate": 2.2528019783138678e-07, + "loss": 0.2981, + "step": 6318 + }, + { + "epoch": 0.93, + "learning_rate": 2.2427037087074942e-07, + "loss": 0.7847, + "step": 6319 + }, + { + "epoch": 0.93, + "learning_rate": 2.2326278662288648e-07, + "loss": 0.7861, + "step": 6320 + }, + { + "epoch": 0.93, + "learning_rate": 2.2225744531895632e-07, + "loss": 0.7388, + "step": 6321 + }, + { + "epoch": 0.93, + "learning_rate": 2.2125434718959892e-07, + "loss": 0.7183, + "step": 6322 + }, + { + "epoch": 0.94, + "learning_rate": 2.2025349246494021e-07, + "loss": 0.8096, + "step": 6323 + }, + { + "epoch": 0.94, + "learning_rate": 2.192548813745965e-07, + "loss": 0.7681, + "step": 6324 + }, + { + "epoch": 0.94, + "learning_rate": 2.182585141476623e-07, + "loss": 0.7969, + "step": 6325 + }, + { + "epoch": 0.94, + "learning_rate": 2.1726439101272145e-07, + "loss": 0.7485, + "step": 6326 + }, + { + "epoch": 0.94, + "learning_rate": 2.1627251219784262e-07, + "loss": 0.7944, + "step": 6327 + }, + { + "epoch": 0.94, + "learning_rate": 2.1528287793057934e-07, + "loss": 0.3145, + "step": 6328 + }, + { + "epoch": 0.94, + "learning_rate": 2.142954884379689e-07, + "loss": 0.7344, + "step": 6329 + }, + { + "epoch": 0.94, + "learning_rate": 2.133103439465356e-07, + "loss": 0.7314, + "step": 6330 + }, + { + "epoch": 0.94, + "learning_rate": 2.123274446822865e-07, + "loss": 0.7466, + "step": 6331 + }, + { + "epoch": 0.94, + "learning_rate": 2.1134679087071676e-07, + "loss": 0.3016, + "step": 6332 + }, + { + "epoch": 0.94, + "learning_rate": 2.1036838273680305e-07, + "loss": 0.3137, + "step": 6333 + }, + { + "epoch": 0.94, + "learning_rate": 2.0939222050500806e-07, + "loss": 0.3126, + "step": 6334 + }, + { + "epoch": 0.94, + "learning_rate": 2.0841830439928045e-07, + "loss": 0.686, + "step": 6335 + }, + { + "epoch": 0.94, + "learning_rate": 2.074466346430515e-07, + "loss": 0.771, + "step": 6336 + }, + { + "epoch": 0.94, + "learning_rate": 2.0647721145923948e-07, + "loss": 0.7632, + "step": 6337 + }, + { + "epoch": 0.94, + "learning_rate": 2.0551003507024546e-07, + "loss": 0.7988, + "step": 6338 + }, + { + "epoch": 0.94, + "learning_rate": 2.0454510569795416e-07, + "loss": 0.8389, + "step": 6339 + }, + { + "epoch": 0.94, + "learning_rate": 2.0358242356373735e-07, + "loss": 0.772, + "step": 6340 + }, + { + "epoch": 0.94, + "learning_rate": 2.0262198888845064e-07, + "loss": 0.7661, + "step": 6341 + }, + { + "epoch": 0.94, + "learning_rate": 2.016638018924344e-07, + "loss": 0.749, + "step": 6342 + }, + { + "epoch": 0.94, + "learning_rate": 2.0070786279550836e-07, + "loss": 0.7695, + "step": 6343 + }, + { + "epoch": 0.94, + "learning_rate": 1.9975417181698487e-07, + "loss": 0.7046, + "step": 6344 + }, + { + "epoch": 0.94, + "learning_rate": 1.9880272917565447e-07, + "loss": 0.8198, + "step": 6345 + }, + { + "epoch": 0.94, + "learning_rate": 1.9785353508979476e-07, + "loss": 0.7427, + "step": 6346 + }, + { + "epoch": 0.94, + "learning_rate": 1.9690658977716382e-07, + "loss": 0.7441, + "step": 6347 + }, + { + "epoch": 0.94, + "learning_rate": 1.9596189345501003e-07, + "loss": 0.7583, + "step": 6348 + }, + { + "epoch": 0.94, + "learning_rate": 1.95019446340059e-07, + "loss": 0.771, + "step": 6349 + }, + { + "epoch": 0.94, + "learning_rate": 1.9407924864852657e-07, + "loss": 0.7466, + "step": 6350 + }, + { + "epoch": 0.94, + "learning_rate": 1.9314130059610693e-07, + "loss": 0.7778, + "step": 6351 + }, + { + "epoch": 0.94, + "learning_rate": 1.9220560239798235e-07, + "loss": 0.7456, + "step": 6352 + }, + { + "epoch": 0.94, + "learning_rate": 1.9127215426881673e-07, + "loss": 0.7061, + "step": 6353 + }, + { + "epoch": 0.94, + "learning_rate": 1.9034095642275763e-07, + "loss": 0.7964, + "step": 6354 + }, + { + "epoch": 0.94, + "learning_rate": 1.894120090734397e-07, + "loss": 0.8223, + "step": 6355 + }, + { + "epoch": 0.94, + "learning_rate": 1.8848531243397471e-07, + "loss": 0.7925, + "step": 6356 + }, + { + "epoch": 0.94, + "learning_rate": 1.8756086671696594e-07, + "loss": 0.7881, + "step": 6357 + }, + { + "epoch": 0.94, + "learning_rate": 1.8663867213449373e-07, + "loss": 0.7837, + "step": 6358 + }, + { + "epoch": 0.94, + "learning_rate": 1.857187288981266e-07, + "loss": 0.6865, + "step": 6359 + }, + { + "epoch": 0.94, + "learning_rate": 1.8480103721891462e-07, + "loss": 0.7979, + "step": 6360 + }, + { + "epoch": 0.94, + "learning_rate": 1.838855973073883e-07, + "loss": 0.7568, + "step": 6361 + }, + { + "epoch": 0.94, + "learning_rate": 1.8297240937356742e-07, + "loss": 0.2937, + "step": 6362 + }, + { + "epoch": 0.94, + "learning_rate": 1.8206147362695214e-07, + "loss": 0.7988, + "step": 6363 + }, + { + "epoch": 0.94, + "learning_rate": 1.8115279027652533e-07, + "loss": 0.7593, + "step": 6364 + }, + { + "epoch": 0.94, + "learning_rate": 1.8024635953075353e-07, + "loss": 0.7808, + "step": 6365 + }, + { + "epoch": 0.94, + "learning_rate": 1.793421815975871e-07, + "loss": 0.8208, + "step": 6366 + }, + { + "epoch": 0.94, + "learning_rate": 1.784402566844601e-07, + "loss": 0.8086, + "step": 6367 + }, + { + "epoch": 0.94, + "learning_rate": 1.775405849982892e-07, + "loss": 0.7974, + "step": 6368 + }, + { + "epoch": 0.94, + "learning_rate": 1.7664316674547155e-07, + "loss": 0.771, + "step": 6369 + }, + { + "epoch": 0.94, + "learning_rate": 1.7574800213189137e-07, + "loss": 0.7505, + "step": 6370 + }, + { + "epoch": 0.94, + "learning_rate": 1.7485509136291322e-07, + "loss": 0.7974, + "step": 6371 + }, + { + "epoch": 0.94, + "learning_rate": 1.7396443464338663e-07, + "loss": 0.2888, + "step": 6372 + }, + { + "epoch": 0.94, + "learning_rate": 1.7307603217764257e-07, + "loss": 0.7651, + "step": 6373 + }, + { + "epoch": 0.94, + "learning_rate": 1.7218988416949355e-07, + "loss": 0.3091, + "step": 6374 + }, + { + "epoch": 0.94, + "learning_rate": 1.713059908222392e-07, + "loss": 0.7417, + "step": 6375 + }, + { + "epoch": 0.94, + "learning_rate": 1.704243523386573e-07, + "loss": 0.7471, + "step": 6376 + }, + { + "epoch": 0.94, + "learning_rate": 1.6954496892101047e-07, + "loss": 0.814, + "step": 6377 + }, + { + "epoch": 0.94, + "learning_rate": 1.6866784077104402e-07, + "loss": 0.7554, + "step": 6378 + }, + { + "epoch": 0.94, + "learning_rate": 1.677929680899848e-07, + "loss": 0.7637, + "step": 6379 + }, + { + "epoch": 0.94, + "learning_rate": 1.6692035107854332e-07, + "loss": 0.7402, + "step": 6380 + }, + { + "epoch": 0.94, + "learning_rate": 1.660499899369139e-07, + "loss": 0.7583, + "step": 6381 + }, + { + "epoch": 0.94, + "learning_rate": 1.6518188486476794e-07, + "loss": 0.7783, + "step": 6382 + }, + { + "epoch": 0.94, + "learning_rate": 1.6431603606126722e-07, + "loss": 0.8237, + "step": 6383 + }, + { + "epoch": 0.94, + "learning_rate": 1.6345244372504842e-07, + "loss": 0.8003, + "step": 6384 + }, + { + "epoch": 0.94, + "learning_rate": 1.6259110805423528e-07, + "loss": 0.7998, + "step": 6385 + }, + { + "epoch": 0.94, + "learning_rate": 1.6173202924643305e-07, + "loss": 0.7896, + "step": 6386 + }, + { + "epoch": 0.94, + "learning_rate": 1.6087520749872633e-07, + "loss": 0.3005, + "step": 6387 + }, + { + "epoch": 0.94, + "learning_rate": 1.600206430076856e-07, + "loss": 0.772, + "step": 6388 + }, + { + "epoch": 0.94, + "learning_rate": 1.5916833596936188e-07, + "loss": 0.8271, + "step": 6389 + }, + { + "epoch": 0.94, + "learning_rate": 1.583182865792876e-07, + "loss": 0.8267, + "step": 6390 + }, + { + "epoch": 0.95, + "learning_rate": 1.5747049503248013e-07, + "loss": 0.7622, + "step": 6391 + }, + { + "epoch": 0.95, + "learning_rate": 1.5662496152343275e-07, + "loss": 0.7642, + "step": 6392 + }, + { + "epoch": 0.95, + "learning_rate": 1.557816862461292e-07, + "loss": 0.7627, + "step": 6393 + }, + { + "epoch": 0.95, + "learning_rate": 1.549406693940292e-07, + "loss": 0.856, + "step": 6394 + }, + { + "epoch": 0.95, + "learning_rate": 1.5410191116007388e-07, + "loss": 0.7471, + "step": 6395 + }, + { + "epoch": 0.95, + "learning_rate": 1.532654117366894e-07, + "loss": 0.7017, + "step": 6396 + }, + { + "epoch": 0.95, + "learning_rate": 1.5243117131578445e-07, + "loss": 0.7603, + "step": 6397 + }, + { + "epoch": 0.95, + "learning_rate": 1.5159919008874368e-07, + "loss": 0.7812, + "step": 6398 + }, + { + "epoch": 0.95, + "learning_rate": 1.5076946824644002e-07, + "loss": 0.748, + "step": 6399 + }, + { + "epoch": 0.95, + "learning_rate": 1.4994200597922337e-07, + "loss": 0.8384, + "step": 6400 + }, + { + "epoch": 0.95, + "learning_rate": 1.491168034769297e-07, + "loss": 0.7876, + "step": 6401 + }, + { + "epoch": 0.95, + "learning_rate": 1.4829386092887199e-07, + "loss": 0.7344, + "step": 6402 + }, + { + "epoch": 0.95, + "learning_rate": 1.4747317852384702e-07, + "loss": 0.7803, + "step": 6403 + }, + { + "epoch": 0.95, + "learning_rate": 1.4665475645013417e-07, + "loss": 0.79, + "step": 6404 + }, + { + "epoch": 0.95, + "learning_rate": 1.458385948954899e-07, + "loss": 0.7031, + "step": 6405 + }, + { + "epoch": 0.95, + "learning_rate": 1.4502469404715892e-07, + "loss": 0.7744, + "step": 6406 + }, + { + "epoch": 0.95, + "learning_rate": 1.442130540918607e-07, + "loss": 0.2791, + "step": 6407 + }, + { + "epoch": 0.95, + "learning_rate": 1.4340367521579969e-07, + "loss": 0.7959, + "step": 6408 + }, + { + "epoch": 0.95, + "learning_rate": 1.4259655760466063e-07, + "loss": 0.7827, + "step": 6409 + }, + { + "epoch": 0.95, + "learning_rate": 1.4179170144360876e-07, + "loss": 0.8018, + "step": 6410 + }, + { + "epoch": 0.95, + "learning_rate": 1.4098910691729195e-07, + "loss": 0.7383, + "step": 6411 + }, + { + "epoch": 0.95, + "learning_rate": 1.4018877420983956e-07, + "loss": 0.7388, + "step": 6412 + }, + { + "epoch": 0.95, + "learning_rate": 1.3939070350485918e-07, + "loss": 0.8081, + "step": 6413 + }, + { + "epoch": 0.95, + "learning_rate": 1.385948949854432e-07, + "loss": 0.7114, + "step": 6414 + }, + { + "epoch": 0.95, + "learning_rate": 1.378013488341612e-07, + "loss": 0.7842, + "step": 6415 + }, + { + "epoch": 0.95, + "learning_rate": 1.3701006523306747e-07, + "loss": 0.7432, + "step": 6416 + }, + { + "epoch": 0.95, + "learning_rate": 1.3622104436369465e-07, + "loss": 0.7568, + "step": 6417 + }, + { + "epoch": 0.95, + "learning_rate": 1.3543428640705568e-07, + "loss": 0.8184, + "step": 6418 + }, + { + "epoch": 0.95, + "learning_rate": 1.3464979154364844e-07, + "loss": 0.7354, + "step": 6419 + }, + { + "epoch": 0.95, + "learning_rate": 1.338675599534478e-07, + "loss": 0.7827, + "step": 6420 + }, + { + "epoch": 0.95, + "learning_rate": 1.3308759181590912e-07, + "loss": 0.7749, + "step": 6421 + }, + { + "epoch": 0.95, + "learning_rate": 1.323098873099715e-07, + "loss": 0.7783, + "step": 6422 + }, + { + "epoch": 0.95, + "learning_rate": 1.3153444661405335e-07, + "loss": 0.7617, + "step": 6423 + }, + { + "epoch": 0.95, + "learning_rate": 1.307612699060523e-07, + "loss": 0.7549, + "step": 6424 + }, + { + "epoch": 0.95, + "learning_rate": 1.2999035736334874e-07, + "loss": 0.7759, + "step": 6425 + }, + { + "epoch": 0.95, + "learning_rate": 1.2922170916280118e-07, + "loss": 0.8345, + "step": 6426 + }, + { + "epoch": 0.95, + "learning_rate": 1.2845532548075301e-07, + "loss": 0.7354, + "step": 6427 + }, + { + "epoch": 0.95, + "learning_rate": 1.2769120649302247e-07, + "loss": 0.7207, + "step": 6428 + }, + { + "epoch": 0.95, + "learning_rate": 1.2692935237491154e-07, + "loss": 0.8125, + "step": 6429 + }, + { + "epoch": 0.95, + "learning_rate": 1.2616976330120268e-07, + "loss": 0.8394, + "step": 6430 + }, + { + "epoch": 0.95, + "learning_rate": 1.2541243944615755e-07, + "loss": 0.7607, + "step": 6431 + }, + { + "epoch": 0.95, + "learning_rate": 1.2465738098352053e-07, + "loss": 0.7373, + "step": 6432 + }, + { + "epoch": 0.95, + "learning_rate": 1.2390458808651085e-07, + "loss": 0.7422, + "step": 6433 + }, + { + "epoch": 0.95, + "learning_rate": 1.2315406092783478e-07, + "loss": 0.8135, + "step": 6434 + }, + { + "epoch": 0.95, + "learning_rate": 1.2240579967967348e-07, + "loss": 0.7725, + "step": 6435 + }, + { + "epoch": 0.95, + "learning_rate": 1.2165980451369186e-07, + "loss": 0.7603, + "step": 6436 + }, + { + "epoch": 0.95, + "learning_rate": 1.2091607560103304e-07, + "loss": 0.7656, + "step": 6437 + }, + { + "epoch": 0.95, + "learning_rate": 1.2017461311231938e-07, + "loss": 0.7905, + "step": 6438 + }, + { + "epoch": 0.95, + "learning_rate": 1.1943541721765596e-07, + "loss": 0.7446, + "step": 6439 + }, + { + "epoch": 0.95, + "learning_rate": 1.186984880866271e-07, + "loss": 0.7905, + "step": 6440 + }, + { + "epoch": 0.95, + "learning_rate": 1.1796382588829425e-07, + "loss": 0.811, + "step": 6441 + }, + { + "epoch": 0.95, + "learning_rate": 1.1723143079120369e-07, + "loss": 0.793, + "step": 6442 + }, + { + "epoch": 0.95, + "learning_rate": 1.1650130296337548e-07, + "loss": 0.687, + "step": 6443 + }, + { + "epoch": 0.95, + "learning_rate": 1.157734425723156e-07, + "loss": 0.7534, + "step": 6444 + }, + { + "epoch": 0.95, + "learning_rate": 1.1504784978500606e-07, + "loss": 0.7969, + "step": 6445 + }, + { + "epoch": 0.95, + "learning_rate": 1.1432452476790922e-07, + "loss": 0.8535, + "step": 6446 + }, + { + "epoch": 0.95, + "learning_rate": 1.1360346768696907e-07, + "loss": 0.7739, + "step": 6447 + }, + { + "epoch": 0.95, + "learning_rate": 1.1288467870760766e-07, + "loss": 0.7832, + "step": 6448 + }, + { + "epoch": 0.95, + "learning_rate": 1.1216815799472647e-07, + "loss": 0.7959, + "step": 6449 + }, + { + "epoch": 0.95, + "learning_rate": 1.1145390571270731e-07, + "loss": 0.7817, + "step": 6450 + }, + { + "epoch": 0.95, + "learning_rate": 1.1074192202541134e-07, + "loss": 0.8169, + "step": 6451 + }, + { + "epoch": 0.95, + "learning_rate": 1.1003220709617901e-07, + "loss": 0.7349, + "step": 6452 + }, + { + "epoch": 0.95, + "learning_rate": 1.093247610878323e-07, + "loss": 0.8022, + "step": 6453 + }, + { + "epoch": 0.95, + "learning_rate": 1.0861958416266805e-07, + "loss": 0.7656, + "step": 6454 + }, + { + "epoch": 0.95, + "learning_rate": 1.0791667648246796e-07, + "loss": 0.7686, + "step": 6455 + }, + { + "epoch": 0.95, + "learning_rate": 1.0721603820848748e-07, + "loss": 0.8315, + "step": 6456 + }, + { + "epoch": 0.95, + "learning_rate": 1.0651766950146913e-07, + "loss": 0.7412, + "step": 6457 + }, + { + "epoch": 0.96, + "learning_rate": 1.058215705216259e-07, + "loss": 0.7661, + "step": 6458 + }, + { + "epoch": 0.96, + "learning_rate": 1.0512774142865667e-07, + "loss": 0.7651, + "step": 6459 + }, + { + "epoch": 0.96, + "learning_rate": 1.0443618238173636e-07, + "loss": 0.2893, + "step": 6460 + }, + { + "epoch": 0.96, + "learning_rate": 1.0374689353952027e-07, + "loss": 0.834, + "step": 6461 + }, + { + "epoch": 0.96, + "learning_rate": 1.0305987506014193e-07, + "loss": 0.7563, + "step": 6462 + }, + { + "epoch": 0.96, + "learning_rate": 1.0237512710121522e-07, + "loss": 0.8105, + "step": 6463 + }, + { + "epoch": 0.96, + "learning_rate": 1.0169264981983229e-07, + "loss": 0.8193, + "step": 6464 + }, + { + "epoch": 0.96, + "learning_rate": 1.0101244337256567e-07, + "loss": 0.8037, + "step": 6465 + }, + { + "epoch": 0.96, + "learning_rate": 1.0033450791546495e-07, + "loss": 0.7891, + "step": 6466 + }, + { + "epoch": 0.96, + "learning_rate": 9.965884360405909e-08, + "loss": 0.7817, + "step": 6467 + }, + { + "epoch": 0.96, + "learning_rate": 9.898545059335852e-08, + "loss": 0.8115, + "step": 6468 + }, + { + "epoch": 0.96, + "learning_rate": 9.831432903784744e-08, + "loss": 0.7871, + "step": 6469 + }, + { + "epoch": 0.96, + "learning_rate": 9.764547909149602e-08, + "loss": 0.7695, + "step": 6470 + }, + { + "epoch": 0.96, + "learning_rate": 9.697890090774597e-08, + "loss": 0.3335, + "step": 6471 + }, + { + "epoch": 0.96, + "learning_rate": 9.631459463952275e-08, + "loss": 0.752, + "step": 6472 + }, + { + "epoch": 0.96, + "learning_rate": 9.565256043922999e-08, + "loss": 0.7808, + "step": 6473 + }, + { + "epoch": 0.96, + "learning_rate": 9.499279845874843e-08, + "loss": 0.7437, + "step": 6474 + }, + { + "epoch": 0.96, + "learning_rate": 9.433530884943698e-08, + "loss": 0.7686, + "step": 6475 + }, + { + "epoch": 0.96, + "learning_rate": 9.368009176213722e-08, + "loss": 0.7593, + "step": 6476 + }, + { + "epoch": 0.96, + "learning_rate": 9.302714734716334e-08, + "loss": 0.8677, + "step": 6477 + }, + { + "epoch": 0.96, + "learning_rate": 9.237647575431553e-08, + "loss": 0.7539, + "step": 6478 + }, + { + "epoch": 0.96, + "learning_rate": 9.172807713286547e-08, + "loss": 0.7817, + "step": 6479 + }, + { + "epoch": 0.96, + "learning_rate": 9.108195163156642e-08, + "loss": 0.7944, + "step": 6480 + }, + { + "epoch": 0.96, + "learning_rate": 9.0438099398652e-08, + "loss": 0.749, + "step": 6481 + }, + { + "epoch": 0.96, + "learning_rate": 8.979652058183185e-08, + "loss": 0.8467, + "step": 6482 + }, + { + "epoch": 0.96, + "learning_rate": 8.91572153282938e-08, + "loss": 0.7822, + "step": 6483 + }, + { + "epoch": 0.96, + "learning_rate": 8.852018378470606e-08, + "loss": 0.7695, + "step": 6484 + }, + { + "epoch": 0.96, + "learning_rate": 8.788542609721284e-08, + "loss": 0.7852, + "step": 6485 + }, + { + "epoch": 0.96, + "learning_rate": 8.725294241143988e-08, + "loss": 0.8262, + "step": 6486 + }, + { + "epoch": 0.96, + "learning_rate": 8.66227328724878e-08, + "loss": 0.771, + "step": 6487 + }, + { + "epoch": 0.96, + "learning_rate": 8.599479762493756e-08, + "loss": 0.769, + "step": 6488 + }, + { + "epoch": 0.96, + "learning_rate": 8.536913681284731e-08, + "loss": 0.728, + "step": 6489 + }, + { + "epoch": 0.96, + "learning_rate": 8.474575057975332e-08, + "loss": 0.8247, + "step": 6490 + }, + { + "epoch": 0.96, + "learning_rate": 8.41246390686734e-08, + "loss": 0.7329, + "step": 6491 + }, + { + "epoch": 0.96, + "learning_rate": 8.350580242209805e-08, + "loss": 0.7534, + "step": 6492 + }, + { + "epoch": 0.96, + "learning_rate": 8.288924078199922e-08, + "loss": 0.7881, + "step": 6493 + }, + { + "epoch": 0.96, + "learning_rate": 8.227495428982601e-08, + "loss": 0.7446, + "step": 6494 + }, + { + "epoch": 0.96, + "learning_rate": 8.16629430865068e-08, + "loss": 0.7646, + "step": 6495 + }, + { + "epoch": 0.96, + "learning_rate": 8.105320731244703e-08, + "loss": 0.8066, + "step": 6496 + }, + { + "epoch": 0.96, + "learning_rate": 8.044574710752928e-08, + "loss": 0.7993, + "step": 6497 + }, + { + "epoch": 0.96, + "learning_rate": 7.984056261111428e-08, + "loss": 0.8042, + "step": 6498 + }, + { + "epoch": 0.96, + "learning_rate": 7.923765396204208e-08, + "loss": 0.7056, + "step": 6499 + }, + { + "epoch": 0.96, + "learning_rate": 7.863702129863093e-08, + "loss": 0.769, + "step": 6500 + }, + { + "epoch": 0.96, + "learning_rate": 7.803866475867394e-08, + "loss": 0.8174, + "step": 6501 + }, + { + "epoch": 0.96, + "learning_rate": 7.744258447944464e-08, + "loss": 0.7393, + "step": 6502 + }, + { + "epoch": 0.96, + "learning_rate": 7.684878059769363e-08, + "loss": 0.7969, + "step": 6503 + }, + { + "epoch": 0.96, + "learning_rate": 7.625725324965084e-08, + "loss": 0.3057, + "step": 6504 + }, + { + "epoch": 0.96, + "learning_rate": 7.566800257101881e-08, + "loss": 0.7441, + "step": 6505 + }, + { + "epoch": 0.96, + "learning_rate": 7.508102869698386e-08, + "loss": 0.8076, + "step": 6506 + }, + { + "epoch": 0.96, + "learning_rate": 7.449633176220717e-08, + "loss": 0.811, + "step": 6507 + }, + { + "epoch": 0.96, + "learning_rate": 7.391391190082808e-08, + "loss": 0.7769, + "step": 6508 + }, + { + "epoch": 0.96, + "learning_rate": 7.333376924646197e-08, + "loss": 0.8193, + "step": 6509 + }, + { + "epoch": 0.96, + "learning_rate": 7.275590393220456e-08, + "loss": 0.6816, + "step": 6510 + }, + { + "epoch": 0.96, + "learning_rate": 7.218031609062538e-08, + "loss": 0.8101, + "step": 6511 + }, + { + "epoch": 0.96, + "learning_rate": 7.160700585377655e-08, + "loss": 0.7935, + "step": 6512 + }, + { + "epoch": 0.96, + "learning_rate": 7.103597335318402e-08, + "loss": 0.7583, + "step": 6513 + }, + { + "epoch": 0.96, + "learning_rate": 7.046721871985074e-08, + "loss": 0.7754, + "step": 6514 + }, + { + "epoch": 0.96, + "learning_rate": 6.990074208425901e-08, + "loss": 0.8286, + "step": 6515 + }, + { + "epoch": 0.96, + "learning_rate": 6.93365435763682e-08, + "loss": 0.7974, + "step": 6516 + }, + { + "epoch": 0.96, + "learning_rate": 6.877462332561479e-08, + "loss": 0.8149, + "step": 6517 + }, + { + "epoch": 0.96, + "learning_rate": 6.821498146091232e-08, + "loss": 0.7617, + "step": 6518 + }, + { + "epoch": 0.96, + "learning_rate": 6.765761811065141e-08, + "loss": 0.7524, + "step": 6519 + }, + { + "epoch": 0.96, + "learning_rate": 6.710253340270089e-08, + "loss": 0.7656, + "step": 6520 + }, + { + "epoch": 0.96, + "learning_rate": 6.654972746440557e-08, + "loss": 0.8335, + "step": 6521 + }, + { + "epoch": 0.96, + "learning_rate": 6.599920042258956e-08, + "loss": 0.832, + "step": 6522 + }, + { + "epoch": 0.96, + "learning_rate": 6.545095240355182e-08, + "loss": 0.7529, + "step": 6523 + }, + { + "epoch": 0.96, + "learning_rate": 6.49049835330684e-08, + "loss": 0.8022, + "step": 6524 + }, + { + "epoch": 0.96, + "learning_rate": 6.436129393639579e-08, + "loss": 0.79, + "step": 6525 + }, + { + "epoch": 0.97, + "learning_rate": 6.381988373826531e-08, + "loss": 0.7129, + "step": 6526 + }, + { + "epoch": 0.97, + "learning_rate": 6.328075306288317e-08, + "loss": 0.7383, + "step": 6527 + }, + { + "epoch": 0.97, + "learning_rate": 6.274390203393488e-08, + "loss": 0.7769, + "step": 6528 + }, + { + "epoch": 0.97, + "learning_rate": 6.220933077458413e-08, + "loss": 0.2939, + "step": 6529 + }, + { + "epoch": 0.97, + "learning_rate": 6.167703940747172e-08, + "loss": 0.7524, + "step": 6530 + }, + { + "epoch": 0.97, + "learning_rate": 6.114702805471107e-08, + "loss": 0.7871, + "step": 6531 + }, + { + "epoch": 0.97, + "learning_rate": 6.061929683789714e-08, + "loss": 0.7622, + "step": 6532 + }, + { + "epoch": 0.97, + "learning_rate": 6.009384587809975e-08, + "loss": 0.7842, + "step": 6533 + }, + { + "epoch": 0.97, + "learning_rate": 5.95706752958658e-08, + "loss": 0.752, + "step": 6534 + }, + { + "epoch": 0.97, + "learning_rate": 5.90497852112204e-08, + "loss": 0.7686, + "step": 6535 + }, + { + "epoch": 0.97, + "learning_rate": 5.8531175743661294e-08, + "loss": 0.7539, + "step": 6536 + }, + { + "epoch": 0.97, + "learning_rate": 5.801484701216997e-08, + "loss": 0.8169, + "step": 6537 + }, + { + "epoch": 0.97, + "learning_rate": 5.750079913519835e-08, + "loss": 0.8032, + "step": 6538 + }, + { + "epoch": 0.97, + "learning_rate": 5.698903223067653e-08, + "loss": 0.8345, + "step": 6539 + }, + { + "epoch": 0.97, + "learning_rate": 5.647954641601505e-08, + "loss": 0.7178, + "step": 6540 + }, + { + "epoch": 0.97, + "learning_rate": 5.597234180809597e-08, + "loss": 0.8022, + "step": 6541 + }, + { + "epoch": 0.97, + "learning_rate": 5.5467418523281745e-08, + "loss": 0.7534, + "step": 6542 + }, + { + "epoch": 0.97, + "learning_rate": 5.496477667740974e-08, + "loss": 0.8022, + "step": 6543 + }, + { + "epoch": 0.97, + "learning_rate": 5.446441638579214e-08, + "loss": 0.7896, + "step": 6544 + }, + { + "epoch": 0.97, + "learning_rate": 5.3966337763223795e-08, + "loss": 0.749, + "step": 6545 + }, + { + "epoch": 0.97, + "learning_rate": 5.347054092396886e-08, + "loss": 0.7861, + "step": 6546 + }, + { + "epoch": 0.97, + "learning_rate": 5.297702598177301e-08, + "loss": 0.7598, + "step": 6547 + }, + { + "epoch": 0.97, + "learning_rate": 5.2485793049855685e-08, + "loss": 0.7993, + "step": 6548 + }, + { + "epoch": 0.97, + "learning_rate": 5.19968422409145e-08, + "loss": 0.7905, + "step": 6549 + }, + { + "epoch": 0.97, + "learning_rate": 5.1510173667121966e-08, + "loss": 0.7837, + "step": 6550 + }, + { + "epoch": 0.97, + "learning_rate": 5.102578744012876e-08, + "loss": 0.7705, + "step": 6551 + }, + { + "epoch": 0.97, + "learning_rate": 5.054368367106044e-08, + "loss": 0.7354, + "step": 6552 + }, + { + "epoch": 0.97, + "learning_rate": 5.006386247051964e-08, + "loss": 0.7798, + "step": 6553 + }, + { + "epoch": 0.97, + "learning_rate": 4.958632394858498e-08, + "loss": 0.7798, + "step": 6554 + }, + { + "epoch": 0.97, + "learning_rate": 4.911106821481215e-08, + "loss": 0.7529, + "step": 6555 + }, + { + "epoch": 0.97, + "learning_rate": 4.863809537823283e-08, + "loss": 0.7256, + "step": 6556 + }, + { + "epoch": 0.97, + "learning_rate": 4.8167405547353555e-08, + "loss": 0.7456, + "step": 6557 + }, + { + "epoch": 0.97, + "learning_rate": 4.769899883016016e-08, + "loss": 0.7539, + "step": 6558 + }, + { + "epoch": 0.97, + "learning_rate": 4.723287533411003e-08, + "loss": 0.7954, + "step": 6559 + }, + { + "epoch": 0.97, + "learning_rate": 4.676903516614206e-08, + "loss": 0.7646, + "step": 6560 + }, + { + "epoch": 0.97, + "learning_rate": 4.630747843266781e-08, + "loss": 0.7085, + "step": 6561 + }, + { + "epoch": 0.97, + "learning_rate": 4.5848205239575894e-08, + "loss": 0.8296, + "step": 6562 + }, + { + "epoch": 0.97, + "learning_rate": 4.5391215692232036e-08, + "loss": 0.3153, + "step": 6563 + }, + { + "epoch": 0.97, + "learning_rate": 4.493650989547682e-08, + "loss": 0.7773, + "step": 6564 + }, + { + "epoch": 0.97, + "learning_rate": 4.4484087953625685e-08, + "loss": 0.7788, + "step": 6565 + }, + { + "epoch": 0.97, + "learning_rate": 4.403394997047339e-08, + "loss": 0.7085, + "step": 6566 + }, + { + "epoch": 0.97, + "learning_rate": 4.358609604928732e-08, + "loss": 0.7871, + "step": 6567 + }, + { + "epoch": 0.97, + "learning_rate": 4.314052629281418e-08, + "loss": 0.7964, + "step": 6568 + }, + { + "epoch": 0.97, + "learning_rate": 4.269724080327553e-08, + "loss": 0.7744, + "step": 6569 + }, + { + "epoch": 0.97, + "learning_rate": 4.2256239682365585e-08, + "loss": 0.8027, + "step": 6570 + }, + { + "epoch": 0.97, + "learning_rate": 4.1817523031258965e-08, + "loss": 0.8398, + "step": 6571 + }, + { + "epoch": 0.97, + "learning_rate": 4.1381090950605165e-08, + "loss": 0.7637, + "step": 6572 + }, + { + "epoch": 0.97, + "learning_rate": 4.094694354052742e-08, + "loss": 0.2905, + "step": 6573 + }, + { + "epoch": 0.97, + "learning_rate": 4.0515080900627165e-08, + "loss": 0.769, + "step": 6574 + }, + { + "epoch": 0.97, + "learning_rate": 4.0085503129980716e-08, + "loss": 0.7798, + "step": 6575 + }, + { + "epoch": 0.97, + "learning_rate": 3.9658210327139236e-08, + "loss": 0.8623, + "step": 6576 + }, + { + "epoch": 0.97, + "learning_rate": 3.923320259013208e-08, + "loss": 0.7192, + "step": 6577 + }, + { + "epoch": 0.97, + "learning_rate": 3.881048001646237e-08, + "loss": 0.8096, + "step": 6578 + }, + { + "epoch": 0.97, + "learning_rate": 3.8390042703110305e-08, + "loss": 0.7175, + "step": 6579 + }, + { + "epoch": 0.97, + "learning_rate": 3.797189074652874e-08, + "loss": 0.8086, + "step": 6580 + }, + { + "epoch": 0.97, + "learning_rate": 3.755602424265203e-08, + "loss": 0.7505, + "step": 6581 + }, + { + "epoch": 0.97, + "learning_rate": 3.714244328688388e-08, + "loss": 0.8018, + "step": 6582 + }, + { + "epoch": 0.97, + "learning_rate": 3.6731147974108374e-08, + "loss": 0.7827, + "step": 6583 + }, + { + "epoch": 0.97, + "learning_rate": 3.632213839868226e-08, + "loss": 0.8228, + "step": 6584 + }, + { + "epoch": 0.97, + "learning_rate": 3.5915414654440486e-08, + "loss": 0.7563, + "step": 6585 + }, + { + "epoch": 0.97, + "learning_rate": 3.551097683468951e-08, + "loss": 0.3293, + "step": 6586 + }, + { + "epoch": 0.97, + "learning_rate": 3.5108825032217355e-08, + "loss": 0.769, + "step": 6587 + }, + { + "epoch": 0.97, + "learning_rate": 3.470895933928131e-08, + "loss": 0.7651, + "step": 6588 + }, + { + "epoch": 0.97, + "learning_rate": 3.431137984761912e-08, + "loss": 0.7905, + "step": 6589 + }, + { + "epoch": 0.97, + "learning_rate": 3.391608664844115e-08, + "loss": 0.7834, + "step": 6590 + }, + { + "epoch": 0.97, + "learning_rate": 3.3523079832434854e-08, + "loss": 0.8145, + "step": 6591 + }, + { + "epoch": 0.97, + "learning_rate": 3.313235948976146e-08, + "loss": 0.8135, + "step": 6592 + }, + { + "epoch": 0.98, + "learning_rate": 3.2743925710058135e-08, + "loss": 0.7651, + "step": 6593 + }, + { + "epoch": 0.98, + "learning_rate": 3.235777858244027e-08, + "loss": 0.7534, + "step": 6594 + }, + { + "epoch": 0.98, + "learning_rate": 3.197391819549478e-08, + "loss": 0.7622, + "step": 6595 + }, + { + "epoch": 0.98, + "learning_rate": 3.1592344637284554e-08, + "loss": 0.7559, + "step": 6596 + }, + { + "epoch": 0.98, + "learning_rate": 3.1213057995350684e-08, + "loss": 0.7549, + "step": 6597 + }, + { + "epoch": 0.98, + "learning_rate": 3.0836058356708e-08, + "loss": 0.3024, + "step": 6598 + }, + { + "epoch": 0.98, + "learning_rate": 3.04613458078451e-08, + "loss": 0.7876, + "step": 6599 + }, + { + "epoch": 0.98, + "learning_rate": 3.008892043472655e-08, + "loss": 0.7988, + "step": 6600 + }, + { + "epoch": 0.98, + "learning_rate": 2.9718782322794015e-08, + "loss": 0.7134, + "step": 6601 + }, + { + "epoch": 0.98, + "learning_rate": 2.9350931556964e-08, + "loss": 0.8179, + "step": 6602 + }, + { + "epoch": 0.98, + "learning_rate": 2.8985368221625677e-08, + "loss": 0.8008, + "step": 6603 + }, + { + "epoch": 0.98, + "learning_rate": 2.8622092400647507e-08, + "loss": 0.8076, + "step": 6604 + }, + { + "epoch": 0.98, + "learning_rate": 2.8261104177368382e-08, + "loss": 0.7412, + "step": 6605 + }, + { + "epoch": 0.98, + "learning_rate": 2.79024036346065e-08, + "loss": 0.7563, + "step": 6606 + }, + { + "epoch": 0.98, + "learning_rate": 2.7545990854653816e-08, + "loss": 0.7407, + "step": 6607 + }, + { + "epoch": 0.98, + "learning_rate": 2.719186591927603e-08, + "loss": 0.7712, + "step": 6608 + }, + { + "epoch": 0.98, + "learning_rate": 2.6840028909715933e-08, + "loss": 0.7686, + "step": 6609 + }, + { + "epoch": 0.98, + "learning_rate": 2.649047990669118e-08, + "loss": 0.7129, + "step": 6610 + }, + { + "epoch": 0.98, + "learning_rate": 2.6143218990393182e-08, + "loss": 0.7358, + "step": 6611 + }, + { + "epoch": 0.98, + "learning_rate": 2.5798246240489323e-08, + "loss": 0.6992, + "step": 6612 + }, + { + "epoch": 0.98, + "learning_rate": 2.5455561736122958e-08, + "loss": 0.7476, + "step": 6613 + }, + { + "epoch": 0.98, + "learning_rate": 2.5115165555911204e-08, + "loss": 0.7441, + "step": 6614 + }, + { + "epoch": 0.98, + "learning_rate": 2.4777057777946034e-08, + "loss": 0.7959, + "step": 6615 + }, + { + "epoch": 0.98, + "learning_rate": 2.4441238479795404e-08, + "loss": 0.3098, + "step": 6616 + }, + { + "epoch": 0.98, + "learning_rate": 2.4107707738501017e-08, + "loss": 0.8208, + "step": 6617 + }, + { + "epoch": 0.98, + "learning_rate": 2.377646563058056e-08, + "loss": 0.7783, + "step": 6618 + }, + { + "epoch": 0.98, + "learning_rate": 2.344751223202768e-08, + "loss": 0.7393, + "step": 6619 + }, + { + "epoch": 0.98, + "learning_rate": 2.3120847618308683e-08, + "loss": 0.7729, + "step": 6620 + }, + { + "epoch": 0.98, + "learning_rate": 2.2796471864364733e-08, + "loss": 0.7524, + "step": 6621 + }, + { + "epoch": 0.98, + "learning_rate": 2.2474385044615188e-08, + "loss": 0.7485, + "step": 6622 + }, + { + "epoch": 0.98, + "learning_rate": 2.2154587232949833e-08, + "loss": 0.8364, + "step": 6623 + }, + { + "epoch": 0.98, + "learning_rate": 2.183707850273664e-08, + "loss": 0.7905, + "step": 6624 + }, + { + "epoch": 0.98, + "learning_rate": 2.1521858926817353e-08, + "loss": 0.7266, + "step": 6625 + }, + { + "epoch": 0.98, + "learning_rate": 2.1208928577508557e-08, + "loss": 0.7734, + "step": 6626 + }, + { + "epoch": 0.98, + "learning_rate": 2.0898287526601725e-08, + "loss": 0.8643, + "step": 6627 + }, + { + "epoch": 0.98, + "learning_rate": 2.0589935845363174e-08, + "loss": 0.8057, + "step": 6628 + }, + { + "epoch": 0.98, + "learning_rate": 2.028387360453188e-08, + "loss": 0.7412, + "step": 6629 + }, + { + "epoch": 0.98, + "learning_rate": 1.9980100874326115e-08, + "loss": 0.7607, + "step": 6630 + }, + { + "epoch": 0.98, + "learning_rate": 1.967861772443458e-08, + "loss": 0.8091, + "step": 6631 + }, + { + "epoch": 0.98, + "learning_rate": 1.9379424224024168e-08, + "loss": 0.7188, + "step": 6632 + }, + { + "epoch": 0.98, + "learning_rate": 1.908252044173331e-08, + "loss": 0.7764, + "step": 6633 + }, + { + "epoch": 0.98, + "learning_rate": 1.8787906445676406e-08, + "loss": 0.7563, + "step": 6634 + }, + { + "epoch": 0.98, + "learning_rate": 1.8495582303443837e-08, + "loss": 0.7837, + "step": 6635 + }, + { + "epoch": 0.98, + "learning_rate": 1.8205548082099733e-08, + "loss": 0.7939, + "step": 6636 + }, + { + "epoch": 0.98, + "learning_rate": 1.791780384818087e-08, + "loss": 0.7944, + "step": 6637 + }, + { + "epoch": 0.98, + "learning_rate": 1.763234966770333e-08, + "loss": 0.793, + "step": 6638 + }, + { + "epoch": 0.98, + "learning_rate": 1.7349185606151396e-08, + "loss": 0.7402, + "step": 6639 + }, + { + "epoch": 0.98, + "learning_rate": 1.7068311728490882e-08, + "loss": 0.3091, + "step": 6640 + }, + { + "epoch": 0.98, + "learning_rate": 1.678972809915802e-08, + "loss": 0.8281, + "step": 6641 + }, + { + "epoch": 0.98, + "learning_rate": 1.6513434782062797e-08, + "loss": 0.7969, + "step": 6642 + }, + { + "epoch": 0.98, + "learning_rate": 1.623943184059229e-08, + "loss": 0.7749, + "step": 6643 + }, + { + "epoch": 0.98, + "learning_rate": 1.5967719337608434e-08, + "loss": 0.7607, + "step": 6644 + }, + { + "epoch": 0.98, + "learning_rate": 1.569829733544581e-08, + "loss": 0.7676, + "step": 6645 + }, + { + "epoch": 0.98, + "learning_rate": 1.543116589591387e-08, + "loss": 0.7402, + "step": 6646 + }, + { + "epoch": 0.98, + "learning_rate": 1.5166325080295808e-08, + "loss": 0.771, + "step": 6647 + }, + { + "epoch": 0.98, + "learning_rate": 1.4903774949354133e-08, + "loss": 0.769, + "step": 6648 + }, + { + "epoch": 0.98, + "learning_rate": 1.4643515563318444e-08, + "loss": 0.8008, + "step": 6649 + }, + { + "epoch": 0.98, + "learning_rate": 1.4385546981897647e-08, + "loss": 0.7935, + "step": 6650 + }, + { + "epoch": 0.98, + "learning_rate": 1.4129869264275508e-08, + "loss": 0.8071, + "step": 6651 + }, + { + "epoch": 0.98, + "learning_rate": 1.3876482469106223e-08, + "loss": 0.7979, + "step": 6652 + }, + { + "epoch": 0.98, + "learning_rate": 1.3625386654522176e-08, + "loss": 0.813, + "step": 6653 + }, + { + "epoch": 0.98, + "learning_rate": 1.3376581878129514e-08, + "loss": 0.8315, + "step": 6654 + }, + { + "epoch": 0.98, + "learning_rate": 1.3130068197007018e-08, + "loss": 0.771, + "step": 6655 + }, + { + "epoch": 0.98, + "learning_rate": 1.288584566770945e-08, + "loss": 0.7192, + "step": 6656 + }, + { + "epoch": 0.98, + "learning_rate": 1.264391434626533e-08, + "loss": 0.8042, + "step": 6657 + }, + { + "epoch": 0.98, + "learning_rate": 1.240427428817692e-08, + "loss": 0.7749, + "step": 6658 + }, + { + "epoch": 0.98, + "learning_rate": 1.216692554842358e-08, + "loss": 0.7104, + "step": 6659 + }, + { + "epoch": 0.98, + "learning_rate": 1.1931868181455087e-08, + "loss": 0.7832, + "step": 6660 + }, + { + "epoch": 0.99, + "learning_rate": 1.1699102241198302e-08, + "loss": 0.7612, + "step": 6661 + }, + { + "epoch": 0.99, + "learning_rate": 1.1468627781052732e-08, + "loss": 0.7988, + "step": 6662 + }, + { + "epoch": 0.99, + "learning_rate": 1.1240444853894972e-08, + "loss": 0.3474, + "step": 6663 + }, + { + "epoch": 0.99, + "learning_rate": 1.1014553512072036e-08, + "loss": 0.7666, + "step": 6664 + }, + { + "epoch": 0.99, + "learning_rate": 1.0790953807406912e-08, + "loss": 0.8384, + "step": 6665 + }, + { + "epoch": 0.99, + "learning_rate": 1.056964579119968e-08, + "loss": 0.8027, + "step": 6666 + }, + { + "epoch": 0.99, + "learning_rate": 1.0350629514218613e-08, + "loss": 0.8325, + "step": 6667 + }, + { + "epoch": 0.99, + "learning_rate": 1.01339050267113e-08, + "loss": 0.7983, + "step": 6668 + }, + { + "epoch": 0.99, + "learning_rate": 9.919472378397966e-09, + "loss": 0.7739, + "step": 6669 + }, + { + "epoch": 0.99, + "learning_rate": 9.707331618472593e-09, + "loss": 0.7607, + "step": 6670 + }, + { + "epoch": 0.99, + "learning_rate": 9.49748279560514e-09, + "loss": 0.7524, + "step": 6671 + }, + { + "epoch": 0.99, + "learning_rate": 9.289925957935985e-09, + "loss": 0.2792, + "step": 6672 + }, + { + "epoch": 0.99, + "learning_rate": 9.084661153083706e-09, + "loss": 0.7017, + "step": 6673 + }, + { + "epoch": 0.99, + "learning_rate": 8.881688428139523e-09, + "loss": 0.7578, + "step": 6674 + }, + { + "epoch": 0.99, + "learning_rate": 8.681007829667298e-09, + "loss": 0.2969, + "step": 6675 + }, + { + "epoch": 0.99, + "learning_rate": 8.482619403707981e-09, + "loss": 0.8018, + "step": 6676 + }, + { + "epoch": 0.99, + "learning_rate": 8.286523195774054e-09, + "loss": 0.8223, + "step": 6677 + }, + { + "epoch": 0.99, + "learning_rate": 8.092719250853975e-09, + "loss": 0.7388, + "step": 6678 + }, + { + "epoch": 0.99, + "learning_rate": 7.901207613408845e-09, + "loss": 0.7886, + "step": 6679 + }, + { + "epoch": 0.99, + "learning_rate": 7.71198832737463e-09, + "loss": 0.7383, + "step": 6680 + }, + { + "epoch": 0.99, + "learning_rate": 7.525061436162162e-09, + "loss": 0.3213, + "step": 6681 + }, + { + "epoch": 0.99, + "learning_rate": 7.340426982654914e-09, + "loss": 0.7485, + "step": 6682 + }, + { + "epoch": 0.99, + "learning_rate": 7.1580850092112245e-09, + "loss": 0.7407, + "step": 6683 + }, + { + "epoch": 0.99, + "learning_rate": 6.978035557664298e-09, + "loss": 0.7461, + "step": 6684 + }, + { + "epoch": 0.99, + "learning_rate": 6.800278669317762e-09, + "loss": 0.7129, + "step": 6685 + }, + { + "epoch": 0.99, + "learning_rate": 6.624814384954548e-09, + "loss": 0.7417, + "step": 6686 + }, + { + "epoch": 0.99, + "learning_rate": 6.451642744828013e-09, + "loss": 0.7598, + "step": 6687 + }, + { + "epoch": 0.99, + "learning_rate": 6.2807637886663775e-09, + "loss": 0.793, + "step": 6688 + }, + { + "epoch": 0.99, + "learning_rate": 6.112177555672727e-09, + "loss": 0.7817, + "step": 6689 + }, + { + "epoch": 0.99, + "learning_rate": 5.945884084522791e-09, + "loss": 0.751, + "step": 6690 + }, + { + "epoch": 0.99, + "learning_rate": 5.781883413367162e-09, + "loss": 0.7556, + "step": 6691 + }, + { + "epoch": 0.99, + "learning_rate": 5.6201755798313e-09, + "loss": 0.7979, + "step": 6692 + }, + { + "epoch": 0.99, + "learning_rate": 5.460760621012196e-09, + "loss": 0.7749, + "step": 6693 + }, + { + "epoch": 0.99, + "learning_rate": 5.303638573482817e-09, + "loss": 0.7456, + "step": 6694 + }, + { + "epoch": 0.99, + "learning_rate": 5.148809473289884e-09, + "loss": 0.7827, + "step": 6695 + }, + { + "epoch": 0.99, + "learning_rate": 4.996273355953873e-09, + "loss": 0.8062, + "step": 6696 + }, + { + "epoch": 0.99, + "learning_rate": 4.8460302564679044e-09, + "loss": 0.7576, + "step": 6697 + }, + { + "epoch": 0.99, + "learning_rate": 4.69808020930218e-09, + "loss": 0.834, + "step": 6698 + }, + { + "epoch": 0.99, + "learning_rate": 4.55242324839622e-09, + "loss": 0.771, + "step": 6699 + }, + { + "epoch": 0.99, + "learning_rate": 4.4090594071688475e-09, + "loss": 0.7163, + "step": 6700 + }, + { + "epoch": 0.99, + "learning_rate": 4.267988718508198e-09, + "loss": 0.7192, + "step": 6701 + }, + { + "epoch": 0.99, + "learning_rate": 4.129211214778384e-09, + "loss": 0.8091, + "step": 6702 + }, + { + "epoch": 0.99, + "learning_rate": 3.99272692781838e-09, + "loss": 0.8008, + "step": 6703 + }, + { + "epoch": 0.99, + "learning_rate": 3.858535888938697e-09, + "loss": 0.7808, + "step": 6704 + }, + { + "epoch": 0.99, + "learning_rate": 3.726638128925819e-09, + "loss": 0.7251, + "step": 6705 + }, + { + "epoch": 0.99, + "learning_rate": 3.597033678038875e-09, + "loss": 0.7505, + "step": 6706 + }, + { + "epoch": 0.99, + "learning_rate": 3.469722566010747e-09, + "loss": 0.8062, + "step": 6707 + }, + { + "epoch": 0.99, + "learning_rate": 3.344704822049183e-09, + "loss": 0.7891, + "step": 6708 + }, + { + "epoch": 0.99, + "learning_rate": 3.2219804748367946e-09, + "loss": 0.7788, + "step": 6709 + }, + { + "epoch": 0.99, + "learning_rate": 3.1015495525255067e-09, + "loss": 0.8438, + "step": 6710 + }, + { + "epoch": 0.99, + "learning_rate": 2.9834120827465506e-09, + "loss": 0.7676, + "step": 6711 + }, + { + "epoch": 0.99, + "learning_rate": 2.8675680926015805e-09, + "loss": 0.7769, + "step": 6712 + }, + { + "epoch": 0.99, + "learning_rate": 2.7540176086671145e-09, + "loss": 0.75, + "step": 6713 + }, + { + "epoch": 0.99, + "learning_rate": 2.642760656994536e-09, + "loss": 0.792, + "step": 6714 + }, + { + "epoch": 0.99, + "learning_rate": 2.5337972631067633e-09, + "loss": 0.7378, + "step": 6715 + }, + { + "epoch": 0.99, + "learning_rate": 2.4271274520015766e-09, + "loss": 0.7749, + "step": 6716 + }, + { + "epoch": 0.99, + "learning_rate": 2.3227512481516224e-09, + "loss": 0.7227, + "step": 6717 + }, + { + "epoch": 0.99, + "learning_rate": 2.2206686755033013e-09, + "loss": 0.7925, + "step": 6718 + }, + { + "epoch": 0.99, + "learning_rate": 2.1208797574745477e-09, + "loss": 0.7852, + "step": 6719 + }, + { + "epoch": 0.99, + "learning_rate": 2.02338451695816e-09, + "loss": 0.7393, + "step": 6720 + }, + { + "epoch": 0.99, + "learning_rate": 1.928182976322912e-09, + "loss": 0.7754, + "step": 6721 + }, + { + "epoch": 0.99, + "learning_rate": 1.835275157409111e-09, + "loss": 0.6978, + "step": 6722 + }, + { + "epoch": 0.99, + "learning_rate": 1.7446610815308184e-09, + "loss": 0.7847, + "step": 6723 + }, + { + "epoch": 0.99, + "learning_rate": 1.6563407694758504e-09, + "loss": 0.7842, + "step": 6724 + }, + { + "epoch": 0.99, + "learning_rate": 1.5703142415079974e-09, + "loss": 0.7227, + "step": 6725 + }, + { + "epoch": 0.99, + "learning_rate": 1.486581517361474e-09, + "loss": 0.79, + "step": 6726 + }, + { + "epoch": 0.99, + "learning_rate": 1.4051426162464687e-09, + "loss": 0.8047, + "step": 6727 + }, + { + "epoch": 0.99, + "learning_rate": 1.3259975568469253e-09, + "loss": 0.7544, + "step": 6728 + }, + { + "epoch": 1.0, + "learning_rate": 1.2491463573194307e-09, + "loss": 0.7383, + "step": 6729 + }, + { + "epoch": 1.0, + "learning_rate": 1.174589035295437e-09, + "loss": 0.7627, + "step": 6730 + }, + { + "epoch": 1.0, + "learning_rate": 1.10232560787904e-09, + "loss": 0.7925, + "step": 6731 + }, + { + "epoch": 1.0, + "learning_rate": 1.0323560916492003e-09, + "loss": 0.793, + "step": 6732 + }, + { + "epoch": 1.0, + "learning_rate": 9.646805026586325e-10, + "loss": 0.7388, + "step": 6733 + }, + { + "epoch": 1.0, + "learning_rate": 8.992988564315852e-10, + "loss": 0.772, + "step": 6734 + }, + { + "epoch": 1.0, + "learning_rate": 8.362111679682817e-10, + "loss": 0.7251, + "step": 6735 + }, + { + "epoch": 1.0, + "learning_rate": 7.754174517426994e-10, + "loss": 0.7778, + "step": 6736 + }, + { + "epoch": 1.0, + "learning_rate": 7.1691772170146e-10, + "loss": 0.2495, + "step": 6737 + }, + { + "epoch": 1.0, + "learning_rate": 6.607119912649395e-10, + "loss": 0.7803, + "step": 6738 + }, + { + "epoch": 1.0, + "learning_rate": 6.068002733294887e-10, + "loss": 0.7378, + "step": 6739 + }, + { + "epoch": 1.0, + "learning_rate": 5.551825802607713e-10, + "loss": 0.7583, + "step": 6740 + }, + { + "epoch": 1.0, + "learning_rate": 5.058589239026468e-10, + "loss": 0.7554, + "step": 6741 + }, + { + "epoch": 1.0, + "learning_rate": 4.588293155693979e-10, + "loss": 0.769, + "step": 6742 + }, + { + "epoch": 1.0, + "learning_rate": 4.140937660512823e-10, + "loss": 0.7812, + "step": 6743 + }, + { + "epoch": 1.0, + "learning_rate": 3.716522856112015e-10, + "loss": 0.7744, + "step": 6744 + }, + { + "epoch": 1.0, + "learning_rate": 3.3150488398581147e-10, + "loss": 0.8354, + "step": 6745 + }, + { + "epoch": 1.0, + "learning_rate": 2.9365157038663273e-10, + "loss": 0.2828, + "step": 6746 + }, + { + "epoch": 1.0, + "learning_rate": 2.5809235349560924e-10, + "loss": 0.7642, + "step": 6747 + }, + { + "epoch": 1.0, + "learning_rate": 2.2482724147177005e-10, + "loss": 0.7759, + "step": 6748 + }, + { + "epoch": 1.0, + "learning_rate": 1.9385624194789843e-10, + "loss": 0.7446, + "step": 6749 + }, + { + "epoch": 1.0, + "learning_rate": 1.6517936202720132e-10, + "loss": 0.7466, + "step": 6750 + }, + { + "epoch": 1.0, + "learning_rate": 1.3879660828997055e-10, + "loss": 0.8369, + "step": 6751 + }, + { + "epoch": 1.0, + "learning_rate": 1.1470798678803186e-10, + "loss": 0.7944, + "step": 6752 + }, + { + "epoch": 1.0, + "learning_rate": 9.291350304918568e-11, + "loss": 0.7598, + "step": 6753 + }, + { + "epoch": 1.0, + "learning_rate": 7.341316207165606e-11, + "loss": 0.8462, + "step": 6754 + }, + { + "epoch": 1.0, + "learning_rate": 5.620696832964179e-11, + "loss": 0.7642, + "step": 6755 + }, + { + "epoch": 1.0, + "learning_rate": 4.1294925769985726e-11, + "loss": 0.7534, + "step": 6756 + }, + { + "epoch": 1.0, + "learning_rate": 2.867703781550546e-11, + "loss": 0.7983, + "step": 6757 + }, + { + "epoch": 1.0, + "learning_rate": 1.8353307359442184e-11, + "loss": 0.7275, + "step": 6758 + }, + { + "epoch": 1.0, + "learning_rate": 1.0323736772122062e-11, + "loss": 0.7441, + "step": 6759 + }, + { + "epoch": 1.0, + "learning_rate": 4.5883278931846405e-12, + "loss": 0.7891, + "step": 6760 + }, + { + "epoch": 1.0, + "learning_rate": 1.1470820393544302e-12, + "loss": 0.3315, + "step": 6761 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.533, + "step": 6762 + }, + { + "epoch": 1.0, + "step": 6762, + "total_flos": 3565969442308096.0, + "train_loss": 0.801395031518042, + "train_runtime": 44734.6205, + "train_samples_per_second": 19.347, + "train_steps_per_second": 0.151 + } + ], + "max_steps": 6762, + "num_train_epochs": 1, + "total_flos": 3565969442308096.0, + "trial_name": null, + "trial_params": null +}