diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,60216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.807411566535654, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999953209807224e-05, + "loss": 0.037, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906419614449e-05, + "loss": 0.0443, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 4.999859629421673e-05, + "loss": 0.0456, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 4.999812839228898e-05, + "loss": 0.0554, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 4.999766049036122e-05, + "loss": 0.0675, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997192588433464e-05, + "loss": 0.0875, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 4.999672468650571e-05, + "loss": 0.0372, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 4.9996256784577956e-05, + "loss": 0.0759, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 4.9995788882650195e-05, + "loss": 0.0565, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 4.999532098072244e-05, + "loss": 0.0625, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.999485307879469e-05, + "loss": 0.0819, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 4.999438517686693e-05, + "loss": 0.0702, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 4.999391727493917e-05, + "loss": 0.0602, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 4.999344937301142e-05, + "loss": 0.0636, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 4.9992981471083664e-05, + "loss": 0.0527, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999251356915591e-05, + "loss": 0.0542, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 4.999204566722815e-05, + "loss": 0.0554, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991577765300395e-05, + "loss": 0.097, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991109863372634e-05, + "loss": 0.0558, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 4.999064196144489e-05, + "loss": 0.0606, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990174059517126e-05, + "loss": 0.0645, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 4.998970615758937e-05, + "loss": 0.0463, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 4.998923825566161e-05, + "loss": 0.062, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988770353733864e-05, + "loss": 0.0648, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 4.99883024518061e-05, + "loss": 0.0652, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 4.998783454987835e-05, + "loss": 0.0588, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 4.998736664795059e-05, + "loss": 0.0534, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986898746022834e-05, + "loss": 0.0591, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 4.998643084409508e-05, + "loss": 0.0734, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985962942167326e-05, + "loss": 0.0554, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985495040239565e-05, + "loss": 0.0694, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 4.998502713831181e-05, + "loss": 0.0483, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984559236384057e-05, + "loss": 0.06, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 4.99840913344563e-05, + "loss": 0.0609, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 4.998362343252854e-05, + "loss": 0.0651, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.998315553060079e-05, + "loss": 0.0729, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 4.998268762867303e-05, + "loss": 0.0757, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 4.998221972674528e-05, + "loss": 0.0727, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 4.998175182481752e-05, + "loss": 0.0758, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981283922889764e-05, + "loss": 0.0541, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.998081602096201e-05, + "loss": 0.0508, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980348119034256e-05, + "loss": 0.061, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979880217106495e-05, + "loss": 0.065, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 4.997941231517874e-05, + "loss": 0.0799, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 4.997894441325098e-05, + "loss": 0.0496, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.997847651132323e-05, + "loss": 0.083, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 4.997800860939547e-05, + "loss": 0.0735, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 4.997754070746772e-05, + "loss": 0.0632, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 4.997707280553996e-05, + "loss": 0.0583, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 4.99766049036122e-05, + "loss": 0.0603, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.997613700168445e-05, + "loss": 0.0608, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975669099756695e-05, + "loss": 0.0535, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975201197828934e-05, + "loss": 0.0497, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 4.997473329590118e-05, + "loss": 0.0643, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 4.9974265393973426e-05, + "loss": 0.0945, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 4.997379749204567e-05, + "loss": 0.0629, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 4.997332959011791e-05, + "loss": 0.0662, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 4.997286168819016e-05, + "loss": 0.0684, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 4.99723937862624e-05, + "loss": 0.0457, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 4.997192588433465e-05, + "loss": 0.0592, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 4.997145798240689e-05, + "loss": 0.0606, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970990080479134e-05, + "loss": 0.0658, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 4.997052217855138e-05, + "loss": 0.0649, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 4.9970054276623626e-05, + "loss": 0.082, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969586374695865e-05, + "loss": 0.064, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 4.996911847276811e-05, + "loss": 0.0648, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 4.996865057084035e-05, + "loss": 0.0601, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 4.99681826689126e-05, + "loss": 0.0887, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 4.996771476698484e-05, + "loss": 0.0643, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 4.996724686505709e-05, + "loss": 0.0608, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 4.996677896312933e-05, + "loss": 0.0526, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 4.996631106120158e-05, + "loss": 0.0693, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 4.996584315927382e-05, + "loss": 0.0642, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 4.9965375257346064e-05, + "loss": 0.0506, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 4.9964907355418303e-05, + "loss": 0.0759, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 4.996443945349055e-05, + "loss": 0.0666, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 4.9963971551562795e-05, + "loss": 0.0838, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 4.996350364963504e-05, + "loss": 0.0516, + "step": 156 + }, + { + "epoch": 0.02, + "learning_rate": 4.996303574770728e-05, + "loss": 0.0821, + "step": 158 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962567845779526e-05, + "loss": 0.0562, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 4.996209994385177e-05, + "loss": 0.0587, + "step": 162 + }, + { + "epoch": 0.02, + "learning_rate": 4.996163204192402e-05, + "loss": 0.0565, + "step": 164 + }, + { + "epoch": 0.02, + "learning_rate": 4.996116413999626e-05, + "loss": 0.0792, + "step": 166 + }, + { + "epoch": 0.02, + "learning_rate": 4.99606962380685e-05, + "loss": 0.0581, + "step": 168 + }, + { + "epoch": 0.02, + "learning_rate": 4.996022833614075e-05, + "loss": 0.0609, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959760434212995e-05, + "loss": 0.0733, + "step": 172 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959292532285234e-05, + "loss": 0.0535, + "step": 174 + }, + { + "epoch": 0.02, + "learning_rate": 4.995882463035748e-05, + "loss": 0.0691, + "step": 176 + }, + { + "epoch": 0.02, + "learning_rate": 4.9958356728429726e-05, + "loss": 0.0531, + "step": 178 + }, + { + "epoch": 0.03, + "learning_rate": 4.995788882650197e-05, + "loss": 0.0678, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 4.995742092457421e-05, + "loss": 0.071, + "step": 182 + }, + { + "epoch": 0.03, + "learning_rate": 4.995695302264646e-05, + "loss": 0.0658, + "step": 184 + }, + { + "epoch": 0.03, + "learning_rate": 4.9956485120718696e-05, + "loss": 0.0588, + "step": 186 + }, + { + "epoch": 0.03, + "learning_rate": 4.995601721879095e-05, + "loss": 0.076, + "step": 188 + }, + { + "epoch": 0.03, + "learning_rate": 4.995554931686319e-05, + "loss": 0.082, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 4.9955081414935434e-05, + "loss": 0.0749, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 4.995461351300767e-05, + "loss": 0.0777, + "step": 194 + }, + { + "epoch": 0.03, + "learning_rate": 4.9954145611079926e-05, + "loss": 0.0482, + "step": 196 + }, + { + "epoch": 0.03, + "learning_rate": 4.9953677709152165e-05, + "loss": 0.0717, + "step": 198 + }, + { + "epoch": 0.03, + "learning_rate": 4.995320980722441e-05, + "loss": 0.0564, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 4.995274190529665e-05, + "loss": 0.0572, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 4.9952274003368896e-05, + "loss": 0.0558, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 4.995180610144114e-05, + "loss": 0.0597, + "step": 206 + }, + { + "epoch": 0.03, + "learning_rate": 4.995133819951339e-05, + "loss": 0.0753, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 4.995087029758563e-05, + "loss": 0.0674, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 4.995040239565787e-05, + "loss": 0.0579, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 4.994993449373012e-05, + "loss": 0.0707, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 4.9949466591802364e-05, + "loss": 0.0689, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 4.9948998689874604e-05, + "loss": 0.052, + "step": 218 + }, + { + "epoch": 0.03, + "learning_rate": 4.994853078794685e-05, + "loss": 0.0635, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 4.9948062886019095e-05, + "loss": 0.0776, + "step": 222 + }, + { + "epoch": 0.03, + "learning_rate": 4.994759498409134e-05, + "loss": 0.0745, + "step": 224 + }, + { + "epoch": 0.03, + "learning_rate": 4.994712708216358e-05, + "loss": 0.0715, + "step": 226 + }, + { + "epoch": 0.03, + "learning_rate": 4.9946659180235826e-05, + "loss": 0.0822, + "step": 228 + }, + { + "epoch": 0.03, + "learning_rate": 4.9946191278308065e-05, + "loss": 0.0503, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 4.994572337638032e-05, + "loss": 0.0659, + "step": 232 + }, + { + "epoch": 0.03, + "learning_rate": 4.994525547445256e-05, + "loss": 0.0601, + "step": 234 + }, + { + "epoch": 0.03, + "learning_rate": 4.99447875725248e-05, + "loss": 0.0695, + "step": 236 + }, + { + "epoch": 0.03, + "learning_rate": 4.994431967059704e-05, + "loss": 0.0656, + "step": 238 + }, + { + "epoch": 0.03, + "learning_rate": 4.9943851768669295e-05, + "loss": 0.0464, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 4.9943383866741534e-05, + "loss": 0.0717, + "step": 242 + }, + { + "epoch": 0.03, + "learning_rate": 4.994291596481378e-05, + "loss": 0.0721, + "step": 244 + }, + { + "epoch": 0.03, + "learning_rate": 4.994244806288602e-05, + "loss": 0.0659, + "step": 246 + }, + { + "epoch": 0.03, + "learning_rate": 4.9941980160958265e-05, + "loss": 0.0624, + "step": 248 + }, + { + "epoch": 0.04, + "learning_rate": 4.994151225903051e-05, + "loss": 0.0821, + "step": 250 + }, + { + "epoch": 0.04, + "learning_rate": 4.994104435710276e-05, + "loss": 0.0659, + "step": 252 + }, + { + "epoch": 0.04, + "learning_rate": 4.9940576455174996e-05, + "loss": 0.0722, + "step": 254 + }, + { + "epoch": 0.04, + "learning_rate": 4.994010855324724e-05, + "loss": 0.0651, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 4.993964065131949e-05, + "loss": 0.063, + "step": 258 + }, + { + "epoch": 0.04, + "learning_rate": 4.993917274939173e-05, + "loss": 0.0858, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 4.993870484746397e-05, + "loss": 0.065, + "step": 262 + }, + { + "epoch": 0.04, + "learning_rate": 4.993823694553621e-05, + "loss": 0.0508, + "step": 264 + }, + { + "epoch": 0.04, + "learning_rate": 4.9937769043608465e-05, + "loss": 0.0733, + "step": 266 + }, + { + "epoch": 0.04, + "learning_rate": 4.9937301141680704e-05, + "loss": 0.0532, + "step": 268 + }, + { + "epoch": 0.04, + "learning_rate": 4.993683323975295e-05, + "loss": 0.0611, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 4.993636533782519e-05, + "loss": 0.0675, + "step": 272 + }, + { + "epoch": 0.04, + "learning_rate": 4.993589743589744e-05, + "loss": 0.0732, + "step": 274 + }, + { + "epoch": 0.04, + "learning_rate": 4.993542953396968e-05, + "loss": 0.0837, + "step": 276 + }, + { + "epoch": 0.04, + "learning_rate": 4.993496163204193e-05, + "loss": 0.0673, + "step": 278 + }, + { + "epoch": 0.04, + "learning_rate": 4.9934493730114166e-05, + "loss": 0.0694, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 4.993402582818641e-05, + "loss": 0.0588, + "step": 282 + }, + { + "epoch": 0.04, + "learning_rate": 4.993355792625866e-05, + "loss": 0.0712, + "step": 284 + }, + { + "epoch": 0.04, + "learning_rate": 4.9933090024330904e-05, + "loss": 0.0729, + "step": 286 + }, + { + "epoch": 0.04, + "learning_rate": 4.993262212240314e-05, + "loss": 0.0628, + "step": 288 + }, + { + "epoch": 0.04, + "learning_rate": 4.993215422047539e-05, + "loss": 0.0781, + "step": 290 + }, + { + "epoch": 0.04, + "learning_rate": 4.9931686318547634e-05, + "loss": 0.0719, + "step": 292 + }, + { + "epoch": 0.04, + "learning_rate": 4.993121841661988e-05, + "loss": 0.0735, + "step": 294 + }, + { + "epoch": 0.04, + "learning_rate": 4.993075051469212e-05, + "loss": 0.0831, + "step": 296 + }, + { + "epoch": 0.04, + "learning_rate": 4.9930282612764365e-05, + "loss": 0.0536, + "step": 298 + }, + { + "epoch": 0.04, + "learning_rate": 4.992981471083661e-05, + "loss": 0.0818, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 4.992934680890886e-05, + "loss": 0.0652, + "step": 302 + }, + { + "epoch": 0.04, + "learning_rate": 4.9928878906981096e-05, + "loss": 0.0744, + "step": 304 + }, + { + "epoch": 0.04, + "learning_rate": 4.992841100505334e-05, + "loss": 0.0641, + "step": 306 + }, + { + "epoch": 0.04, + "learning_rate": 4.992794310312559e-05, + "loss": 0.0605, + "step": 308 + }, + { + "epoch": 0.04, + "learning_rate": 4.9927475201197834e-05, + "loss": 0.0653, + "step": 310 + }, + { + "epoch": 0.04, + "learning_rate": 4.992700729927007e-05, + "loss": 0.0743, + "step": 312 + }, + { + "epoch": 0.04, + "learning_rate": 4.992653939734232e-05, + "loss": 0.0614, + "step": 314 + }, + { + "epoch": 0.04, + "learning_rate": 4.992607149541456e-05, + "loss": 0.0706, + "step": 316 + }, + { + "epoch": 0.04, + "learning_rate": 4.992560359348681e-05, + "loss": 0.0712, + "step": 318 + }, + { + "epoch": 0.04, + "learning_rate": 4.992513569155905e-05, + "loss": 0.0667, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 4.9924667789631296e-05, + "loss": 0.0829, + "step": 322 + }, + { + "epoch": 0.05, + "learning_rate": 4.9924199887703535e-05, + "loss": 0.0632, + "step": 324 + }, + { + "epoch": 0.05, + "learning_rate": 4.992373198577579e-05, + "loss": 0.0668, + "step": 326 + }, + { + "epoch": 0.05, + "learning_rate": 4.992326408384803e-05, + "loss": 0.074, + "step": 328 + }, + { + "epoch": 0.05, + "learning_rate": 4.992279618192027e-05, + "loss": 0.0648, + "step": 330 + }, + { + "epoch": 0.05, + "learning_rate": 4.992232827999251e-05, + "loss": 0.0703, + "step": 332 + }, + { + "epoch": 0.05, + "learning_rate": 4.992186037806476e-05, + "loss": 0.0689, + "step": 334 + }, + { + "epoch": 0.05, + "learning_rate": 4.9921392476137004e-05, + "loss": 0.0587, + "step": 336 + }, + { + "epoch": 0.05, + "learning_rate": 4.992092457420925e-05, + "loss": 0.0486, + "step": 338 + }, + { + "epoch": 0.05, + "learning_rate": 4.992045667228149e-05, + "loss": 0.0688, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 4.9919988770353735e-05, + "loss": 0.0746, + "step": 342 + }, + { + "epoch": 0.05, + "learning_rate": 4.991952086842598e-05, + "loss": 0.064, + "step": 344 + }, + { + "epoch": 0.05, + "learning_rate": 4.991905296649823e-05, + "loss": 0.0703, + "step": 346 + }, + { + "epoch": 0.05, + "learning_rate": 4.9918585064570466e-05, + "loss": 0.0582, + "step": 348 + }, + { + "epoch": 0.05, + "learning_rate": 4.991811716264271e-05, + "loss": 0.0926, + "step": 350 + }, + { + "epoch": 0.05, + "learning_rate": 4.991764926071496e-05, + "loss": 0.073, + "step": 352 + }, + { + "epoch": 0.05, + "learning_rate": 4.9917181358787204e-05, + "loss": 0.0732, + "step": 354 + }, + { + "epoch": 0.05, + "learning_rate": 4.991671345685944e-05, + "loss": 0.0916, + "step": 356 + }, + { + "epoch": 0.05, + "learning_rate": 4.991624555493169e-05, + "loss": 0.055, + "step": 358 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915777653003934e-05, + "loss": 0.0583, + "step": 360 + }, + { + "epoch": 0.05, + "learning_rate": 4.991530975107618e-05, + "loss": 0.0512, + "step": 362 + }, + { + "epoch": 0.05, + "learning_rate": 4.991484184914842e-05, + "loss": 0.0686, + "step": 364 + }, + { + "epoch": 0.05, + "learning_rate": 4.9914373947220665e-05, + "loss": 0.0654, + "step": 366 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913906045292905e-05, + "loss": 0.0615, + "step": 368 + }, + { + "epoch": 0.05, + "learning_rate": 4.991343814336516e-05, + "loss": 0.0862, + "step": 370 + }, + { + "epoch": 0.05, + "learning_rate": 4.9912970241437396e-05, + "loss": 0.0641, + "step": 372 + }, + { + "epoch": 0.05, + "learning_rate": 4.991250233950964e-05, + "loss": 0.0601, + "step": 374 + }, + { + "epoch": 0.05, + "learning_rate": 4.991203443758188e-05, + "loss": 0.0584, + "step": 376 + }, + { + "epoch": 0.05, + "learning_rate": 4.991156653565413e-05, + "loss": 0.0663, + "step": 378 + }, + { + "epoch": 0.05, + "learning_rate": 4.991109863372637e-05, + "loss": 0.0858, + "step": 380 + }, + { + "epoch": 0.05, + "learning_rate": 4.991063073179862e-05, + "loss": 0.0638, + "step": 382 + }, + { + "epoch": 0.05, + "learning_rate": 4.991016282987086e-05, + "loss": 0.0453, + "step": 384 + }, + { + "epoch": 0.05, + "learning_rate": 4.9909694927943104e-05, + "loss": 0.0616, + "step": 386 + }, + { + "epoch": 0.05, + "learning_rate": 4.990922702601535e-05, + "loss": 0.0885, + "step": 388 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908759124087596e-05, + "loss": 0.078, + "step": 390 + }, + { + "epoch": 0.06, + "learning_rate": 4.9908291222159835e-05, + "loss": 0.0484, + "step": 392 + }, + { + "epoch": 0.06, + "learning_rate": 4.990782332023208e-05, + "loss": 0.0595, + "step": 394 + }, + { + "epoch": 0.06, + "learning_rate": 4.990735541830433e-05, + "loss": 0.0726, + "step": 396 + }, + { + "epoch": 0.06, + "learning_rate": 4.990688751637657e-05, + "loss": 0.0838, + "step": 398 + }, + { + "epoch": 0.06, + "learning_rate": 4.990641961444881e-05, + "loss": 0.0652, + "step": 400 + }, + { + "epoch": 0.06, + "learning_rate": 4.990595171252106e-05, + "loss": 0.0673, + "step": 402 + }, + { + "epoch": 0.06, + "learning_rate": 4.9905483810593304e-05, + "loss": 0.055, + "step": 404 + }, + { + "epoch": 0.06, + "learning_rate": 4.990501590866555e-05, + "loss": 0.0759, + "step": 406 + }, + { + "epoch": 0.06, + "learning_rate": 4.990454800673779e-05, + "loss": 0.0616, + "step": 408 + }, + { + "epoch": 0.06, + "learning_rate": 4.9904080104810035e-05, + "loss": 0.0933, + "step": 410 + }, + { + "epoch": 0.06, + "learning_rate": 4.9903612202882274e-05, + "loss": 0.0647, + "step": 412 + }, + { + "epoch": 0.06, + "learning_rate": 4.990314430095453e-05, + "loss": 0.0527, + "step": 414 + }, + { + "epoch": 0.06, + "learning_rate": 4.9902676399026766e-05, + "loss": 0.0578, + "step": 416 + }, + { + "epoch": 0.06, + "learning_rate": 4.990220849709901e-05, + "loss": 0.0808, + "step": 418 + }, + { + "epoch": 0.06, + "learning_rate": 4.990174059517125e-05, + "loss": 0.0793, + "step": 420 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901272693243504e-05, + "loss": 0.0648, + "step": 422 + }, + { + "epoch": 0.06, + "learning_rate": 4.990080479131574e-05, + "loss": 0.0716, + "step": 424 + }, + { + "epoch": 0.06, + "learning_rate": 4.990033688938799e-05, + "loss": 0.0825, + "step": 426 + }, + { + "epoch": 0.06, + "learning_rate": 4.989986898746023e-05, + "loss": 0.0908, + "step": 428 + }, + { + "epoch": 0.06, + "learning_rate": 4.9899401085532474e-05, + "loss": 0.0688, + "step": 430 + }, + { + "epoch": 0.06, + "learning_rate": 4.989893318360472e-05, + "loss": 0.0669, + "step": 432 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898465281676965e-05, + "loss": 0.0735, + "step": 434 + }, + { + "epoch": 0.06, + "learning_rate": 4.9897997379749205e-05, + "loss": 0.0817, + "step": 436 + }, + { + "epoch": 0.06, + "learning_rate": 4.989752947782145e-05, + "loss": 0.0792, + "step": 438 + }, + { + "epoch": 0.06, + "learning_rate": 4.9897061575893696e-05, + "loss": 0.0509, + "step": 440 + }, + { + "epoch": 0.06, + "learning_rate": 4.989659367396594e-05, + "loss": 0.0809, + "step": 442 + }, + { + "epoch": 0.06, + "learning_rate": 4.989612577203818e-05, + "loss": 0.0654, + "step": 444 + }, + { + "epoch": 0.06, + "learning_rate": 4.989565787011043e-05, + "loss": 0.0608, + "step": 446 + }, + { + "epoch": 0.06, + "learning_rate": 4.989518996818267e-05, + "loss": 0.0928, + "step": 448 + }, + { + "epoch": 0.06, + "learning_rate": 4.989472206625492e-05, + "loss": 0.0554, + "step": 450 + }, + { + "epoch": 0.06, + "learning_rate": 4.989425416432716e-05, + "loss": 0.0559, + "step": 452 + }, + { + "epoch": 0.06, + "learning_rate": 4.9893786262399404e-05, + "loss": 0.0871, + "step": 454 + }, + { + "epoch": 0.06, + "learning_rate": 4.989331836047165e-05, + "loss": 0.0724, + "step": 456 + }, + { + "epoch": 0.06, + "learning_rate": 4.9892850458543896e-05, + "loss": 0.0672, + "step": 458 + }, + { + "epoch": 0.06, + "learning_rate": 4.9892382556616135e-05, + "loss": 0.0602, + "step": 460 + }, + { + "epoch": 0.06, + "learning_rate": 4.989191465468838e-05, + "loss": 0.0616, + "step": 462 + }, + { + "epoch": 0.07, + "learning_rate": 4.989144675276062e-05, + "loss": 0.0732, + "step": 464 + }, + { + "epoch": 0.07, + "learning_rate": 4.989097885083287e-05, + "loss": 0.0668, + "step": 466 + }, + { + "epoch": 0.07, + "learning_rate": 4.989051094890511e-05, + "loss": 0.0848, + "step": 468 + }, + { + "epoch": 0.07, + "learning_rate": 4.989004304697736e-05, + "loss": 0.0656, + "step": 470 + }, + { + "epoch": 0.07, + "learning_rate": 4.98895751450496e-05, + "loss": 0.0609, + "step": 472 + }, + { + "epoch": 0.07, + "learning_rate": 4.988910724312185e-05, + "loss": 0.067, + "step": 474 + }, + { + "epoch": 0.07, + "learning_rate": 4.988863934119409e-05, + "loss": 0.0814, + "step": 476 + }, + { + "epoch": 0.07, + "learning_rate": 4.9888171439266335e-05, + "loss": 0.0736, + "step": 478 + }, + { + "epoch": 0.07, + "learning_rate": 4.9887703537338574e-05, + "loss": 0.0638, + "step": 480 + }, + { + "epoch": 0.07, + "learning_rate": 4.988723563541082e-05, + "loss": 0.0587, + "step": 482 + }, + { + "epoch": 0.07, + "learning_rate": 4.9886767733483066e-05, + "loss": 0.056, + "step": 484 + }, + { + "epoch": 0.07, + "learning_rate": 4.988629983155531e-05, + "loss": 0.0593, + "step": 486 + }, + { + "epoch": 0.07, + "learning_rate": 4.988583192962755e-05, + "loss": 0.0738, + "step": 488 + }, + { + "epoch": 0.07, + "learning_rate": 4.98853640276998e-05, + "loss": 0.0849, + "step": 490 + }, + { + "epoch": 0.07, + "learning_rate": 4.988489612577204e-05, + "loss": 0.0673, + "step": 492 + }, + { + "epoch": 0.07, + "learning_rate": 4.988442822384429e-05, + "loss": 0.0864, + "step": 494 + }, + { + "epoch": 0.07, + "learning_rate": 4.988396032191653e-05, + "loss": 0.0738, + "step": 496 + }, + { + "epoch": 0.07, + "learning_rate": 4.9883492419988774e-05, + "loss": 0.0748, + "step": 498 + }, + { + "epoch": 0.07, + "learning_rate": 4.988302451806102e-05, + "loss": 0.0529, + "step": 500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9882556616133265e-05, + "loss": 0.0544, + "step": 502 + }, + { + "epoch": 0.07, + "learning_rate": 4.9882088714205505e-05, + "loss": 0.077, + "step": 504 + }, + { + "epoch": 0.07, + "learning_rate": 4.988162081227775e-05, + "loss": 0.0706, + "step": 506 + }, + { + "epoch": 0.07, + "learning_rate": 4.988115291034999e-05, + "loss": 0.0729, + "step": 508 + }, + { + "epoch": 0.07, + "learning_rate": 4.9880685008422236e-05, + "loss": 0.0812, + "step": 510 + }, + { + "epoch": 0.07, + "learning_rate": 4.988021710649448e-05, + "loss": 0.067, + "step": 512 + }, + { + "epoch": 0.07, + "learning_rate": 4.987974920456672e-05, + "loss": 0.0565, + "step": 514 + }, + { + "epoch": 0.07, + "learning_rate": 4.9879281302638967e-05, + "loss": 0.0597, + "step": 516 + }, + { + "epoch": 0.07, + "learning_rate": 4.987881340071121e-05, + "loss": 0.095, + "step": 518 + }, + { + "epoch": 0.07, + "learning_rate": 4.987834549878346e-05, + "loss": 0.0729, + "step": 520 + }, + { + "epoch": 0.07, + "learning_rate": 4.98778775968557e-05, + "loss": 0.0743, + "step": 522 + }, + { + "epoch": 0.07, + "learning_rate": 4.9877409694927943e-05, + "loss": 0.0656, + "step": 524 + }, + { + "epoch": 0.07, + "learning_rate": 4.987694179300019e-05, + "loss": 0.0656, + "step": 526 + }, + { + "epoch": 0.07, + "learning_rate": 4.9876473891072435e-05, + "loss": 0.0761, + "step": 528 + }, + { + "epoch": 0.07, + "learning_rate": 4.9876005989144674e-05, + "loss": 0.0566, + "step": 530 + }, + { + "epoch": 0.07, + "learning_rate": 4.987553808721692e-05, + "loss": 0.0812, + "step": 532 + }, + { + "epoch": 0.07, + "learning_rate": 4.9875070185289166e-05, + "loss": 0.0558, + "step": 534 + }, + { + "epoch": 0.08, + "learning_rate": 4.987460228336141e-05, + "loss": 0.08, + "step": 536 + }, + { + "epoch": 0.08, + "learning_rate": 4.987413438143365e-05, + "loss": 0.0568, + "step": 538 + }, + { + "epoch": 0.08, + "learning_rate": 4.98736664795059e-05, + "loss": 0.059, + "step": 540 + }, + { + "epoch": 0.08, + "learning_rate": 4.9873198577578136e-05, + "loss": 0.0801, + "step": 542 + }, + { + "epoch": 0.08, + "learning_rate": 4.987273067565039e-05, + "loss": 0.0843, + "step": 544 + }, + { + "epoch": 0.08, + "learning_rate": 4.987226277372263e-05, + "loss": 0.0663, + "step": 546 + }, + { + "epoch": 0.08, + "learning_rate": 4.9871794871794874e-05, + "loss": 0.0791, + "step": 548 + }, + { + "epoch": 0.08, + "learning_rate": 4.987132696986711e-05, + "loss": 0.0614, + "step": 550 + }, + { + "epoch": 0.08, + "learning_rate": 4.9870859067939366e-05, + "loss": 0.0644, + "step": 552 + }, + { + "epoch": 0.08, + "learning_rate": 4.9870391166011605e-05, + "loss": 0.0819, + "step": 554 + }, + { + "epoch": 0.08, + "learning_rate": 4.986992326408385e-05, + "loss": 0.0612, + "step": 556 + }, + { + "epoch": 0.08, + "learning_rate": 4.986945536215609e-05, + "loss": 0.0671, + "step": 558 + }, + { + "epoch": 0.08, + "learning_rate": 4.9868987460228336e-05, + "loss": 0.0697, + "step": 560 + }, + { + "epoch": 0.08, + "learning_rate": 4.986851955830058e-05, + "loss": 0.0684, + "step": 562 + }, + { + "epoch": 0.08, + "learning_rate": 4.986805165637283e-05, + "loss": 0.0503, + "step": 564 + }, + { + "epoch": 0.08, + "learning_rate": 4.986758375444507e-05, + "loss": 0.1045, + "step": 566 + }, + { + "epoch": 0.08, + "learning_rate": 4.986711585251731e-05, + "loss": 0.0755, + "step": 568 + }, + { + "epoch": 0.08, + "learning_rate": 4.986664795058956e-05, + "loss": 0.0808, + "step": 570 + }, + { + "epoch": 0.08, + "learning_rate": 4.9866180048661805e-05, + "loss": 0.0615, + "step": 572 + }, + { + "epoch": 0.08, + "learning_rate": 4.9865712146734044e-05, + "loss": 0.0703, + "step": 574 + }, + { + "epoch": 0.08, + "learning_rate": 4.986524424480629e-05, + "loss": 0.0816, + "step": 576 + }, + { + "epoch": 0.08, + "learning_rate": 4.9864776342878536e-05, + "loss": 0.1, + "step": 578 + }, + { + "epoch": 0.08, + "learning_rate": 4.986430844095078e-05, + "loss": 0.0772, + "step": 580 + }, + { + "epoch": 0.08, + "learning_rate": 4.986384053902302e-05, + "loss": 0.0676, + "step": 582 + }, + { + "epoch": 0.08, + "learning_rate": 4.9863372637095267e-05, + "loss": 0.065, + "step": 584 + }, + { + "epoch": 0.08, + "learning_rate": 4.986290473516751e-05, + "loss": 0.0518, + "step": 586 + }, + { + "epoch": 0.08, + "learning_rate": 4.986243683323976e-05, + "loss": 0.0837, + "step": 588 + }, + { + "epoch": 0.08, + "learning_rate": 4.9861968931312e-05, + "loss": 0.0935, + "step": 590 + }, + { + "epoch": 0.08, + "learning_rate": 4.9861501029384243e-05, + "loss": 0.0638, + "step": 592 + }, + { + "epoch": 0.08, + "learning_rate": 4.986103312745648e-05, + "loss": 0.0594, + "step": 594 + }, + { + "epoch": 0.08, + "learning_rate": 4.9860565225528735e-05, + "loss": 0.0801, + "step": 596 + }, + { + "epoch": 0.08, + "learning_rate": 4.9860097323600974e-05, + "loss": 0.083, + "step": 598 + }, + { + "epoch": 0.08, + "learning_rate": 4.985962942167322e-05, + "loss": 0.0821, + "step": 600 + }, + { + "epoch": 0.08, + "learning_rate": 4.985916151974546e-05, + "loss": 0.0725, + "step": 602 + }, + { + "epoch": 0.08, + "learning_rate": 4.985869361781771e-05, + "loss": 0.0954, + "step": 604 + }, + { + "epoch": 0.09, + "learning_rate": 4.985822571588995e-05, + "loss": 0.0728, + "step": 606 + }, + { + "epoch": 0.09, + "learning_rate": 4.98577578139622e-05, + "loss": 0.0852, + "step": 608 + }, + { + "epoch": 0.09, + "learning_rate": 4.9857289912034436e-05, + "loss": 0.0623, + "step": 610 + }, + { + "epoch": 0.09, + "learning_rate": 4.985682201010668e-05, + "loss": 0.0678, + "step": 612 + }, + { + "epoch": 0.09, + "learning_rate": 4.985635410817893e-05, + "loss": 0.0754, + "step": 614 + }, + { + "epoch": 0.09, + "learning_rate": 4.9855886206251174e-05, + "loss": 0.0643, + "step": 616 + }, + { + "epoch": 0.09, + "learning_rate": 4.985541830432341e-05, + "loss": 0.0607, + "step": 618 + }, + { + "epoch": 0.09, + "learning_rate": 4.985495040239566e-05, + "loss": 0.0761, + "step": 620 + }, + { + "epoch": 0.09, + "learning_rate": 4.9854482500467905e-05, + "loss": 0.0524, + "step": 622 + }, + { + "epoch": 0.09, + "learning_rate": 4.985401459854015e-05, + "loss": 0.0739, + "step": 624 + }, + { + "epoch": 0.09, + "learning_rate": 4.985354669661239e-05, + "loss": 0.0671, + "step": 626 + }, + { + "epoch": 0.09, + "learning_rate": 4.9853078794684636e-05, + "loss": 0.1179, + "step": 628 + }, + { + "epoch": 0.09, + "learning_rate": 4.985261089275688e-05, + "loss": 0.0855, + "step": 630 + }, + { + "epoch": 0.09, + "learning_rate": 4.985214299082913e-05, + "loss": 0.0785, + "step": 632 + }, + { + "epoch": 0.09, + "learning_rate": 4.985167508890137e-05, + "loss": 0.085, + "step": 634 + }, + { + "epoch": 0.09, + "learning_rate": 4.985120718697361e-05, + "loss": 0.0555, + "step": 636 + }, + { + "epoch": 0.09, + "learning_rate": 4.985073928504586e-05, + "loss": 0.0648, + "step": 638 + }, + { + "epoch": 0.09, + "learning_rate": 4.9850271383118105e-05, + "loss": 0.0983, + "step": 640 + }, + { + "epoch": 0.09, + "learning_rate": 4.9849803481190344e-05, + "loss": 0.065, + "step": 642 + }, + { + "epoch": 0.09, + "learning_rate": 4.984933557926259e-05, + "loss": 0.0807, + "step": 644 + }, + { + "epoch": 0.09, + "learning_rate": 4.984886767733483e-05, + "loss": 0.081, + "step": 646 + }, + { + "epoch": 0.09, + "learning_rate": 4.984839977540708e-05, + "loss": 0.067, + "step": 648 + }, + { + "epoch": 0.09, + "learning_rate": 4.984793187347932e-05, + "loss": 0.0582, + "step": 650 + }, + { + "epoch": 0.09, + "learning_rate": 4.9847463971551567e-05, + "loss": 0.061, + "step": 652 + }, + { + "epoch": 0.09, + "learning_rate": 4.9846996069623806e-05, + "loss": 0.0615, + "step": 654 + }, + { + "epoch": 0.09, + "learning_rate": 4.984652816769605e-05, + "loss": 0.0744, + "step": 656 + }, + { + "epoch": 0.09, + "learning_rate": 4.98460602657683e-05, + "loss": 0.0589, + "step": 658 + }, + { + "epoch": 0.09, + "learning_rate": 4.9845592363840543e-05, + "loss": 0.0687, + "step": 660 + }, + { + "epoch": 0.09, + "learning_rate": 4.984512446191278e-05, + "loss": 0.0741, + "step": 662 + }, + { + "epoch": 0.09, + "learning_rate": 4.984465655998503e-05, + "loss": 0.0739, + "step": 664 + }, + { + "epoch": 0.09, + "learning_rate": 4.9844188658057274e-05, + "loss": 0.0726, + "step": 666 + }, + { + "epoch": 0.09, + "learning_rate": 4.984372075612952e-05, + "loss": 0.0584, + "step": 668 + }, + { + "epoch": 0.09, + "learning_rate": 4.984325285420176e-05, + "loss": 0.0637, + "step": 670 + }, + { + "epoch": 0.09, + "learning_rate": 4.9842784952274005e-05, + "loss": 0.0623, + "step": 672 + }, + { + "epoch": 0.09, + "learning_rate": 4.984231705034625e-05, + "loss": 0.0817, + "step": 674 + }, + { + "epoch": 0.09, + "learning_rate": 4.98418491484185e-05, + "loss": 0.0579, + "step": 676 + }, + { + "epoch": 0.1, + "learning_rate": 4.9841381246490736e-05, + "loss": 0.0798, + "step": 678 + }, + { + "epoch": 0.1, + "learning_rate": 4.984091334456298e-05, + "loss": 0.08, + "step": 680 + }, + { + "epoch": 0.1, + "learning_rate": 4.984044544263523e-05, + "loss": 0.0769, + "step": 682 + }, + { + "epoch": 0.1, + "learning_rate": 4.9839977540707474e-05, + "loss": 0.0791, + "step": 684 + }, + { + "epoch": 0.1, + "learning_rate": 4.983950963877971e-05, + "loss": 0.0704, + "step": 686 + }, + { + "epoch": 0.1, + "learning_rate": 4.983904173685196e-05, + "loss": 0.0652, + "step": 688 + }, + { + "epoch": 0.1, + "learning_rate": 4.98385738349242e-05, + "loss": 0.0667, + "step": 690 + }, + { + "epoch": 0.1, + "learning_rate": 4.983810593299645e-05, + "loss": 0.1066, + "step": 692 + }, + { + "epoch": 0.1, + "learning_rate": 4.983763803106869e-05, + "loss": 0.0738, + "step": 694 + }, + { + "epoch": 0.1, + "learning_rate": 4.9837170129140936e-05, + "loss": 0.0779, + "step": 696 + }, + { + "epoch": 0.1, + "learning_rate": 4.9836702227213175e-05, + "loss": 0.0779, + "step": 698 + }, + { + "epoch": 0.1, + "learning_rate": 4.983623432528543e-05, + "loss": 0.0608, + "step": 700 + }, + { + "epoch": 0.1, + "learning_rate": 4.983576642335767e-05, + "loss": 0.082, + "step": 702 + }, + { + "epoch": 0.1, + "learning_rate": 4.983529852142991e-05, + "loss": 0.0693, + "step": 704 + }, + { + "epoch": 0.1, + "learning_rate": 4.983483061950215e-05, + "loss": 0.0889, + "step": 706 + }, + { + "epoch": 0.1, + "learning_rate": 4.98343627175744e-05, + "loss": 0.0858, + "step": 708 + }, + { + "epoch": 0.1, + "learning_rate": 4.9833894815646644e-05, + "loss": 0.0742, + "step": 710 + }, + { + "epoch": 0.1, + "learning_rate": 4.983342691371889e-05, + "loss": 0.0764, + "step": 712 + }, + { + "epoch": 0.1, + "learning_rate": 4.983295901179113e-05, + "loss": 0.0766, + "step": 714 + }, + { + "epoch": 0.1, + "learning_rate": 4.9832491109863375e-05, + "loss": 0.0643, + "step": 716 + }, + { + "epoch": 0.1, + "learning_rate": 4.983202320793562e-05, + "loss": 0.0594, + "step": 718 + }, + { + "epoch": 0.1, + "learning_rate": 4.9831555306007867e-05, + "loss": 0.0705, + "step": 720 + }, + { + "epoch": 0.1, + "learning_rate": 4.9831087404080106e-05, + "loss": 0.0614, + "step": 722 + }, + { + "epoch": 0.1, + "learning_rate": 4.983061950215235e-05, + "loss": 0.0734, + "step": 724 + }, + { + "epoch": 0.1, + "learning_rate": 4.98301516002246e-05, + "loss": 0.0642, + "step": 726 + }, + { + "epoch": 0.1, + "learning_rate": 4.9829683698296843e-05, + "loss": 0.0855, + "step": 728 + }, + { + "epoch": 0.1, + "learning_rate": 4.982921579636908e-05, + "loss": 0.0674, + "step": 730 + }, + { + "epoch": 0.1, + "learning_rate": 4.982874789444133e-05, + "loss": 0.0818, + "step": 732 + }, + { + "epoch": 0.1, + "learning_rate": 4.9828279992513574e-05, + "loss": 0.0786, + "step": 734 + }, + { + "epoch": 0.1, + "learning_rate": 4.982781209058582e-05, + "loss": 0.0543, + "step": 736 + }, + { + "epoch": 0.1, + "learning_rate": 4.982734418865806e-05, + "loss": 0.0741, + "step": 738 + }, + { + "epoch": 0.1, + "learning_rate": 4.9826876286730305e-05, + "loss": 0.0657, + "step": 740 + }, + { + "epoch": 0.1, + "learning_rate": 4.9826408384802545e-05, + "loss": 0.0634, + "step": 742 + }, + { + "epoch": 0.1, + "learning_rate": 4.98259404828748e-05, + "loss": 0.0747, + "step": 744 + }, + { + "epoch": 0.1, + "learning_rate": 4.9825472580947036e-05, + "loss": 0.0765, + "step": 746 + }, + { + "epoch": 0.1, + "learning_rate": 4.982500467901928e-05, + "loss": 0.0785, + "step": 748 + }, + { + "epoch": 0.11, + "learning_rate": 4.982453677709152e-05, + "loss": 0.0719, + "step": 750 + }, + { + "epoch": 0.11, + "learning_rate": 4.9824068875163774e-05, + "loss": 0.0752, + "step": 752 + }, + { + "epoch": 0.11, + "learning_rate": 4.982360097323601e-05, + "loss": 0.0667, + "step": 754 + }, + { + "epoch": 0.11, + "learning_rate": 4.982313307130826e-05, + "loss": 0.0795, + "step": 756 + }, + { + "epoch": 0.11, + "learning_rate": 4.98226651693805e-05, + "loss": 0.0718, + "step": 758 + }, + { + "epoch": 0.11, + "learning_rate": 4.9822197267452744e-05, + "loss": 0.0618, + "step": 760 + }, + { + "epoch": 0.11, + "learning_rate": 4.982172936552499e-05, + "loss": 0.0672, + "step": 762 + }, + { + "epoch": 0.11, + "learning_rate": 4.982126146359723e-05, + "loss": 0.0586, + "step": 764 + }, + { + "epoch": 0.11, + "learning_rate": 4.9820793561669475e-05, + "loss": 0.0782, + "step": 766 + }, + { + "epoch": 0.11, + "learning_rate": 4.982032565974172e-05, + "loss": 0.086, + "step": 768 + }, + { + "epoch": 0.11, + "learning_rate": 4.981985775781397e-05, + "loss": 0.0726, + "step": 770 + }, + { + "epoch": 0.11, + "learning_rate": 4.9819389855886206e-05, + "loss": 0.0705, + "step": 772 + }, + { + "epoch": 0.11, + "learning_rate": 4.981892195395845e-05, + "loss": 0.0829, + "step": 774 + }, + { + "epoch": 0.11, + "learning_rate": 4.981845405203069e-05, + "loss": 0.0539, + "step": 776 + }, + { + "epoch": 0.11, + "learning_rate": 4.9817986150102944e-05, + "loss": 0.0653, + "step": 778 + }, + { + "epoch": 0.11, + "learning_rate": 4.981751824817518e-05, + "loss": 0.0795, + "step": 780 + }, + { + "epoch": 0.11, + "learning_rate": 4.981705034624743e-05, + "loss": 0.0688, + "step": 782 + }, + { + "epoch": 0.11, + "learning_rate": 4.981658244431967e-05, + "loss": 0.0733, + "step": 784 + }, + { + "epoch": 0.11, + "learning_rate": 4.981611454239192e-05, + "loss": 0.079, + "step": 786 + }, + { + "epoch": 0.11, + "learning_rate": 4.981564664046416e-05, + "loss": 0.0719, + "step": 788 + }, + { + "epoch": 0.11, + "learning_rate": 4.9815178738536406e-05, + "loss": 0.0719, + "step": 790 + }, + { + "epoch": 0.11, + "learning_rate": 4.9814710836608645e-05, + "loss": 0.0795, + "step": 792 + }, + { + "epoch": 0.11, + "learning_rate": 4.981424293468089e-05, + "loss": 0.0839, + "step": 794 + }, + { + "epoch": 0.11, + "learning_rate": 4.981377503275314e-05, + "loss": 0.0543, + "step": 796 + }, + { + "epoch": 0.11, + "learning_rate": 4.981330713082538e-05, + "loss": 0.0901, + "step": 798 + }, + { + "epoch": 0.11, + "learning_rate": 4.981283922889762e-05, + "loss": 0.085, + "step": 800 + }, + { + "epoch": 0.11, + "learning_rate": 4.981237132696987e-05, + "loss": 0.0841, + "step": 802 + }, + { + "epoch": 0.11, + "learning_rate": 4.9811903425042114e-05, + "loss": 0.0873, + "step": 804 + }, + { + "epoch": 0.11, + "learning_rate": 4.981143552311436e-05, + "loss": 0.0738, + "step": 806 + }, + { + "epoch": 0.11, + "learning_rate": 4.98109676211866e-05, + "loss": 0.0911, + "step": 808 + }, + { + "epoch": 0.11, + "learning_rate": 4.9810499719258845e-05, + "loss": 0.0635, + "step": 810 + }, + { + "epoch": 0.11, + "learning_rate": 4.981003181733109e-05, + "loss": 0.0814, + "step": 812 + }, + { + "epoch": 0.11, + "learning_rate": 4.9809563915403336e-05, + "loss": 0.0875, + "step": 814 + }, + { + "epoch": 0.11, + "learning_rate": 4.9809096013475575e-05, + "loss": 0.0586, + "step": 816 + }, + { + "epoch": 0.11, + "learning_rate": 4.980862811154782e-05, + "loss": 0.076, + "step": 818 + }, + { + "epoch": 0.12, + "learning_rate": 4.980816020962006e-05, + "loss": 0.0733, + "step": 820 + }, + { + "epoch": 0.12, + "learning_rate": 4.980769230769231e-05, + "loss": 0.0748, + "step": 822 + }, + { + "epoch": 0.12, + "learning_rate": 4.980722440576455e-05, + "loss": 0.0799, + "step": 824 + }, + { + "epoch": 0.12, + "learning_rate": 4.98067565038368e-05, + "loss": 0.0687, + "step": 826 + }, + { + "epoch": 0.12, + "learning_rate": 4.980628860190904e-05, + "loss": 0.0638, + "step": 828 + }, + { + "epoch": 0.12, + "learning_rate": 4.980582069998129e-05, + "loss": 0.0611, + "step": 830 + }, + { + "epoch": 0.12, + "learning_rate": 4.980535279805353e-05, + "loss": 0.0895, + "step": 832 + }, + { + "epoch": 0.12, + "learning_rate": 4.9804884896125775e-05, + "loss": 0.0886, + "step": 834 + }, + { + "epoch": 0.12, + "learning_rate": 4.9804416994198014e-05, + "loss": 0.0741, + "step": 836 + }, + { + "epoch": 0.12, + "learning_rate": 4.980394909227026e-05, + "loss": 0.0634, + "step": 838 + }, + { + "epoch": 0.12, + "learning_rate": 4.9803481190342506e-05, + "loss": 0.0722, + "step": 840 + }, + { + "epoch": 0.12, + "learning_rate": 4.980301328841475e-05, + "loss": 0.079, + "step": 842 + }, + { + "epoch": 0.12, + "learning_rate": 4.980254538648699e-05, + "loss": 0.0603, + "step": 844 + }, + { + "epoch": 0.12, + "learning_rate": 4.980207748455924e-05, + "loss": 0.0672, + "step": 846 + }, + { + "epoch": 0.12, + "learning_rate": 4.980160958263148e-05, + "loss": 0.0855, + "step": 848 + }, + { + "epoch": 0.12, + "learning_rate": 4.980114168070373e-05, + "loss": 0.0704, + "step": 850 + }, + { + "epoch": 0.12, + "learning_rate": 4.980067377877597e-05, + "loss": 0.0627, + "step": 852 + }, + { + "epoch": 0.12, + "learning_rate": 4.9800205876848214e-05, + "loss": 0.0737, + "step": 854 + }, + { + "epoch": 0.12, + "learning_rate": 4.979973797492046e-05, + "loss": 0.064, + "step": 856 + }, + { + "epoch": 0.12, + "learning_rate": 4.9799270072992706e-05, + "loss": 0.0789, + "step": 858 + }, + { + "epoch": 0.12, + "learning_rate": 4.9798802171064945e-05, + "loss": 0.0821, + "step": 860 + }, + { + "epoch": 0.12, + "learning_rate": 4.979833426913719e-05, + "loss": 0.0795, + "step": 862 + }, + { + "epoch": 0.12, + "learning_rate": 4.979786636720944e-05, + "loss": 0.0738, + "step": 864 + }, + { + "epoch": 0.12, + "learning_rate": 4.979739846528168e-05, + "loss": 0.0792, + "step": 866 + }, + { + "epoch": 0.12, + "learning_rate": 4.979693056335392e-05, + "loss": 0.0799, + "step": 868 + }, + { + "epoch": 0.12, + "learning_rate": 4.979646266142617e-05, + "loss": 0.0891, + "step": 870 + }, + { + "epoch": 0.12, + "learning_rate": 4.979599475949841e-05, + "loss": 0.076, + "step": 872 + }, + { + "epoch": 0.12, + "learning_rate": 4.979552685757066e-05, + "loss": 0.084, + "step": 874 + }, + { + "epoch": 0.12, + "learning_rate": 4.97950589556429e-05, + "loss": 0.1022, + "step": 876 + }, + { + "epoch": 0.12, + "learning_rate": 4.9794591053715145e-05, + "loss": 0.0674, + "step": 878 + }, + { + "epoch": 0.12, + "learning_rate": 4.9794123151787384e-05, + "loss": 0.0637, + "step": 880 + }, + { + "epoch": 0.12, + "learning_rate": 4.9793655249859636e-05, + "loss": 0.0675, + "step": 882 + }, + { + "epoch": 0.12, + "learning_rate": 4.9793187347931876e-05, + "loss": 0.0714, + "step": 884 + }, + { + "epoch": 0.12, + "learning_rate": 4.979271944600412e-05, + "loss": 0.0624, + "step": 886 + }, + { + "epoch": 0.12, + "learning_rate": 4.979225154407636e-05, + "loss": 0.0751, + "step": 888 + }, + { + "epoch": 0.12, + "learning_rate": 4.9791783642148606e-05, + "loss": 0.0722, + "step": 890 + }, + { + "epoch": 0.13, + "learning_rate": 4.979131574022085e-05, + "loss": 0.0787, + "step": 892 + }, + { + "epoch": 0.13, + "learning_rate": 4.97908478382931e-05, + "loss": 0.0885, + "step": 894 + }, + { + "epoch": 0.13, + "learning_rate": 4.979037993636534e-05, + "loss": 0.084, + "step": 896 + }, + { + "epoch": 0.13, + "learning_rate": 4.978991203443758e-05, + "loss": 0.0701, + "step": 898 + }, + { + "epoch": 0.13, + "learning_rate": 4.978944413250983e-05, + "loss": 0.0832, + "step": 900 + }, + { + "epoch": 0.13, + "learning_rate": 4.9788976230582075e-05, + "loss": 0.0629, + "step": 902 + }, + { + "epoch": 0.13, + "learning_rate": 4.9788508328654314e-05, + "loss": 0.0877, + "step": 904 + }, + { + "epoch": 0.13, + "learning_rate": 4.978804042672656e-05, + "loss": 0.0544, + "step": 906 + }, + { + "epoch": 0.13, + "learning_rate": 4.9787572524798806e-05, + "loss": 0.0992, + "step": 908 + }, + { + "epoch": 0.13, + "learning_rate": 4.978710462287105e-05, + "loss": 0.0708, + "step": 910 + }, + { + "epoch": 0.13, + "learning_rate": 4.978663672094329e-05, + "loss": 0.1194, + "step": 912 + }, + { + "epoch": 0.13, + "learning_rate": 4.978616881901554e-05, + "loss": 0.0667, + "step": 914 + }, + { + "epoch": 0.13, + "learning_rate": 4.978570091708778e-05, + "loss": 0.0704, + "step": 916 + }, + { + "epoch": 0.13, + "learning_rate": 4.978523301516003e-05, + "loss": 0.0848, + "step": 918 + }, + { + "epoch": 0.13, + "learning_rate": 4.978476511323227e-05, + "loss": 0.0714, + "step": 920 + }, + { + "epoch": 0.13, + "learning_rate": 4.9784297211304514e-05, + "loss": 0.0827, + "step": 922 + }, + { + "epoch": 0.13, + "learning_rate": 4.978382930937675e-05, + "loss": 0.0753, + "step": 924 + }, + { + "epoch": 0.13, + "learning_rate": 4.9783361407449006e-05, + "loss": 0.0912, + "step": 926 + }, + { + "epoch": 0.13, + "learning_rate": 4.9782893505521245e-05, + "loss": 0.0688, + "step": 928 + }, + { + "epoch": 0.13, + "learning_rate": 4.978242560359349e-05, + "loss": 0.0669, + "step": 930 + }, + { + "epoch": 0.13, + "learning_rate": 4.978195770166573e-05, + "loss": 0.0779, + "step": 932 + }, + { + "epoch": 0.13, + "learning_rate": 4.9781489799737976e-05, + "loss": 0.0745, + "step": 934 + }, + { + "epoch": 0.13, + "learning_rate": 4.978102189781022e-05, + "loss": 0.0952, + "step": 936 + }, + { + "epoch": 0.13, + "learning_rate": 4.978055399588247e-05, + "loss": 0.0925, + "step": 938 + }, + { + "epoch": 0.13, + "learning_rate": 4.978008609395471e-05, + "loss": 0.0822, + "step": 940 + }, + { + "epoch": 0.13, + "learning_rate": 4.977961819202695e-05, + "loss": 0.0715, + "step": 942 + }, + { + "epoch": 0.13, + "learning_rate": 4.97791502900992e-05, + "loss": 0.0817, + "step": 944 + }, + { + "epoch": 0.13, + "learning_rate": 4.9778682388171445e-05, + "loss": 0.0675, + "step": 946 + }, + { + "epoch": 0.13, + "learning_rate": 4.9778214486243684e-05, + "loss": 0.0612, + "step": 948 + }, + { + "epoch": 0.13, + "learning_rate": 4.977774658431593e-05, + "loss": 0.0768, + "step": 950 + }, + { + "epoch": 0.13, + "learning_rate": 4.9777278682388176e-05, + "loss": 0.1107, + "step": 952 + }, + { + "epoch": 0.13, + "learning_rate": 4.977681078046042e-05, + "loss": 0.0712, + "step": 954 + }, + { + "epoch": 0.13, + "learning_rate": 4.977634287853266e-05, + "loss": 0.0712, + "step": 956 + }, + { + "epoch": 0.13, + "learning_rate": 4.9775874976604906e-05, + "loss": 0.0705, + "step": 958 + }, + { + "epoch": 0.13, + "learning_rate": 4.977540707467715e-05, + "loss": 0.0801, + "step": 960 + }, + { + "epoch": 0.14, + "learning_rate": 4.97749391727494e-05, + "loss": 0.0883, + "step": 962 + }, + { + "epoch": 0.14, + "learning_rate": 4.977447127082164e-05, + "loss": 0.0704, + "step": 964 + }, + { + "epoch": 0.14, + "learning_rate": 4.977400336889388e-05, + "loss": 0.0827, + "step": 966 + }, + { + "epoch": 0.14, + "learning_rate": 4.977353546696612e-05, + "loss": 0.0675, + "step": 968 + }, + { + "epoch": 0.14, + "learning_rate": 4.9773067565038375e-05, + "loss": 0.0628, + "step": 970 + }, + { + "epoch": 0.14, + "learning_rate": 4.9772599663110614e-05, + "loss": 0.0976, + "step": 972 + }, + { + "epoch": 0.14, + "learning_rate": 4.977213176118286e-05, + "loss": 0.0806, + "step": 974 + }, + { + "epoch": 0.14, + "learning_rate": 4.97716638592551e-05, + "loss": 0.0684, + "step": 976 + }, + { + "epoch": 0.14, + "learning_rate": 4.977119595732735e-05, + "loss": 0.069, + "step": 978 + }, + { + "epoch": 0.14, + "learning_rate": 4.977072805539959e-05, + "loss": 0.1089, + "step": 980 + }, + { + "epoch": 0.14, + "learning_rate": 4.977026015347184e-05, + "loss": 0.0738, + "step": 982 + }, + { + "epoch": 0.14, + "learning_rate": 4.9769792251544076e-05, + "loss": 0.0895, + "step": 984 + }, + { + "epoch": 0.14, + "learning_rate": 4.976932434961632e-05, + "loss": 0.0637, + "step": 986 + }, + { + "epoch": 0.14, + "learning_rate": 4.976885644768857e-05, + "loss": 0.0724, + "step": 988 + }, + { + "epoch": 0.14, + "learning_rate": 4.9768388545760814e-05, + "loss": 0.0606, + "step": 990 + }, + { + "epoch": 0.14, + "learning_rate": 4.976792064383305e-05, + "loss": 0.0679, + "step": 992 + }, + { + "epoch": 0.14, + "learning_rate": 4.97674527419053e-05, + "loss": 0.071, + "step": 994 + }, + { + "epoch": 0.14, + "learning_rate": 4.9766984839977545e-05, + "loss": 0.0872, + "step": 996 + }, + { + "epoch": 0.14, + "learning_rate": 4.976651693804979e-05, + "loss": 0.0745, + "step": 998 + }, + { + "epoch": 0.14, + "learning_rate": 4.976604903612203e-05, + "loss": 0.0872, + "step": 1000 + }, + { + "epoch": 0.14, + "eval_gen_len": 30.8621, + "eval_loss": 1.0442978143692017, + "eval_meteor": 0.0398, + "eval_runtime": 16.8946, + "eval_samples_per_second": 3.433, + "eval_steps_per_second": 0.474, + "step": 1000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9765581134194276e-05, + "loss": 0.0728, + "step": 1002 + }, + { + "epoch": 0.14, + "learning_rate": 4.976511323226652e-05, + "loss": 0.0724, + "step": 1004 + }, + { + "epoch": 0.14, + "learning_rate": 4.976464533033877e-05, + "loss": 0.0668, + "step": 1006 + }, + { + "epoch": 0.14, + "learning_rate": 4.976417742841101e-05, + "loss": 0.0669, + "step": 1008 + }, + { + "epoch": 0.14, + "learning_rate": 4.976370952648325e-05, + "loss": 0.0745, + "step": 1010 + }, + { + "epoch": 0.14, + "learning_rate": 4.97632416245555e-05, + "loss": 0.0742, + "step": 1012 + }, + { + "epoch": 0.14, + "learning_rate": 4.976277372262774e-05, + "loss": 0.0789, + "step": 1014 + }, + { + "epoch": 0.14, + "learning_rate": 4.9762305820699984e-05, + "loss": 0.0911, + "step": 1016 + }, + { + "epoch": 0.14, + "learning_rate": 4.976183791877222e-05, + "loss": 0.0868, + "step": 1018 + }, + { + "epoch": 0.14, + "learning_rate": 4.976137001684447e-05, + "loss": 0.0685, + "step": 1020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9760902114916715e-05, + "loss": 0.0819, + "step": 1022 + }, + { + "epoch": 0.14, + "learning_rate": 4.976043421298896e-05, + "loss": 0.0797, + "step": 1024 + }, + { + "epoch": 0.14, + "learning_rate": 4.97599663110612e-05, + "loss": 0.0794, + "step": 1026 + }, + { + "epoch": 0.14, + "learning_rate": 4.9759498409133446e-05, + "loss": 0.0893, + "step": 1028 + }, + { + "epoch": 0.14, + "learning_rate": 4.975903050720569e-05, + "loss": 0.1106, + "step": 1030 + }, + { + "epoch": 0.14, + "learning_rate": 4.975856260527794e-05, + "loss": 0.0689, + "step": 1032 + }, + { + "epoch": 0.15, + "learning_rate": 4.9758094703350177e-05, + "loss": 0.0797, + "step": 1034 + }, + { + "epoch": 0.15, + "learning_rate": 4.975762680142242e-05, + "loss": 0.0948, + "step": 1036 + }, + { + "epoch": 0.15, + "learning_rate": 4.975715889949467e-05, + "loss": 0.0694, + "step": 1038 + }, + { + "epoch": 0.15, + "learning_rate": 4.9756690997566914e-05, + "loss": 0.0811, + "step": 1040 + }, + { + "epoch": 0.15, + "learning_rate": 4.9756223095639153e-05, + "loss": 0.0649, + "step": 1042 + }, + { + "epoch": 0.15, + "learning_rate": 4.97557551937114e-05, + "loss": 0.0638, + "step": 1044 + }, + { + "epoch": 0.15, + "learning_rate": 4.9755287291783645e-05, + "loss": 0.0785, + "step": 1046 + }, + { + "epoch": 0.15, + "learning_rate": 4.975481938985589e-05, + "loss": 0.0799, + "step": 1048 + }, + { + "epoch": 0.15, + "learning_rate": 4.975435148792813e-05, + "loss": 0.0764, + "step": 1050 + }, + { + "epoch": 0.15, + "learning_rate": 4.9753883586000376e-05, + "loss": 0.0614, + "step": 1052 + }, + { + "epoch": 0.15, + "learning_rate": 4.9753415684072615e-05, + "loss": 0.096, + "step": 1054 + }, + { + "epoch": 0.15, + "learning_rate": 4.975294778214487e-05, + "loss": 0.0754, + "step": 1056 + }, + { + "epoch": 0.15, + "learning_rate": 4.975247988021711e-05, + "loss": 0.0629, + "step": 1058 + }, + { + "epoch": 0.15, + "learning_rate": 4.975201197828935e-05, + "loss": 0.0691, + "step": 1060 + }, + { + "epoch": 0.15, + "learning_rate": 4.975154407636159e-05, + "loss": 0.0723, + "step": 1062 + }, + { + "epoch": 0.15, + "learning_rate": 4.9751076174433845e-05, + "loss": 0.0716, + "step": 1064 + }, + { + "epoch": 0.15, + "learning_rate": 4.9750608272506084e-05, + "loss": 0.103, + "step": 1066 + }, + { + "epoch": 0.15, + "learning_rate": 4.975014037057833e-05, + "loss": 0.0746, + "step": 1068 + }, + { + "epoch": 0.15, + "learning_rate": 4.974967246865057e-05, + "loss": 0.0882, + "step": 1070 + }, + { + "epoch": 0.15, + "learning_rate": 4.9749204566722815e-05, + "loss": 0.0734, + "step": 1072 + }, + { + "epoch": 0.15, + "learning_rate": 4.974873666479506e-05, + "loss": 0.083, + "step": 1074 + }, + { + "epoch": 0.15, + "learning_rate": 4.974826876286731e-05, + "loss": 0.0744, + "step": 1076 + }, + { + "epoch": 0.15, + "learning_rate": 4.9747800860939546e-05, + "loss": 0.0658, + "step": 1078 + }, + { + "epoch": 0.15, + "learning_rate": 4.974733295901179e-05, + "loss": 0.0993, + "step": 1080 + }, + { + "epoch": 0.15, + "learning_rate": 4.974686505708404e-05, + "loss": 0.092, + "step": 1082 + }, + { + "epoch": 0.15, + "learning_rate": 4.9746397155156284e-05, + "loss": 0.0776, + "step": 1084 + }, + { + "epoch": 0.15, + "learning_rate": 4.974592925322852e-05, + "loss": 0.091, + "step": 1086 + }, + { + "epoch": 0.15, + "learning_rate": 4.974546135130077e-05, + "loss": 0.0613, + "step": 1088 + }, + { + "epoch": 0.15, + "learning_rate": 4.9744993449373015e-05, + "loss": 0.0721, + "step": 1090 + }, + { + "epoch": 0.15, + "learning_rate": 4.974452554744526e-05, + "loss": 0.0725, + "step": 1092 + }, + { + "epoch": 0.15, + "learning_rate": 4.97440576455175e-05, + "loss": 0.0686, + "step": 1094 + }, + { + "epoch": 0.15, + "learning_rate": 4.9743589743589746e-05, + "loss": 0.0711, + "step": 1096 + }, + { + "epoch": 0.15, + "learning_rate": 4.974312184166199e-05, + "loss": 0.0771, + "step": 1098 + }, + { + "epoch": 0.15, + "learning_rate": 4.974265393973424e-05, + "loss": 0.0777, + "step": 1100 + }, + { + "epoch": 0.15, + "learning_rate": 4.974218603780648e-05, + "loss": 0.0696, + "step": 1102 + }, + { + "epoch": 0.15, + "learning_rate": 4.974171813587872e-05, + "loss": 0.0804, + "step": 1104 + }, + { + "epoch": 0.16, + "learning_rate": 4.974125023395096e-05, + "loss": 0.0923, + "step": 1106 + }, + { + "epoch": 0.16, + "learning_rate": 4.9740782332023214e-05, + "loss": 0.0609, + "step": 1108 + }, + { + "epoch": 0.16, + "learning_rate": 4.9740314430095453e-05, + "loss": 0.095, + "step": 1110 + }, + { + "epoch": 0.16, + "learning_rate": 4.97398465281677e-05, + "loss": 0.0916, + "step": 1112 + }, + { + "epoch": 0.16, + "learning_rate": 4.973937862623994e-05, + "loss": 0.0593, + "step": 1114 + }, + { + "epoch": 0.16, + "learning_rate": 4.9738910724312184e-05, + "loss": 0.0868, + "step": 1116 + }, + { + "epoch": 0.16, + "learning_rate": 4.973844282238443e-05, + "loss": 0.0645, + "step": 1118 + }, + { + "epoch": 0.16, + "learning_rate": 4.9737974920456676e-05, + "loss": 0.0774, + "step": 1120 + }, + { + "epoch": 0.16, + "learning_rate": 4.9737507018528915e-05, + "loss": 0.0776, + "step": 1122 + }, + { + "epoch": 0.16, + "learning_rate": 4.973703911660116e-05, + "loss": 0.0825, + "step": 1124 + }, + { + "epoch": 0.16, + "learning_rate": 4.973657121467341e-05, + "loss": 0.0764, + "step": 1126 + }, + { + "epoch": 0.16, + "learning_rate": 4.973610331274565e-05, + "loss": 0.0721, + "step": 1128 + }, + { + "epoch": 0.16, + "learning_rate": 4.973563541081789e-05, + "loss": 0.0665, + "step": 1130 + }, + { + "epoch": 0.16, + "learning_rate": 4.973516750889014e-05, + "loss": 0.0799, + "step": 1132 + }, + { + "epoch": 0.16, + "learning_rate": 4.9734699606962384e-05, + "loss": 0.0929, + "step": 1134 + }, + { + "epoch": 0.16, + "learning_rate": 4.973423170503463e-05, + "loss": 0.0776, + "step": 1136 + }, + { + "epoch": 0.16, + "learning_rate": 4.973376380310687e-05, + "loss": 0.0908, + "step": 1138 + }, + { + "epoch": 0.16, + "learning_rate": 4.9733295901179115e-05, + "loss": 0.0844, + "step": 1140 + }, + { + "epoch": 0.16, + "learning_rate": 4.973282799925136e-05, + "loss": 0.0699, + "step": 1142 + }, + { + "epoch": 0.16, + "learning_rate": 4.973236009732361e-05, + "loss": 0.0734, + "step": 1144 + }, + { + "epoch": 0.16, + "learning_rate": 4.9731892195395846e-05, + "loss": 0.0761, + "step": 1146 + }, + { + "epoch": 0.16, + "learning_rate": 4.973142429346809e-05, + "loss": 0.0918, + "step": 1148 + }, + { + "epoch": 0.16, + "learning_rate": 4.973095639154033e-05, + "loss": 0.0646, + "step": 1150 + }, + { + "epoch": 0.16, + "learning_rate": 4.9730488489612584e-05, + "loss": 0.0588, + "step": 1152 + }, + { + "epoch": 0.16, + "learning_rate": 4.973002058768482e-05, + "loss": 0.113, + "step": 1154 + }, + { + "epoch": 0.16, + "learning_rate": 4.972955268575707e-05, + "loss": 0.0782, + "step": 1156 + }, + { + "epoch": 0.16, + "learning_rate": 4.972908478382931e-05, + "loss": 0.0707, + "step": 1158 + }, + { + "epoch": 0.16, + "learning_rate": 4.972861688190156e-05, + "loss": 0.0718, + "step": 1160 + }, + { + "epoch": 0.16, + "learning_rate": 4.97281489799738e-05, + "loss": 0.0861, + "step": 1162 + }, + { + "epoch": 0.16, + "learning_rate": 4.9727681078046046e-05, + "loss": 0.0731, + "step": 1164 + }, + { + "epoch": 0.16, + "learning_rate": 4.9727213176118285e-05, + "loss": 0.0554, + "step": 1166 + }, + { + "epoch": 0.16, + "learning_rate": 4.972674527419053e-05, + "loss": 0.0887, + "step": 1168 + }, + { + "epoch": 0.16, + "learning_rate": 4.972627737226278e-05, + "loss": 0.07, + "step": 1170 + }, + { + "epoch": 0.16, + "learning_rate": 4.972580947033502e-05, + "loss": 0.0858, + "step": 1172 + }, + { + "epoch": 0.16, + "learning_rate": 4.972534156840726e-05, + "loss": 0.0911, + "step": 1174 + }, + { + "epoch": 0.17, + "learning_rate": 4.972487366647951e-05, + "loss": 0.1087, + "step": 1176 + }, + { + "epoch": 0.17, + "learning_rate": 4.9724405764551754e-05, + "loss": 0.0649, + "step": 1178 + }, + { + "epoch": 0.17, + "learning_rate": 4.9723937862624e-05, + "loss": 0.074, + "step": 1180 + }, + { + "epoch": 0.17, + "learning_rate": 4.972346996069624e-05, + "loss": 0.0718, + "step": 1182 + }, + { + "epoch": 0.17, + "learning_rate": 4.9723002058768484e-05, + "loss": 0.0853, + "step": 1184 + }, + { + "epoch": 0.17, + "learning_rate": 4.972253415684073e-05, + "loss": 0.0804, + "step": 1186 + }, + { + "epoch": 0.17, + "learning_rate": 4.9722066254912976e-05, + "loss": 0.0734, + "step": 1188 + }, + { + "epoch": 0.17, + "learning_rate": 4.9721598352985215e-05, + "loss": 0.0886, + "step": 1190 + }, + { + "epoch": 0.17, + "learning_rate": 4.972113045105746e-05, + "loss": 0.0633, + "step": 1192 + }, + { + "epoch": 0.17, + "learning_rate": 4.972066254912971e-05, + "loss": 0.0832, + "step": 1194 + }, + { + "epoch": 0.17, + "learning_rate": 4.972019464720195e-05, + "loss": 0.064, + "step": 1196 + }, + { + "epoch": 0.17, + "learning_rate": 4.971972674527419e-05, + "loss": 0.093, + "step": 1198 + }, + { + "epoch": 0.17, + "learning_rate": 4.971925884334644e-05, + "loss": 0.0718, + "step": 1200 + }, + { + "epoch": 0.17, + "learning_rate": 4.971879094141868e-05, + "loss": 0.0691, + "step": 1202 + }, + { + "epoch": 0.17, + "learning_rate": 4.971832303949093e-05, + "loss": 0.071, + "step": 1204 + }, + { + "epoch": 0.17, + "learning_rate": 4.971785513756317e-05, + "loss": 0.0604, + "step": 1206 + }, + { + "epoch": 0.17, + "learning_rate": 4.9717387235635415e-05, + "loss": 0.0821, + "step": 1208 + }, + { + "epoch": 0.17, + "learning_rate": 4.9716919333707654e-05, + "loss": 0.0756, + "step": 1210 + }, + { + "epoch": 0.17, + "learning_rate": 4.97164514317799e-05, + "loss": 0.0643, + "step": 1212 + }, + { + "epoch": 0.17, + "learning_rate": 4.9715983529852146e-05, + "loss": 0.1184, + "step": 1214 + }, + { + "epoch": 0.17, + "learning_rate": 4.971551562792439e-05, + "loss": 0.0718, + "step": 1216 + }, + { + "epoch": 0.17, + "learning_rate": 4.971504772599663e-05, + "loss": 0.0878, + "step": 1218 + }, + { + "epoch": 0.17, + "learning_rate": 4.971457982406888e-05, + "loss": 0.0799, + "step": 1220 + }, + { + "epoch": 0.17, + "learning_rate": 4.971411192214112e-05, + "loss": 0.0843, + "step": 1222 + }, + { + "epoch": 0.17, + "learning_rate": 4.971364402021337e-05, + "loss": 0.085, + "step": 1224 + }, + { + "epoch": 0.17, + "learning_rate": 4.971317611828561e-05, + "loss": 0.0735, + "step": 1226 + }, + { + "epoch": 0.17, + "learning_rate": 4.9712708216357854e-05, + "loss": 0.0596, + "step": 1228 + }, + { + "epoch": 0.17, + "learning_rate": 4.97122403144301e-05, + "loss": 0.0913, + "step": 1230 + }, + { + "epoch": 0.17, + "learning_rate": 4.9711772412502346e-05, + "loss": 0.0649, + "step": 1232 + }, + { + "epoch": 0.17, + "learning_rate": 4.9711304510574585e-05, + "loss": 0.0815, + "step": 1234 + }, + { + "epoch": 0.17, + "learning_rate": 4.971083660864683e-05, + "loss": 0.0671, + "step": 1236 + }, + { + "epoch": 0.17, + "learning_rate": 4.971036870671908e-05, + "loss": 0.0909, + "step": 1238 + }, + { + "epoch": 0.17, + "learning_rate": 4.970990080479132e-05, + "loss": 0.0842, + "step": 1240 + }, + { + "epoch": 0.17, + "learning_rate": 4.970943290286356e-05, + "loss": 0.0687, + "step": 1242 + }, + { + "epoch": 0.17, + "learning_rate": 4.970896500093581e-05, + "loss": 0.0708, + "step": 1244 + }, + { + "epoch": 0.17, + "learning_rate": 4.970849709900805e-05, + "loss": 0.0764, + "step": 1246 + }, + { + "epoch": 0.18, + "learning_rate": 4.97080291970803e-05, + "loss": 0.097, + "step": 1248 + }, + { + "epoch": 0.18, + "learning_rate": 4.970756129515254e-05, + "loss": 0.0811, + "step": 1250 + }, + { + "epoch": 0.18, + "learning_rate": 4.9707093393224784e-05, + "loss": 0.0683, + "step": 1252 + }, + { + "epoch": 0.18, + "learning_rate": 4.9706625491297024e-05, + "loss": 0.0878, + "step": 1254 + }, + { + "epoch": 0.18, + "learning_rate": 4.9706157589369276e-05, + "loss": 0.0864, + "step": 1256 + }, + { + "epoch": 0.18, + "learning_rate": 4.9705689687441515e-05, + "loss": 0.0921, + "step": 1258 + }, + { + "epoch": 0.18, + "learning_rate": 4.970522178551376e-05, + "loss": 0.0966, + "step": 1260 + }, + { + "epoch": 0.18, + "learning_rate": 4.9704753883586e-05, + "loss": 0.081, + "step": 1262 + }, + { + "epoch": 0.18, + "learning_rate": 4.9704285981658246e-05, + "loss": 0.1004, + "step": 1264 + }, + { + "epoch": 0.18, + "learning_rate": 4.970381807973049e-05, + "loss": 0.0775, + "step": 1266 + }, + { + "epoch": 0.18, + "learning_rate": 4.970335017780273e-05, + "loss": 0.0964, + "step": 1268 + }, + { + "epoch": 0.18, + "learning_rate": 4.970288227587498e-05, + "loss": 0.0648, + "step": 1270 + }, + { + "epoch": 0.18, + "learning_rate": 4.970241437394722e-05, + "loss": 0.0906, + "step": 1272 + }, + { + "epoch": 0.18, + "learning_rate": 4.970194647201947e-05, + "loss": 0.0682, + "step": 1274 + }, + { + "epoch": 0.18, + "learning_rate": 4.970147857009171e-05, + "loss": 0.0713, + "step": 1276 + }, + { + "epoch": 0.18, + "learning_rate": 4.9701010668163954e-05, + "loss": 0.0702, + "step": 1278 + }, + { + "epoch": 0.18, + "learning_rate": 4.970054276623619e-05, + "loss": 0.0809, + "step": 1280 + }, + { + "epoch": 0.18, + "learning_rate": 4.9700074864308446e-05, + "loss": 0.0708, + "step": 1282 + }, + { + "epoch": 0.18, + "learning_rate": 4.9699606962380685e-05, + "loss": 0.0661, + "step": 1284 + }, + { + "epoch": 0.18, + "learning_rate": 4.969913906045293e-05, + "loss": 0.0763, + "step": 1286 + }, + { + "epoch": 0.18, + "learning_rate": 4.969867115852517e-05, + "loss": 0.0946, + "step": 1288 + }, + { + "epoch": 0.18, + "learning_rate": 4.969820325659742e-05, + "loss": 0.0696, + "step": 1290 + }, + { + "epoch": 0.18, + "learning_rate": 4.969773535466966e-05, + "loss": 0.1217, + "step": 1292 + }, + { + "epoch": 0.18, + "learning_rate": 4.969726745274191e-05, + "loss": 0.0738, + "step": 1294 + }, + { + "epoch": 0.18, + "learning_rate": 4.969679955081415e-05, + "loss": 0.0721, + "step": 1296 + }, + { + "epoch": 0.18, + "learning_rate": 4.969633164888639e-05, + "loss": 0.0921, + "step": 1298 + }, + { + "epoch": 0.18, + "learning_rate": 4.969586374695864e-05, + "loss": 0.0764, + "step": 1300 + }, + { + "epoch": 0.18, + "learning_rate": 4.9695395845030885e-05, + "loss": 0.0703, + "step": 1302 + }, + { + "epoch": 0.18, + "learning_rate": 4.9694927943103124e-05, + "loss": 0.0706, + "step": 1304 + }, + { + "epoch": 0.18, + "learning_rate": 4.969446004117537e-05, + "loss": 0.0656, + "step": 1306 + }, + { + "epoch": 0.18, + "learning_rate": 4.9693992139247616e-05, + "loss": 0.0598, + "step": 1308 + }, + { + "epoch": 0.18, + "learning_rate": 4.969352423731986e-05, + "loss": 0.1019, + "step": 1310 + }, + { + "epoch": 0.18, + "learning_rate": 4.96930563353921e-05, + "loss": 0.0666, + "step": 1312 + }, + { + "epoch": 0.18, + "learning_rate": 4.969258843346435e-05, + "loss": 0.0836, + "step": 1314 + }, + { + "epoch": 0.18, + "learning_rate": 4.969212053153659e-05, + "loss": 0.1002, + "step": 1316 + }, + { + "epoch": 0.19, + "learning_rate": 4.969165262960884e-05, + "loss": 0.0828, + "step": 1318 + }, + { + "epoch": 0.19, + "learning_rate": 4.969118472768108e-05, + "loss": 0.0883, + "step": 1320 + }, + { + "epoch": 0.19, + "learning_rate": 4.9690716825753324e-05, + "loss": 0.0744, + "step": 1322 + }, + { + "epoch": 0.19, + "learning_rate": 4.969024892382557e-05, + "loss": 0.0969, + "step": 1324 + }, + { + "epoch": 0.19, + "learning_rate": 4.9689781021897815e-05, + "loss": 0.0907, + "step": 1326 + }, + { + "epoch": 0.19, + "learning_rate": 4.9689313119970055e-05, + "loss": 0.0732, + "step": 1328 + }, + { + "epoch": 0.19, + "learning_rate": 4.96888452180423e-05, + "loss": 0.0717, + "step": 1330 + }, + { + "epoch": 0.19, + "learning_rate": 4.968837731611454e-05, + "loss": 0.073, + "step": 1332 + }, + { + "epoch": 0.19, + "learning_rate": 4.968790941418679e-05, + "loss": 0.0989, + "step": 1334 + }, + { + "epoch": 0.19, + "learning_rate": 4.968744151225903e-05, + "loss": 0.0718, + "step": 1336 + }, + { + "epoch": 0.19, + "learning_rate": 4.968697361033128e-05, + "loss": 0.0672, + "step": 1338 + }, + { + "epoch": 0.19, + "learning_rate": 4.9686505708403517e-05, + "loss": 0.0547, + "step": 1340 + }, + { + "epoch": 0.19, + "learning_rate": 4.968603780647577e-05, + "loss": 0.0972, + "step": 1342 + }, + { + "epoch": 0.19, + "learning_rate": 4.968556990454801e-05, + "loss": 0.0707, + "step": 1344 + }, + { + "epoch": 0.19, + "learning_rate": 4.9685102002620254e-05, + "loss": 0.0644, + "step": 1346 + }, + { + "epoch": 0.19, + "learning_rate": 4.968463410069249e-05, + "loss": 0.0886, + "step": 1348 + }, + { + "epoch": 0.19, + "learning_rate": 4.968416619876474e-05, + "loss": 0.1001, + "step": 1350 + }, + { + "epoch": 0.19, + "learning_rate": 4.9683698296836985e-05, + "loss": 0.0992, + "step": 1352 + }, + { + "epoch": 0.19, + "learning_rate": 4.968323039490923e-05, + "loss": 0.0732, + "step": 1354 + }, + { + "epoch": 0.19, + "learning_rate": 4.968276249298147e-05, + "loss": 0.086, + "step": 1356 + }, + { + "epoch": 0.19, + "learning_rate": 4.9682294591053716e-05, + "loss": 0.08, + "step": 1358 + }, + { + "epoch": 0.19, + "learning_rate": 4.968182668912596e-05, + "loss": 0.1046, + "step": 1360 + }, + { + "epoch": 0.19, + "learning_rate": 4.968135878719821e-05, + "loss": 0.0528, + "step": 1362 + }, + { + "epoch": 0.19, + "learning_rate": 4.968089088527045e-05, + "loss": 0.0786, + "step": 1364 + }, + { + "epoch": 0.19, + "learning_rate": 4.968042298334269e-05, + "loss": 0.0762, + "step": 1366 + }, + { + "epoch": 0.19, + "learning_rate": 4.967995508141494e-05, + "loss": 0.098, + "step": 1368 + }, + { + "epoch": 0.19, + "learning_rate": 4.9679487179487185e-05, + "loss": 0.0655, + "step": 1370 + }, + { + "epoch": 0.19, + "learning_rate": 4.9679019277559424e-05, + "loss": 0.079, + "step": 1372 + }, + { + "epoch": 0.19, + "learning_rate": 4.967855137563167e-05, + "loss": 0.0746, + "step": 1374 + }, + { + "epoch": 0.19, + "learning_rate": 4.9678083473703916e-05, + "loss": 0.0864, + "step": 1376 + }, + { + "epoch": 0.19, + "learning_rate": 4.967761557177616e-05, + "loss": 0.0659, + "step": 1378 + }, + { + "epoch": 0.19, + "learning_rate": 4.96771476698484e-05, + "loss": 0.0851, + "step": 1380 + }, + { + "epoch": 0.19, + "learning_rate": 4.967667976792065e-05, + "loss": 0.0959, + "step": 1382 + }, + { + "epoch": 0.19, + "learning_rate": 4.9676211865992886e-05, + "loss": 0.103, + "step": 1384 + }, + { + "epoch": 0.19, + "learning_rate": 4.967574396406514e-05, + "loss": 0.0967, + "step": 1386 + }, + { + "epoch": 0.19, + "learning_rate": 4.967527606213738e-05, + "loss": 0.0623, + "step": 1388 + }, + { + "epoch": 0.2, + "learning_rate": 4.9674808160209624e-05, + "loss": 0.0926, + "step": 1390 + }, + { + "epoch": 0.2, + "learning_rate": 4.967434025828186e-05, + "loss": 0.09, + "step": 1392 + }, + { + "epoch": 0.2, + "learning_rate": 4.967387235635411e-05, + "loss": 0.0794, + "step": 1394 + }, + { + "epoch": 0.2, + "learning_rate": 4.9673404454426355e-05, + "loss": 0.096, + "step": 1396 + }, + { + "epoch": 0.2, + "learning_rate": 4.96729365524986e-05, + "loss": 0.0793, + "step": 1398 + }, + { + "epoch": 0.2, + "learning_rate": 4.967246865057084e-05, + "loss": 0.0692, + "step": 1400 + }, + { + "epoch": 0.2, + "learning_rate": 4.9672000748643086e-05, + "loss": 0.0781, + "step": 1402 + }, + { + "epoch": 0.2, + "learning_rate": 4.967153284671533e-05, + "loss": 0.0773, + "step": 1404 + }, + { + "epoch": 0.2, + "learning_rate": 4.967106494478758e-05, + "loss": 0.0641, + "step": 1406 + }, + { + "epoch": 0.2, + "learning_rate": 4.9670597042859817e-05, + "loss": 0.0957, + "step": 1408 + }, + { + "epoch": 0.2, + "learning_rate": 4.967012914093206e-05, + "loss": 0.0772, + "step": 1410 + }, + { + "epoch": 0.2, + "learning_rate": 4.966966123900431e-05, + "loss": 0.0795, + "step": 1412 + }, + { + "epoch": 0.2, + "learning_rate": 4.9669193337076554e-05, + "loss": 0.0783, + "step": 1414 + }, + { + "epoch": 0.2, + "learning_rate": 4.9668725435148793e-05, + "loss": 0.0703, + "step": 1416 + }, + { + "epoch": 0.2, + "learning_rate": 4.966825753322104e-05, + "loss": 0.0983, + "step": 1418 + }, + { + "epoch": 0.2, + "learning_rate": 4.9667789631293285e-05, + "loss": 0.1161, + "step": 1420 + }, + { + "epoch": 0.2, + "learning_rate": 4.966732172936553e-05, + "loss": 0.0694, + "step": 1422 + }, + { + "epoch": 0.2, + "learning_rate": 4.966685382743777e-05, + "loss": 0.0845, + "step": 1424 + }, + { + "epoch": 0.2, + "learning_rate": 4.9666385925510016e-05, + "loss": 0.0769, + "step": 1426 + }, + { + "epoch": 0.2, + "learning_rate": 4.9665918023582255e-05, + "loss": 0.0982, + "step": 1428 + }, + { + "epoch": 0.2, + "learning_rate": 4.966545012165451e-05, + "loss": 0.08, + "step": 1430 + }, + { + "epoch": 0.2, + "learning_rate": 4.966498221972675e-05, + "loss": 0.0748, + "step": 1432 + }, + { + "epoch": 0.2, + "learning_rate": 4.966451431779899e-05, + "loss": 0.1047, + "step": 1434 + }, + { + "epoch": 0.2, + "learning_rate": 4.966404641587123e-05, + "loss": 0.0887, + "step": 1436 + }, + { + "epoch": 0.2, + "learning_rate": 4.9663578513943485e-05, + "loss": 0.0544, + "step": 1438 + }, + { + "epoch": 0.2, + "learning_rate": 4.9663110612015724e-05, + "loss": 0.068, + "step": 1440 + }, + { + "epoch": 0.2, + "learning_rate": 4.966264271008797e-05, + "loss": 0.0825, + "step": 1442 + }, + { + "epoch": 0.2, + "learning_rate": 4.966217480816021e-05, + "loss": 0.0847, + "step": 1444 + }, + { + "epoch": 0.2, + "learning_rate": 4.9661706906232455e-05, + "loss": 0.0638, + "step": 1446 + }, + { + "epoch": 0.2, + "learning_rate": 4.96612390043047e-05, + "loss": 0.0694, + "step": 1448 + }, + { + "epoch": 0.2, + "learning_rate": 4.966077110237695e-05, + "loss": 0.1114, + "step": 1450 + }, + { + "epoch": 0.2, + "learning_rate": 4.9660303200449186e-05, + "loss": 0.084, + "step": 1452 + }, + { + "epoch": 0.2, + "learning_rate": 4.965983529852143e-05, + "loss": 0.0949, + "step": 1454 + }, + { + "epoch": 0.2, + "learning_rate": 4.965936739659368e-05, + "loss": 0.0833, + "step": 1456 + }, + { + "epoch": 0.2, + "learning_rate": 4.9658899494665924e-05, + "loss": 0.0765, + "step": 1458 + }, + { + "epoch": 0.2, + "learning_rate": 4.965843159273816e-05, + "loss": 0.1126, + "step": 1460 + }, + { + "epoch": 0.21, + "learning_rate": 4.965796369081041e-05, + "loss": 0.0751, + "step": 1462 + }, + { + "epoch": 0.21, + "learning_rate": 4.9657495788882655e-05, + "loss": 0.0913, + "step": 1464 + }, + { + "epoch": 0.21, + "learning_rate": 4.96570278869549e-05, + "loss": 0.0793, + "step": 1466 + }, + { + "epoch": 0.21, + "learning_rate": 4.965655998502714e-05, + "loss": 0.0663, + "step": 1468 + }, + { + "epoch": 0.21, + "learning_rate": 4.9656092083099386e-05, + "loss": 0.0732, + "step": 1470 + }, + { + "epoch": 0.21, + "learning_rate": 4.965562418117163e-05, + "loss": 0.0885, + "step": 1472 + }, + { + "epoch": 0.21, + "learning_rate": 4.965515627924388e-05, + "loss": 0.0798, + "step": 1474 + }, + { + "epoch": 0.21, + "learning_rate": 4.9654688377316117e-05, + "loss": 0.083, + "step": 1476 + }, + { + "epoch": 0.21, + "learning_rate": 4.965422047538836e-05, + "loss": 0.0648, + "step": 1478 + }, + { + "epoch": 0.21, + "learning_rate": 4.96537525734606e-05, + "loss": 0.0861, + "step": 1480 + }, + { + "epoch": 0.21, + "learning_rate": 4.9653284671532854e-05, + "loss": 0.0934, + "step": 1482 + }, + { + "epoch": 0.21, + "learning_rate": 4.9652816769605093e-05, + "loss": 0.0993, + "step": 1484 + }, + { + "epoch": 0.21, + "learning_rate": 4.965234886767734e-05, + "loss": 0.0917, + "step": 1486 + }, + { + "epoch": 0.21, + "learning_rate": 4.965188096574958e-05, + "loss": 0.0759, + "step": 1488 + }, + { + "epoch": 0.21, + "learning_rate": 4.965141306382183e-05, + "loss": 0.0847, + "step": 1490 + }, + { + "epoch": 0.21, + "learning_rate": 4.965094516189407e-05, + "loss": 0.0854, + "step": 1492 + }, + { + "epoch": 0.21, + "learning_rate": 4.9650477259966316e-05, + "loss": 0.0781, + "step": 1494 + }, + { + "epoch": 0.21, + "learning_rate": 4.9650009358038555e-05, + "loss": 0.089, + "step": 1496 + }, + { + "epoch": 0.21, + "learning_rate": 4.96495414561108e-05, + "loss": 0.0684, + "step": 1498 + }, + { + "epoch": 0.21, + "learning_rate": 4.964907355418305e-05, + "loss": 0.0786, + "step": 1500 + }, + { + "epoch": 0.21, + "learning_rate": 4.964860565225529e-05, + "loss": 0.089, + "step": 1502 + }, + { + "epoch": 0.21, + "learning_rate": 4.964813775032753e-05, + "loss": 0.0815, + "step": 1504 + }, + { + "epoch": 0.21, + "learning_rate": 4.964766984839978e-05, + "loss": 0.067, + "step": 1506 + }, + { + "epoch": 0.21, + "learning_rate": 4.9647201946472024e-05, + "loss": 0.0887, + "step": 1508 + }, + { + "epoch": 0.21, + "learning_rate": 4.964673404454427e-05, + "loss": 0.0788, + "step": 1510 + }, + { + "epoch": 0.21, + "learning_rate": 4.964626614261651e-05, + "loss": 0.062, + "step": 1512 + }, + { + "epoch": 0.21, + "learning_rate": 4.9645798240688755e-05, + "loss": 0.0913, + "step": 1514 + }, + { + "epoch": 0.21, + "learning_rate": 4.9645330338761e-05, + "loss": 0.0805, + "step": 1516 + }, + { + "epoch": 0.21, + "learning_rate": 4.964486243683325e-05, + "loss": 0.0733, + "step": 1518 + }, + { + "epoch": 0.21, + "learning_rate": 4.9644394534905486e-05, + "loss": 0.0832, + "step": 1520 + }, + { + "epoch": 0.21, + "learning_rate": 4.9643926632977725e-05, + "loss": 0.0691, + "step": 1522 + }, + { + "epoch": 0.21, + "learning_rate": 4.964345873104997e-05, + "loss": 0.0676, + "step": 1524 + }, + { + "epoch": 0.21, + "learning_rate": 4.964299082912222e-05, + "loss": 0.069, + "step": 1526 + }, + { + "epoch": 0.21, + "learning_rate": 4.964252292719446e-05, + "loss": 0.0831, + "step": 1528 + }, + { + "epoch": 0.21, + "learning_rate": 4.96420550252667e-05, + "loss": 0.0967, + "step": 1530 + }, + { + "epoch": 0.22, + "learning_rate": 4.964158712333895e-05, + "loss": 0.0859, + "step": 1532 + }, + { + "epoch": 0.22, + "learning_rate": 4.9641119221411194e-05, + "loss": 0.0698, + "step": 1534 + }, + { + "epoch": 0.22, + "learning_rate": 4.964065131948344e-05, + "loss": 0.0774, + "step": 1536 + }, + { + "epoch": 0.22, + "learning_rate": 4.964018341755568e-05, + "loss": 0.0931, + "step": 1538 + }, + { + "epoch": 0.22, + "learning_rate": 4.9639715515627925e-05, + "loss": 0.0637, + "step": 1540 + }, + { + "epoch": 0.22, + "learning_rate": 4.963924761370017e-05, + "loss": 0.0944, + "step": 1542 + }, + { + "epoch": 0.22, + "learning_rate": 4.9638779711772417e-05, + "loss": 0.0829, + "step": 1544 + }, + { + "epoch": 0.22, + "learning_rate": 4.9638311809844656e-05, + "loss": 0.0705, + "step": 1546 + }, + { + "epoch": 0.22, + "learning_rate": 4.96378439079169e-05, + "loss": 0.0874, + "step": 1548 + }, + { + "epoch": 0.22, + "learning_rate": 4.963737600598915e-05, + "loss": 0.0955, + "step": 1550 + }, + { + "epoch": 0.22, + "learning_rate": 4.9636908104061393e-05, + "loss": 0.0767, + "step": 1552 + }, + { + "epoch": 0.22, + "learning_rate": 4.963644020213363e-05, + "loss": 0.0824, + "step": 1554 + }, + { + "epoch": 0.22, + "learning_rate": 4.963597230020588e-05, + "loss": 0.0621, + "step": 1556 + }, + { + "epoch": 0.22, + "learning_rate": 4.963550439827812e-05, + "loss": 0.0668, + "step": 1558 + }, + { + "epoch": 0.22, + "learning_rate": 4.963503649635037e-05, + "loss": 0.0944, + "step": 1560 + }, + { + "epoch": 0.22, + "learning_rate": 4.963456859442261e-05, + "loss": 0.0821, + "step": 1562 + }, + { + "epoch": 0.22, + "learning_rate": 4.9634100692494855e-05, + "loss": 0.0802, + "step": 1564 + }, + { + "epoch": 0.22, + "learning_rate": 4.9633632790567094e-05, + "loss": 0.0609, + "step": 1566 + }, + { + "epoch": 0.22, + "learning_rate": 4.963316488863935e-05, + "loss": 0.0801, + "step": 1568 + }, + { + "epoch": 0.22, + "learning_rate": 4.9632696986711586e-05, + "loss": 0.0837, + "step": 1570 + }, + { + "epoch": 0.22, + "learning_rate": 4.963222908478383e-05, + "loss": 0.0898, + "step": 1572 + }, + { + "epoch": 0.22, + "learning_rate": 4.963176118285607e-05, + "loss": 0.0818, + "step": 1574 + }, + { + "epoch": 0.22, + "learning_rate": 4.963129328092832e-05, + "loss": 0.0602, + "step": 1576 + }, + { + "epoch": 0.22, + "learning_rate": 4.963082537900056e-05, + "loss": 0.0868, + "step": 1578 + }, + { + "epoch": 0.22, + "learning_rate": 4.963035747707281e-05, + "loss": 0.077, + "step": 1580 + }, + { + "epoch": 0.22, + "learning_rate": 4.962988957514505e-05, + "loss": 0.1011, + "step": 1582 + }, + { + "epoch": 0.22, + "learning_rate": 4.9629421673217294e-05, + "loss": 0.0658, + "step": 1584 + }, + { + "epoch": 0.22, + "learning_rate": 4.962895377128954e-05, + "loss": 0.0791, + "step": 1586 + }, + { + "epoch": 0.22, + "learning_rate": 4.9628485869361786e-05, + "loss": 0.0889, + "step": 1588 + }, + { + "epoch": 0.22, + "learning_rate": 4.9628017967434025e-05, + "loss": 0.0783, + "step": 1590 + }, + { + "epoch": 0.22, + "learning_rate": 4.962755006550627e-05, + "loss": 0.0944, + "step": 1592 + }, + { + "epoch": 0.22, + "learning_rate": 4.962708216357852e-05, + "loss": 0.0814, + "step": 1594 + }, + { + "epoch": 0.22, + "learning_rate": 4.962661426165076e-05, + "loss": 0.1023, + "step": 1596 + }, + { + "epoch": 0.22, + "learning_rate": 4.9626146359723e-05, + "loss": 0.0762, + "step": 1598 + }, + { + "epoch": 0.22, + "learning_rate": 4.962567845779525e-05, + "loss": 0.0939, + "step": 1600 + }, + { + "epoch": 0.22, + "learning_rate": 4.9625210555867494e-05, + "loss": 0.0706, + "step": 1602 + }, + { + "epoch": 0.23, + "learning_rate": 4.962474265393974e-05, + "loss": 0.0997, + "step": 1604 + }, + { + "epoch": 0.23, + "learning_rate": 4.962427475201198e-05, + "loss": 0.0837, + "step": 1606 + }, + { + "epoch": 0.23, + "learning_rate": 4.9623806850084225e-05, + "loss": 0.0888, + "step": 1608 + }, + { + "epoch": 0.23, + "learning_rate": 4.9623338948156464e-05, + "loss": 0.1059, + "step": 1610 + }, + { + "epoch": 0.23, + "learning_rate": 4.9622871046228717e-05, + "loss": 0.0897, + "step": 1612 + }, + { + "epoch": 0.23, + "learning_rate": 4.9622403144300956e-05, + "loss": 0.0891, + "step": 1614 + }, + { + "epoch": 0.23, + "learning_rate": 4.96219352423732e-05, + "loss": 0.0714, + "step": 1616 + }, + { + "epoch": 0.23, + "learning_rate": 4.962146734044544e-05, + "loss": 0.1006, + "step": 1618 + }, + { + "epoch": 0.23, + "learning_rate": 4.9620999438517693e-05, + "loss": 0.0806, + "step": 1620 + }, + { + "epoch": 0.23, + "learning_rate": 4.962053153658993e-05, + "loss": 0.0737, + "step": 1622 + }, + { + "epoch": 0.23, + "learning_rate": 4.962006363466218e-05, + "loss": 0.0902, + "step": 1624 + }, + { + "epoch": 0.23, + "learning_rate": 4.961959573273442e-05, + "loss": 0.0863, + "step": 1626 + }, + { + "epoch": 0.23, + "learning_rate": 4.9619127830806664e-05, + "loss": 0.0983, + "step": 1628 + }, + { + "epoch": 0.23, + "learning_rate": 4.961865992887891e-05, + "loss": 0.083, + "step": 1630 + }, + { + "epoch": 0.23, + "learning_rate": 4.9618192026951155e-05, + "loss": 0.0634, + "step": 1632 + }, + { + "epoch": 0.23, + "learning_rate": 4.9617724125023395e-05, + "loss": 0.0901, + "step": 1634 + }, + { + "epoch": 0.23, + "learning_rate": 4.961725622309564e-05, + "loss": 0.0803, + "step": 1636 + }, + { + "epoch": 0.23, + "learning_rate": 4.9616788321167886e-05, + "loss": 0.083, + "step": 1638 + }, + { + "epoch": 0.23, + "learning_rate": 4.961632041924013e-05, + "loss": 0.0967, + "step": 1640 + }, + { + "epoch": 0.23, + "learning_rate": 4.961585251731237e-05, + "loss": 0.0762, + "step": 1642 + }, + { + "epoch": 0.23, + "learning_rate": 4.961538461538462e-05, + "loss": 0.0673, + "step": 1644 + }, + { + "epoch": 0.23, + "learning_rate": 4.961491671345686e-05, + "loss": 0.0974, + "step": 1646 + }, + { + "epoch": 0.23, + "learning_rate": 4.961444881152911e-05, + "loss": 0.0882, + "step": 1648 + }, + { + "epoch": 0.23, + "learning_rate": 4.961398090960135e-05, + "loss": 0.0851, + "step": 1650 + }, + { + "epoch": 0.23, + "learning_rate": 4.9613513007673594e-05, + "loss": 0.0705, + "step": 1652 + }, + { + "epoch": 0.23, + "learning_rate": 4.961304510574584e-05, + "loss": 0.0681, + "step": 1654 + }, + { + "epoch": 0.23, + "learning_rate": 4.9612577203818086e-05, + "loss": 0.0806, + "step": 1656 + }, + { + "epoch": 0.23, + "learning_rate": 4.9612109301890325e-05, + "loss": 0.085, + "step": 1658 + }, + { + "epoch": 0.23, + "learning_rate": 4.961164139996257e-05, + "loss": 0.0866, + "step": 1660 + }, + { + "epoch": 0.23, + "learning_rate": 4.961117349803481e-05, + "loss": 0.07, + "step": 1662 + }, + { + "epoch": 0.23, + "learning_rate": 4.961070559610706e-05, + "loss": 0.0758, + "step": 1664 + }, + { + "epoch": 0.23, + "learning_rate": 4.96102376941793e-05, + "loss": 0.0642, + "step": 1666 + }, + { + "epoch": 0.23, + "learning_rate": 4.960976979225155e-05, + "loss": 0.0705, + "step": 1668 + }, + { + "epoch": 0.23, + "learning_rate": 4.960930189032379e-05, + "loss": 0.0739, + "step": 1670 + }, + { + "epoch": 0.23, + "learning_rate": 4.960883398839603e-05, + "loss": 0.1064, + "step": 1672 + }, + { + "epoch": 0.23, + "learning_rate": 4.960836608646828e-05, + "loss": 0.0583, + "step": 1674 + }, + { + "epoch": 0.24, + "learning_rate": 4.9607898184540525e-05, + "loss": 0.0852, + "step": 1676 + }, + { + "epoch": 0.24, + "learning_rate": 4.9607430282612764e-05, + "loss": 0.0754, + "step": 1678 + }, + { + "epoch": 0.24, + "learning_rate": 4.960696238068501e-05, + "loss": 0.0618, + "step": 1680 + }, + { + "epoch": 0.24, + "learning_rate": 4.9606494478757256e-05, + "loss": 0.0784, + "step": 1682 + }, + { + "epoch": 0.24, + "learning_rate": 4.96060265768295e-05, + "loss": 0.0766, + "step": 1684 + }, + { + "epoch": 0.24, + "learning_rate": 4.960555867490174e-05, + "loss": 0.0703, + "step": 1686 + }, + { + "epoch": 0.24, + "learning_rate": 4.960509077297399e-05, + "loss": 0.0819, + "step": 1688 + }, + { + "epoch": 0.24, + "learning_rate": 4.960462287104623e-05, + "loss": 0.0892, + "step": 1690 + }, + { + "epoch": 0.24, + "learning_rate": 4.960415496911848e-05, + "loss": 0.0824, + "step": 1692 + }, + { + "epoch": 0.24, + "learning_rate": 4.960368706719072e-05, + "loss": 0.0842, + "step": 1694 + }, + { + "epoch": 0.24, + "learning_rate": 4.9603219165262964e-05, + "loss": 0.0952, + "step": 1696 + }, + { + "epoch": 0.24, + "learning_rate": 4.960275126333521e-05, + "loss": 0.0705, + "step": 1698 + }, + { + "epoch": 0.24, + "learning_rate": 4.9602283361407455e-05, + "loss": 0.0768, + "step": 1700 + }, + { + "epoch": 0.24, + "learning_rate": 4.9601815459479695e-05, + "loss": 0.0775, + "step": 1702 + }, + { + "epoch": 0.24, + "learning_rate": 4.960134755755194e-05, + "loss": 0.0928, + "step": 1704 + }, + { + "epoch": 0.24, + "learning_rate": 4.960087965562418e-05, + "loss": 0.0663, + "step": 1706 + }, + { + "epoch": 0.24, + "learning_rate": 4.960041175369643e-05, + "loss": 0.075, + "step": 1708 + }, + { + "epoch": 0.24, + "learning_rate": 4.959994385176867e-05, + "loss": 0.1009, + "step": 1710 + }, + { + "epoch": 0.24, + "learning_rate": 4.959947594984092e-05, + "loss": 0.0737, + "step": 1712 + }, + { + "epoch": 0.24, + "learning_rate": 4.9599008047913156e-05, + "loss": 0.0872, + "step": 1714 + }, + { + "epoch": 0.24, + "learning_rate": 4.959854014598541e-05, + "loss": 0.0707, + "step": 1716 + }, + { + "epoch": 0.24, + "learning_rate": 4.959807224405765e-05, + "loss": 0.0861, + "step": 1718 + }, + { + "epoch": 0.24, + "learning_rate": 4.9597604342129894e-05, + "loss": 0.0675, + "step": 1720 + }, + { + "epoch": 0.24, + "learning_rate": 4.959713644020213e-05, + "loss": 0.0742, + "step": 1722 + }, + { + "epoch": 0.24, + "learning_rate": 4.959666853827438e-05, + "loss": 0.0651, + "step": 1724 + }, + { + "epoch": 0.24, + "learning_rate": 4.9596200636346625e-05, + "loss": 0.0899, + "step": 1726 + }, + { + "epoch": 0.24, + "learning_rate": 4.959573273441887e-05, + "loss": 0.0718, + "step": 1728 + }, + { + "epoch": 0.24, + "learning_rate": 4.959526483249111e-05, + "loss": 0.0892, + "step": 1730 + }, + { + "epoch": 0.24, + "learning_rate": 4.9594796930563356e-05, + "loss": 0.072, + "step": 1732 + }, + { + "epoch": 0.24, + "learning_rate": 4.95943290286356e-05, + "loss": 0.0897, + "step": 1734 + }, + { + "epoch": 0.24, + "learning_rate": 4.959386112670785e-05, + "loss": 0.0705, + "step": 1736 + }, + { + "epoch": 0.24, + "learning_rate": 4.959339322478009e-05, + "loss": 0.0808, + "step": 1738 + }, + { + "epoch": 0.24, + "learning_rate": 4.959292532285233e-05, + "loss": 0.076, + "step": 1740 + }, + { + "epoch": 0.24, + "learning_rate": 4.959245742092458e-05, + "loss": 0.0787, + "step": 1742 + }, + { + "epoch": 0.24, + "learning_rate": 4.9591989518996825e-05, + "loss": 0.1035, + "step": 1744 + }, + { + "epoch": 0.25, + "learning_rate": 4.9591521617069064e-05, + "loss": 0.0927, + "step": 1746 + }, + { + "epoch": 0.25, + "learning_rate": 4.959105371514131e-05, + "loss": 0.0718, + "step": 1748 + }, + { + "epoch": 0.25, + "learning_rate": 4.9590585813213556e-05, + "loss": 0.0716, + "step": 1750 + }, + { + "epoch": 0.25, + "learning_rate": 4.95901179112858e-05, + "loss": 0.0739, + "step": 1752 + }, + { + "epoch": 0.25, + "learning_rate": 4.958965000935804e-05, + "loss": 0.0586, + "step": 1754 + }, + { + "epoch": 0.25, + "learning_rate": 4.958918210743029e-05, + "loss": 0.0888, + "step": 1756 + }, + { + "epoch": 0.25, + "learning_rate": 4.9588714205502526e-05, + "loss": 0.0948, + "step": 1758 + }, + { + "epoch": 0.25, + "learning_rate": 4.958824630357478e-05, + "loss": 0.0726, + "step": 1760 + }, + { + "epoch": 0.25, + "learning_rate": 4.958777840164702e-05, + "loss": 0.0675, + "step": 1762 + }, + { + "epoch": 0.25, + "learning_rate": 4.9587310499719264e-05, + "loss": 0.0904, + "step": 1764 + }, + { + "epoch": 0.25, + "learning_rate": 4.95868425977915e-05, + "loss": 0.0768, + "step": 1766 + }, + { + "epoch": 0.25, + "learning_rate": 4.9586374695863755e-05, + "loss": 0.09, + "step": 1768 + }, + { + "epoch": 0.25, + "learning_rate": 4.9585906793935995e-05, + "loss": 0.1021, + "step": 1770 + }, + { + "epoch": 0.25, + "learning_rate": 4.958543889200824e-05, + "loss": 0.0889, + "step": 1772 + }, + { + "epoch": 0.25, + "learning_rate": 4.958497099008048e-05, + "loss": 0.0765, + "step": 1774 + }, + { + "epoch": 0.25, + "learning_rate": 4.9584503088152726e-05, + "loss": 0.0895, + "step": 1776 + }, + { + "epoch": 0.25, + "learning_rate": 4.958403518622497e-05, + "loss": 0.0798, + "step": 1778 + }, + { + "epoch": 0.25, + "learning_rate": 4.958356728429721e-05, + "loss": 0.1208, + "step": 1780 + }, + { + "epoch": 0.25, + "learning_rate": 4.9583099382369456e-05, + "loss": 0.0793, + "step": 1782 + }, + { + "epoch": 0.25, + "learning_rate": 4.95826314804417e-05, + "loss": 0.0725, + "step": 1784 + }, + { + "epoch": 0.25, + "learning_rate": 4.958216357851395e-05, + "loss": 0.0829, + "step": 1786 + }, + { + "epoch": 0.25, + "learning_rate": 4.958169567658619e-05, + "loss": 0.069, + "step": 1788 + }, + { + "epoch": 0.25, + "learning_rate": 4.958122777465843e-05, + "loss": 0.069, + "step": 1790 + }, + { + "epoch": 0.25, + "learning_rate": 4.958075987273067e-05, + "loss": 0.0839, + "step": 1792 + }, + { + "epoch": 0.25, + "learning_rate": 4.9580291970802925e-05, + "loss": 0.0852, + "step": 1794 + }, + { + "epoch": 0.25, + "learning_rate": 4.9579824068875164e-05, + "loss": 0.1122, + "step": 1796 + }, + { + "epoch": 0.25, + "learning_rate": 4.957935616694741e-05, + "loss": 0.1048, + "step": 1798 + }, + { + "epoch": 0.25, + "learning_rate": 4.957888826501965e-05, + "loss": 0.0711, + "step": 1800 + }, + { + "epoch": 0.25, + "learning_rate": 4.9578420363091895e-05, + "loss": 0.073, + "step": 1802 + }, + { + "epoch": 0.25, + "learning_rate": 4.957795246116414e-05, + "loss": 0.1139, + "step": 1804 + }, + { + "epoch": 0.25, + "learning_rate": 4.957748455923639e-05, + "loss": 0.0678, + "step": 1806 + }, + { + "epoch": 0.25, + "learning_rate": 4.9577016657308626e-05, + "loss": 0.0936, + "step": 1808 + }, + { + "epoch": 0.25, + "learning_rate": 4.957654875538087e-05, + "loss": 0.0722, + "step": 1810 + }, + { + "epoch": 0.25, + "learning_rate": 4.957608085345312e-05, + "loss": 0.0779, + "step": 1812 + }, + { + "epoch": 0.25, + "learning_rate": 4.9575612951525364e-05, + "loss": 0.0842, + "step": 1814 + }, + { + "epoch": 0.25, + "learning_rate": 4.95751450495976e-05, + "loss": 0.1146, + "step": 1816 + }, + { + "epoch": 0.26, + "learning_rate": 4.957467714766985e-05, + "loss": 0.0801, + "step": 1818 + }, + { + "epoch": 0.26, + "learning_rate": 4.9574209245742095e-05, + "loss": 0.0851, + "step": 1820 + }, + { + "epoch": 0.26, + "learning_rate": 4.957374134381434e-05, + "loss": 0.0865, + "step": 1822 + }, + { + "epoch": 0.26, + "learning_rate": 4.957327344188658e-05, + "loss": 0.0789, + "step": 1824 + }, + { + "epoch": 0.26, + "learning_rate": 4.9572805539958826e-05, + "loss": 0.0866, + "step": 1826 + }, + { + "epoch": 0.26, + "learning_rate": 4.957233763803107e-05, + "loss": 0.0828, + "step": 1828 + }, + { + "epoch": 0.26, + "learning_rate": 4.957186973610332e-05, + "loss": 0.1413, + "step": 1830 + }, + { + "epoch": 0.26, + "learning_rate": 4.957140183417556e-05, + "loss": 0.0734, + "step": 1832 + }, + { + "epoch": 0.26, + "learning_rate": 4.95709339322478e-05, + "loss": 0.0669, + "step": 1834 + }, + { + "epoch": 0.26, + "learning_rate": 4.957046603032004e-05, + "loss": 0.0824, + "step": 1836 + }, + { + "epoch": 0.26, + "learning_rate": 4.9569998128392295e-05, + "loss": 0.0812, + "step": 1838 + }, + { + "epoch": 0.26, + "learning_rate": 4.9569530226464534e-05, + "loss": 0.0805, + "step": 1840 + }, + { + "epoch": 0.26, + "learning_rate": 4.956906232453678e-05, + "loss": 0.1054, + "step": 1842 + }, + { + "epoch": 0.26, + "learning_rate": 4.956859442260902e-05, + "loss": 0.0721, + "step": 1844 + }, + { + "epoch": 0.26, + "learning_rate": 4.956812652068127e-05, + "loss": 0.0824, + "step": 1846 + }, + { + "epoch": 0.26, + "learning_rate": 4.956765861875351e-05, + "loss": 0.0905, + "step": 1848 + }, + { + "epoch": 0.26, + "learning_rate": 4.9567190716825756e-05, + "loss": 0.0787, + "step": 1850 + }, + { + "epoch": 0.26, + "learning_rate": 4.9566722814897996e-05, + "loss": 0.0864, + "step": 1852 + }, + { + "epoch": 0.26, + "learning_rate": 4.956625491297024e-05, + "loss": 0.0656, + "step": 1854 + }, + { + "epoch": 0.26, + "learning_rate": 4.956578701104249e-05, + "loss": 0.1018, + "step": 1856 + }, + { + "epoch": 0.26, + "learning_rate": 4.956531910911473e-05, + "loss": 0.0841, + "step": 1858 + }, + { + "epoch": 0.26, + "learning_rate": 4.956485120718697e-05, + "loss": 0.0772, + "step": 1860 + }, + { + "epoch": 0.26, + "learning_rate": 4.956438330525922e-05, + "loss": 0.0838, + "step": 1862 + }, + { + "epoch": 0.26, + "learning_rate": 4.9563915403331464e-05, + "loss": 0.0848, + "step": 1864 + }, + { + "epoch": 0.26, + "learning_rate": 4.956344750140371e-05, + "loss": 0.0752, + "step": 1866 + }, + { + "epoch": 0.26, + "learning_rate": 4.956297959947595e-05, + "loss": 0.0683, + "step": 1868 + }, + { + "epoch": 0.26, + "learning_rate": 4.9562511697548195e-05, + "loss": 0.0707, + "step": 1870 + }, + { + "epoch": 0.26, + "learning_rate": 4.956204379562044e-05, + "loss": 0.0966, + "step": 1872 + }, + { + "epoch": 0.26, + "learning_rate": 4.956157589369269e-05, + "loss": 0.1103, + "step": 1874 + }, + { + "epoch": 0.26, + "learning_rate": 4.9561107991764926e-05, + "loss": 0.0819, + "step": 1876 + }, + { + "epoch": 0.26, + "learning_rate": 4.956064008983717e-05, + "loss": 0.0815, + "step": 1878 + }, + { + "epoch": 0.26, + "learning_rate": 4.956017218790942e-05, + "loss": 0.087, + "step": 1880 + }, + { + "epoch": 0.26, + "learning_rate": 4.9559704285981664e-05, + "loss": 0.0682, + "step": 1882 + }, + { + "epoch": 0.26, + "learning_rate": 4.95592363840539e-05, + "loss": 0.0894, + "step": 1884 + }, + { + "epoch": 0.26, + "learning_rate": 4.955876848212615e-05, + "loss": 0.0693, + "step": 1886 + }, + { + "epoch": 0.27, + "learning_rate": 4.955830058019839e-05, + "loss": 0.0699, + "step": 1888 + }, + { + "epoch": 0.27, + "learning_rate": 4.955783267827064e-05, + "loss": 0.0955, + "step": 1890 + }, + { + "epoch": 0.27, + "learning_rate": 4.955736477634288e-05, + "loss": 0.0724, + "step": 1892 + }, + { + "epoch": 0.27, + "learning_rate": 4.9556896874415126e-05, + "loss": 0.0868, + "step": 1894 + }, + { + "epoch": 0.27, + "learning_rate": 4.9556428972487365e-05, + "loss": 0.079, + "step": 1896 + }, + { + "epoch": 0.27, + "learning_rate": 4.955596107055962e-05, + "loss": 0.0919, + "step": 1898 + }, + { + "epoch": 0.27, + "learning_rate": 4.955549316863186e-05, + "loss": 0.0741, + "step": 1900 + }, + { + "epoch": 0.27, + "learning_rate": 4.95550252667041e-05, + "loss": 0.0702, + "step": 1902 + }, + { + "epoch": 0.27, + "learning_rate": 4.955455736477634e-05, + "loss": 0.0649, + "step": 1904 + }, + { + "epoch": 0.27, + "learning_rate": 4.955408946284859e-05, + "loss": 0.1234, + "step": 1906 + }, + { + "epoch": 0.27, + "learning_rate": 4.9553621560920834e-05, + "loss": 0.0759, + "step": 1908 + }, + { + "epoch": 0.27, + "learning_rate": 4.955315365899308e-05, + "loss": 0.0899, + "step": 1910 + }, + { + "epoch": 0.27, + "learning_rate": 4.955268575706532e-05, + "loss": 0.0933, + "step": 1912 + }, + { + "epoch": 0.27, + "learning_rate": 4.9552217855137565e-05, + "loss": 0.1276, + "step": 1914 + }, + { + "epoch": 0.27, + "learning_rate": 4.955174995320981e-05, + "loss": 0.0794, + "step": 1916 + }, + { + "epoch": 0.27, + "learning_rate": 4.9551282051282056e-05, + "loss": 0.1122, + "step": 1918 + }, + { + "epoch": 0.27, + "learning_rate": 4.9550814149354296e-05, + "loss": 0.1009, + "step": 1920 + }, + { + "epoch": 0.27, + "learning_rate": 4.955034624742654e-05, + "loss": 0.0805, + "step": 1922 + }, + { + "epoch": 0.27, + "learning_rate": 4.954987834549879e-05, + "loss": 0.0824, + "step": 1924 + }, + { + "epoch": 0.27, + "learning_rate": 4.954941044357103e-05, + "loss": 0.0767, + "step": 1926 + }, + { + "epoch": 0.27, + "learning_rate": 4.954894254164327e-05, + "loss": 0.0992, + "step": 1928 + }, + { + "epoch": 0.27, + "learning_rate": 4.954847463971552e-05, + "loss": 0.0582, + "step": 1930 + }, + { + "epoch": 0.27, + "learning_rate": 4.9548006737787764e-05, + "loss": 0.0696, + "step": 1932 + }, + { + "epoch": 0.27, + "learning_rate": 4.954753883586001e-05, + "loss": 0.1047, + "step": 1934 + }, + { + "epoch": 0.27, + "learning_rate": 4.954707093393225e-05, + "loss": 0.0732, + "step": 1936 + }, + { + "epoch": 0.27, + "learning_rate": 4.9546603032004495e-05, + "loss": 0.0788, + "step": 1938 + }, + { + "epoch": 0.27, + "learning_rate": 4.9546135130076734e-05, + "loss": 0.0817, + "step": 1940 + }, + { + "epoch": 0.27, + "learning_rate": 4.954566722814899e-05, + "loss": 0.0883, + "step": 1942 + }, + { + "epoch": 0.27, + "learning_rate": 4.9545199326221226e-05, + "loss": 0.0814, + "step": 1944 + }, + { + "epoch": 0.27, + "learning_rate": 4.954473142429347e-05, + "loss": 0.0878, + "step": 1946 + }, + { + "epoch": 0.27, + "learning_rate": 4.954426352236571e-05, + "loss": 0.0919, + "step": 1948 + }, + { + "epoch": 0.27, + "learning_rate": 4.954379562043796e-05, + "loss": 0.0728, + "step": 1950 + }, + { + "epoch": 0.27, + "learning_rate": 4.95433277185102e-05, + "loss": 0.0961, + "step": 1952 + }, + { + "epoch": 0.27, + "learning_rate": 4.954285981658245e-05, + "loss": 0.0727, + "step": 1954 + }, + { + "epoch": 0.27, + "learning_rate": 4.954239191465469e-05, + "loss": 0.0969, + "step": 1956 + }, + { + "epoch": 0.27, + "learning_rate": 4.9541924012726934e-05, + "loss": 0.0743, + "step": 1958 + }, + { + "epoch": 0.28, + "learning_rate": 4.954145611079918e-05, + "loss": 0.0761, + "step": 1960 + }, + { + "epoch": 0.28, + "learning_rate": 4.9540988208871426e-05, + "loss": 0.0691, + "step": 1962 + }, + { + "epoch": 0.28, + "learning_rate": 4.9540520306943665e-05, + "loss": 0.0725, + "step": 1964 + }, + { + "epoch": 0.28, + "learning_rate": 4.954005240501591e-05, + "loss": 0.1018, + "step": 1966 + }, + { + "epoch": 0.28, + "learning_rate": 4.953958450308816e-05, + "loss": 0.0669, + "step": 1968 + }, + { + "epoch": 0.28, + "learning_rate": 4.95391166011604e-05, + "loss": 0.0777, + "step": 1970 + }, + { + "epoch": 0.28, + "learning_rate": 4.953864869923264e-05, + "loss": 0.1015, + "step": 1972 + }, + { + "epoch": 0.28, + "learning_rate": 4.953818079730489e-05, + "loss": 0.099, + "step": 1974 + }, + { + "epoch": 0.28, + "learning_rate": 4.9537712895377134e-05, + "loss": 0.0754, + "step": 1976 + }, + { + "epoch": 0.28, + "learning_rate": 4.953724499344938e-05, + "loss": 0.0703, + "step": 1978 + }, + { + "epoch": 0.28, + "learning_rate": 4.953677709152162e-05, + "loss": 0.0935, + "step": 1980 + }, + { + "epoch": 0.28, + "learning_rate": 4.9536309189593865e-05, + "loss": 0.0701, + "step": 1982 + }, + { + "epoch": 0.28, + "learning_rate": 4.9535841287666104e-05, + "loss": 0.1014, + "step": 1984 + }, + { + "epoch": 0.28, + "learning_rate": 4.9535373385738357e-05, + "loss": 0.0649, + "step": 1986 + }, + { + "epoch": 0.28, + "learning_rate": 4.9534905483810596e-05, + "loss": 0.0947, + "step": 1988 + }, + { + "epoch": 0.28, + "learning_rate": 4.953443758188284e-05, + "loss": 0.0857, + "step": 1990 + }, + { + "epoch": 0.28, + "learning_rate": 4.953396967995508e-05, + "loss": 0.0781, + "step": 1992 + }, + { + "epoch": 0.28, + "learning_rate": 4.953350177802733e-05, + "loss": 0.0849, + "step": 1994 + }, + { + "epoch": 0.28, + "learning_rate": 4.953303387609957e-05, + "loss": 0.0767, + "step": 1996 + }, + { + "epoch": 0.28, + "learning_rate": 4.953256597417182e-05, + "loss": 0.1001, + "step": 1998 + }, + { + "epoch": 0.28, + "learning_rate": 4.953209807224406e-05, + "loss": 0.09, + "step": 2000 + }, + { + "epoch": 0.28, + "eval_gen_len": 27.0862, + "eval_loss": 1.035199761390686, + "eval_meteor": 0.0382, + "eval_runtime": 13.218, + "eval_samples_per_second": 4.388, + "eval_steps_per_second": 0.605, + "step": 2000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9531630170316303e-05, + "loss": 0.1195, + "step": 2002 + }, + { + "epoch": 0.28, + "learning_rate": 4.953116226838855e-05, + "loss": 0.0717, + "step": 2004 + }, + { + "epoch": 0.28, + "learning_rate": 4.9530694366460795e-05, + "loss": 0.101, + "step": 2006 + }, + { + "epoch": 0.28, + "learning_rate": 4.9530226464533034e-05, + "loss": 0.0867, + "step": 2008 + }, + { + "epoch": 0.28, + "learning_rate": 4.952975856260528e-05, + "loss": 0.0734, + "step": 2010 + }, + { + "epoch": 0.28, + "learning_rate": 4.9529290660677526e-05, + "loss": 0.0957, + "step": 2012 + }, + { + "epoch": 0.28, + "learning_rate": 4.952882275874977e-05, + "loss": 0.1171, + "step": 2014 + }, + { + "epoch": 0.28, + "learning_rate": 4.952835485682201e-05, + "loss": 0.103, + "step": 2016 + }, + { + "epoch": 0.28, + "learning_rate": 4.952788695489426e-05, + "loss": 0.0913, + "step": 2018 + }, + { + "epoch": 0.28, + "learning_rate": 4.95274190529665e-05, + "loss": 0.0911, + "step": 2020 + }, + { + "epoch": 0.28, + "learning_rate": 4.952695115103875e-05, + "loss": 0.0803, + "step": 2022 + }, + { + "epoch": 0.28, + "learning_rate": 4.952648324911099e-05, + "loss": 0.0831, + "step": 2024 + }, + { + "epoch": 0.28, + "learning_rate": 4.952601534718323e-05, + "loss": 0.0769, + "step": 2026 + }, + { + "epoch": 0.28, + "learning_rate": 4.952554744525548e-05, + "loss": 0.072, + "step": 2028 + }, + { + "epoch": 0.28, + "learning_rate": 4.952507954332772e-05, + "loss": 0.0863, + "step": 2030 + }, + { + "epoch": 0.29, + "learning_rate": 4.9524611641399965e-05, + "loss": 0.0999, + "step": 2032 + }, + { + "epoch": 0.29, + "learning_rate": 4.9524143739472204e-05, + "loss": 0.097, + "step": 2034 + }, + { + "epoch": 0.29, + "learning_rate": 4.952367583754445e-05, + "loss": 0.0917, + "step": 2036 + }, + { + "epoch": 0.29, + "learning_rate": 4.9523207935616696e-05, + "loss": 0.0717, + "step": 2038 + }, + { + "epoch": 0.29, + "learning_rate": 4.952274003368894e-05, + "loss": 0.0909, + "step": 2040 + }, + { + "epoch": 0.29, + "learning_rate": 4.952227213176118e-05, + "loss": 0.0911, + "step": 2042 + }, + { + "epoch": 0.29, + "learning_rate": 4.952180422983343e-05, + "loss": 0.0811, + "step": 2044 + }, + { + "epoch": 0.29, + "learning_rate": 4.952133632790567e-05, + "loss": 0.0892, + "step": 2046 + }, + { + "epoch": 0.29, + "learning_rate": 4.952086842597792e-05, + "loss": 0.0879, + "step": 2048 + }, + { + "epoch": 0.29, + "learning_rate": 4.952040052405016e-05, + "loss": 0.084, + "step": 2050 + }, + { + "epoch": 0.29, + "learning_rate": 4.9519932622122404e-05, + "loss": 0.075, + "step": 2052 + }, + { + "epoch": 0.29, + "learning_rate": 4.951946472019465e-05, + "loss": 0.0923, + "step": 2054 + }, + { + "epoch": 0.29, + "learning_rate": 4.9518996818266896e-05, + "loss": 0.0782, + "step": 2056 + }, + { + "epoch": 0.29, + "learning_rate": 4.9518528916339135e-05, + "loss": 0.1017, + "step": 2058 + }, + { + "epoch": 0.29, + "learning_rate": 4.951806101441138e-05, + "loss": 0.0976, + "step": 2060 + }, + { + "epoch": 0.29, + "learning_rate": 4.951759311248363e-05, + "loss": 0.0843, + "step": 2062 + }, + { + "epoch": 0.29, + "learning_rate": 4.951712521055587e-05, + "loss": 0.1091, + "step": 2064 + }, + { + "epoch": 0.29, + "learning_rate": 4.951665730862811e-05, + "loss": 0.0778, + "step": 2066 + }, + { + "epoch": 0.29, + "learning_rate": 4.951618940670036e-05, + "loss": 0.08, + "step": 2068 + }, + { + "epoch": 0.29, + "learning_rate": 4.95157215047726e-05, + "loss": 0.0813, + "step": 2070 + }, + { + "epoch": 0.29, + "learning_rate": 4.951525360284485e-05, + "loss": 0.1102, + "step": 2072 + }, + { + "epoch": 0.29, + "learning_rate": 4.951478570091709e-05, + "loss": 0.0795, + "step": 2074 + }, + { + "epoch": 0.29, + "learning_rate": 4.9514317798989334e-05, + "loss": 0.0959, + "step": 2076 + }, + { + "epoch": 0.29, + "learning_rate": 4.9513849897061574e-05, + "loss": 0.0886, + "step": 2078 + }, + { + "epoch": 0.29, + "learning_rate": 4.9513381995133826e-05, + "loss": 0.0764, + "step": 2080 + }, + { + "epoch": 0.29, + "learning_rate": 4.9512914093206065e-05, + "loss": 0.0692, + "step": 2082 + }, + { + "epoch": 0.29, + "learning_rate": 4.951244619127831e-05, + "loss": 0.0869, + "step": 2084 + }, + { + "epoch": 0.29, + "learning_rate": 4.951197828935055e-05, + "loss": 0.0853, + "step": 2086 + }, + { + "epoch": 0.29, + "learning_rate": 4.9511510387422796e-05, + "loss": 0.1056, + "step": 2088 + }, + { + "epoch": 0.29, + "learning_rate": 4.951104248549504e-05, + "loss": 0.0794, + "step": 2090 + }, + { + "epoch": 0.29, + "learning_rate": 4.951057458356729e-05, + "loss": 0.0834, + "step": 2092 + }, + { + "epoch": 0.29, + "learning_rate": 4.951010668163953e-05, + "loss": 0.1245, + "step": 2094 + }, + { + "epoch": 0.29, + "learning_rate": 4.950963877971177e-05, + "loss": 0.0811, + "step": 2096 + }, + { + "epoch": 0.29, + "learning_rate": 4.950917087778402e-05, + "loss": 0.0852, + "step": 2098 + }, + { + "epoch": 0.29, + "learning_rate": 4.9508702975856265e-05, + "loss": 0.0776, + "step": 2100 + }, + { + "epoch": 0.3, + "learning_rate": 4.9508235073928504e-05, + "loss": 0.0913, + "step": 2102 + }, + { + "epoch": 0.3, + "learning_rate": 4.950776717200075e-05, + "loss": 0.074, + "step": 2104 + }, + { + "epoch": 0.3, + "learning_rate": 4.9507299270072996e-05, + "loss": 0.0734, + "step": 2106 + }, + { + "epoch": 0.3, + "learning_rate": 4.950683136814524e-05, + "loss": 0.0771, + "step": 2108 + }, + { + "epoch": 0.3, + "learning_rate": 4.950636346621748e-05, + "loss": 0.0767, + "step": 2110 + }, + { + "epoch": 0.3, + "learning_rate": 4.950589556428973e-05, + "loss": 0.0856, + "step": 2112 + }, + { + "epoch": 0.3, + "learning_rate": 4.9505427662361966e-05, + "loss": 0.0851, + "step": 2114 + }, + { + "epoch": 0.3, + "learning_rate": 4.950495976043422e-05, + "loss": 0.0734, + "step": 2116 + }, + { + "epoch": 0.3, + "learning_rate": 4.950449185850646e-05, + "loss": 0.0829, + "step": 2118 + }, + { + "epoch": 0.3, + "learning_rate": 4.9504023956578704e-05, + "loss": 0.0889, + "step": 2120 + }, + { + "epoch": 0.3, + "learning_rate": 4.950355605465094e-05, + "loss": 0.0918, + "step": 2122 + }, + { + "epoch": 0.3, + "learning_rate": 4.9503088152723196e-05, + "loss": 0.1152, + "step": 2124 + }, + { + "epoch": 0.3, + "learning_rate": 4.9502620250795435e-05, + "loss": 0.1038, + "step": 2126 + }, + { + "epoch": 0.3, + "learning_rate": 4.950215234886768e-05, + "loss": 0.0785, + "step": 2128 + }, + { + "epoch": 0.3, + "learning_rate": 4.950168444693992e-05, + "loss": 0.0699, + "step": 2130 + }, + { + "epoch": 0.3, + "learning_rate": 4.9501216545012166e-05, + "loss": 0.0966, + "step": 2132 + }, + { + "epoch": 0.3, + "learning_rate": 4.950074864308441e-05, + "loss": 0.1138, + "step": 2134 + }, + { + "epoch": 0.3, + "learning_rate": 4.950028074115666e-05, + "loss": 0.086, + "step": 2136 + }, + { + "epoch": 0.3, + "learning_rate": 4.94998128392289e-05, + "loss": 0.0641, + "step": 2138 + }, + { + "epoch": 0.3, + "learning_rate": 4.949934493730114e-05, + "loss": 0.0762, + "step": 2140 + }, + { + "epoch": 0.3, + "learning_rate": 4.949887703537339e-05, + "loss": 0.1115, + "step": 2142 + }, + { + "epoch": 0.3, + "learning_rate": 4.9498409133445634e-05, + "loss": 0.1117, + "step": 2144 + }, + { + "epoch": 0.3, + "learning_rate": 4.9497941231517874e-05, + "loss": 0.089, + "step": 2146 + }, + { + "epoch": 0.3, + "learning_rate": 4.949747332959012e-05, + "loss": 0.0802, + "step": 2148 + }, + { + "epoch": 0.3, + "learning_rate": 4.9497005427662365e-05, + "loss": 0.0747, + "step": 2150 + }, + { + "epoch": 0.3, + "learning_rate": 4.949653752573461e-05, + "loss": 0.085, + "step": 2152 + }, + { + "epoch": 0.3, + "learning_rate": 4.949606962380685e-05, + "loss": 0.0879, + "step": 2154 + }, + { + "epoch": 0.3, + "learning_rate": 4.9495601721879096e-05, + "loss": 0.0959, + "step": 2156 + }, + { + "epoch": 0.3, + "learning_rate": 4.949513381995134e-05, + "loss": 0.0959, + "step": 2158 + }, + { + "epoch": 0.3, + "learning_rate": 4.949466591802359e-05, + "loss": 0.0837, + "step": 2160 + }, + { + "epoch": 0.3, + "learning_rate": 4.949419801609583e-05, + "loss": 0.0731, + "step": 2162 + }, + { + "epoch": 0.3, + "learning_rate": 4.949373011416807e-05, + "loss": 0.0748, + "step": 2164 + }, + { + "epoch": 0.3, + "learning_rate": 4.949326221224031e-05, + "loss": 0.0935, + "step": 2166 + }, + { + "epoch": 0.3, + "learning_rate": 4.9492794310312565e-05, + "loss": 0.0672, + "step": 2168 + }, + { + "epoch": 0.3, + "learning_rate": 4.9492326408384804e-05, + "loss": 0.1234, + "step": 2170 + }, + { + "epoch": 0.3, + "learning_rate": 4.949185850645705e-05, + "loss": 0.0684, + "step": 2172 + }, + { + "epoch": 0.31, + "learning_rate": 4.949139060452929e-05, + "loss": 0.0832, + "step": 2174 + }, + { + "epoch": 0.31, + "learning_rate": 4.949092270260154e-05, + "loss": 0.0881, + "step": 2176 + }, + { + "epoch": 0.31, + "learning_rate": 4.949045480067378e-05, + "loss": 0.0823, + "step": 2178 + }, + { + "epoch": 0.31, + "learning_rate": 4.948998689874603e-05, + "loss": 0.1178, + "step": 2180 + }, + { + "epoch": 0.31, + "learning_rate": 4.9489518996818266e-05, + "loss": 0.091, + "step": 2182 + }, + { + "epoch": 0.31, + "learning_rate": 4.948905109489051e-05, + "loss": 0.1102, + "step": 2184 + }, + { + "epoch": 0.31, + "learning_rate": 4.948858319296276e-05, + "loss": 0.0945, + "step": 2186 + }, + { + "epoch": 0.31, + "learning_rate": 4.9488115291035004e-05, + "loss": 0.097, + "step": 2188 + }, + { + "epoch": 0.31, + "learning_rate": 4.948764738910724e-05, + "loss": 0.0815, + "step": 2190 + }, + { + "epoch": 0.31, + "learning_rate": 4.948717948717949e-05, + "loss": 0.0947, + "step": 2192 + }, + { + "epoch": 0.31, + "learning_rate": 4.9486711585251735e-05, + "loss": 0.0601, + "step": 2194 + }, + { + "epoch": 0.31, + "learning_rate": 4.948624368332398e-05, + "loss": 0.079, + "step": 2196 + }, + { + "epoch": 0.31, + "learning_rate": 4.948577578139622e-05, + "loss": 0.1026, + "step": 2198 + }, + { + "epoch": 0.31, + "learning_rate": 4.9485307879468466e-05, + "loss": 0.099, + "step": 2200 + }, + { + "epoch": 0.31, + "learning_rate": 4.948483997754071e-05, + "loss": 0.0662, + "step": 2202 + }, + { + "epoch": 0.31, + "learning_rate": 4.948437207561296e-05, + "loss": 0.0896, + "step": 2204 + }, + { + "epoch": 0.31, + "learning_rate": 4.94839041736852e-05, + "loss": 0.0807, + "step": 2206 + }, + { + "epoch": 0.31, + "learning_rate": 4.948343627175744e-05, + "loss": 0.1046, + "step": 2208 + }, + { + "epoch": 0.31, + "learning_rate": 4.948296836982969e-05, + "loss": 0.0735, + "step": 2210 + }, + { + "epoch": 0.31, + "learning_rate": 4.9482500467901934e-05, + "loss": 0.0695, + "step": 2212 + }, + { + "epoch": 0.31, + "learning_rate": 4.9482032565974174e-05, + "loss": 0.0924, + "step": 2214 + }, + { + "epoch": 0.31, + "learning_rate": 4.948156466404642e-05, + "loss": 0.1034, + "step": 2216 + }, + { + "epoch": 0.31, + "learning_rate": 4.948109676211866e-05, + "loss": 0.0763, + "step": 2218 + }, + { + "epoch": 0.31, + "learning_rate": 4.948062886019091e-05, + "loss": 0.0851, + "step": 2220 + }, + { + "epoch": 0.31, + "learning_rate": 4.948016095826315e-05, + "loss": 0.0719, + "step": 2222 + }, + { + "epoch": 0.31, + "learning_rate": 4.9479693056335396e-05, + "loss": 0.0899, + "step": 2224 + }, + { + "epoch": 0.31, + "learning_rate": 4.9479225154407636e-05, + "loss": 0.0821, + "step": 2226 + }, + { + "epoch": 0.31, + "learning_rate": 4.947875725247988e-05, + "loss": 0.0697, + "step": 2228 + }, + { + "epoch": 0.31, + "learning_rate": 4.947828935055213e-05, + "loss": 0.0901, + "step": 2230 + }, + { + "epoch": 0.31, + "learning_rate": 4.947782144862437e-05, + "loss": 0.0631, + "step": 2232 + }, + { + "epoch": 0.31, + "learning_rate": 4.947735354669661e-05, + "loss": 0.078, + "step": 2234 + }, + { + "epoch": 0.31, + "learning_rate": 4.947688564476886e-05, + "loss": 0.0959, + "step": 2236 + }, + { + "epoch": 0.31, + "learning_rate": 4.9476417742841104e-05, + "loss": 0.0922, + "step": 2238 + }, + { + "epoch": 0.31, + "learning_rate": 4.947594984091335e-05, + "loss": 0.1077, + "step": 2240 + }, + { + "epoch": 0.31, + "learning_rate": 4.947548193898559e-05, + "loss": 0.0851, + "step": 2242 + }, + { + "epoch": 0.31, + "learning_rate": 4.9475014037057835e-05, + "loss": 0.0806, + "step": 2244 + }, + { + "epoch": 0.32, + "learning_rate": 4.947454613513008e-05, + "loss": 0.0849, + "step": 2246 + }, + { + "epoch": 0.32, + "learning_rate": 4.947407823320233e-05, + "loss": 0.093, + "step": 2248 + }, + { + "epoch": 0.32, + "learning_rate": 4.9473610331274566e-05, + "loss": 0.0759, + "step": 2250 + }, + { + "epoch": 0.32, + "learning_rate": 4.947314242934681e-05, + "loss": 0.0918, + "step": 2252 + }, + { + "epoch": 0.32, + "learning_rate": 4.947267452741906e-05, + "loss": 0.0587, + "step": 2254 + }, + { + "epoch": 0.32, + "learning_rate": 4.9472206625491304e-05, + "loss": 0.0857, + "step": 2256 + }, + { + "epoch": 0.32, + "learning_rate": 4.947173872356354e-05, + "loss": 0.0774, + "step": 2258 + }, + { + "epoch": 0.32, + "learning_rate": 4.947127082163579e-05, + "loss": 0.0937, + "step": 2260 + }, + { + "epoch": 0.32, + "learning_rate": 4.947080291970803e-05, + "loss": 0.076, + "step": 2262 + }, + { + "epoch": 0.32, + "learning_rate": 4.947033501778028e-05, + "loss": 0.0994, + "step": 2264 + }, + { + "epoch": 0.32, + "learning_rate": 4.946986711585252e-05, + "loss": 0.0815, + "step": 2266 + }, + { + "epoch": 0.32, + "learning_rate": 4.9469399213924766e-05, + "loss": 0.108, + "step": 2268 + }, + { + "epoch": 0.32, + "learning_rate": 4.9468931311997005e-05, + "loss": 0.1041, + "step": 2270 + }, + { + "epoch": 0.32, + "learning_rate": 4.946846341006926e-05, + "loss": 0.0705, + "step": 2272 + }, + { + "epoch": 0.32, + "learning_rate": 4.94679955081415e-05, + "loss": 0.1079, + "step": 2274 + }, + { + "epoch": 0.32, + "learning_rate": 4.946752760621374e-05, + "loss": 0.0984, + "step": 2276 + }, + { + "epoch": 0.32, + "learning_rate": 4.946705970428598e-05, + "loss": 0.0885, + "step": 2278 + }, + { + "epoch": 0.32, + "learning_rate": 4.946659180235823e-05, + "loss": 0.0901, + "step": 2280 + }, + { + "epoch": 0.32, + "learning_rate": 4.9466123900430474e-05, + "loss": 0.0859, + "step": 2282 + }, + { + "epoch": 0.32, + "learning_rate": 4.946565599850271e-05, + "loss": 0.0949, + "step": 2284 + }, + { + "epoch": 0.32, + "learning_rate": 4.946518809657496e-05, + "loss": 0.0816, + "step": 2286 + }, + { + "epoch": 0.32, + "learning_rate": 4.9464720194647205e-05, + "loss": 0.1089, + "step": 2288 + }, + { + "epoch": 0.32, + "learning_rate": 4.946425229271945e-05, + "loss": 0.1052, + "step": 2290 + }, + { + "epoch": 0.32, + "learning_rate": 4.946378439079169e-05, + "loss": 0.0762, + "step": 2292 + }, + { + "epoch": 0.32, + "learning_rate": 4.9463316488863936e-05, + "loss": 0.0896, + "step": 2294 + }, + { + "epoch": 0.32, + "learning_rate": 4.9462848586936175e-05, + "loss": 0.0887, + "step": 2296 + }, + { + "epoch": 0.32, + "learning_rate": 4.946238068500843e-05, + "loss": 0.0937, + "step": 2298 + }, + { + "epoch": 0.32, + "learning_rate": 4.9461912783080667e-05, + "loss": 0.0998, + "step": 2300 + }, + { + "epoch": 0.32, + "learning_rate": 4.946144488115291e-05, + "loss": 0.0834, + "step": 2302 + }, + { + "epoch": 0.32, + "learning_rate": 4.946097697922515e-05, + "loss": 0.0535, + "step": 2304 + }, + { + "epoch": 0.32, + "learning_rate": 4.9460509077297404e-05, + "loss": 0.0749, + "step": 2306 + }, + { + "epoch": 0.32, + "learning_rate": 4.9460041175369643e-05, + "loss": 0.072, + "step": 2308 + }, + { + "epoch": 0.32, + "learning_rate": 4.945957327344189e-05, + "loss": 0.0742, + "step": 2310 + }, + { + "epoch": 0.32, + "learning_rate": 4.945910537151413e-05, + "loss": 0.092, + "step": 2312 + }, + { + "epoch": 0.32, + "learning_rate": 4.9458637469586374e-05, + "loss": 0.0796, + "step": 2314 + }, + { + "epoch": 0.33, + "learning_rate": 4.945816956765862e-05, + "loss": 0.0694, + "step": 2316 + }, + { + "epoch": 0.33, + "learning_rate": 4.9457701665730866e-05, + "loss": 0.0766, + "step": 2318 + }, + { + "epoch": 0.33, + "learning_rate": 4.9457233763803105e-05, + "loss": 0.0923, + "step": 2320 + }, + { + "epoch": 0.33, + "learning_rate": 4.945676586187535e-05, + "loss": 0.1028, + "step": 2322 + }, + { + "epoch": 0.33, + "learning_rate": 4.94562979599476e-05, + "loss": 0.0795, + "step": 2324 + }, + { + "epoch": 0.33, + "learning_rate": 4.945583005801984e-05, + "loss": 0.078, + "step": 2326 + }, + { + "epoch": 0.33, + "learning_rate": 4.945536215609208e-05, + "loss": 0.0799, + "step": 2328 + }, + { + "epoch": 0.33, + "learning_rate": 4.945489425416433e-05, + "loss": 0.1079, + "step": 2330 + }, + { + "epoch": 0.33, + "learning_rate": 4.9454426352236574e-05, + "loss": 0.0796, + "step": 2332 + }, + { + "epoch": 0.33, + "learning_rate": 4.945395845030882e-05, + "loss": 0.098, + "step": 2334 + }, + { + "epoch": 0.33, + "learning_rate": 4.945349054838106e-05, + "loss": 0.0919, + "step": 2336 + }, + { + "epoch": 0.33, + "learning_rate": 4.9453022646453305e-05, + "loss": 0.0801, + "step": 2338 + }, + { + "epoch": 0.33, + "learning_rate": 4.945255474452555e-05, + "loss": 0.0834, + "step": 2340 + }, + { + "epoch": 0.33, + "learning_rate": 4.94520868425978e-05, + "loss": 0.0893, + "step": 2342 + }, + { + "epoch": 0.33, + "learning_rate": 4.9451618940670036e-05, + "loss": 0.1059, + "step": 2344 + }, + { + "epoch": 0.33, + "learning_rate": 4.945115103874228e-05, + "loss": 0.0724, + "step": 2346 + }, + { + "epoch": 0.33, + "learning_rate": 4.945068313681452e-05, + "loss": 0.0923, + "step": 2348 + }, + { + "epoch": 0.33, + "learning_rate": 4.9450215234886774e-05, + "loss": 0.075, + "step": 2350 + }, + { + "epoch": 0.33, + "learning_rate": 4.944974733295901e-05, + "loss": 0.0787, + "step": 2352 + }, + { + "epoch": 0.33, + "learning_rate": 4.944927943103126e-05, + "loss": 0.0868, + "step": 2354 + }, + { + "epoch": 0.33, + "learning_rate": 4.94488115291035e-05, + "loss": 0.0791, + "step": 2356 + }, + { + "epoch": 0.33, + "learning_rate": 4.944834362717575e-05, + "loss": 0.0778, + "step": 2358 + }, + { + "epoch": 0.33, + "learning_rate": 4.944787572524799e-05, + "loss": 0.0834, + "step": 2360 + }, + { + "epoch": 0.33, + "learning_rate": 4.9447407823320236e-05, + "loss": 0.077, + "step": 2362 + }, + { + "epoch": 0.33, + "learning_rate": 4.9446939921392475e-05, + "loss": 0.0619, + "step": 2364 + }, + { + "epoch": 0.33, + "learning_rate": 4.944647201946472e-05, + "loss": 0.0895, + "step": 2366 + }, + { + "epoch": 0.33, + "learning_rate": 4.9446004117536967e-05, + "loss": 0.0713, + "step": 2368 + }, + { + "epoch": 0.33, + "learning_rate": 4.944553621560921e-05, + "loss": 0.0743, + "step": 2370 + }, + { + "epoch": 0.33, + "learning_rate": 4.944506831368145e-05, + "loss": 0.075, + "step": 2372 + }, + { + "epoch": 0.33, + "learning_rate": 4.94446004117537e-05, + "loss": 0.0811, + "step": 2374 + }, + { + "epoch": 0.33, + "learning_rate": 4.9444132509825943e-05, + "loss": 0.0738, + "step": 2376 + }, + { + "epoch": 0.33, + "learning_rate": 4.944366460789819e-05, + "loss": 0.0831, + "step": 2378 + }, + { + "epoch": 0.33, + "learning_rate": 4.944319670597043e-05, + "loss": 0.0905, + "step": 2380 + }, + { + "epoch": 0.33, + "learning_rate": 4.9442728804042674e-05, + "loss": 0.1058, + "step": 2382 + }, + { + "epoch": 0.33, + "learning_rate": 4.944226090211492e-05, + "loss": 0.0913, + "step": 2384 + }, + { + "epoch": 0.33, + "learning_rate": 4.9441793000187166e-05, + "loss": 0.0977, + "step": 2386 + }, + { + "epoch": 0.34, + "learning_rate": 4.9441325098259405e-05, + "loss": 0.0655, + "step": 2388 + }, + { + "epoch": 0.34, + "learning_rate": 4.944085719633165e-05, + "loss": 0.0907, + "step": 2390 + }, + { + "epoch": 0.34, + "learning_rate": 4.94403892944039e-05, + "loss": 0.1052, + "step": 2392 + }, + { + "epoch": 0.34, + "learning_rate": 4.943992139247614e-05, + "loss": 0.0933, + "step": 2394 + }, + { + "epoch": 0.34, + "learning_rate": 4.943945349054838e-05, + "loss": 0.075, + "step": 2396 + }, + { + "epoch": 0.34, + "learning_rate": 4.943898558862063e-05, + "loss": 0.0644, + "step": 2398 + }, + { + "epoch": 0.34, + "learning_rate": 4.943851768669287e-05, + "loss": 0.0824, + "step": 2400 + }, + { + "epoch": 0.34, + "learning_rate": 4.943804978476512e-05, + "loss": 0.09, + "step": 2402 + }, + { + "epoch": 0.34, + "learning_rate": 4.943758188283736e-05, + "loss": 0.0828, + "step": 2404 + }, + { + "epoch": 0.34, + "learning_rate": 4.9437113980909605e-05, + "loss": 0.0981, + "step": 2406 + }, + { + "epoch": 0.34, + "learning_rate": 4.9436646078981844e-05, + "loss": 0.0962, + "step": 2408 + }, + { + "epoch": 0.34, + "learning_rate": 4.943617817705409e-05, + "loss": 0.0749, + "step": 2410 + }, + { + "epoch": 0.34, + "learning_rate": 4.9435710275126336e-05, + "loss": 0.1149, + "step": 2412 + }, + { + "epoch": 0.34, + "learning_rate": 4.943524237319858e-05, + "loss": 0.1069, + "step": 2414 + }, + { + "epoch": 0.34, + "learning_rate": 4.943477447127082e-05, + "loss": 0.0873, + "step": 2416 + }, + { + "epoch": 0.34, + "learning_rate": 4.943430656934307e-05, + "loss": 0.0975, + "step": 2418 + }, + { + "epoch": 0.34, + "learning_rate": 4.943383866741531e-05, + "loss": 0.0846, + "step": 2420 + }, + { + "epoch": 0.34, + "learning_rate": 4.943337076548756e-05, + "loss": 0.0992, + "step": 2422 + }, + { + "epoch": 0.34, + "learning_rate": 4.94329028635598e-05, + "loss": 0.0794, + "step": 2424 + }, + { + "epoch": 0.34, + "learning_rate": 4.9432434961632044e-05, + "loss": 0.0782, + "step": 2426 + }, + { + "epoch": 0.34, + "learning_rate": 4.943196705970429e-05, + "loss": 0.0959, + "step": 2428 + }, + { + "epoch": 0.34, + "learning_rate": 4.9431499157776536e-05, + "loss": 0.0882, + "step": 2430 + }, + { + "epoch": 0.34, + "learning_rate": 4.9431031255848775e-05, + "loss": 0.0715, + "step": 2432 + }, + { + "epoch": 0.34, + "learning_rate": 4.943056335392102e-05, + "loss": 0.074, + "step": 2434 + }, + { + "epoch": 0.34, + "learning_rate": 4.9430095451993267e-05, + "loss": 0.089, + "step": 2436 + }, + { + "epoch": 0.34, + "learning_rate": 4.942962755006551e-05, + "loss": 0.0881, + "step": 2438 + }, + { + "epoch": 0.34, + "learning_rate": 4.942915964813775e-05, + "loss": 0.1152, + "step": 2440 + }, + { + "epoch": 0.34, + "learning_rate": 4.942869174621e-05, + "loss": 0.0627, + "step": 2442 + }, + { + "epoch": 0.34, + "learning_rate": 4.942822384428224e-05, + "loss": 0.0979, + "step": 2444 + }, + { + "epoch": 0.34, + "learning_rate": 4.942775594235449e-05, + "loss": 0.0961, + "step": 2446 + }, + { + "epoch": 0.34, + "learning_rate": 4.942728804042673e-05, + "loss": 0.1038, + "step": 2448 + }, + { + "epoch": 0.34, + "learning_rate": 4.9426820138498974e-05, + "loss": 0.0805, + "step": 2450 + }, + { + "epoch": 0.34, + "learning_rate": 4.9426352236571214e-05, + "loss": 0.0731, + "step": 2452 + }, + { + "epoch": 0.34, + "learning_rate": 4.9425884334643466e-05, + "loss": 0.0756, + "step": 2454 + }, + { + "epoch": 0.34, + "learning_rate": 4.9425416432715705e-05, + "loss": 0.0756, + "step": 2456 + }, + { + "epoch": 0.35, + "learning_rate": 4.942494853078795e-05, + "loss": 0.109, + "step": 2458 + }, + { + "epoch": 0.35, + "learning_rate": 4.942448062886019e-05, + "loss": 0.0913, + "step": 2460 + }, + { + "epoch": 0.35, + "learning_rate": 4.9424012726932436e-05, + "loss": 0.0695, + "step": 2462 + }, + { + "epoch": 0.35, + "learning_rate": 4.942354482500468e-05, + "loss": 0.0919, + "step": 2464 + }, + { + "epoch": 0.35, + "learning_rate": 4.942307692307693e-05, + "loss": 0.0844, + "step": 2466 + }, + { + "epoch": 0.35, + "learning_rate": 4.942260902114917e-05, + "loss": 0.0902, + "step": 2468 + }, + { + "epoch": 0.35, + "learning_rate": 4.942214111922141e-05, + "loss": 0.0867, + "step": 2470 + }, + { + "epoch": 0.35, + "learning_rate": 4.942167321729366e-05, + "loss": 0.1038, + "step": 2472 + }, + { + "epoch": 0.35, + "learning_rate": 4.9421205315365905e-05, + "loss": 0.0879, + "step": 2474 + }, + { + "epoch": 0.35, + "learning_rate": 4.9420737413438144e-05, + "loss": 0.0834, + "step": 2476 + }, + { + "epoch": 0.35, + "learning_rate": 4.942026951151039e-05, + "loss": 0.0751, + "step": 2478 + }, + { + "epoch": 0.35, + "learning_rate": 4.9419801609582636e-05, + "loss": 0.0794, + "step": 2480 + }, + { + "epoch": 0.35, + "learning_rate": 4.941933370765488e-05, + "loss": 0.0741, + "step": 2482 + }, + { + "epoch": 0.35, + "learning_rate": 4.941886580572712e-05, + "loss": 0.0955, + "step": 2484 + }, + { + "epoch": 0.35, + "learning_rate": 4.941839790379937e-05, + "loss": 0.0644, + "step": 2486 + }, + { + "epoch": 0.35, + "learning_rate": 4.941793000187161e-05, + "loss": 0.0998, + "step": 2488 + }, + { + "epoch": 0.35, + "learning_rate": 4.941746209994386e-05, + "loss": 0.0783, + "step": 2490 + }, + { + "epoch": 0.35, + "learning_rate": 4.94169941980161e-05, + "loss": 0.0843, + "step": 2492 + }, + { + "epoch": 0.35, + "learning_rate": 4.9416526296088344e-05, + "loss": 0.0812, + "step": 2494 + }, + { + "epoch": 0.35, + "learning_rate": 4.941605839416058e-05, + "loss": 0.0885, + "step": 2496 + }, + { + "epoch": 0.35, + "learning_rate": 4.9415590492232836e-05, + "loss": 0.0798, + "step": 2498 + }, + { + "epoch": 0.35, + "learning_rate": 4.9415122590305075e-05, + "loss": 0.0867, + "step": 2500 + }, + { + "epoch": 0.35, + "learning_rate": 4.941465468837732e-05, + "loss": 0.0652, + "step": 2502 + }, + { + "epoch": 0.35, + "learning_rate": 4.941418678644956e-05, + "loss": 0.074, + "step": 2504 + }, + { + "epoch": 0.35, + "learning_rate": 4.9413718884521806e-05, + "loss": 0.0926, + "step": 2506 + }, + { + "epoch": 0.35, + "learning_rate": 4.941325098259405e-05, + "loss": 0.0821, + "step": 2508 + }, + { + "epoch": 0.35, + "learning_rate": 4.94127830806663e-05, + "loss": 0.0757, + "step": 2510 + }, + { + "epoch": 0.35, + "learning_rate": 4.941231517873854e-05, + "loss": 0.0862, + "step": 2512 + }, + { + "epoch": 0.35, + "learning_rate": 4.941184727681078e-05, + "loss": 0.0833, + "step": 2514 + }, + { + "epoch": 0.35, + "learning_rate": 4.941137937488303e-05, + "loss": 0.086, + "step": 2516 + }, + { + "epoch": 0.35, + "learning_rate": 4.9410911472955274e-05, + "loss": 0.082, + "step": 2518 + }, + { + "epoch": 0.35, + "learning_rate": 4.9410443571027514e-05, + "loss": 0.0901, + "step": 2520 + }, + { + "epoch": 0.35, + "learning_rate": 4.940997566909976e-05, + "loss": 0.0925, + "step": 2522 + }, + { + "epoch": 0.35, + "learning_rate": 4.9409507767172005e-05, + "loss": 0.0891, + "step": 2524 + }, + { + "epoch": 0.35, + "learning_rate": 4.940903986524425e-05, + "loss": 0.0839, + "step": 2526 + }, + { + "epoch": 0.35, + "learning_rate": 4.940857196331649e-05, + "loss": 0.0825, + "step": 2528 + }, + { + "epoch": 0.36, + "learning_rate": 4.9408104061388736e-05, + "loss": 0.1103, + "step": 2530 + }, + { + "epoch": 0.36, + "learning_rate": 4.940763615946098e-05, + "loss": 0.0753, + "step": 2532 + }, + { + "epoch": 0.36, + "learning_rate": 4.940716825753322e-05, + "loss": 0.1045, + "step": 2534 + }, + { + "epoch": 0.36, + "learning_rate": 4.940670035560547e-05, + "loss": 0.0909, + "step": 2536 + }, + { + "epoch": 0.36, + "learning_rate": 4.9406232453677706e-05, + "loss": 0.0801, + "step": 2538 + }, + { + "epoch": 0.36, + "learning_rate": 4.940576455174995e-05, + "loss": 0.0956, + "step": 2540 + }, + { + "epoch": 0.36, + "learning_rate": 4.94052966498222e-05, + "loss": 0.0888, + "step": 2542 + }, + { + "epoch": 0.36, + "learning_rate": 4.9404828747894444e-05, + "loss": 0.0784, + "step": 2544 + }, + { + "epoch": 0.36, + "learning_rate": 4.940436084596668e-05, + "loss": 0.0807, + "step": 2546 + }, + { + "epoch": 0.36, + "learning_rate": 4.940389294403893e-05, + "loss": 0.0854, + "step": 2548 + }, + { + "epoch": 0.36, + "learning_rate": 4.9403425042111175e-05, + "loss": 0.0882, + "step": 2550 + }, + { + "epoch": 0.36, + "learning_rate": 4.940295714018342e-05, + "loss": 0.0825, + "step": 2552 + }, + { + "epoch": 0.36, + "learning_rate": 4.940248923825566e-05, + "loss": 0.0677, + "step": 2554 + }, + { + "epoch": 0.36, + "learning_rate": 4.9402021336327906e-05, + "loss": 0.0805, + "step": 2556 + }, + { + "epoch": 0.36, + "learning_rate": 4.940155343440015e-05, + "loss": 0.063, + "step": 2558 + }, + { + "epoch": 0.36, + "learning_rate": 4.94010855324724e-05, + "loss": 0.0733, + "step": 2560 + }, + { + "epoch": 0.36, + "learning_rate": 4.940061763054464e-05, + "loss": 0.0732, + "step": 2562 + }, + { + "epoch": 0.36, + "learning_rate": 4.940014972861688e-05, + "loss": 0.1077, + "step": 2564 + }, + { + "epoch": 0.36, + "learning_rate": 4.939968182668913e-05, + "loss": 0.0888, + "step": 2566 + }, + { + "epoch": 0.36, + "learning_rate": 4.9399213924761375e-05, + "loss": 0.0857, + "step": 2568 + }, + { + "epoch": 0.36, + "learning_rate": 4.9398746022833614e-05, + "loss": 0.0794, + "step": 2570 + }, + { + "epoch": 0.36, + "learning_rate": 4.939827812090586e-05, + "loss": 0.0967, + "step": 2572 + }, + { + "epoch": 0.36, + "learning_rate": 4.93978102189781e-05, + "loss": 0.1005, + "step": 2574 + }, + { + "epoch": 0.36, + "learning_rate": 4.939734231705035e-05, + "loss": 0.0941, + "step": 2576 + }, + { + "epoch": 0.36, + "learning_rate": 4.939687441512259e-05, + "loss": 0.1091, + "step": 2578 + }, + { + "epoch": 0.36, + "learning_rate": 4.939640651319484e-05, + "loss": 0.0826, + "step": 2580 + }, + { + "epoch": 0.36, + "learning_rate": 4.9395938611267076e-05, + "loss": 0.0791, + "step": 2582 + }, + { + "epoch": 0.36, + "learning_rate": 4.939547070933933e-05, + "loss": 0.0869, + "step": 2584 + }, + { + "epoch": 0.36, + "learning_rate": 4.939500280741157e-05, + "loss": 0.0631, + "step": 2586 + }, + { + "epoch": 0.36, + "learning_rate": 4.9394534905483814e-05, + "loss": 0.0909, + "step": 2588 + }, + { + "epoch": 0.36, + "learning_rate": 4.939406700355605e-05, + "loss": 0.0927, + "step": 2590 + }, + { + "epoch": 0.36, + "learning_rate": 4.93935991016283e-05, + "loss": 0.0659, + "step": 2592 + }, + { + "epoch": 0.36, + "learning_rate": 4.9393131199700545e-05, + "loss": 0.0723, + "step": 2594 + }, + { + "epoch": 0.36, + "learning_rate": 4.939266329777279e-05, + "loss": 0.1038, + "step": 2596 + }, + { + "epoch": 0.36, + "learning_rate": 4.939219539584503e-05, + "loss": 0.0827, + "step": 2598 + }, + { + "epoch": 0.36, + "learning_rate": 4.9391727493917275e-05, + "loss": 0.0997, + "step": 2600 + }, + { + "epoch": 0.37, + "learning_rate": 4.939125959198952e-05, + "loss": 0.0814, + "step": 2602 + }, + { + "epoch": 0.37, + "learning_rate": 4.939079169006177e-05, + "loss": 0.0988, + "step": 2604 + }, + { + "epoch": 0.37, + "learning_rate": 4.9390323788134006e-05, + "loss": 0.094, + "step": 2606 + }, + { + "epoch": 0.37, + "learning_rate": 4.938985588620625e-05, + "loss": 0.0926, + "step": 2608 + }, + { + "epoch": 0.37, + "learning_rate": 4.93893879842785e-05, + "loss": 0.0775, + "step": 2610 + }, + { + "epoch": 0.37, + "learning_rate": 4.9388920082350744e-05, + "loss": 0.0849, + "step": 2612 + }, + { + "epoch": 0.37, + "learning_rate": 4.938845218042298e-05, + "loss": 0.0768, + "step": 2614 + }, + { + "epoch": 0.37, + "learning_rate": 4.938798427849523e-05, + "loss": 0.1012, + "step": 2616 + }, + { + "epoch": 0.37, + "learning_rate": 4.9387516376567475e-05, + "loss": 0.1016, + "step": 2618 + }, + { + "epoch": 0.37, + "learning_rate": 4.938704847463972e-05, + "loss": 0.0953, + "step": 2620 + }, + { + "epoch": 0.37, + "learning_rate": 4.938658057271196e-05, + "loss": 0.1024, + "step": 2622 + }, + { + "epoch": 0.37, + "learning_rate": 4.9386112670784206e-05, + "loss": 0.1112, + "step": 2624 + }, + { + "epoch": 0.37, + "learning_rate": 4.9385644768856445e-05, + "loss": 0.0912, + "step": 2626 + }, + { + "epoch": 0.37, + "learning_rate": 4.93851768669287e-05, + "loss": 0.0913, + "step": 2628 + }, + { + "epoch": 0.37, + "learning_rate": 4.938470896500094e-05, + "loss": 0.081, + "step": 2630 + }, + { + "epoch": 0.37, + "learning_rate": 4.938424106307318e-05, + "loss": 0.0804, + "step": 2632 + }, + { + "epoch": 0.37, + "learning_rate": 4.938377316114542e-05, + "loss": 0.0689, + "step": 2634 + }, + { + "epoch": 0.37, + "learning_rate": 4.9383305259217675e-05, + "loss": 0.0885, + "step": 2636 + }, + { + "epoch": 0.37, + "learning_rate": 4.9382837357289914e-05, + "loss": 0.0695, + "step": 2638 + }, + { + "epoch": 0.37, + "learning_rate": 4.938236945536216e-05, + "loss": 0.0922, + "step": 2640 + }, + { + "epoch": 0.37, + "learning_rate": 4.93819015534344e-05, + "loss": 0.0983, + "step": 2642 + }, + { + "epoch": 0.37, + "learning_rate": 4.9381433651506645e-05, + "loss": 0.0997, + "step": 2644 + }, + { + "epoch": 0.37, + "learning_rate": 4.938096574957889e-05, + "loss": 0.0841, + "step": 2646 + }, + { + "epoch": 0.37, + "learning_rate": 4.938049784765114e-05, + "loss": 0.0863, + "step": 2648 + }, + { + "epoch": 0.37, + "learning_rate": 4.9380029945723376e-05, + "loss": 0.0944, + "step": 2650 + }, + { + "epoch": 0.37, + "learning_rate": 4.937956204379562e-05, + "loss": 0.1024, + "step": 2652 + }, + { + "epoch": 0.37, + "learning_rate": 4.937909414186787e-05, + "loss": 0.0689, + "step": 2654 + }, + { + "epoch": 0.37, + "learning_rate": 4.9378626239940114e-05, + "loss": 0.0946, + "step": 2656 + }, + { + "epoch": 0.37, + "learning_rate": 4.937815833801235e-05, + "loss": 0.0825, + "step": 2658 + }, + { + "epoch": 0.37, + "learning_rate": 4.93776904360846e-05, + "loss": 0.0975, + "step": 2660 + }, + { + "epoch": 0.37, + "learning_rate": 4.9377222534156845e-05, + "loss": 0.0915, + "step": 2662 + }, + { + "epoch": 0.37, + "learning_rate": 4.937675463222909e-05, + "loss": 0.0676, + "step": 2664 + }, + { + "epoch": 0.37, + "learning_rate": 4.937628673030133e-05, + "loss": 0.102, + "step": 2666 + }, + { + "epoch": 0.37, + "learning_rate": 4.9375818828373575e-05, + "loss": 0.1083, + "step": 2668 + }, + { + "epoch": 0.37, + "learning_rate": 4.937535092644582e-05, + "loss": 0.0844, + "step": 2670 + }, + { + "epoch": 0.38, + "learning_rate": 4.937488302451807e-05, + "loss": 0.0943, + "step": 2672 + }, + { + "epoch": 0.38, + "learning_rate": 4.9374415122590306e-05, + "loss": 0.0872, + "step": 2674 + }, + { + "epoch": 0.38, + "learning_rate": 4.937394722066255e-05, + "loss": 0.0766, + "step": 2676 + }, + { + "epoch": 0.38, + "learning_rate": 4.937347931873479e-05, + "loss": 0.069, + "step": 2678 + }, + { + "epoch": 0.38, + "learning_rate": 4.9373011416807044e-05, + "loss": 0.0887, + "step": 2680 + }, + { + "epoch": 0.38, + "learning_rate": 4.937254351487928e-05, + "loss": 0.0843, + "step": 2682 + }, + { + "epoch": 0.38, + "learning_rate": 4.937207561295153e-05, + "loss": 0.062, + "step": 2684 + }, + { + "epoch": 0.38, + "learning_rate": 4.937160771102377e-05, + "loss": 0.1122, + "step": 2686 + }, + { + "epoch": 0.38, + "learning_rate": 4.9371139809096014e-05, + "loss": 0.0852, + "step": 2688 + }, + { + "epoch": 0.38, + "learning_rate": 4.937067190716826e-05, + "loss": 0.0827, + "step": 2690 + }, + { + "epoch": 0.38, + "learning_rate": 4.9370204005240506e-05, + "loss": 0.0627, + "step": 2692 + }, + { + "epoch": 0.38, + "learning_rate": 4.9369736103312745e-05, + "loss": 0.1137, + "step": 2694 + }, + { + "epoch": 0.38, + "learning_rate": 4.936926820138499e-05, + "loss": 0.1036, + "step": 2696 + }, + { + "epoch": 0.38, + "learning_rate": 4.936880029945724e-05, + "loss": 0.0767, + "step": 2698 + }, + { + "epoch": 0.38, + "learning_rate": 4.936833239752948e-05, + "loss": 0.0915, + "step": 2700 + }, + { + "epoch": 0.38, + "learning_rate": 4.936786449560172e-05, + "loss": 0.1017, + "step": 2702 + }, + { + "epoch": 0.38, + "learning_rate": 4.936739659367397e-05, + "loss": 0.0586, + "step": 2704 + }, + { + "epoch": 0.38, + "learning_rate": 4.9366928691746214e-05, + "loss": 0.1029, + "step": 2706 + }, + { + "epoch": 0.38, + "learning_rate": 4.936646078981846e-05, + "loss": 0.0846, + "step": 2708 + }, + { + "epoch": 0.38, + "learning_rate": 4.93659928878907e-05, + "loss": 0.0791, + "step": 2710 + }, + { + "epoch": 0.38, + "learning_rate": 4.9365524985962945e-05, + "loss": 0.0909, + "step": 2712 + }, + { + "epoch": 0.38, + "learning_rate": 4.936505708403519e-05, + "loss": 0.102, + "step": 2714 + }, + { + "epoch": 0.38, + "learning_rate": 4.936458918210744e-05, + "loss": 0.0842, + "step": 2716 + }, + { + "epoch": 0.38, + "learning_rate": 4.9364121280179676e-05, + "loss": 0.0893, + "step": 2718 + }, + { + "epoch": 0.38, + "learning_rate": 4.936365337825192e-05, + "loss": 0.1152, + "step": 2720 + }, + { + "epoch": 0.38, + "learning_rate": 4.936318547632416e-05, + "loss": 0.0785, + "step": 2722 + }, + { + "epoch": 0.38, + "learning_rate": 4.9362717574396414e-05, + "loss": 0.0631, + "step": 2724 + }, + { + "epoch": 0.38, + "learning_rate": 4.936224967246865e-05, + "loss": 0.0979, + "step": 2726 + }, + { + "epoch": 0.38, + "learning_rate": 4.93617817705409e-05, + "loss": 0.0591, + "step": 2728 + }, + { + "epoch": 0.38, + "learning_rate": 4.936131386861314e-05, + "loss": 0.1029, + "step": 2730 + }, + { + "epoch": 0.38, + "learning_rate": 4.936084596668539e-05, + "loss": 0.1041, + "step": 2732 + }, + { + "epoch": 0.38, + "learning_rate": 4.936037806475763e-05, + "loss": 0.0838, + "step": 2734 + }, + { + "epoch": 0.38, + "learning_rate": 4.9359910162829876e-05, + "loss": 0.1018, + "step": 2736 + }, + { + "epoch": 0.38, + "learning_rate": 4.9359442260902115e-05, + "loss": 0.1033, + "step": 2738 + }, + { + "epoch": 0.38, + "learning_rate": 4.935897435897436e-05, + "loss": 0.0843, + "step": 2740 + }, + { + "epoch": 0.38, + "learning_rate": 4.9358506457046606e-05, + "loss": 0.0835, + "step": 2742 + }, + { + "epoch": 0.39, + "learning_rate": 4.935803855511885e-05, + "loss": 0.1003, + "step": 2744 + }, + { + "epoch": 0.39, + "learning_rate": 4.935757065319109e-05, + "loss": 0.0987, + "step": 2746 + }, + { + "epoch": 0.39, + "learning_rate": 4.935710275126334e-05, + "loss": 0.0769, + "step": 2748 + }, + { + "epoch": 0.39, + "learning_rate": 4.935663484933558e-05, + "loss": 0.0842, + "step": 2750 + }, + { + "epoch": 0.39, + "learning_rate": 4.935616694740783e-05, + "loss": 0.0742, + "step": 2752 + }, + { + "epoch": 0.39, + "learning_rate": 4.935569904548007e-05, + "loss": 0.0921, + "step": 2754 + }, + { + "epoch": 0.39, + "learning_rate": 4.9355231143552314e-05, + "loss": 0.0929, + "step": 2756 + }, + { + "epoch": 0.39, + "learning_rate": 4.935476324162456e-05, + "loss": 0.0645, + "step": 2758 + }, + { + "epoch": 0.39, + "learning_rate": 4.9354295339696806e-05, + "loss": 0.0891, + "step": 2760 + }, + { + "epoch": 0.39, + "learning_rate": 4.9353827437769045e-05, + "loss": 0.0806, + "step": 2762 + }, + { + "epoch": 0.39, + "learning_rate": 4.935335953584129e-05, + "loss": 0.1043, + "step": 2764 + }, + { + "epoch": 0.39, + "learning_rate": 4.935289163391354e-05, + "loss": 0.1009, + "step": 2766 + }, + { + "epoch": 0.39, + "learning_rate": 4.935242373198578e-05, + "loss": 0.0715, + "step": 2768 + }, + { + "epoch": 0.39, + "learning_rate": 4.935195583005802e-05, + "loss": 0.0918, + "step": 2770 + }, + { + "epoch": 0.39, + "learning_rate": 4.935148792813027e-05, + "loss": 0.0957, + "step": 2772 + }, + { + "epoch": 0.39, + "learning_rate": 4.935102002620251e-05, + "loss": 0.0927, + "step": 2774 + }, + { + "epoch": 0.39, + "learning_rate": 4.935055212427476e-05, + "loss": 0.1075, + "step": 2776 + }, + { + "epoch": 0.39, + "learning_rate": 4.9350084222347e-05, + "loss": 0.0845, + "step": 2778 + }, + { + "epoch": 0.39, + "learning_rate": 4.9349616320419245e-05, + "loss": 0.0982, + "step": 2780 + }, + { + "epoch": 0.39, + "learning_rate": 4.9349148418491484e-05, + "loss": 0.1105, + "step": 2782 + }, + { + "epoch": 0.39, + "learning_rate": 4.934868051656374e-05, + "loss": 0.1015, + "step": 2784 + }, + { + "epoch": 0.39, + "learning_rate": 4.9348212614635976e-05, + "loss": 0.0815, + "step": 2786 + }, + { + "epoch": 0.39, + "learning_rate": 4.9347744712708215e-05, + "loss": 0.1058, + "step": 2788 + }, + { + "epoch": 0.39, + "learning_rate": 4.934727681078046e-05, + "loss": 0.0769, + "step": 2790 + }, + { + "epoch": 0.39, + "learning_rate": 4.934680890885271e-05, + "loss": 0.0965, + "step": 2792 + }, + { + "epoch": 0.39, + "learning_rate": 4.934634100692495e-05, + "loss": 0.0706, + "step": 2794 + }, + { + "epoch": 0.39, + "learning_rate": 4.934587310499719e-05, + "loss": 0.077, + "step": 2796 + }, + { + "epoch": 0.39, + "learning_rate": 4.934540520306944e-05, + "loss": 0.0854, + "step": 2798 + }, + { + "epoch": 0.39, + "learning_rate": 4.9344937301141684e-05, + "loss": 0.0888, + "step": 2800 + }, + { + "epoch": 0.39, + "learning_rate": 4.934446939921393e-05, + "loss": 0.07, + "step": 2802 + }, + { + "epoch": 0.39, + "learning_rate": 4.934400149728617e-05, + "loss": 0.0731, + "step": 2804 + }, + { + "epoch": 0.39, + "learning_rate": 4.9343533595358415e-05, + "loss": 0.0812, + "step": 2806 + }, + { + "epoch": 0.39, + "learning_rate": 4.9343065693430654e-05, + "loss": 0.0853, + "step": 2808 + }, + { + "epoch": 0.39, + "learning_rate": 4.9342597791502906e-05, + "loss": 0.084, + "step": 2810 + }, + { + "epoch": 0.39, + "learning_rate": 4.9342129889575146e-05, + "loss": 0.093, + "step": 2812 + }, + { + "epoch": 0.4, + "learning_rate": 4.934166198764739e-05, + "loss": 0.0735, + "step": 2814 + }, + { + "epoch": 0.4, + "learning_rate": 4.934119408571963e-05, + "loss": 0.07, + "step": 2816 + }, + { + "epoch": 0.4, + "learning_rate": 4.9340726183791877e-05, + "loss": 0.1021, + "step": 2818 + }, + { + "epoch": 0.4, + "learning_rate": 4.934025828186412e-05, + "loss": 0.0915, + "step": 2820 + }, + { + "epoch": 0.4, + "learning_rate": 4.933979037993637e-05, + "loss": 0.0718, + "step": 2822 + }, + { + "epoch": 0.4, + "learning_rate": 4.933932247800861e-05, + "loss": 0.0619, + "step": 2824 + }, + { + "epoch": 0.4, + "learning_rate": 4.9338854576080853e-05, + "loss": 0.1078, + "step": 2826 + }, + { + "epoch": 0.4, + "learning_rate": 4.93383866741531e-05, + "loss": 0.1037, + "step": 2828 + }, + { + "epoch": 0.4, + "learning_rate": 4.9337918772225345e-05, + "loss": 0.104, + "step": 2830 + }, + { + "epoch": 0.4, + "learning_rate": 4.9337450870297584e-05, + "loss": 0.0801, + "step": 2832 + }, + { + "epoch": 0.4, + "learning_rate": 4.933698296836983e-05, + "loss": 0.079, + "step": 2834 + }, + { + "epoch": 0.4, + "learning_rate": 4.9336515066442076e-05, + "loss": 0.0832, + "step": 2836 + }, + { + "epoch": 0.4, + "learning_rate": 4.933604716451432e-05, + "loss": 0.1078, + "step": 2838 + }, + { + "epoch": 0.4, + "learning_rate": 4.933557926258656e-05, + "loss": 0.1121, + "step": 2840 + }, + { + "epoch": 0.4, + "learning_rate": 4.933511136065881e-05, + "loss": 0.0727, + "step": 2842 + }, + { + "epoch": 0.4, + "learning_rate": 4.933464345873105e-05, + "loss": 0.0748, + "step": 2844 + }, + { + "epoch": 0.4, + "learning_rate": 4.93341755568033e-05, + "loss": 0.1016, + "step": 2846 + }, + { + "epoch": 0.4, + "learning_rate": 4.933370765487554e-05, + "loss": 0.0796, + "step": 2848 + }, + { + "epoch": 0.4, + "learning_rate": 4.9333239752947784e-05, + "loss": 0.0688, + "step": 2850 + }, + { + "epoch": 0.4, + "learning_rate": 4.933277185102002e-05, + "loss": 0.0842, + "step": 2852 + }, + { + "epoch": 0.4, + "learning_rate": 4.9332303949092276e-05, + "loss": 0.079, + "step": 2854 + }, + { + "epoch": 0.4, + "learning_rate": 4.9331836047164515e-05, + "loss": 0.0742, + "step": 2856 + }, + { + "epoch": 0.4, + "learning_rate": 4.933136814523676e-05, + "loss": 0.1007, + "step": 2858 + }, + { + "epoch": 0.4, + "learning_rate": 4.9330900243309e-05, + "loss": 0.1008, + "step": 2860 + }, + { + "epoch": 0.4, + "learning_rate": 4.933043234138125e-05, + "loss": 0.1142, + "step": 2862 + }, + { + "epoch": 0.4, + "learning_rate": 4.932996443945349e-05, + "loss": 0.0813, + "step": 2864 + }, + { + "epoch": 0.4, + "learning_rate": 4.932949653752574e-05, + "loss": 0.1144, + "step": 2866 + }, + { + "epoch": 0.4, + "learning_rate": 4.932902863559798e-05, + "loss": 0.0992, + "step": 2868 + }, + { + "epoch": 0.4, + "learning_rate": 4.932856073367022e-05, + "loss": 0.0968, + "step": 2870 + }, + { + "epoch": 0.4, + "learning_rate": 4.932809283174247e-05, + "loss": 0.1014, + "step": 2872 + }, + { + "epoch": 0.4, + "learning_rate": 4.9327624929814715e-05, + "loss": 0.1101, + "step": 2874 + }, + { + "epoch": 0.4, + "learning_rate": 4.9327157027886954e-05, + "loss": 0.1064, + "step": 2876 + }, + { + "epoch": 0.4, + "learning_rate": 4.93266891259592e-05, + "loss": 0.0975, + "step": 2878 + }, + { + "epoch": 0.4, + "learning_rate": 4.9326221224031446e-05, + "loss": 0.0771, + "step": 2880 + }, + { + "epoch": 0.4, + "learning_rate": 4.932575332210369e-05, + "loss": 0.0935, + "step": 2882 + }, + { + "epoch": 0.4, + "learning_rate": 4.932528542017593e-05, + "loss": 0.0904, + "step": 2884 + }, + { + "epoch": 0.41, + "learning_rate": 4.9324817518248177e-05, + "loss": 0.089, + "step": 2886 + }, + { + "epoch": 0.41, + "learning_rate": 4.932434961632042e-05, + "loss": 0.0849, + "step": 2888 + }, + { + "epoch": 0.41, + "learning_rate": 4.932388171439267e-05, + "loss": 0.0978, + "step": 2890 + }, + { + "epoch": 0.41, + "learning_rate": 4.932341381246491e-05, + "loss": 0.0862, + "step": 2892 + }, + { + "epoch": 0.41, + "learning_rate": 4.9322945910537153e-05, + "loss": 0.1218, + "step": 2894 + }, + { + "epoch": 0.41, + "learning_rate": 4.93224780086094e-05, + "loss": 0.0879, + "step": 2896 + }, + { + "epoch": 0.41, + "learning_rate": 4.9322010106681645e-05, + "loss": 0.0632, + "step": 2898 + }, + { + "epoch": 0.41, + "learning_rate": 4.9321542204753884e-05, + "loss": 0.0839, + "step": 2900 + }, + { + "epoch": 0.41, + "learning_rate": 4.932107430282613e-05, + "loss": 0.0785, + "step": 2902 + }, + { + "epoch": 0.41, + "learning_rate": 4.932060640089837e-05, + "loss": 0.081, + "step": 2904 + }, + { + "epoch": 0.41, + "learning_rate": 4.932013849897062e-05, + "loss": 0.0749, + "step": 2906 + }, + { + "epoch": 0.41, + "learning_rate": 4.931967059704286e-05, + "loss": 0.0961, + "step": 2908 + }, + { + "epoch": 0.41, + "learning_rate": 4.931920269511511e-05, + "loss": 0.0717, + "step": 2910 + }, + { + "epoch": 0.41, + "learning_rate": 4.9318734793187346e-05, + "loss": 0.0949, + "step": 2912 + }, + { + "epoch": 0.41, + "learning_rate": 4.93182668912596e-05, + "loss": 0.1055, + "step": 2914 + }, + { + "epoch": 0.41, + "learning_rate": 4.931779898933184e-05, + "loss": 0.1064, + "step": 2916 + }, + { + "epoch": 0.41, + "learning_rate": 4.9317331087404084e-05, + "loss": 0.0878, + "step": 2918 + }, + { + "epoch": 0.41, + "learning_rate": 4.931686318547632e-05, + "loss": 0.1077, + "step": 2920 + }, + { + "epoch": 0.41, + "learning_rate": 4.931639528354857e-05, + "loss": 0.097, + "step": 2922 + }, + { + "epoch": 0.41, + "learning_rate": 4.9315927381620815e-05, + "loss": 0.0931, + "step": 2924 + }, + { + "epoch": 0.41, + "learning_rate": 4.931545947969306e-05, + "loss": 0.0806, + "step": 2926 + }, + { + "epoch": 0.41, + "learning_rate": 4.93149915777653e-05, + "loss": 0.1012, + "step": 2928 + }, + { + "epoch": 0.41, + "learning_rate": 4.9314523675837546e-05, + "loss": 0.0795, + "step": 2930 + }, + { + "epoch": 0.41, + "learning_rate": 4.931405577390979e-05, + "loss": 0.0863, + "step": 2932 + }, + { + "epoch": 0.41, + "learning_rate": 4.931358787198204e-05, + "loss": 0.1046, + "step": 2934 + }, + { + "epoch": 0.41, + "learning_rate": 4.931311997005428e-05, + "loss": 0.0997, + "step": 2936 + }, + { + "epoch": 0.41, + "learning_rate": 4.931265206812652e-05, + "loss": 0.0833, + "step": 2938 + }, + { + "epoch": 0.41, + "learning_rate": 4.931218416619877e-05, + "loss": 0.0915, + "step": 2940 + }, + { + "epoch": 0.41, + "learning_rate": 4.9311716264271015e-05, + "loss": 0.0701, + "step": 2942 + }, + { + "epoch": 0.41, + "learning_rate": 4.9311248362343254e-05, + "loss": 0.0793, + "step": 2944 + }, + { + "epoch": 0.41, + "learning_rate": 4.93107804604155e-05, + "loss": 0.0771, + "step": 2946 + }, + { + "epoch": 0.41, + "learning_rate": 4.9310312558487746e-05, + "loss": 0.0764, + "step": 2948 + }, + { + "epoch": 0.41, + "learning_rate": 4.930984465655999e-05, + "loss": 0.1202, + "step": 2950 + }, + { + "epoch": 0.41, + "learning_rate": 4.930937675463223e-05, + "loss": 0.0874, + "step": 2952 + }, + { + "epoch": 0.41, + "learning_rate": 4.930890885270448e-05, + "loss": 0.0884, + "step": 2954 + }, + { + "epoch": 0.41, + "learning_rate": 4.9308440950776716e-05, + "loss": 0.0876, + "step": 2956 + }, + { + "epoch": 0.42, + "learning_rate": 4.930797304884897e-05, + "loss": 0.0696, + "step": 2958 + }, + { + "epoch": 0.42, + "learning_rate": 4.930750514692121e-05, + "loss": 0.0949, + "step": 2960 + }, + { + "epoch": 0.42, + "learning_rate": 4.9307037244993453e-05, + "loss": 0.0941, + "step": 2962 + }, + { + "epoch": 0.42, + "learning_rate": 4.930656934306569e-05, + "loss": 0.0809, + "step": 2964 + }, + { + "epoch": 0.42, + "learning_rate": 4.930610144113794e-05, + "loss": 0.0756, + "step": 2966 + }, + { + "epoch": 0.42, + "learning_rate": 4.9305633539210184e-05, + "loss": 0.0949, + "step": 2968 + }, + { + "epoch": 0.42, + "learning_rate": 4.930516563728243e-05, + "loss": 0.0901, + "step": 2970 + }, + { + "epoch": 0.42, + "learning_rate": 4.930469773535467e-05, + "loss": 0.0894, + "step": 2972 + }, + { + "epoch": 0.42, + "learning_rate": 4.9304229833426915e-05, + "loss": 0.0945, + "step": 2974 + }, + { + "epoch": 0.42, + "learning_rate": 4.930376193149916e-05, + "loss": 0.089, + "step": 2976 + }, + { + "epoch": 0.42, + "learning_rate": 4.930329402957141e-05, + "loss": 0.1, + "step": 2978 + }, + { + "epoch": 0.42, + "learning_rate": 4.9302826127643646e-05, + "loss": 0.0658, + "step": 2980 + }, + { + "epoch": 0.42, + "learning_rate": 4.930235822571589e-05, + "loss": 0.1, + "step": 2982 + }, + { + "epoch": 0.42, + "learning_rate": 4.930189032378814e-05, + "loss": 0.0922, + "step": 2984 + }, + { + "epoch": 0.42, + "learning_rate": 4.9301422421860384e-05, + "loss": 0.0654, + "step": 2986 + }, + { + "epoch": 0.42, + "learning_rate": 4.930095451993262e-05, + "loss": 0.1063, + "step": 2988 + }, + { + "epoch": 0.42, + "learning_rate": 4.930048661800487e-05, + "loss": 0.0785, + "step": 2990 + }, + { + "epoch": 0.42, + "learning_rate": 4.9300018716077115e-05, + "loss": 0.0839, + "step": 2992 + }, + { + "epoch": 0.42, + "learning_rate": 4.929955081414936e-05, + "loss": 0.0981, + "step": 2994 + }, + { + "epoch": 0.42, + "learning_rate": 4.92990829122216e-05, + "loss": 0.0758, + "step": 2996 + }, + { + "epoch": 0.42, + "learning_rate": 4.9298615010293846e-05, + "loss": 0.08, + "step": 2998 + }, + { + "epoch": 0.42, + "learning_rate": 4.9298147108366085e-05, + "loss": 0.104, + "step": 3000 + }, + { + "epoch": 0.42, + "eval_gen_len": 31.5172, + "eval_loss": 1.0198298692703247, + "eval_meteor": 0.0411, + "eval_runtime": 15.656, + "eval_samples_per_second": 3.705, + "eval_steps_per_second": 0.511, + "step": 3000 + }, + { + "epoch": 0.42, + "learning_rate": 4.929767920643834e-05, + "loss": 0.1136, + "step": 3002 + }, + { + "epoch": 0.42, + "learning_rate": 4.929721130451058e-05, + "loss": 0.0673, + "step": 3004 + }, + { + "epoch": 0.42, + "learning_rate": 4.929674340258282e-05, + "loss": 0.1076, + "step": 3006 + }, + { + "epoch": 0.42, + "learning_rate": 4.929627550065506e-05, + "loss": 0.0948, + "step": 3008 + }, + { + "epoch": 0.42, + "learning_rate": 4.9295807598727315e-05, + "loss": 0.0816, + "step": 3010 + }, + { + "epoch": 0.42, + "learning_rate": 4.9295339696799554e-05, + "loss": 0.0813, + "step": 3012 + }, + { + "epoch": 0.42, + "learning_rate": 4.92948717948718e-05, + "loss": 0.087, + "step": 3014 + }, + { + "epoch": 0.42, + "learning_rate": 4.929440389294404e-05, + "loss": 0.0923, + "step": 3016 + }, + { + "epoch": 0.42, + "learning_rate": 4.9293935991016285e-05, + "loss": 0.0832, + "step": 3018 + }, + { + "epoch": 0.42, + "learning_rate": 4.929346808908853e-05, + "loss": 0.121, + "step": 3020 + }, + { + "epoch": 0.42, + "learning_rate": 4.929300018716078e-05, + "loss": 0.0928, + "step": 3022 + }, + { + "epoch": 0.42, + "learning_rate": 4.9292532285233016e-05, + "loss": 0.0719, + "step": 3024 + }, + { + "epoch": 0.42, + "learning_rate": 4.929206438330526e-05, + "loss": 0.0995, + "step": 3026 + }, + { + "epoch": 0.43, + "learning_rate": 4.929159648137751e-05, + "loss": 0.0781, + "step": 3028 + }, + { + "epoch": 0.43, + "learning_rate": 4.9291128579449754e-05, + "loss": 0.1279, + "step": 3030 + }, + { + "epoch": 0.43, + "learning_rate": 4.929066067752199e-05, + "loss": 0.0931, + "step": 3032 + }, + { + "epoch": 0.43, + "learning_rate": 4.929019277559424e-05, + "loss": 0.0818, + "step": 3034 + }, + { + "epoch": 0.43, + "learning_rate": 4.9289724873666484e-05, + "loss": 0.0922, + "step": 3036 + }, + { + "epoch": 0.43, + "learning_rate": 4.928925697173873e-05, + "loss": 0.0798, + "step": 3038 + }, + { + "epoch": 0.43, + "learning_rate": 4.928878906981097e-05, + "loss": 0.0919, + "step": 3040 + }, + { + "epoch": 0.43, + "learning_rate": 4.928832116788321e-05, + "loss": 0.0774, + "step": 3042 + }, + { + "epoch": 0.43, + "learning_rate": 4.928785326595546e-05, + "loss": 0.0971, + "step": 3044 + }, + { + "epoch": 0.43, + "learning_rate": 4.92873853640277e-05, + "loss": 0.0831, + "step": 3046 + }, + { + "epoch": 0.43, + "learning_rate": 4.9286917462099946e-05, + "loss": 0.1036, + "step": 3048 + }, + { + "epoch": 0.43, + "learning_rate": 4.9286449560172186e-05, + "loss": 0.0944, + "step": 3050 + }, + { + "epoch": 0.43, + "learning_rate": 4.928598165824443e-05, + "loss": 0.0763, + "step": 3052 + }, + { + "epoch": 0.43, + "learning_rate": 4.928551375631668e-05, + "loss": 0.0883, + "step": 3054 + }, + { + "epoch": 0.43, + "learning_rate": 4.928504585438892e-05, + "loss": 0.0872, + "step": 3056 + }, + { + "epoch": 0.43, + "learning_rate": 4.928457795246116e-05, + "loss": 0.1013, + "step": 3058 + }, + { + "epoch": 0.43, + "learning_rate": 4.928411005053341e-05, + "loss": 0.0789, + "step": 3060 + }, + { + "epoch": 0.43, + "learning_rate": 4.9283642148605654e-05, + "loss": 0.0916, + "step": 3062 + }, + { + "epoch": 0.43, + "learning_rate": 4.92831742466779e-05, + "loss": 0.0894, + "step": 3064 + }, + { + "epoch": 0.43, + "learning_rate": 4.928270634475014e-05, + "loss": 0.0807, + "step": 3066 + }, + { + "epoch": 0.43, + "learning_rate": 4.9282238442822385e-05, + "loss": 0.0952, + "step": 3068 + }, + { + "epoch": 0.43, + "learning_rate": 4.928177054089463e-05, + "loss": 0.1163, + "step": 3070 + }, + { + "epoch": 0.43, + "learning_rate": 4.928130263896688e-05, + "loss": 0.0867, + "step": 3072 + }, + { + "epoch": 0.43, + "learning_rate": 4.9280834737039116e-05, + "loss": 0.0805, + "step": 3074 + }, + { + "epoch": 0.43, + "learning_rate": 4.928036683511136e-05, + "loss": 0.0857, + "step": 3076 + }, + { + "epoch": 0.43, + "learning_rate": 4.927989893318361e-05, + "loss": 0.103, + "step": 3078 + }, + { + "epoch": 0.43, + "learning_rate": 4.9279431031255854e-05, + "loss": 0.0869, + "step": 3080 + }, + { + "epoch": 0.43, + "learning_rate": 4.927896312932809e-05, + "loss": 0.0897, + "step": 3082 + }, + { + "epoch": 0.43, + "learning_rate": 4.927849522740034e-05, + "loss": 0.0912, + "step": 3084 + }, + { + "epoch": 0.43, + "learning_rate": 4.927802732547258e-05, + "loss": 0.1038, + "step": 3086 + }, + { + "epoch": 0.43, + "learning_rate": 4.927755942354483e-05, + "loss": 0.0832, + "step": 3088 + }, + { + "epoch": 0.43, + "learning_rate": 4.927709152161707e-05, + "loss": 0.1027, + "step": 3090 + }, + { + "epoch": 0.43, + "learning_rate": 4.9276623619689316e-05, + "loss": 0.0765, + "step": 3092 + }, + { + "epoch": 0.43, + "learning_rate": 4.9276155717761555e-05, + "loss": 0.0644, + "step": 3094 + }, + { + "epoch": 0.43, + "learning_rate": 4.927568781583381e-05, + "loss": 0.0852, + "step": 3096 + }, + { + "epoch": 0.43, + "learning_rate": 4.927521991390605e-05, + "loss": 0.1084, + "step": 3098 + }, + { + "epoch": 0.44, + "learning_rate": 4.927475201197829e-05, + "loss": 0.1356, + "step": 3100 + }, + { + "epoch": 0.44, + "learning_rate": 4.927428411005053e-05, + "loss": 0.1009, + "step": 3102 + }, + { + "epoch": 0.44, + "learning_rate": 4.927381620812278e-05, + "loss": 0.0932, + "step": 3104 + }, + { + "epoch": 0.44, + "learning_rate": 4.9273348306195024e-05, + "loss": 0.0939, + "step": 3106 + }, + { + "epoch": 0.44, + "learning_rate": 4.927288040426727e-05, + "loss": 0.1043, + "step": 3108 + }, + { + "epoch": 0.44, + "learning_rate": 4.927241250233951e-05, + "loss": 0.0928, + "step": 3110 + }, + { + "epoch": 0.44, + "learning_rate": 4.9271944600411755e-05, + "loss": 0.0829, + "step": 3112 + }, + { + "epoch": 0.44, + "learning_rate": 4.9271476698484e-05, + "loss": 0.0711, + "step": 3114 + }, + { + "epoch": 0.44, + "learning_rate": 4.9271008796556246e-05, + "loss": 0.0959, + "step": 3116 + }, + { + "epoch": 0.44, + "learning_rate": 4.9270540894628486e-05, + "loss": 0.0769, + "step": 3118 + }, + { + "epoch": 0.44, + "learning_rate": 4.927007299270073e-05, + "loss": 0.087, + "step": 3120 + }, + { + "epoch": 0.44, + "learning_rate": 4.926960509077298e-05, + "loss": 0.0955, + "step": 3122 + }, + { + "epoch": 0.44, + "learning_rate": 4.926913718884522e-05, + "loss": 0.0665, + "step": 3124 + }, + { + "epoch": 0.44, + "learning_rate": 4.926866928691746e-05, + "loss": 0.108, + "step": 3126 + }, + { + "epoch": 0.44, + "learning_rate": 4.926820138498971e-05, + "loss": 0.0868, + "step": 3128 + }, + { + "epoch": 0.44, + "learning_rate": 4.926773348306195e-05, + "loss": 0.0698, + "step": 3130 + }, + { + "epoch": 0.44, + "learning_rate": 4.92672655811342e-05, + "loss": 0.1027, + "step": 3132 + }, + { + "epoch": 0.44, + "learning_rate": 4.926679767920644e-05, + "loss": 0.0982, + "step": 3134 + }, + { + "epoch": 0.44, + "learning_rate": 4.9266329777278685e-05, + "loss": 0.1106, + "step": 3136 + }, + { + "epoch": 0.44, + "learning_rate": 4.9265861875350924e-05, + "loss": 0.106, + "step": 3138 + }, + { + "epoch": 0.44, + "learning_rate": 4.926539397342318e-05, + "loss": 0.0866, + "step": 3140 + }, + { + "epoch": 0.44, + "learning_rate": 4.9264926071495416e-05, + "loss": 0.0842, + "step": 3142 + }, + { + "epoch": 0.44, + "learning_rate": 4.926445816956766e-05, + "loss": 0.0922, + "step": 3144 + }, + { + "epoch": 0.44, + "learning_rate": 4.92639902676399e-05, + "loss": 0.0924, + "step": 3146 + }, + { + "epoch": 0.44, + "learning_rate": 4.926352236571215e-05, + "loss": 0.0974, + "step": 3148 + }, + { + "epoch": 0.44, + "learning_rate": 4.926305446378439e-05, + "loss": 0.0888, + "step": 3150 + }, + { + "epoch": 0.44, + "learning_rate": 4.926258656185664e-05, + "loss": 0.0867, + "step": 3152 + }, + { + "epoch": 0.44, + "learning_rate": 4.926211865992888e-05, + "loss": 0.084, + "step": 3154 + }, + { + "epoch": 0.44, + "learning_rate": 4.9261650758001124e-05, + "loss": 0.0833, + "step": 3156 + }, + { + "epoch": 0.44, + "learning_rate": 4.926118285607337e-05, + "loss": 0.0991, + "step": 3158 + }, + { + "epoch": 0.44, + "learning_rate": 4.9260714954145616e-05, + "loss": 0.0926, + "step": 3160 + }, + { + "epoch": 0.44, + "learning_rate": 4.9260247052217855e-05, + "loss": 0.0762, + "step": 3162 + }, + { + "epoch": 0.44, + "learning_rate": 4.92597791502901e-05, + "loss": 0.0903, + "step": 3164 + }, + { + "epoch": 0.44, + "learning_rate": 4.925931124836235e-05, + "loss": 0.0944, + "step": 3166 + }, + { + "epoch": 0.44, + "learning_rate": 4.925884334643459e-05, + "loss": 0.0783, + "step": 3168 + }, + { + "epoch": 0.44, + "learning_rate": 4.925837544450683e-05, + "loss": 0.0828, + "step": 3170 + }, + { + "epoch": 0.45, + "learning_rate": 4.925790754257908e-05, + "loss": 0.0821, + "step": 3172 + }, + { + "epoch": 0.45, + "learning_rate": 4.9257439640651324e-05, + "loss": 0.0774, + "step": 3174 + }, + { + "epoch": 0.45, + "learning_rate": 4.925697173872357e-05, + "loss": 0.0988, + "step": 3176 + }, + { + "epoch": 0.45, + "learning_rate": 4.925650383679581e-05, + "loss": 0.0883, + "step": 3178 + }, + { + "epoch": 0.45, + "learning_rate": 4.9256035934868055e-05, + "loss": 0.0986, + "step": 3180 + }, + { + "epoch": 0.45, + "learning_rate": 4.9255568032940294e-05, + "loss": 0.0735, + "step": 3182 + }, + { + "epoch": 0.45, + "learning_rate": 4.9255100131012546e-05, + "loss": 0.085, + "step": 3184 + }, + { + "epoch": 0.45, + "learning_rate": 4.9254632229084786e-05, + "loss": 0.0614, + "step": 3186 + }, + { + "epoch": 0.45, + "learning_rate": 4.925416432715703e-05, + "loss": 0.1033, + "step": 3188 + }, + { + "epoch": 0.45, + "learning_rate": 4.925369642522927e-05, + "loss": 0.0694, + "step": 3190 + }, + { + "epoch": 0.45, + "learning_rate": 4.925322852330152e-05, + "loss": 0.1439, + "step": 3192 + }, + { + "epoch": 0.45, + "learning_rate": 4.925276062137376e-05, + "loss": 0.0791, + "step": 3194 + }, + { + "epoch": 0.45, + "learning_rate": 4.925229271944601e-05, + "loss": 0.0969, + "step": 3196 + }, + { + "epoch": 0.45, + "learning_rate": 4.925182481751825e-05, + "loss": 0.1305, + "step": 3198 + }, + { + "epoch": 0.45, + "learning_rate": 4.925135691559049e-05, + "loss": 0.1025, + "step": 3200 + }, + { + "epoch": 0.45, + "learning_rate": 4.925088901366274e-05, + "loss": 0.0891, + "step": 3202 + }, + { + "epoch": 0.45, + "learning_rate": 4.9250421111734985e-05, + "loss": 0.0853, + "step": 3204 + }, + { + "epoch": 0.45, + "learning_rate": 4.9249953209807224e-05, + "loss": 0.0804, + "step": 3206 + }, + { + "epoch": 0.45, + "learning_rate": 4.924948530787947e-05, + "loss": 0.1006, + "step": 3208 + }, + { + "epoch": 0.45, + "learning_rate": 4.9249017405951716e-05, + "loss": 0.0594, + "step": 3210 + }, + { + "epoch": 0.45, + "learning_rate": 4.924854950402396e-05, + "loss": 0.0936, + "step": 3212 + }, + { + "epoch": 0.45, + "learning_rate": 4.92480816020962e-05, + "loss": 0.0892, + "step": 3214 + }, + { + "epoch": 0.45, + "learning_rate": 4.924761370016845e-05, + "loss": 0.0965, + "step": 3216 + }, + { + "epoch": 0.45, + "learning_rate": 4.924714579824069e-05, + "loss": 0.0896, + "step": 3218 + }, + { + "epoch": 0.45, + "learning_rate": 4.924667789631294e-05, + "loss": 0.0858, + "step": 3220 + }, + { + "epoch": 0.45, + "learning_rate": 4.924620999438518e-05, + "loss": 0.1017, + "step": 3222 + }, + { + "epoch": 0.45, + "learning_rate": 4.9245742092457424e-05, + "loss": 0.1014, + "step": 3224 + }, + { + "epoch": 0.45, + "learning_rate": 4.924527419052967e-05, + "loss": 0.1362, + "step": 3226 + }, + { + "epoch": 0.45, + "learning_rate": 4.9244806288601916e-05, + "loss": 0.0861, + "step": 3228 + }, + { + "epoch": 0.45, + "learning_rate": 4.9244338386674155e-05, + "loss": 0.0786, + "step": 3230 + }, + { + "epoch": 0.45, + "learning_rate": 4.92438704847464e-05, + "loss": 0.0862, + "step": 3232 + }, + { + "epoch": 0.45, + "learning_rate": 4.924340258281864e-05, + "loss": 0.0964, + "step": 3234 + }, + { + "epoch": 0.45, + "learning_rate": 4.924293468089089e-05, + "loss": 0.0864, + "step": 3236 + }, + { + "epoch": 0.45, + "learning_rate": 4.924246677896313e-05, + "loss": 0.0824, + "step": 3238 + }, + { + "epoch": 0.45, + "learning_rate": 4.924199887703538e-05, + "loss": 0.0787, + "step": 3240 + }, + { + "epoch": 0.46, + "learning_rate": 4.924153097510762e-05, + "loss": 0.0682, + "step": 3242 + }, + { + "epoch": 0.46, + "learning_rate": 4.924106307317986e-05, + "loss": 0.0717, + "step": 3244 + }, + { + "epoch": 0.46, + "learning_rate": 4.924059517125211e-05, + "loss": 0.0737, + "step": 3246 + }, + { + "epoch": 0.46, + "learning_rate": 4.9240127269324355e-05, + "loss": 0.0897, + "step": 3248 + }, + { + "epoch": 0.46, + "learning_rate": 4.9239659367396594e-05, + "loss": 0.0791, + "step": 3250 + }, + { + "epoch": 0.46, + "learning_rate": 4.923919146546884e-05, + "loss": 0.0948, + "step": 3252 + }, + { + "epoch": 0.46, + "learning_rate": 4.9238723563541086e-05, + "loss": 0.0983, + "step": 3254 + }, + { + "epoch": 0.46, + "learning_rate": 4.923825566161333e-05, + "loss": 0.09, + "step": 3256 + }, + { + "epoch": 0.46, + "learning_rate": 4.923778775968557e-05, + "loss": 0.0888, + "step": 3258 + }, + { + "epoch": 0.46, + "learning_rate": 4.9237319857757817e-05, + "loss": 0.0903, + "step": 3260 + }, + { + "epoch": 0.46, + "learning_rate": 4.923685195583006e-05, + "loss": 0.0757, + "step": 3262 + }, + { + "epoch": 0.46, + "learning_rate": 4.923638405390231e-05, + "loss": 0.1054, + "step": 3264 + }, + { + "epoch": 0.46, + "learning_rate": 4.923591615197455e-05, + "loss": 0.0773, + "step": 3266 + }, + { + "epoch": 0.46, + "learning_rate": 4.9235448250046793e-05, + "loss": 0.1131, + "step": 3268 + }, + { + "epoch": 0.46, + "learning_rate": 4.923498034811904e-05, + "loss": 0.0962, + "step": 3270 + }, + { + "epoch": 0.46, + "learning_rate": 4.9234512446191285e-05, + "loss": 0.1024, + "step": 3272 + }, + { + "epoch": 0.46, + "learning_rate": 4.9234044544263524e-05, + "loss": 0.0872, + "step": 3274 + }, + { + "epoch": 0.46, + "learning_rate": 4.923357664233577e-05, + "loss": 0.0985, + "step": 3276 + }, + { + "epoch": 0.46, + "learning_rate": 4.923310874040801e-05, + "loss": 0.0879, + "step": 3278 + }, + { + "epoch": 0.46, + "learning_rate": 4.923264083848026e-05, + "loss": 0.1048, + "step": 3280 + }, + { + "epoch": 0.46, + "learning_rate": 4.92321729365525e-05, + "loss": 0.0863, + "step": 3282 + }, + { + "epoch": 0.46, + "learning_rate": 4.923170503462475e-05, + "loss": 0.0783, + "step": 3284 + }, + { + "epoch": 0.46, + "learning_rate": 4.9231237132696986e-05, + "loss": 0.0929, + "step": 3286 + }, + { + "epoch": 0.46, + "learning_rate": 4.923076923076924e-05, + "loss": 0.0823, + "step": 3288 + }, + { + "epoch": 0.46, + "learning_rate": 4.923030132884148e-05, + "loss": 0.0793, + "step": 3290 + }, + { + "epoch": 0.46, + "learning_rate": 4.922983342691372e-05, + "loss": 0.1179, + "step": 3292 + }, + { + "epoch": 0.46, + "learning_rate": 4.922936552498596e-05, + "loss": 0.0903, + "step": 3294 + }, + { + "epoch": 0.46, + "learning_rate": 4.922889762305821e-05, + "loss": 0.0917, + "step": 3296 + }, + { + "epoch": 0.46, + "learning_rate": 4.9228429721130455e-05, + "loss": 0.0788, + "step": 3298 + }, + { + "epoch": 0.46, + "learning_rate": 4.9227961819202694e-05, + "loss": 0.076, + "step": 3300 + }, + { + "epoch": 0.46, + "learning_rate": 4.922749391727494e-05, + "loss": 0.0905, + "step": 3302 + }, + { + "epoch": 0.46, + "learning_rate": 4.9227026015347186e-05, + "loss": 0.0986, + "step": 3304 + }, + { + "epoch": 0.46, + "learning_rate": 4.922655811341943e-05, + "loss": 0.0629, + "step": 3306 + }, + { + "epoch": 0.46, + "learning_rate": 4.922609021149167e-05, + "loss": 0.0851, + "step": 3308 + }, + { + "epoch": 0.46, + "learning_rate": 4.922562230956392e-05, + "loss": 0.0812, + "step": 3310 + }, + { + "epoch": 0.46, + "learning_rate": 4.9225154407636156e-05, + "loss": 0.0563, + "step": 3312 + }, + { + "epoch": 0.47, + "learning_rate": 4.922468650570841e-05, + "loss": 0.1, + "step": 3314 + }, + { + "epoch": 0.47, + "learning_rate": 4.922421860378065e-05, + "loss": 0.0857, + "step": 3316 + }, + { + "epoch": 0.47, + "learning_rate": 4.9223750701852894e-05, + "loss": 0.079, + "step": 3318 + }, + { + "epoch": 0.47, + "learning_rate": 4.922328279992513e-05, + "loss": 0.0901, + "step": 3320 + }, + { + "epoch": 0.47, + "learning_rate": 4.9222814897997386e-05, + "loss": 0.082, + "step": 3322 + }, + { + "epoch": 0.47, + "learning_rate": 4.9222346996069625e-05, + "loss": 0.0875, + "step": 3324 + }, + { + "epoch": 0.47, + "learning_rate": 4.922187909414187e-05, + "loss": 0.0915, + "step": 3326 + }, + { + "epoch": 0.47, + "learning_rate": 4.922141119221411e-05, + "loss": 0.0988, + "step": 3328 + }, + { + "epoch": 0.47, + "learning_rate": 4.9220943290286356e-05, + "loss": 0.0775, + "step": 3330 + }, + { + "epoch": 0.47, + "learning_rate": 4.92204753883586e-05, + "loss": 0.077, + "step": 3332 + }, + { + "epoch": 0.47, + "learning_rate": 4.922000748643085e-05, + "loss": 0.0846, + "step": 3334 + }, + { + "epoch": 0.47, + "learning_rate": 4.921953958450309e-05, + "loss": 0.0951, + "step": 3336 + }, + { + "epoch": 0.47, + "learning_rate": 4.921907168257533e-05, + "loss": 0.0938, + "step": 3338 + }, + { + "epoch": 0.47, + "learning_rate": 4.921860378064758e-05, + "loss": 0.0741, + "step": 3340 + }, + { + "epoch": 0.47, + "learning_rate": 4.9218135878719824e-05, + "loss": 0.092, + "step": 3342 + }, + { + "epoch": 0.47, + "learning_rate": 4.9217667976792064e-05, + "loss": 0.0932, + "step": 3344 + }, + { + "epoch": 0.47, + "learning_rate": 4.921720007486431e-05, + "loss": 0.0955, + "step": 3346 + }, + { + "epoch": 0.47, + "learning_rate": 4.9216732172936555e-05, + "loss": 0.1082, + "step": 3348 + }, + { + "epoch": 0.47, + "learning_rate": 4.92162642710088e-05, + "loss": 0.0878, + "step": 3350 + }, + { + "epoch": 0.47, + "learning_rate": 4.921579636908104e-05, + "loss": 0.0945, + "step": 3352 + }, + { + "epoch": 0.47, + "learning_rate": 4.9215328467153286e-05, + "loss": 0.0896, + "step": 3354 + }, + { + "epoch": 0.47, + "learning_rate": 4.921486056522553e-05, + "loss": 0.0855, + "step": 3356 + }, + { + "epoch": 0.47, + "learning_rate": 4.921439266329778e-05, + "loss": 0.1167, + "step": 3358 + }, + { + "epoch": 0.47, + "learning_rate": 4.921392476137002e-05, + "loss": 0.104, + "step": 3360 + }, + { + "epoch": 0.47, + "learning_rate": 4.921345685944226e-05, + "loss": 0.0772, + "step": 3362 + }, + { + "epoch": 0.47, + "learning_rate": 4.92129889575145e-05, + "loss": 0.0921, + "step": 3364 + }, + { + "epoch": 0.47, + "learning_rate": 4.9212521055586755e-05, + "loss": 0.0733, + "step": 3366 + }, + { + "epoch": 0.47, + "learning_rate": 4.9212053153658994e-05, + "loss": 0.105, + "step": 3368 + }, + { + "epoch": 0.47, + "learning_rate": 4.921158525173124e-05, + "loss": 0.0738, + "step": 3370 + }, + { + "epoch": 0.47, + "learning_rate": 4.921111734980348e-05, + "loss": 0.1045, + "step": 3372 + }, + { + "epoch": 0.47, + "learning_rate": 4.921064944787573e-05, + "loss": 0.0756, + "step": 3374 + }, + { + "epoch": 0.47, + "learning_rate": 4.921018154594797e-05, + "loss": 0.0824, + "step": 3376 + }, + { + "epoch": 0.47, + "learning_rate": 4.920971364402022e-05, + "loss": 0.0694, + "step": 3378 + }, + { + "epoch": 0.47, + "learning_rate": 4.9209245742092456e-05, + "loss": 0.0884, + "step": 3380 + }, + { + "epoch": 0.47, + "learning_rate": 4.92087778401647e-05, + "loss": 0.0801, + "step": 3382 + }, + { + "epoch": 0.48, + "learning_rate": 4.920830993823695e-05, + "loss": 0.0818, + "step": 3384 + }, + { + "epoch": 0.48, + "learning_rate": 4.9207842036309194e-05, + "loss": 0.102, + "step": 3386 + }, + { + "epoch": 0.48, + "learning_rate": 4.920737413438143e-05, + "loss": 0.0931, + "step": 3388 + }, + { + "epoch": 0.48, + "learning_rate": 4.920690623245368e-05, + "loss": 0.0782, + "step": 3390 + }, + { + "epoch": 0.48, + "learning_rate": 4.9206438330525925e-05, + "loss": 0.0823, + "step": 3392 + }, + { + "epoch": 0.48, + "learning_rate": 4.920597042859817e-05, + "loss": 0.0899, + "step": 3394 + }, + { + "epoch": 0.48, + "learning_rate": 4.920550252667041e-05, + "loss": 0.0896, + "step": 3396 + }, + { + "epoch": 0.48, + "learning_rate": 4.9205034624742656e-05, + "loss": 0.0832, + "step": 3398 + }, + { + "epoch": 0.48, + "learning_rate": 4.92045667228149e-05, + "loss": 0.0772, + "step": 3400 + }, + { + "epoch": 0.48, + "learning_rate": 4.920409882088715e-05, + "loss": 0.0912, + "step": 3402 + }, + { + "epoch": 0.48, + "learning_rate": 4.920363091895939e-05, + "loss": 0.0745, + "step": 3404 + }, + { + "epoch": 0.48, + "learning_rate": 4.920316301703163e-05, + "loss": 0.0826, + "step": 3406 + }, + { + "epoch": 0.48, + "learning_rate": 4.920269511510387e-05, + "loss": 0.0785, + "step": 3408 + }, + { + "epoch": 0.48, + "learning_rate": 4.9202227213176124e-05, + "loss": 0.0764, + "step": 3410 + }, + { + "epoch": 0.48, + "learning_rate": 4.9201759311248364e-05, + "loss": 0.0993, + "step": 3412 + }, + { + "epoch": 0.48, + "learning_rate": 4.920129140932061e-05, + "loss": 0.0614, + "step": 3414 + }, + { + "epoch": 0.48, + "learning_rate": 4.920082350739285e-05, + "loss": 0.0791, + "step": 3416 + }, + { + "epoch": 0.48, + "learning_rate": 4.92003556054651e-05, + "loss": 0.0739, + "step": 3418 + }, + { + "epoch": 0.48, + "learning_rate": 4.919988770353734e-05, + "loss": 0.0815, + "step": 3420 + }, + { + "epoch": 0.48, + "learning_rate": 4.9199419801609586e-05, + "loss": 0.0853, + "step": 3422 + }, + { + "epoch": 0.48, + "learning_rate": 4.9198951899681825e-05, + "loss": 0.0914, + "step": 3424 + }, + { + "epoch": 0.48, + "learning_rate": 4.919848399775407e-05, + "loss": 0.099, + "step": 3426 + }, + { + "epoch": 0.48, + "learning_rate": 4.919801609582632e-05, + "loss": 0.0893, + "step": 3428 + }, + { + "epoch": 0.48, + "learning_rate": 4.919754819389856e-05, + "loss": 0.0865, + "step": 3430 + }, + { + "epoch": 0.48, + "learning_rate": 4.91970802919708e-05, + "loss": 0.0925, + "step": 3432 + }, + { + "epoch": 0.48, + "learning_rate": 4.919661239004305e-05, + "loss": 0.088, + "step": 3434 + }, + { + "epoch": 0.48, + "learning_rate": 4.9196144488115294e-05, + "loss": 0.1066, + "step": 3436 + }, + { + "epoch": 0.48, + "learning_rate": 4.919567658618754e-05, + "loss": 0.0967, + "step": 3438 + }, + { + "epoch": 0.48, + "learning_rate": 4.919520868425978e-05, + "loss": 0.1086, + "step": 3440 + }, + { + "epoch": 0.48, + "learning_rate": 4.9194740782332025e-05, + "loss": 0.078, + "step": 3442 + }, + { + "epoch": 0.48, + "learning_rate": 4.919427288040427e-05, + "loss": 0.0949, + "step": 3444 + }, + { + "epoch": 0.48, + "learning_rate": 4.919380497847652e-05, + "loss": 0.0922, + "step": 3446 + }, + { + "epoch": 0.48, + "learning_rate": 4.9193337076548756e-05, + "loss": 0.0815, + "step": 3448 + }, + { + "epoch": 0.48, + "learning_rate": 4.9192869174621e-05, + "loss": 0.0727, + "step": 3450 + }, + { + "epoch": 0.48, + "learning_rate": 4.919240127269325e-05, + "loss": 0.0812, + "step": 3452 + }, + { + "epoch": 0.48, + "learning_rate": 4.9191933370765494e-05, + "loss": 0.0907, + "step": 3454 + }, + { + "epoch": 0.49, + "learning_rate": 4.919146546883773e-05, + "loss": 0.0951, + "step": 3456 + }, + { + "epoch": 0.49, + "learning_rate": 4.919099756690998e-05, + "loss": 0.0956, + "step": 3458 + }, + { + "epoch": 0.49, + "learning_rate": 4.919052966498222e-05, + "loss": 0.0864, + "step": 3460 + }, + { + "epoch": 0.49, + "learning_rate": 4.919006176305447e-05, + "loss": 0.0829, + "step": 3462 + }, + { + "epoch": 0.49, + "learning_rate": 4.918959386112671e-05, + "loss": 0.0738, + "step": 3464 + }, + { + "epoch": 0.49, + "learning_rate": 4.9189125959198956e-05, + "loss": 0.0959, + "step": 3466 + }, + { + "epoch": 0.49, + "learning_rate": 4.9188658057271195e-05, + "loss": 0.0988, + "step": 3468 + }, + { + "epoch": 0.49, + "learning_rate": 4.918819015534345e-05, + "loss": 0.087, + "step": 3470 + }, + { + "epoch": 0.49, + "learning_rate": 4.918772225341569e-05, + "loss": 0.0897, + "step": 3472 + }, + { + "epoch": 0.49, + "learning_rate": 4.918725435148793e-05, + "loss": 0.1108, + "step": 3474 + }, + { + "epoch": 0.49, + "learning_rate": 4.918678644956017e-05, + "loss": 0.0892, + "step": 3476 + }, + { + "epoch": 0.49, + "learning_rate": 4.918631854763242e-05, + "loss": 0.0837, + "step": 3478 + }, + { + "epoch": 0.49, + "learning_rate": 4.9185850645704664e-05, + "loss": 0.0924, + "step": 3480 + }, + { + "epoch": 0.49, + "learning_rate": 4.918538274377691e-05, + "loss": 0.0895, + "step": 3482 + }, + { + "epoch": 0.49, + "learning_rate": 4.918491484184915e-05, + "loss": 0.0828, + "step": 3484 + }, + { + "epoch": 0.49, + "learning_rate": 4.9184446939921395e-05, + "loss": 0.1, + "step": 3486 + }, + { + "epoch": 0.49, + "learning_rate": 4.918397903799364e-05, + "loss": 0.0853, + "step": 3488 + }, + { + "epoch": 0.49, + "learning_rate": 4.9183511136065886e-05, + "loss": 0.1041, + "step": 3490 + }, + { + "epoch": 0.49, + "learning_rate": 4.9183043234138125e-05, + "loss": 0.0838, + "step": 3492 + }, + { + "epoch": 0.49, + "learning_rate": 4.918257533221037e-05, + "loss": 0.084, + "step": 3494 + }, + { + "epoch": 0.49, + "learning_rate": 4.918210743028262e-05, + "loss": 0.0974, + "step": 3496 + }, + { + "epoch": 0.49, + "learning_rate": 4.918163952835486e-05, + "loss": 0.0789, + "step": 3498 + }, + { + "epoch": 0.49, + "learning_rate": 4.91811716264271e-05, + "loss": 0.088, + "step": 3500 + }, + { + "epoch": 0.49, + "learning_rate": 4.918070372449935e-05, + "loss": 0.0601, + "step": 3502 + }, + { + "epoch": 0.49, + "learning_rate": 4.9180235822571594e-05, + "loss": 0.1086, + "step": 3504 + }, + { + "epoch": 0.49, + "learning_rate": 4.917976792064384e-05, + "loss": 0.0887, + "step": 3506 + }, + { + "epoch": 0.49, + "learning_rate": 4.917930001871608e-05, + "loss": 0.0916, + "step": 3508 + }, + { + "epoch": 0.49, + "learning_rate": 4.9178832116788325e-05, + "loss": 0.0867, + "step": 3510 + }, + { + "epoch": 0.49, + "learning_rate": 4.9178364214860564e-05, + "loss": 0.0938, + "step": 3512 + }, + { + "epoch": 0.49, + "learning_rate": 4.917789631293282e-05, + "loss": 0.0803, + "step": 3514 + }, + { + "epoch": 0.49, + "learning_rate": 4.9177428411005056e-05, + "loss": 0.0915, + "step": 3516 + }, + { + "epoch": 0.49, + "learning_rate": 4.91769605090773e-05, + "loss": 0.0574, + "step": 3518 + }, + { + "epoch": 0.49, + "learning_rate": 4.917649260714954e-05, + "loss": 0.0889, + "step": 3520 + }, + { + "epoch": 0.49, + "learning_rate": 4.917602470522179e-05, + "loss": 0.0987, + "step": 3522 + }, + { + "epoch": 0.49, + "learning_rate": 4.917555680329403e-05, + "loss": 0.0592, + "step": 3524 + }, + { + "epoch": 0.49, + "learning_rate": 4.917508890136628e-05, + "loss": 0.1026, + "step": 3526 + }, + { + "epoch": 0.5, + "learning_rate": 4.917462099943852e-05, + "loss": 0.1007, + "step": 3528 + }, + { + "epoch": 0.5, + "learning_rate": 4.9174153097510764e-05, + "loss": 0.0932, + "step": 3530 + }, + { + "epoch": 0.5, + "learning_rate": 4.917368519558301e-05, + "loss": 0.1059, + "step": 3532 + }, + { + "epoch": 0.5, + "learning_rate": 4.9173217293655256e-05, + "loss": 0.0822, + "step": 3534 + }, + { + "epoch": 0.5, + "learning_rate": 4.9172749391727495e-05, + "loss": 0.0979, + "step": 3536 + }, + { + "epoch": 0.5, + "learning_rate": 4.917228148979974e-05, + "loss": 0.1007, + "step": 3538 + }, + { + "epoch": 0.5, + "learning_rate": 4.917181358787199e-05, + "loss": 0.0944, + "step": 3540 + }, + { + "epoch": 0.5, + "learning_rate": 4.917134568594423e-05, + "loss": 0.102, + "step": 3542 + }, + { + "epoch": 0.5, + "learning_rate": 4.917087778401647e-05, + "loss": 0.0743, + "step": 3544 + }, + { + "epoch": 0.5, + "learning_rate": 4.917040988208871e-05, + "loss": 0.0897, + "step": 3546 + }, + { + "epoch": 0.5, + "learning_rate": 4.9169941980160964e-05, + "loss": 0.1136, + "step": 3548 + }, + { + "epoch": 0.5, + "learning_rate": 4.91694740782332e-05, + "loss": 0.0874, + "step": 3550 + }, + { + "epoch": 0.5, + "learning_rate": 4.916900617630545e-05, + "loss": 0.079, + "step": 3552 + }, + { + "epoch": 0.5, + "learning_rate": 4.916853827437769e-05, + "loss": 0.0844, + "step": 3554 + }, + { + "epoch": 0.5, + "learning_rate": 4.9168070372449934e-05, + "loss": 0.1033, + "step": 3556 + }, + { + "epoch": 0.5, + "learning_rate": 4.916760247052218e-05, + "loss": 0.1093, + "step": 3558 + }, + { + "epoch": 0.5, + "learning_rate": 4.9167134568594425e-05, + "loss": 0.0963, + "step": 3560 + }, + { + "epoch": 0.5, + "learning_rate": 4.9166666666666665e-05, + "loss": 0.101, + "step": 3562 + }, + { + "epoch": 0.5, + "learning_rate": 4.916619876473891e-05, + "loss": 0.0802, + "step": 3564 + }, + { + "epoch": 0.5, + "learning_rate": 4.9165730862811156e-05, + "loss": 0.0968, + "step": 3566 + }, + { + "epoch": 0.5, + "learning_rate": 4.91652629608834e-05, + "loss": 0.0982, + "step": 3568 + }, + { + "epoch": 0.5, + "learning_rate": 4.916479505895564e-05, + "loss": 0.0885, + "step": 3570 + }, + { + "epoch": 0.5, + "learning_rate": 4.916432715702789e-05, + "loss": 0.0921, + "step": 3572 + }, + { + "epoch": 0.5, + "learning_rate": 4.916385925510013e-05, + "loss": 0.0744, + "step": 3574 + }, + { + "epoch": 0.5, + "learning_rate": 4.916339135317238e-05, + "loss": 0.0824, + "step": 3576 + }, + { + "epoch": 0.5, + "learning_rate": 4.916292345124462e-05, + "loss": 0.1129, + "step": 3578 + }, + { + "epoch": 0.5, + "learning_rate": 4.9162455549316864e-05, + "loss": 0.0937, + "step": 3580 + }, + { + "epoch": 0.5, + "learning_rate": 4.916198764738911e-05, + "loss": 0.0924, + "step": 3582 + }, + { + "epoch": 0.5, + "learning_rate": 4.9161519745461356e-05, + "loss": 0.0828, + "step": 3584 + }, + { + "epoch": 0.5, + "learning_rate": 4.9161051843533595e-05, + "loss": 0.0917, + "step": 3586 + }, + { + "epoch": 0.5, + "learning_rate": 4.916058394160584e-05, + "loss": 0.0766, + "step": 3588 + }, + { + "epoch": 0.5, + "learning_rate": 4.916011603967808e-05, + "loss": 0.0963, + "step": 3590 + }, + { + "epoch": 0.5, + "learning_rate": 4.915964813775033e-05, + "loss": 0.0912, + "step": 3592 + }, + { + "epoch": 0.5, + "learning_rate": 4.915918023582257e-05, + "loss": 0.0903, + "step": 3594 + }, + { + "epoch": 0.5, + "learning_rate": 4.915871233389482e-05, + "loss": 0.0843, + "step": 3596 + }, + { + "epoch": 0.51, + "learning_rate": 4.915824443196706e-05, + "loss": 0.0897, + "step": 3598 + }, + { + "epoch": 0.51, + "learning_rate": 4.915777653003931e-05, + "loss": 0.0859, + "step": 3600 + }, + { + "epoch": 0.51, + "learning_rate": 4.915730862811155e-05, + "loss": 0.0991, + "step": 3602 + }, + { + "epoch": 0.51, + "learning_rate": 4.9156840726183795e-05, + "loss": 0.1032, + "step": 3604 + }, + { + "epoch": 0.51, + "learning_rate": 4.9156372824256034e-05, + "loss": 0.0866, + "step": 3606 + }, + { + "epoch": 0.51, + "learning_rate": 4.915590492232828e-05, + "loss": 0.0994, + "step": 3608 + }, + { + "epoch": 0.51, + "learning_rate": 4.9155437020400526e-05, + "loss": 0.0831, + "step": 3610 + }, + { + "epoch": 0.51, + "learning_rate": 4.915496911847277e-05, + "loss": 0.1023, + "step": 3612 + }, + { + "epoch": 0.51, + "learning_rate": 4.915450121654501e-05, + "loss": 0.1046, + "step": 3614 + }, + { + "epoch": 0.51, + "learning_rate": 4.915403331461726e-05, + "loss": 0.0863, + "step": 3616 + }, + { + "epoch": 0.51, + "learning_rate": 4.91535654126895e-05, + "loss": 0.0761, + "step": 3618 + }, + { + "epoch": 0.51, + "learning_rate": 4.915309751076175e-05, + "loss": 0.1039, + "step": 3620 + }, + { + "epoch": 0.51, + "learning_rate": 4.915262960883399e-05, + "loss": 0.0741, + "step": 3622 + }, + { + "epoch": 0.51, + "learning_rate": 4.9152161706906234e-05, + "loss": 0.0682, + "step": 3624 + }, + { + "epoch": 0.51, + "learning_rate": 4.915169380497848e-05, + "loss": 0.0898, + "step": 3626 + }, + { + "epoch": 0.51, + "learning_rate": 4.9151225903050725e-05, + "loss": 0.1086, + "step": 3628 + }, + { + "epoch": 0.51, + "learning_rate": 4.9150758001122965e-05, + "loss": 0.1067, + "step": 3630 + }, + { + "epoch": 0.51, + "learning_rate": 4.915029009919521e-05, + "loss": 0.0946, + "step": 3632 + }, + { + "epoch": 0.51, + "learning_rate": 4.9149822197267456e-05, + "loss": 0.0644, + "step": 3634 + }, + { + "epoch": 0.51, + "learning_rate": 4.91493542953397e-05, + "loss": 0.0866, + "step": 3636 + }, + { + "epoch": 0.51, + "learning_rate": 4.914888639341194e-05, + "loss": 0.1014, + "step": 3638 + }, + { + "epoch": 0.51, + "learning_rate": 4.914841849148419e-05, + "loss": 0.0865, + "step": 3640 + }, + { + "epoch": 0.51, + "learning_rate": 4.9147950589556427e-05, + "loss": 0.0764, + "step": 3642 + }, + { + "epoch": 0.51, + "learning_rate": 4.914748268762868e-05, + "loss": 0.0872, + "step": 3644 + }, + { + "epoch": 0.51, + "learning_rate": 4.914701478570092e-05, + "loss": 0.1115, + "step": 3646 + }, + { + "epoch": 0.51, + "learning_rate": 4.9146546883773164e-05, + "loss": 0.0901, + "step": 3648 + }, + { + "epoch": 0.51, + "learning_rate": 4.9146078981845403e-05, + "loss": 0.0947, + "step": 3650 + }, + { + "epoch": 0.51, + "learning_rate": 4.9145611079917656e-05, + "loss": 0.0965, + "step": 3652 + }, + { + "epoch": 0.51, + "learning_rate": 4.9145143177989895e-05, + "loss": 0.0938, + "step": 3654 + }, + { + "epoch": 0.51, + "learning_rate": 4.914467527606214e-05, + "loss": 0.1238, + "step": 3656 + }, + { + "epoch": 0.51, + "learning_rate": 4.914420737413438e-05, + "loss": 0.097, + "step": 3658 + }, + { + "epoch": 0.51, + "learning_rate": 4.9143739472206626e-05, + "loss": 0.098, + "step": 3660 + }, + { + "epoch": 0.51, + "learning_rate": 4.914327157027887e-05, + "loss": 0.0833, + "step": 3662 + }, + { + "epoch": 0.51, + "learning_rate": 4.914280366835112e-05, + "loss": 0.0798, + "step": 3664 + }, + { + "epoch": 0.51, + "learning_rate": 4.914233576642336e-05, + "loss": 0.0802, + "step": 3666 + }, + { + "epoch": 0.51, + "learning_rate": 4.91418678644956e-05, + "loss": 0.0726, + "step": 3668 + }, + { + "epoch": 0.52, + "learning_rate": 4.914139996256785e-05, + "loss": 0.0939, + "step": 3670 + }, + { + "epoch": 0.52, + "learning_rate": 4.9140932060640095e-05, + "loss": 0.0983, + "step": 3672 + }, + { + "epoch": 0.52, + "learning_rate": 4.9140464158712334e-05, + "loss": 0.0997, + "step": 3674 + }, + { + "epoch": 0.52, + "learning_rate": 4.913999625678458e-05, + "loss": 0.0726, + "step": 3676 + }, + { + "epoch": 0.52, + "learning_rate": 4.9139528354856826e-05, + "loss": 0.0967, + "step": 3678 + }, + { + "epoch": 0.52, + "learning_rate": 4.913906045292907e-05, + "loss": 0.0933, + "step": 3680 + }, + { + "epoch": 0.52, + "learning_rate": 4.913859255100131e-05, + "loss": 0.0727, + "step": 3682 + }, + { + "epoch": 0.52, + "learning_rate": 4.913812464907356e-05, + "loss": 0.1104, + "step": 3684 + }, + { + "epoch": 0.52, + "learning_rate": 4.91376567471458e-05, + "loss": 0.0989, + "step": 3686 + }, + { + "epoch": 0.52, + "learning_rate": 4.913718884521805e-05, + "loss": 0.0867, + "step": 3688 + }, + { + "epoch": 0.52, + "learning_rate": 4.913672094329029e-05, + "loss": 0.0684, + "step": 3690 + }, + { + "epoch": 0.52, + "learning_rate": 4.9136253041362534e-05, + "loss": 0.0805, + "step": 3692 + }, + { + "epoch": 0.52, + "learning_rate": 4.913578513943477e-05, + "loss": 0.1268, + "step": 3694 + }, + { + "epoch": 0.52, + "learning_rate": 4.9135317237507026e-05, + "loss": 0.1143, + "step": 3696 + }, + { + "epoch": 0.52, + "learning_rate": 4.9134849335579265e-05, + "loss": 0.119, + "step": 3698 + }, + { + "epoch": 0.52, + "learning_rate": 4.913438143365151e-05, + "loss": 0.0887, + "step": 3700 + }, + { + "epoch": 0.52, + "learning_rate": 4.913391353172375e-05, + "loss": 0.0864, + "step": 3702 + }, + { + "epoch": 0.52, + "learning_rate": 4.9133445629795996e-05, + "loss": 0.088, + "step": 3704 + }, + { + "epoch": 0.52, + "learning_rate": 4.913297772786824e-05, + "loss": 0.1035, + "step": 3706 + }, + { + "epoch": 0.52, + "learning_rate": 4.913250982594049e-05, + "loss": 0.1182, + "step": 3708 + }, + { + "epoch": 0.52, + "learning_rate": 4.9132041924012727e-05, + "loss": 0.0731, + "step": 3710 + }, + { + "epoch": 0.52, + "learning_rate": 4.913157402208497e-05, + "loss": 0.0962, + "step": 3712 + }, + { + "epoch": 0.52, + "learning_rate": 4.913110612015722e-05, + "loss": 0.1324, + "step": 3714 + }, + { + "epoch": 0.52, + "learning_rate": 4.9130638218229464e-05, + "loss": 0.0934, + "step": 3716 + }, + { + "epoch": 0.52, + "learning_rate": 4.9130170316301703e-05, + "loss": 0.0941, + "step": 3718 + }, + { + "epoch": 0.52, + "learning_rate": 4.912970241437395e-05, + "loss": 0.0777, + "step": 3720 + }, + { + "epoch": 0.52, + "learning_rate": 4.9129234512446195e-05, + "loss": 0.0773, + "step": 3722 + }, + { + "epoch": 0.52, + "learning_rate": 4.912876661051844e-05, + "loss": 0.0969, + "step": 3724 + }, + { + "epoch": 0.52, + "learning_rate": 4.912829870859068e-05, + "loss": 0.0824, + "step": 3726 + }, + { + "epoch": 0.52, + "learning_rate": 4.9127830806662926e-05, + "loss": 0.0849, + "step": 3728 + }, + { + "epoch": 0.52, + "learning_rate": 4.912736290473517e-05, + "loss": 0.0925, + "step": 3730 + }, + { + "epoch": 0.52, + "learning_rate": 4.912689500280742e-05, + "loss": 0.1111, + "step": 3732 + }, + { + "epoch": 0.52, + "learning_rate": 4.912642710087966e-05, + "loss": 0.0961, + "step": 3734 + }, + { + "epoch": 0.52, + "learning_rate": 4.91259591989519e-05, + "loss": 0.0834, + "step": 3736 + }, + { + "epoch": 0.52, + "learning_rate": 4.912549129702414e-05, + "loss": 0.0897, + "step": 3738 + }, + { + "epoch": 0.52, + "learning_rate": 4.9125023395096395e-05, + "loss": 0.0979, + "step": 3740 + }, + { + "epoch": 0.53, + "learning_rate": 4.9124555493168634e-05, + "loss": 0.1118, + "step": 3742 + }, + { + "epoch": 0.53, + "learning_rate": 4.912408759124088e-05, + "loss": 0.0778, + "step": 3744 + }, + { + "epoch": 0.53, + "learning_rate": 4.912361968931312e-05, + "loss": 0.074, + "step": 3746 + }, + { + "epoch": 0.53, + "learning_rate": 4.912315178738537e-05, + "loss": 0.0897, + "step": 3748 + }, + { + "epoch": 0.53, + "learning_rate": 4.912268388545761e-05, + "loss": 0.0941, + "step": 3750 + }, + { + "epoch": 0.53, + "learning_rate": 4.912221598352986e-05, + "loss": 0.0834, + "step": 3752 + }, + { + "epoch": 0.53, + "learning_rate": 4.9121748081602096e-05, + "loss": 0.1081, + "step": 3754 + }, + { + "epoch": 0.53, + "learning_rate": 4.912128017967434e-05, + "loss": 0.0938, + "step": 3756 + }, + { + "epoch": 0.53, + "learning_rate": 4.912081227774659e-05, + "loss": 0.0984, + "step": 3758 + }, + { + "epoch": 0.53, + "learning_rate": 4.9120344375818834e-05, + "loss": 0.099, + "step": 3760 + }, + { + "epoch": 0.53, + "learning_rate": 4.911987647389107e-05, + "loss": 0.0916, + "step": 3762 + }, + { + "epoch": 0.53, + "learning_rate": 4.911940857196332e-05, + "loss": 0.0924, + "step": 3764 + }, + { + "epoch": 0.53, + "learning_rate": 4.9118940670035565e-05, + "loss": 0.0822, + "step": 3766 + }, + { + "epoch": 0.53, + "learning_rate": 4.911847276810781e-05, + "loss": 0.0792, + "step": 3768 + }, + { + "epoch": 0.53, + "learning_rate": 4.911800486618005e-05, + "loss": 0.1024, + "step": 3770 + }, + { + "epoch": 0.53, + "learning_rate": 4.9117536964252296e-05, + "loss": 0.1246, + "step": 3772 + }, + { + "epoch": 0.53, + "learning_rate": 4.911706906232454e-05, + "loss": 0.0895, + "step": 3774 + }, + { + "epoch": 0.53, + "learning_rate": 4.911660116039679e-05, + "loss": 0.0821, + "step": 3776 + }, + { + "epoch": 0.53, + "learning_rate": 4.9116133258469027e-05, + "loss": 0.0959, + "step": 3778 + }, + { + "epoch": 0.53, + "learning_rate": 4.911566535654127e-05, + "loss": 0.0906, + "step": 3780 + }, + { + "epoch": 0.53, + "learning_rate": 4.911519745461352e-05, + "loss": 0.0815, + "step": 3782 + }, + { + "epoch": 0.53, + "learning_rate": 4.9114729552685764e-05, + "loss": 0.1142, + "step": 3784 + }, + { + "epoch": 0.53, + "learning_rate": 4.9114261650758003e-05, + "loss": 0.0801, + "step": 3786 + }, + { + "epoch": 0.53, + "learning_rate": 4.911379374883025e-05, + "loss": 0.07, + "step": 3788 + }, + { + "epoch": 0.53, + "learning_rate": 4.911332584690249e-05, + "loss": 0.0859, + "step": 3790 + }, + { + "epoch": 0.53, + "learning_rate": 4.911285794497474e-05, + "loss": 0.084, + "step": 3792 + }, + { + "epoch": 0.53, + "learning_rate": 4.911239004304698e-05, + "loss": 0.1333, + "step": 3794 + }, + { + "epoch": 0.53, + "learning_rate": 4.9111922141119226e-05, + "loss": 0.0816, + "step": 3796 + }, + { + "epoch": 0.53, + "learning_rate": 4.9111454239191465e-05, + "loss": 0.0963, + "step": 3798 + }, + { + "epoch": 0.53, + "learning_rate": 4.911098633726371e-05, + "loss": 0.1001, + "step": 3800 + }, + { + "epoch": 0.53, + "learning_rate": 4.911051843533596e-05, + "loss": 0.0969, + "step": 3802 + }, + { + "epoch": 0.53, + "learning_rate": 4.9110050533408196e-05, + "loss": 0.0817, + "step": 3804 + }, + { + "epoch": 0.53, + "learning_rate": 4.910958263148044e-05, + "loss": 0.1269, + "step": 3806 + }, + { + "epoch": 0.53, + "learning_rate": 4.910911472955269e-05, + "loss": 0.1191, + "step": 3808 + }, + { + "epoch": 0.53, + "learning_rate": 4.9108646827624934e-05, + "loss": 0.1094, + "step": 3810 + }, + { + "epoch": 0.54, + "learning_rate": 4.910817892569717e-05, + "loss": 0.0941, + "step": 3812 + }, + { + "epoch": 0.54, + "learning_rate": 4.910771102376942e-05, + "loss": 0.1328, + "step": 3814 + }, + { + "epoch": 0.54, + "learning_rate": 4.9107243121841665e-05, + "loss": 0.0866, + "step": 3816 + }, + { + "epoch": 0.54, + "learning_rate": 4.910677521991391e-05, + "loss": 0.1208, + "step": 3818 + }, + { + "epoch": 0.54, + "learning_rate": 4.910630731798615e-05, + "loss": 0.0855, + "step": 3820 + }, + { + "epoch": 0.54, + "learning_rate": 4.9105839416058396e-05, + "loss": 0.0963, + "step": 3822 + }, + { + "epoch": 0.54, + "learning_rate": 4.9105371514130635e-05, + "loss": 0.0908, + "step": 3824 + }, + { + "epoch": 0.54, + "learning_rate": 4.910490361220289e-05, + "loss": 0.0924, + "step": 3826 + }, + { + "epoch": 0.54, + "learning_rate": 4.910443571027513e-05, + "loss": 0.0927, + "step": 3828 + }, + { + "epoch": 0.54, + "learning_rate": 4.910396780834737e-05, + "loss": 0.1203, + "step": 3830 + }, + { + "epoch": 0.54, + "learning_rate": 4.910349990641961e-05, + "loss": 0.0944, + "step": 3832 + }, + { + "epoch": 0.54, + "learning_rate": 4.910303200449186e-05, + "loss": 0.0923, + "step": 3834 + }, + { + "epoch": 0.54, + "learning_rate": 4.9102564102564104e-05, + "loss": 0.09, + "step": 3836 + }, + { + "epoch": 0.54, + "learning_rate": 4.910209620063635e-05, + "loss": 0.0838, + "step": 3838 + }, + { + "epoch": 0.54, + "learning_rate": 4.910162829870859e-05, + "loss": 0.0979, + "step": 3840 + }, + { + "epoch": 0.54, + "learning_rate": 4.9101160396780835e-05, + "loss": 0.0925, + "step": 3842 + }, + { + "epoch": 0.54, + "learning_rate": 4.910069249485308e-05, + "loss": 0.0911, + "step": 3844 + }, + { + "epoch": 0.54, + "learning_rate": 4.9100224592925327e-05, + "loss": 0.0723, + "step": 3846 + }, + { + "epoch": 0.54, + "learning_rate": 4.9099756690997566e-05, + "loss": 0.0795, + "step": 3848 + }, + { + "epoch": 0.54, + "learning_rate": 4.909928878906981e-05, + "loss": 0.0956, + "step": 3850 + }, + { + "epoch": 0.54, + "learning_rate": 4.909882088714206e-05, + "loss": 0.0885, + "step": 3852 + }, + { + "epoch": 0.54, + "learning_rate": 4.9098352985214303e-05, + "loss": 0.1086, + "step": 3854 + }, + { + "epoch": 0.54, + "learning_rate": 4.909788508328654e-05, + "loss": 0.0614, + "step": 3856 + }, + { + "epoch": 0.54, + "learning_rate": 4.909741718135879e-05, + "loss": 0.1023, + "step": 3858 + }, + { + "epoch": 0.54, + "learning_rate": 4.9096949279431034e-05, + "loss": 0.1041, + "step": 3860 + }, + { + "epoch": 0.54, + "learning_rate": 4.909648137750328e-05, + "loss": 0.0934, + "step": 3862 + }, + { + "epoch": 0.54, + "learning_rate": 4.909601347557552e-05, + "loss": 0.0782, + "step": 3864 + }, + { + "epoch": 0.54, + "learning_rate": 4.9095545573647765e-05, + "loss": 0.1001, + "step": 3866 + }, + { + "epoch": 0.54, + "learning_rate": 4.9095077671720005e-05, + "loss": 0.082, + "step": 3868 + }, + { + "epoch": 0.54, + "learning_rate": 4.909460976979226e-05, + "loss": 0.0836, + "step": 3870 + }, + { + "epoch": 0.54, + "learning_rate": 4.9094141867864496e-05, + "loss": 0.0753, + "step": 3872 + }, + { + "epoch": 0.54, + "learning_rate": 4.909367396593674e-05, + "loss": 0.0935, + "step": 3874 + }, + { + "epoch": 0.54, + "learning_rate": 4.909320606400898e-05, + "loss": 0.093, + "step": 3876 + }, + { + "epoch": 0.54, + "learning_rate": 4.9092738162081234e-05, + "loss": 0.0815, + "step": 3878 + }, + { + "epoch": 0.54, + "learning_rate": 4.909227026015347e-05, + "loss": 0.0994, + "step": 3880 + }, + { + "epoch": 0.54, + "learning_rate": 4.909180235822572e-05, + "loss": 0.0939, + "step": 3882 + }, + { + "epoch": 0.55, + "learning_rate": 4.909133445629796e-05, + "loss": 0.078, + "step": 3884 + }, + { + "epoch": 0.55, + "learning_rate": 4.9090866554370204e-05, + "loss": 0.0785, + "step": 3886 + }, + { + "epoch": 0.55, + "learning_rate": 4.909039865244245e-05, + "loss": 0.1125, + "step": 3888 + }, + { + "epoch": 0.55, + "learning_rate": 4.9089930750514696e-05, + "loss": 0.0668, + "step": 3890 + }, + { + "epoch": 0.55, + "learning_rate": 4.9089462848586935e-05, + "loss": 0.1003, + "step": 3892 + }, + { + "epoch": 0.55, + "learning_rate": 4.908899494665918e-05, + "loss": 0.0932, + "step": 3894 + }, + { + "epoch": 0.55, + "learning_rate": 4.908852704473143e-05, + "loss": 0.095, + "step": 3896 + }, + { + "epoch": 0.55, + "learning_rate": 4.908805914280367e-05, + "loss": 0.1071, + "step": 3898 + }, + { + "epoch": 0.55, + "learning_rate": 4.908759124087591e-05, + "loss": 0.0721, + "step": 3900 + }, + { + "epoch": 0.55, + "learning_rate": 4.908712333894816e-05, + "loss": 0.0861, + "step": 3902 + }, + { + "epoch": 0.55, + "learning_rate": 4.9086655437020404e-05, + "loss": 0.0828, + "step": 3904 + }, + { + "epoch": 0.55, + "learning_rate": 4.908618753509265e-05, + "loss": 0.0956, + "step": 3906 + }, + { + "epoch": 0.55, + "learning_rate": 4.908571963316489e-05, + "loss": 0.0569, + "step": 3908 + }, + { + "epoch": 0.55, + "learning_rate": 4.9085251731237135e-05, + "loss": 0.1102, + "step": 3910 + }, + { + "epoch": 0.55, + "learning_rate": 4.908478382930938e-05, + "loss": 0.0839, + "step": 3912 + }, + { + "epoch": 0.55, + "learning_rate": 4.908431592738163e-05, + "loss": 0.093, + "step": 3914 + }, + { + "epoch": 0.55, + "learning_rate": 4.9083848025453866e-05, + "loss": 0.0756, + "step": 3916 + }, + { + "epoch": 0.55, + "learning_rate": 4.908338012352611e-05, + "loss": 0.0863, + "step": 3918 + }, + { + "epoch": 0.55, + "learning_rate": 4.908291222159835e-05, + "loss": 0.0755, + "step": 3920 + }, + { + "epoch": 0.55, + "learning_rate": 4.9082444319670603e-05, + "loss": 0.0991, + "step": 3922 + }, + { + "epoch": 0.55, + "learning_rate": 4.908197641774284e-05, + "loss": 0.0932, + "step": 3924 + }, + { + "epoch": 0.55, + "learning_rate": 4.908150851581509e-05, + "loss": 0.1011, + "step": 3926 + }, + { + "epoch": 0.55, + "learning_rate": 4.908104061388733e-05, + "loss": 0.1346, + "step": 3928 + }, + { + "epoch": 0.55, + "learning_rate": 4.908057271195958e-05, + "loss": 0.1027, + "step": 3930 + }, + { + "epoch": 0.55, + "learning_rate": 4.908010481003182e-05, + "loss": 0.062, + "step": 3932 + }, + { + "epoch": 0.55, + "learning_rate": 4.9079636908104065e-05, + "loss": 0.11, + "step": 3934 + }, + { + "epoch": 0.55, + "learning_rate": 4.9079169006176305e-05, + "loss": 0.0779, + "step": 3936 + }, + { + "epoch": 0.55, + "learning_rate": 4.907870110424855e-05, + "loss": 0.0715, + "step": 3938 + }, + { + "epoch": 0.55, + "learning_rate": 4.9078233202320796e-05, + "loss": 0.0872, + "step": 3940 + }, + { + "epoch": 0.55, + "learning_rate": 4.907776530039304e-05, + "loss": 0.0855, + "step": 3942 + }, + { + "epoch": 0.55, + "learning_rate": 4.907729739846528e-05, + "loss": 0.0987, + "step": 3944 + }, + { + "epoch": 0.55, + "learning_rate": 4.907682949653753e-05, + "loss": 0.0932, + "step": 3946 + }, + { + "epoch": 0.55, + "learning_rate": 4.907636159460977e-05, + "loss": 0.0953, + "step": 3948 + }, + { + "epoch": 0.55, + "learning_rate": 4.907589369268202e-05, + "loss": 0.0986, + "step": 3950 + }, + { + "epoch": 0.55, + "learning_rate": 4.907542579075426e-05, + "loss": 0.0859, + "step": 3952 + }, + { + "epoch": 0.56, + "learning_rate": 4.9074957888826504e-05, + "loss": 0.0915, + "step": 3954 + }, + { + "epoch": 0.56, + "learning_rate": 4.907448998689875e-05, + "loss": 0.0753, + "step": 3956 + }, + { + "epoch": 0.56, + "learning_rate": 4.9074022084970996e-05, + "loss": 0.0887, + "step": 3958 + }, + { + "epoch": 0.56, + "learning_rate": 4.9073554183043235e-05, + "loss": 0.1035, + "step": 3960 + }, + { + "epoch": 0.56, + "learning_rate": 4.907308628111548e-05, + "loss": 0.1388, + "step": 3962 + }, + { + "epoch": 0.56, + "learning_rate": 4.907261837918773e-05, + "loss": 0.0965, + "step": 3964 + }, + { + "epoch": 0.56, + "learning_rate": 4.907215047725997e-05, + "loss": 0.1309, + "step": 3966 + }, + { + "epoch": 0.56, + "learning_rate": 4.907168257533221e-05, + "loss": 0.1106, + "step": 3968 + }, + { + "epoch": 0.56, + "learning_rate": 4.907121467340446e-05, + "loss": 0.0858, + "step": 3970 + }, + { + "epoch": 0.56, + "learning_rate": 4.90707467714767e-05, + "loss": 0.0735, + "step": 3972 + }, + { + "epoch": 0.56, + "learning_rate": 4.907027886954895e-05, + "loss": 0.093, + "step": 3974 + }, + { + "epoch": 0.56, + "learning_rate": 4.906981096762119e-05, + "loss": 0.1556, + "step": 3976 + }, + { + "epoch": 0.56, + "learning_rate": 4.9069343065693435e-05, + "loss": 0.0656, + "step": 3978 + }, + { + "epoch": 0.56, + "learning_rate": 4.9068875163765674e-05, + "loss": 0.0825, + "step": 3980 + }, + { + "epoch": 0.56, + "learning_rate": 4.906840726183792e-05, + "loss": 0.0945, + "step": 3982 + }, + { + "epoch": 0.56, + "learning_rate": 4.9067939359910166e-05, + "loss": 0.0892, + "step": 3984 + }, + { + "epoch": 0.56, + "learning_rate": 4.906747145798241e-05, + "loss": 0.089, + "step": 3986 + }, + { + "epoch": 0.56, + "learning_rate": 4.906700355605465e-05, + "loss": 0.1058, + "step": 3988 + }, + { + "epoch": 0.56, + "learning_rate": 4.90665356541269e-05, + "loss": 0.0915, + "step": 3990 + }, + { + "epoch": 0.56, + "learning_rate": 4.906606775219914e-05, + "loss": 0.0894, + "step": 3992 + }, + { + "epoch": 0.56, + "learning_rate": 4.906559985027139e-05, + "loss": 0.0821, + "step": 3994 + }, + { + "epoch": 0.56, + "learning_rate": 4.906513194834363e-05, + "loss": 0.081, + "step": 3996 + }, + { + "epoch": 0.56, + "learning_rate": 4.9064664046415874e-05, + "loss": 0.096, + "step": 3998 + }, + { + "epoch": 0.56, + "learning_rate": 4.906419614448812e-05, + "loss": 0.0845, + "step": 4000 + }, + { + "epoch": 0.56, + "eval_gen_len": 33.3103, + "eval_loss": 1.031044363975525, + "eval_meteor": 0.0499, + "eval_runtime": 17.245, + "eval_samples_per_second": 3.363, + "eval_steps_per_second": 0.464, + "step": 4000 + }, + { + "epoch": 0.56, + "learning_rate": 4.9063728242560365e-05, + "loss": 0.0821, + "step": 4002 + }, + { + "epoch": 0.56, + "learning_rate": 4.9063260340632605e-05, + "loss": 0.0703, + "step": 4004 + }, + { + "epoch": 0.56, + "learning_rate": 4.906279243870485e-05, + "loss": 0.0763, + "step": 4006 + }, + { + "epoch": 0.56, + "learning_rate": 4.9062324536777096e-05, + "loss": 0.083, + "step": 4008 + }, + { + "epoch": 0.56, + "learning_rate": 4.906185663484934e-05, + "loss": 0.0703, + "step": 4010 + }, + { + "epoch": 0.56, + "learning_rate": 4.906138873292158e-05, + "loss": 0.104, + "step": 4012 + }, + { + "epoch": 0.56, + "learning_rate": 4.906092083099383e-05, + "loss": 0.0943, + "step": 4014 + }, + { + "epoch": 0.56, + "learning_rate": 4.9060452929066066e-05, + "loss": 0.086, + "step": 4016 + }, + { + "epoch": 0.56, + "learning_rate": 4.905998502713832e-05, + "loss": 0.0977, + "step": 4018 + }, + { + "epoch": 0.56, + "learning_rate": 4.905951712521056e-05, + "loss": 0.1004, + "step": 4020 + }, + { + "epoch": 0.56, + "learning_rate": 4.9059049223282804e-05, + "loss": 0.1224, + "step": 4022 + }, + { + "epoch": 0.56, + "learning_rate": 4.905858132135504e-05, + "loss": 0.094, + "step": 4024 + }, + { + "epoch": 0.57, + "learning_rate": 4.9058113419427296e-05, + "loss": 0.0824, + "step": 4026 + }, + { + "epoch": 0.57, + "learning_rate": 4.9057645517499535e-05, + "loss": 0.1008, + "step": 4028 + }, + { + "epoch": 0.57, + "learning_rate": 4.905717761557178e-05, + "loss": 0.1034, + "step": 4030 + }, + { + "epoch": 0.57, + "learning_rate": 4.905670971364402e-05, + "loss": 0.1208, + "step": 4032 + }, + { + "epoch": 0.57, + "learning_rate": 4.9056241811716266e-05, + "loss": 0.0767, + "step": 4034 + }, + { + "epoch": 0.57, + "learning_rate": 4.905577390978851e-05, + "loss": 0.1051, + "step": 4036 + }, + { + "epoch": 0.57, + "learning_rate": 4.905530600786076e-05, + "loss": 0.1172, + "step": 4038 + }, + { + "epoch": 0.57, + "learning_rate": 4.9054838105933e-05, + "loss": 0.1016, + "step": 4040 + }, + { + "epoch": 0.57, + "learning_rate": 4.905437020400524e-05, + "loss": 0.09, + "step": 4042 + }, + { + "epoch": 0.57, + "learning_rate": 4.905390230207749e-05, + "loss": 0.0693, + "step": 4044 + }, + { + "epoch": 0.57, + "learning_rate": 4.9053434400149735e-05, + "loss": 0.0866, + "step": 4046 + }, + { + "epoch": 0.57, + "learning_rate": 4.9052966498221974e-05, + "loss": 0.1154, + "step": 4048 + }, + { + "epoch": 0.57, + "learning_rate": 4.905249859629422e-05, + "loss": 0.1125, + "step": 4050 + }, + { + "epoch": 0.57, + "learning_rate": 4.9052030694366466e-05, + "loss": 0.0926, + "step": 4052 + }, + { + "epoch": 0.57, + "learning_rate": 4.9051562792438705e-05, + "loss": 0.0813, + "step": 4054 + }, + { + "epoch": 0.57, + "learning_rate": 4.905109489051095e-05, + "loss": 0.104, + "step": 4056 + }, + { + "epoch": 0.57, + "learning_rate": 4.905062698858319e-05, + "loss": 0.0821, + "step": 4058 + }, + { + "epoch": 0.57, + "learning_rate": 4.905015908665544e-05, + "loss": 0.0744, + "step": 4060 + }, + { + "epoch": 0.57, + "learning_rate": 4.904969118472768e-05, + "loss": 0.1002, + "step": 4062 + }, + { + "epoch": 0.57, + "learning_rate": 4.904922328279993e-05, + "loss": 0.0927, + "step": 4064 + }, + { + "epoch": 0.57, + "learning_rate": 4.904875538087217e-05, + "loss": 0.0597, + "step": 4066 + }, + { + "epoch": 0.57, + "learning_rate": 4.904828747894441e-05, + "loss": 0.0818, + "step": 4068 + }, + { + "epoch": 0.57, + "learning_rate": 4.904781957701666e-05, + "loss": 0.0856, + "step": 4070 + }, + { + "epoch": 0.57, + "learning_rate": 4.9047351675088905e-05, + "loss": 0.0839, + "step": 4072 + }, + { + "epoch": 0.57, + "learning_rate": 4.9046883773161144e-05, + "loss": 0.0904, + "step": 4074 + }, + { + "epoch": 0.57, + "learning_rate": 4.904641587123339e-05, + "loss": 0.1198, + "step": 4076 + }, + { + "epoch": 0.57, + "learning_rate": 4.9045947969305636e-05, + "loss": 0.0997, + "step": 4078 + }, + { + "epoch": 0.57, + "learning_rate": 4.904548006737788e-05, + "loss": 0.091, + "step": 4080 + }, + { + "epoch": 0.57, + "learning_rate": 4.904501216545012e-05, + "loss": 0.0827, + "step": 4082 + }, + { + "epoch": 0.57, + "learning_rate": 4.9044544263522367e-05, + "loss": 0.0801, + "step": 4084 + }, + { + "epoch": 0.57, + "learning_rate": 4.904407636159461e-05, + "loss": 0.099, + "step": 4086 + }, + { + "epoch": 0.57, + "learning_rate": 4.904360845966686e-05, + "loss": 0.1048, + "step": 4088 + }, + { + "epoch": 0.57, + "learning_rate": 4.90431405577391e-05, + "loss": 0.089, + "step": 4090 + }, + { + "epoch": 0.57, + "learning_rate": 4.904267265581134e-05, + "loss": 0.0893, + "step": 4092 + }, + { + "epoch": 0.57, + "learning_rate": 4.904220475388359e-05, + "loss": 0.0834, + "step": 4094 + }, + { + "epoch": 0.57, + "learning_rate": 4.9041736851955835e-05, + "loss": 0.0814, + "step": 4096 + }, + { + "epoch": 0.58, + "learning_rate": 4.9041268950028074e-05, + "loss": 0.0911, + "step": 4098 + }, + { + "epoch": 0.58, + "learning_rate": 4.904080104810032e-05, + "loss": 0.1082, + "step": 4100 + }, + { + "epoch": 0.58, + "learning_rate": 4.904033314617256e-05, + "loss": 0.1039, + "step": 4102 + }, + { + "epoch": 0.58, + "learning_rate": 4.903986524424481e-05, + "loss": 0.0719, + "step": 4104 + }, + { + "epoch": 0.58, + "learning_rate": 4.903939734231705e-05, + "loss": 0.086, + "step": 4106 + }, + { + "epoch": 0.58, + "learning_rate": 4.90389294403893e-05, + "loss": 0.0852, + "step": 4108 + }, + { + "epoch": 0.58, + "learning_rate": 4.9038461538461536e-05, + "loss": 0.1041, + "step": 4110 + }, + { + "epoch": 0.58, + "learning_rate": 4.903799363653378e-05, + "loss": 0.1025, + "step": 4112 + }, + { + "epoch": 0.58, + "learning_rate": 4.903752573460603e-05, + "loss": 0.1016, + "step": 4114 + }, + { + "epoch": 0.58, + "learning_rate": 4.9037057832678274e-05, + "loss": 0.1134, + "step": 4116 + }, + { + "epoch": 0.58, + "learning_rate": 4.903658993075051e-05, + "loss": 0.1048, + "step": 4118 + }, + { + "epoch": 0.58, + "learning_rate": 4.903612202882276e-05, + "loss": 0.0856, + "step": 4120 + }, + { + "epoch": 0.58, + "learning_rate": 4.9035654126895005e-05, + "loss": 0.0737, + "step": 4122 + }, + { + "epoch": 0.58, + "learning_rate": 4.903518622496725e-05, + "loss": 0.0931, + "step": 4124 + }, + { + "epoch": 0.58, + "learning_rate": 4.903471832303949e-05, + "loss": 0.0875, + "step": 4126 + }, + { + "epoch": 0.58, + "learning_rate": 4.9034250421111736e-05, + "loss": 0.0841, + "step": 4128 + }, + { + "epoch": 0.58, + "learning_rate": 4.903378251918398e-05, + "loss": 0.1084, + "step": 4130 + }, + { + "epoch": 0.58, + "learning_rate": 4.903331461725623e-05, + "loss": 0.0762, + "step": 4132 + }, + { + "epoch": 0.58, + "learning_rate": 4.903284671532847e-05, + "loss": 0.1076, + "step": 4134 + }, + { + "epoch": 0.58, + "learning_rate": 4.903237881340071e-05, + "loss": 0.0912, + "step": 4136 + }, + { + "epoch": 0.58, + "learning_rate": 4.903191091147296e-05, + "loss": 0.0866, + "step": 4138 + }, + { + "epoch": 0.58, + "learning_rate": 4.9031443009545205e-05, + "loss": 0.092, + "step": 4140 + }, + { + "epoch": 0.58, + "learning_rate": 4.9030975107617444e-05, + "loss": 0.1047, + "step": 4142 + }, + { + "epoch": 0.58, + "learning_rate": 4.903050720568969e-05, + "loss": 0.0932, + "step": 4144 + }, + { + "epoch": 0.58, + "learning_rate": 4.903003930376193e-05, + "loss": 0.1197, + "step": 4146 + }, + { + "epoch": 0.58, + "learning_rate": 4.902957140183418e-05, + "loss": 0.1017, + "step": 4148 + }, + { + "epoch": 0.58, + "learning_rate": 4.902910349990642e-05, + "loss": 0.0966, + "step": 4150 + }, + { + "epoch": 0.58, + "learning_rate": 4.9028635597978667e-05, + "loss": 0.1375, + "step": 4152 + }, + { + "epoch": 0.58, + "learning_rate": 4.9028167696050906e-05, + "loss": 0.096, + "step": 4154 + }, + { + "epoch": 0.58, + "learning_rate": 4.902769979412316e-05, + "loss": 0.0814, + "step": 4156 + }, + { + "epoch": 0.58, + "learning_rate": 4.90272318921954e-05, + "loss": 0.0854, + "step": 4158 + }, + { + "epoch": 0.58, + "learning_rate": 4.9026763990267643e-05, + "loss": 0.0935, + "step": 4160 + }, + { + "epoch": 0.58, + "learning_rate": 4.902629608833988e-05, + "loss": 0.0972, + "step": 4162 + }, + { + "epoch": 0.58, + "learning_rate": 4.902582818641213e-05, + "loss": 0.1046, + "step": 4164 + }, + { + "epoch": 0.58, + "learning_rate": 4.9025360284484374e-05, + "loss": 0.1404, + "step": 4166 + }, + { + "epoch": 0.59, + "learning_rate": 4.902489238255662e-05, + "loss": 0.0898, + "step": 4168 + }, + { + "epoch": 0.59, + "learning_rate": 4.902442448062886e-05, + "loss": 0.082, + "step": 4170 + }, + { + "epoch": 0.59, + "learning_rate": 4.9023956578701105e-05, + "loss": 0.1063, + "step": 4172 + }, + { + "epoch": 0.59, + "learning_rate": 4.902348867677335e-05, + "loss": 0.0762, + "step": 4174 + }, + { + "epoch": 0.59, + "learning_rate": 4.90230207748456e-05, + "loss": 0.1002, + "step": 4176 + }, + { + "epoch": 0.59, + "learning_rate": 4.9022552872917836e-05, + "loss": 0.0935, + "step": 4178 + }, + { + "epoch": 0.59, + "learning_rate": 4.902208497099008e-05, + "loss": 0.1117, + "step": 4180 + }, + { + "epoch": 0.59, + "learning_rate": 4.902161706906233e-05, + "loss": 0.1026, + "step": 4182 + }, + { + "epoch": 0.59, + "learning_rate": 4.9021149167134574e-05, + "loss": 0.0976, + "step": 4184 + }, + { + "epoch": 0.59, + "learning_rate": 4.902068126520681e-05, + "loss": 0.1024, + "step": 4186 + }, + { + "epoch": 0.59, + "learning_rate": 4.902021336327906e-05, + "loss": 0.0984, + "step": 4188 + }, + { + "epoch": 0.59, + "learning_rate": 4.9019745461351305e-05, + "loss": 0.0916, + "step": 4190 + }, + { + "epoch": 0.59, + "learning_rate": 4.901927755942355e-05, + "loss": 0.1315, + "step": 4192 + }, + { + "epoch": 0.59, + "learning_rate": 4.901880965749579e-05, + "loss": 0.0902, + "step": 4194 + }, + { + "epoch": 0.59, + "learning_rate": 4.9018341755568036e-05, + "loss": 0.1066, + "step": 4196 + }, + { + "epoch": 0.59, + "learning_rate": 4.9017873853640275e-05, + "loss": 0.0916, + "step": 4198 + }, + { + "epoch": 0.59, + "learning_rate": 4.901740595171253e-05, + "loss": 0.1009, + "step": 4200 + }, + { + "epoch": 0.59, + "learning_rate": 4.901693804978477e-05, + "loss": 0.099, + "step": 4202 + }, + { + "epoch": 0.59, + "learning_rate": 4.901647014785701e-05, + "loss": 0.1022, + "step": 4204 + }, + { + "epoch": 0.59, + "learning_rate": 4.901600224592925e-05, + "loss": 0.1264, + "step": 4206 + }, + { + "epoch": 0.59, + "learning_rate": 4.9015534344001505e-05, + "loss": 0.0855, + "step": 4208 + }, + { + "epoch": 0.59, + "learning_rate": 4.9015066442073744e-05, + "loss": 0.0807, + "step": 4210 + }, + { + "epoch": 0.59, + "learning_rate": 4.901459854014599e-05, + "loss": 0.098, + "step": 4212 + }, + { + "epoch": 0.59, + "learning_rate": 4.901413063821823e-05, + "loss": 0.0941, + "step": 4214 + }, + { + "epoch": 0.59, + "learning_rate": 4.9013662736290475e-05, + "loss": 0.0892, + "step": 4216 + }, + { + "epoch": 0.59, + "learning_rate": 4.901319483436272e-05, + "loss": 0.086, + "step": 4218 + }, + { + "epoch": 0.59, + "learning_rate": 4.9012726932434967e-05, + "loss": 0.0854, + "step": 4220 + }, + { + "epoch": 0.59, + "learning_rate": 4.9012259030507206e-05, + "loss": 0.0884, + "step": 4222 + }, + { + "epoch": 0.59, + "learning_rate": 4.901179112857945e-05, + "loss": 0.14, + "step": 4224 + }, + { + "epoch": 0.59, + "learning_rate": 4.90113232266517e-05, + "loss": 0.1044, + "step": 4226 + }, + { + "epoch": 0.59, + "learning_rate": 4.9010855324723943e-05, + "loss": 0.1168, + "step": 4228 + }, + { + "epoch": 0.59, + "learning_rate": 4.901038742279618e-05, + "loss": 0.099, + "step": 4230 + }, + { + "epoch": 0.59, + "learning_rate": 4.900991952086843e-05, + "loss": 0.0921, + "step": 4232 + }, + { + "epoch": 0.59, + "learning_rate": 4.9009451618940674e-05, + "loss": 0.1101, + "step": 4234 + }, + { + "epoch": 0.59, + "learning_rate": 4.900898371701292e-05, + "loss": 0.0668, + "step": 4236 + }, + { + "epoch": 0.59, + "learning_rate": 4.900851581508516e-05, + "loss": 0.1024, + "step": 4238 + }, + { + "epoch": 0.6, + "learning_rate": 4.9008047913157405e-05, + "loss": 0.1071, + "step": 4240 + }, + { + "epoch": 0.6, + "learning_rate": 4.900758001122965e-05, + "loss": 0.0846, + "step": 4242 + }, + { + "epoch": 0.6, + "learning_rate": 4.90071121093019e-05, + "loss": 0.0732, + "step": 4244 + }, + { + "epoch": 0.6, + "learning_rate": 4.9006644207374136e-05, + "loss": 0.0998, + "step": 4246 + }, + { + "epoch": 0.6, + "learning_rate": 4.900617630544638e-05, + "loss": 0.1033, + "step": 4248 + }, + { + "epoch": 0.6, + "learning_rate": 4.900570840351862e-05, + "loss": 0.0899, + "step": 4250 + }, + { + "epoch": 0.6, + "learning_rate": 4.9005240501590874e-05, + "loss": 0.09, + "step": 4252 + }, + { + "epoch": 0.6, + "learning_rate": 4.900477259966311e-05, + "loss": 0.1022, + "step": 4254 + }, + { + "epoch": 0.6, + "learning_rate": 4.900430469773536e-05, + "loss": 0.0846, + "step": 4256 + }, + { + "epoch": 0.6, + "learning_rate": 4.90038367958076e-05, + "loss": 0.1168, + "step": 4258 + }, + { + "epoch": 0.6, + "learning_rate": 4.9003368893879844e-05, + "loss": 0.094, + "step": 4260 + }, + { + "epoch": 0.6, + "learning_rate": 4.900290099195209e-05, + "loss": 0.0767, + "step": 4262 + }, + { + "epoch": 0.6, + "learning_rate": 4.9002433090024336e-05, + "loss": 0.0952, + "step": 4264 + }, + { + "epoch": 0.6, + "learning_rate": 4.9001965188096575e-05, + "loss": 0.0958, + "step": 4266 + }, + { + "epoch": 0.6, + "learning_rate": 4.900149728616882e-05, + "loss": 0.0928, + "step": 4268 + }, + { + "epoch": 0.6, + "learning_rate": 4.900102938424107e-05, + "loss": 0.0999, + "step": 4270 + }, + { + "epoch": 0.6, + "learning_rate": 4.900056148231331e-05, + "loss": 0.1167, + "step": 4272 + }, + { + "epoch": 0.6, + "learning_rate": 4.900009358038555e-05, + "loss": 0.0872, + "step": 4274 + }, + { + "epoch": 0.6, + "learning_rate": 4.89996256784578e-05, + "loss": 0.0778, + "step": 4276 + }, + { + "epoch": 0.6, + "learning_rate": 4.8999157776530044e-05, + "loss": 0.097, + "step": 4278 + }, + { + "epoch": 0.6, + "learning_rate": 4.899868987460229e-05, + "loss": 0.0768, + "step": 4280 + }, + { + "epoch": 0.6, + "learning_rate": 4.899822197267453e-05, + "loss": 0.0844, + "step": 4282 + }, + { + "epoch": 0.6, + "learning_rate": 4.8997754070746775e-05, + "loss": 0.1015, + "step": 4284 + }, + { + "epoch": 0.6, + "learning_rate": 4.899728616881902e-05, + "loss": 0.0848, + "step": 4286 + }, + { + "epoch": 0.6, + "learning_rate": 4.8996818266891267e-05, + "loss": 0.0916, + "step": 4288 + }, + { + "epoch": 0.6, + "learning_rate": 4.8996350364963506e-05, + "loss": 0.091, + "step": 4290 + }, + { + "epoch": 0.6, + "learning_rate": 4.899588246303575e-05, + "loss": 0.085, + "step": 4292 + }, + { + "epoch": 0.6, + "learning_rate": 4.899541456110799e-05, + "loss": 0.0828, + "step": 4294 + }, + { + "epoch": 0.6, + "learning_rate": 4.8994946659180243e-05, + "loss": 0.0947, + "step": 4296 + }, + { + "epoch": 0.6, + "learning_rate": 4.899447875725248e-05, + "loss": 0.1177, + "step": 4298 + }, + { + "epoch": 0.6, + "learning_rate": 4.899401085532473e-05, + "loss": 0.0947, + "step": 4300 + }, + { + "epoch": 0.6, + "learning_rate": 4.899354295339697e-05, + "loss": 0.0855, + "step": 4302 + }, + { + "epoch": 0.6, + "learning_rate": 4.899307505146922e-05, + "loss": 0.0957, + "step": 4304 + }, + { + "epoch": 0.6, + "learning_rate": 4.899260714954146e-05, + "loss": 0.0863, + "step": 4306 + }, + { + "epoch": 0.6, + "learning_rate": 4.89921392476137e-05, + "loss": 0.1037, + "step": 4308 + }, + { + "epoch": 0.6, + "learning_rate": 4.8991671345685944e-05, + "loss": 0.1227, + "step": 4310 + }, + { + "epoch": 0.61, + "learning_rate": 4.899120344375819e-05, + "loss": 0.0743, + "step": 4312 + }, + { + "epoch": 0.61, + "learning_rate": 4.8990735541830436e-05, + "loss": 0.1011, + "step": 4314 + }, + { + "epoch": 0.61, + "learning_rate": 4.8990267639902675e-05, + "loss": 0.0948, + "step": 4316 + }, + { + "epoch": 0.61, + "learning_rate": 4.898979973797492e-05, + "loss": 0.0914, + "step": 4318 + }, + { + "epoch": 0.61, + "learning_rate": 4.898933183604717e-05, + "loss": 0.0866, + "step": 4320 + }, + { + "epoch": 0.61, + "learning_rate": 4.898886393411941e-05, + "loss": 0.0906, + "step": 4322 + }, + { + "epoch": 0.61, + "learning_rate": 4.898839603219165e-05, + "loss": 0.0916, + "step": 4324 + }, + { + "epoch": 0.61, + "learning_rate": 4.89879281302639e-05, + "loss": 0.0887, + "step": 4326 + }, + { + "epoch": 0.61, + "learning_rate": 4.898746022833614e-05, + "loss": 0.0896, + "step": 4328 + }, + { + "epoch": 0.61, + "learning_rate": 4.898699232640839e-05, + "loss": 0.1005, + "step": 4330 + }, + { + "epoch": 0.61, + "learning_rate": 4.898652442448063e-05, + "loss": 0.0842, + "step": 4332 + }, + { + "epoch": 0.61, + "learning_rate": 4.8986056522552875e-05, + "loss": 0.0873, + "step": 4334 + }, + { + "epoch": 0.61, + "learning_rate": 4.8985588620625114e-05, + "loss": 0.1558, + "step": 4336 + }, + { + "epoch": 0.61, + "learning_rate": 4.898512071869737e-05, + "loss": 0.0988, + "step": 4338 + }, + { + "epoch": 0.61, + "learning_rate": 4.8984652816769606e-05, + "loss": 0.0885, + "step": 4340 + }, + { + "epoch": 0.61, + "learning_rate": 4.898418491484185e-05, + "loss": 0.0964, + "step": 4342 + }, + { + "epoch": 0.61, + "learning_rate": 4.898371701291409e-05, + "loss": 0.0886, + "step": 4344 + }, + { + "epoch": 0.61, + "learning_rate": 4.898324911098634e-05, + "loss": 0.0924, + "step": 4346 + }, + { + "epoch": 0.61, + "learning_rate": 4.898278120905858e-05, + "loss": 0.1097, + "step": 4348 + }, + { + "epoch": 0.61, + "learning_rate": 4.898231330713083e-05, + "loss": 0.0865, + "step": 4350 + }, + { + "epoch": 0.61, + "learning_rate": 4.898184540520307e-05, + "loss": 0.1014, + "step": 4352 + }, + { + "epoch": 0.61, + "learning_rate": 4.8981377503275314e-05, + "loss": 0.1189, + "step": 4354 + }, + { + "epoch": 0.61, + "learning_rate": 4.898090960134756e-05, + "loss": 0.0735, + "step": 4356 + }, + { + "epoch": 0.61, + "learning_rate": 4.8980441699419806e-05, + "loss": 0.0957, + "step": 4358 + }, + { + "epoch": 0.61, + "learning_rate": 4.8979973797492045e-05, + "loss": 0.1077, + "step": 4360 + }, + { + "epoch": 0.61, + "learning_rate": 4.897950589556429e-05, + "loss": 0.0805, + "step": 4362 + }, + { + "epoch": 0.61, + "learning_rate": 4.897903799363654e-05, + "loss": 0.0992, + "step": 4364 + }, + { + "epoch": 0.61, + "learning_rate": 4.897857009170878e-05, + "loss": 0.1078, + "step": 4366 + }, + { + "epoch": 0.61, + "learning_rate": 4.897810218978102e-05, + "loss": 0.0871, + "step": 4368 + }, + { + "epoch": 0.61, + "learning_rate": 4.897763428785327e-05, + "loss": 0.0985, + "step": 4370 + }, + { + "epoch": 0.61, + "learning_rate": 4.8977166385925514e-05, + "loss": 0.0938, + "step": 4372 + }, + { + "epoch": 0.61, + "learning_rate": 4.897669848399776e-05, + "loss": 0.1007, + "step": 4374 + }, + { + "epoch": 0.61, + "learning_rate": 4.897623058207e-05, + "loss": 0.0793, + "step": 4376 + }, + { + "epoch": 0.61, + "learning_rate": 4.8975762680142244e-05, + "loss": 0.0948, + "step": 4378 + }, + { + "epoch": 0.61, + "learning_rate": 4.8975294778214484e-05, + "loss": 0.0967, + "step": 4380 + }, + { + "epoch": 0.62, + "learning_rate": 4.8974826876286736e-05, + "loss": 0.0931, + "step": 4382 + }, + { + "epoch": 0.62, + "learning_rate": 4.8974358974358975e-05, + "loss": 0.0887, + "step": 4384 + }, + { + "epoch": 0.62, + "learning_rate": 4.897389107243122e-05, + "loss": 0.0804, + "step": 4386 + }, + { + "epoch": 0.62, + "learning_rate": 4.897342317050346e-05, + "loss": 0.0771, + "step": 4388 + }, + { + "epoch": 0.62, + "learning_rate": 4.897295526857571e-05, + "loss": 0.0911, + "step": 4390 + }, + { + "epoch": 0.62, + "learning_rate": 4.897248736664795e-05, + "loss": 0.0988, + "step": 4392 + }, + { + "epoch": 0.62, + "learning_rate": 4.89720194647202e-05, + "loss": 0.0744, + "step": 4394 + }, + { + "epoch": 0.62, + "learning_rate": 4.897155156279244e-05, + "loss": 0.0785, + "step": 4396 + }, + { + "epoch": 0.62, + "learning_rate": 4.897108366086468e-05, + "loss": 0.0776, + "step": 4398 + }, + { + "epoch": 0.62, + "learning_rate": 4.897061575893693e-05, + "loss": 0.0814, + "step": 4400 + }, + { + "epoch": 0.62, + "learning_rate": 4.8970147857009175e-05, + "loss": 0.097, + "step": 4402 + }, + { + "epoch": 0.62, + "learning_rate": 4.8969679955081414e-05, + "loss": 0.0763, + "step": 4404 + }, + { + "epoch": 0.62, + "learning_rate": 4.896921205315366e-05, + "loss": 0.1057, + "step": 4406 + }, + { + "epoch": 0.62, + "learning_rate": 4.8968744151225906e-05, + "loss": 0.0981, + "step": 4408 + }, + { + "epoch": 0.62, + "learning_rate": 4.896827624929815e-05, + "loss": 0.0961, + "step": 4410 + }, + { + "epoch": 0.62, + "learning_rate": 4.896780834737039e-05, + "loss": 0.0917, + "step": 4412 + }, + { + "epoch": 0.62, + "learning_rate": 4.896734044544264e-05, + "loss": 0.0941, + "step": 4414 + }, + { + "epoch": 0.62, + "learning_rate": 4.896687254351488e-05, + "loss": 0.0752, + "step": 4416 + }, + { + "epoch": 0.62, + "learning_rate": 4.896640464158713e-05, + "loss": 0.1232, + "step": 4418 + }, + { + "epoch": 0.62, + "learning_rate": 4.896593673965937e-05, + "loss": 0.0823, + "step": 4420 + }, + { + "epoch": 0.62, + "learning_rate": 4.8965468837731614e-05, + "loss": 0.0992, + "step": 4422 + }, + { + "epoch": 0.62, + "learning_rate": 4.896500093580385e-05, + "loss": 0.0677, + "step": 4424 + }, + { + "epoch": 0.62, + "learning_rate": 4.8964533033876106e-05, + "loss": 0.0863, + "step": 4426 + }, + { + "epoch": 0.62, + "learning_rate": 4.8964065131948345e-05, + "loss": 0.1048, + "step": 4428 + }, + { + "epoch": 0.62, + "learning_rate": 4.896359723002059e-05, + "loss": 0.0841, + "step": 4430 + }, + { + "epoch": 0.62, + "learning_rate": 4.896312932809283e-05, + "loss": 0.1079, + "step": 4432 + }, + { + "epoch": 0.62, + "learning_rate": 4.896266142616508e-05, + "loss": 0.0667, + "step": 4434 + }, + { + "epoch": 0.62, + "learning_rate": 4.896219352423732e-05, + "loss": 0.0898, + "step": 4436 + }, + { + "epoch": 0.62, + "learning_rate": 4.896172562230957e-05, + "loss": 0.1009, + "step": 4438 + }, + { + "epoch": 0.62, + "learning_rate": 4.896125772038181e-05, + "loss": 0.0881, + "step": 4440 + }, + { + "epoch": 0.62, + "learning_rate": 4.896078981845405e-05, + "loss": 0.0873, + "step": 4442 + }, + { + "epoch": 0.62, + "learning_rate": 4.89603219165263e-05, + "loss": 0.1066, + "step": 4444 + }, + { + "epoch": 0.62, + "learning_rate": 4.8959854014598545e-05, + "loss": 0.1005, + "step": 4446 + }, + { + "epoch": 0.62, + "learning_rate": 4.8959386112670784e-05, + "loss": 0.0918, + "step": 4448 + }, + { + "epoch": 0.62, + "learning_rate": 4.895891821074303e-05, + "loss": 0.0812, + "step": 4450 + }, + { + "epoch": 0.62, + "learning_rate": 4.8958450308815275e-05, + "loss": 0.0732, + "step": 4452 + }, + { + "epoch": 0.63, + "learning_rate": 4.895798240688752e-05, + "loss": 0.1159, + "step": 4454 + }, + { + "epoch": 0.63, + "learning_rate": 4.895751450495976e-05, + "loss": 0.1127, + "step": 4456 + }, + { + "epoch": 0.63, + "learning_rate": 4.8957046603032006e-05, + "loss": 0.0782, + "step": 4458 + }, + { + "epoch": 0.63, + "learning_rate": 4.895657870110425e-05, + "loss": 0.1067, + "step": 4460 + }, + { + "epoch": 0.63, + "learning_rate": 4.89561107991765e-05, + "loss": 0.078, + "step": 4462 + }, + { + "epoch": 0.63, + "learning_rate": 4.895564289724874e-05, + "loss": 0.1023, + "step": 4464 + }, + { + "epoch": 0.63, + "learning_rate": 4.895517499532098e-05, + "loss": 0.0884, + "step": 4466 + }, + { + "epoch": 0.63, + "learning_rate": 4.895470709339323e-05, + "loss": 0.0833, + "step": 4468 + }, + { + "epoch": 0.63, + "learning_rate": 4.8954239191465475e-05, + "loss": 0.0966, + "step": 4470 + }, + { + "epoch": 0.63, + "learning_rate": 4.8953771289537714e-05, + "loss": 0.0709, + "step": 4472 + }, + { + "epoch": 0.63, + "learning_rate": 4.895330338760996e-05, + "loss": 0.0861, + "step": 4474 + }, + { + "epoch": 0.63, + "learning_rate": 4.89528354856822e-05, + "loss": 0.1094, + "step": 4476 + }, + { + "epoch": 0.63, + "learning_rate": 4.895236758375445e-05, + "loss": 0.1049, + "step": 4478 + }, + { + "epoch": 0.63, + "learning_rate": 4.895189968182669e-05, + "loss": 0.0909, + "step": 4480 + }, + { + "epoch": 0.63, + "learning_rate": 4.895143177989894e-05, + "loss": 0.0886, + "step": 4482 + }, + { + "epoch": 0.63, + "learning_rate": 4.8950963877971176e-05, + "loss": 0.112, + "step": 4484 + }, + { + "epoch": 0.63, + "learning_rate": 4.895049597604343e-05, + "loss": 0.0861, + "step": 4486 + }, + { + "epoch": 0.63, + "learning_rate": 4.895002807411567e-05, + "loss": 0.086, + "step": 4488 + }, + { + "epoch": 0.63, + "learning_rate": 4.8949560172187914e-05, + "loss": 0.0954, + "step": 4490 + }, + { + "epoch": 0.63, + "learning_rate": 4.894909227026015e-05, + "loss": 0.1179, + "step": 4492 + }, + { + "epoch": 0.63, + "learning_rate": 4.89486243683324e-05, + "loss": 0.0842, + "step": 4494 + }, + { + "epoch": 0.63, + "learning_rate": 4.8948156466404645e-05, + "loss": 0.1064, + "step": 4496 + }, + { + "epoch": 0.63, + "learning_rate": 4.894768856447689e-05, + "loss": 0.1053, + "step": 4498 + }, + { + "epoch": 0.63, + "learning_rate": 4.894722066254913e-05, + "loss": 0.0951, + "step": 4500 + }, + { + "epoch": 0.63, + "learning_rate": 4.8946752760621376e-05, + "loss": 0.1047, + "step": 4502 + }, + { + "epoch": 0.63, + "learning_rate": 4.894628485869362e-05, + "loss": 0.0911, + "step": 4504 + }, + { + "epoch": 0.63, + "learning_rate": 4.894581695676587e-05, + "loss": 0.0806, + "step": 4506 + }, + { + "epoch": 0.63, + "learning_rate": 4.894534905483811e-05, + "loss": 0.0861, + "step": 4508 + }, + { + "epoch": 0.63, + "learning_rate": 4.894488115291035e-05, + "loss": 0.09, + "step": 4510 + }, + { + "epoch": 0.63, + "learning_rate": 4.89444132509826e-05, + "loss": 0.0681, + "step": 4512 + }, + { + "epoch": 0.63, + "learning_rate": 4.8943945349054845e-05, + "loss": 0.1114, + "step": 4514 + }, + { + "epoch": 0.63, + "learning_rate": 4.8943477447127084e-05, + "loss": 0.1009, + "step": 4516 + }, + { + "epoch": 0.63, + "learning_rate": 4.894300954519933e-05, + "loss": 0.0783, + "step": 4518 + }, + { + "epoch": 0.63, + "learning_rate": 4.8942541643271575e-05, + "loss": 0.0956, + "step": 4520 + }, + { + "epoch": 0.63, + "learning_rate": 4.894207374134382e-05, + "loss": 0.0938, + "step": 4522 + }, + { + "epoch": 0.64, + "learning_rate": 4.894160583941606e-05, + "loss": 0.0783, + "step": 4524 + }, + { + "epoch": 0.64, + "learning_rate": 4.8941137937488306e-05, + "loss": 0.095, + "step": 4526 + }, + { + "epoch": 0.64, + "learning_rate": 4.8940670035560546e-05, + "loss": 0.1184, + "step": 4528 + }, + { + "epoch": 0.64, + "learning_rate": 4.89402021336328e-05, + "loss": 0.0978, + "step": 4530 + }, + { + "epoch": 0.64, + "learning_rate": 4.893973423170504e-05, + "loss": 0.1171, + "step": 4532 + }, + { + "epoch": 0.64, + "learning_rate": 4.893926632977728e-05, + "loss": 0.0834, + "step": 4534 + }, + { + "epoch": 0.64, + "learning_rate": 4.893879842784952e-05, + "loss": 0.1069, + "step": 4536 + }, + { + "epoch": 0.64, + "learning_rate": 4.893833052592177e-05, + "loss": 0.1139, + "step": 4538 + }, + { + "epoch": 0.64, + "learning_rate": 4.8937862623994014e-05, + "loss": 0.1091, + "step": 4540 + }, + { + "epoch": 0.64, + "learning_rate": 4.893739472206626e-05, + "loss": 0.1117, + "step": 4542 + }, + { + "epoch": 0.64, + "learning_rate": 4.89369268201385e-05, + "loss": 0.1039, + "step": 4544 + }, + { + "epoch": 0.64, + "learning_rate": 4.8936458918210745e-05, + "loss": 0.0875, + "step": 4546 + }, + { + "epoch": 0.64, + "learning_rate": 4.893599101628299e-05, + "loss": 0.0898, + "step": 4548 + }, + { + "epoch": 0.64, + "learning_rate": 4.893552311435524e-05, + "loss": 0.0719, + "step": 4550 + }, + { + "epoch": 0.64, + "learning_rate": 4.8935055212427476e-05, + "loss": 0.1235, + "step": 4552 + }, + { + "epoch": 0.64, + "learning_rate": 4.893458731049972e-05, + "loss": 0.0776, + "step": 4554 + }, + { + "epoch": 0.64, + "learning_rate": 4.893411940857197e-05, + "loss": 0.0873, + "step": 4556 + }, + { + "epoch": 0.64, + "learning_rate": 4.8933651506644214e-05, + "loss": 0.0984, + "step": 4558 + }, + { + "epoch": 0.64, + "learning_rate": 4.893318360471645e-05, + "loss": 0.0908, + "step": 4560 + }, + { + "epoch": 0.64, + "learning_rate": 4.893271570278869e-05, + "loss": 0.0766, + "step": 4562 + }, + { + "epoch": 0.64, + "learning_rate": 4.8932247800860945e-05, + "loss": 0.0991, + "step": 4564 + }, + { + "epoch": 0.64, + "learning_rate": 4.8931779898933184e-05, + "loss": 0.0929, + "step": 4566 + }, + { + "epoch": 0.64, + "learning_rate": 4.893131199700543e-05, + "loss": 0.0673, + "step": 4568 + }, + { + "epoch": 0.64, + "learning_rate": 4.893084409507767e-05, + "loss": 0.0776, + "step": 4570 + }, + { + "epoch": 0.64, + "learning_rate": 4.8930376193149915e-05, + "loss": 0.0835, + "step": 4572 + }, + { + "epoch": 0.64, + "learning_rate": 4.892990829122216e-05, + "loss": 0.1216, + "step": 4574 + }, + { + "epoch": 0.64, + "learning_rate": 4.892944038929441e-05, + "loss": 0.0918, + "step": 4576 + }, + { + "epoch": 0.64, + "learning_rate": 4.8928972487366646e-05, + "loss": 0.1165, + "step": 4578 + }, + { + "epoch": 0.64, + "learning_rate": 4.892850458543889e-05, + "loss": 0.0949, + "step": 4580 + }, + { + "epoch": 0.64, + "learning_rate": 4.892803668351114e-05, + "loss": 0.106, + "step": 4582 + }, + { + "epoch": 0.64, + "learning_rate": 4.8927568781583384e-05, + "loss": 0.0911, + "step": 4584 + }, + { + "epoch": 0.64, + "learning_rate": 4.892710087965562e-05, + "loss": 0.0676, + "step": 4586 + }, + { + "epoch": 0.64, + "learning_rate": 4.892663297772787e-05, + "loss": 0.0986, + "step": 4588 + }, + { + "epoch": 0.64, + "learning_rate": 4.8926165075800115e-05, + "loss": 0.1082, + "step": 4590 + }, + { + "epoch": 0.64, + "learning_rate": 4.892569717387236e-05, + "loss": 0.104, + "step": 4592 + }, + { + "epoch": 0.64, + "learning_rate": 4.89252292719446e-05, + "loss": 0.0883, + "step": 4594 + }, + { + "epoch": 0.65, + "learning_rate": 4.8924761370016846e-05, + "loss": 0.1203, + "step": 4596 + }, + { + "epoch": 0.65, + "learning_rate": 4.892429346808909e-05, + "loss": 0.1259, + "step": 4598 + }, + { + "epoch": 0.65, + "learning_rate": 4.892382556616134e-05, + "loss": 0.088, + "step": 4600 + }, + { + "epoch": 0.65, + "learning_rate": 4.8923357664233577e-05, + "loss": 0.0992, + "step": 4602 + }, + { + "epoch": 0.65, + "learning_rate": 4.892288976230582e-05, + "loss": 0.0931, + "step": 4604 + }, + { + "epoch": 0.65, + "learning_rate": 4.892242186037806e-05, + "loss": 0.112, + "step": 4606 + }, + { + "epoch": 0.65, + "learning_rate": 4.8921953958450314e-05, + "loss": 0.1198, + "step": 4608 + }, + { + "epoch": 0.65, + "learning_rate": 4.8921486056522553e-05, + "loss": 0.0877, + "step": 4610 + }, + { + "epoch": 0.65, + "learning_rate": 4.89210181545948e-05, + "loss": 0.0897, + "step": 4612 + }, + { + "epoch": 0.65, + "learning_rate": 4.892055025266704e-05, + "loss": 0.093, + "step": 4614 + }, + { + "epoch": 0.65, + "learning_rate": 4.892008235073929e-05, + "loss": 0.1275, + "step": 4616 + }, + { + "epoch": 0.65, + "learning_rate": 4.891961444881153e-05, + "loss": 0.1018, + "step": 4618 + }, + { + "epoch": 0.65, + "learning_rate": 4.8919146546883776e-05, + "loss": 0.1001, + "step": 4620 + }, + { + "epoch": 0.65, + "learning_rate": 4.8918678644956015e-05, + "loss": 0.0741, + "step": 4622 + }, + { + "epoch": 0.65, + "learning_rate": 4.891821074302826e-05, + "loss": 0.0864, + "step": 4624 + }, + { + "epoch": 0.65, + "learning_rate": 4.891774284110051e-05, + "loss": 0.098, + "step": 4626 + }, + { + "epoch": 0.65, + "learning_rate": 4.891727493917275e-05, + "loss": 0.0803, + "step": 4628 + }, + { + "epoch": 0.65, + "learning_rate": 4.891680703724499e-05, + "loss": 0.0959, + "step": 4630 + }, + { + "epoch": 0.65, + "learning_rate": 4.891633913531724e-05, + "loss": 0.0874, + "step": 4632 + }, + { + "epoch": 0.65, + "learning_rate": 4.8915871233389484e-05, + "loss": 0.0949, + "step": 4634 + }, + { + "epoch": 0.65, + "learning_rate": 4.891540333146173e-05, + "loss": 0.0782, + "step": 4636 + }, + { + "epoch": 0.65, + "learning_rate": 4.891493542953397e-05, + "loss": 0.0949, + "step": 4638 + }, + { + "epoch": 0.65, + "learning_rate": 4.8914467527606215e-05, + "loss": 0.1179, + "step": 4640 + }, + { + "epoch": 0.65, + "learning_rate": 4.891399962567846e-05, + "loss": 0.0777, + "step": 4642 + }, + { + "epoch": 0.65, + "learning_rate": 4.891353172375071e-05, + "loss": 0.0822, + "step": 4644 + }, + { + "epoch": 0.65, + "learning_rate": 4.8913063821822946e-05, + "loss": 0.1081, + "step": 4646 + }, + { + "epoch": 0.65, + "learning_rate": 4.891259591989519e-05, + "loss": 0.139, + "step": 4648 + }, + { + "epoch": 0.65, + "learning_rate": 4.891212801796744e-05, + "loss": 0.0901, + "step": 4650 + }, + { + "epoch": 0.65, + "learning_rate": 4.8911660116039684e-05, + "loss": 0.0992, + "step": 4652 + }, + { + "epoch": 0.65, + "learning_rate": 4.891119221411192e-05, + "loss": 0.1104, + "step": 4654 + }, + { + "epoch": 0.65, + "learning_rate": 4.891072431218417e-05, + "loss": 0.1043, + "step": 4656 + }, + { + "epoch": 0.65, + "learning_rate": 4.891025641025641e-05, + "loss": 0.0848, + "step": 4658 + }, + { + "epoch": 0.65, + "learning_rate": 4.890978850832866e-05, + "loss": 0.1208, + "step": 4660 + }, + { + "epoch": 0.65, + "learning_rate": 4.89093206064009e-05, + "loss": 0.0921, + "step": 4662 + }, + { + "epoch": 0.65, + "learning_rate": 4.8908852704473146e-05, + "loss": 0.1036, + "step": 4664 + }, + { + "epoch": 0.65, + "learning_rate": 4.8908384802545385e-05, + "loss": 0.1074, + "step": 4666 + }, + { + "epoch": 0.66, + "learning_rate": 4.890791690061764e-05, + "loss": 0.0866, + "step": 4668 + }, + { + "epoch": 0.66, + "learning_rate": 4.8907448998689877e-05, + "loss": 0.077, + "step": 4670 + }, + { + "epoch": 0.66, + "learning_rate": 4.890698109676212e-05, + "loss": 0.1032, + "step": 4672 + }, + { + "epoch": 0.66, + "learning_rate": 4.890651319483436e-05, + "loss": 0.0909, + "step": 4674 + }, + { + "epoch": 0.66, + "learning_rate": 4.890604529290661e-05, + "loss": 0.0973, + "step": 4676 + }, + { + "epoch": 0.66, + "learning_rate": 4.8905577390978853e-05, + "loss": 0.1134, + "step": 4678 + }, + { + "epoch": 0.66, + "learning_rate": 4.89051094890511e-05, + "loss": 0.0955, + "step": 4680 + }, + { + "epoch": 0.66, + "learning_rate": 4.890464158712334e-05, + "loss": 0.1156, + "step": 4682 + }, + { + "epoch": 0.66, + "learning_rate": 4.8904173685195584e-05, + "loss": 0.1031, + "step": 4684 + }, + { + "epoch": 0.66, + "learning_rate": 4.890370578326783e-05, + "loss": 0.1189, + "step": 4686 + }, + { + "epoch": 0.66, + "learning_rate": 4.8903237881340076e-05, + "loss": 0.0778, + "step": 4688 + }, + { + "epoch": 0.66, + "learning_rate": 4.8902769979412315e-05, + "loss": 0.0846, + "step": 4690 + }, + { + "epoch": 0.66, + "learning_rate": 4.890230207748456e-05, + "loss": 0.1086, + "step": 4692 + }, + { + "epoch": 0.66, + "learning_rate": 4.890183417555681e-05, + "loss": 0.1226, + "step": 4694 + }, + { + "epoch": 0.66, + "learning_rate": 4.890136627362905e-05, + "loss": 0.1065, + "step": 4696 + }, + { + "epoch": 0.66, + "learning_rate": 4.890089837170129e-05, + "loss": 0.0925, + "step": 4698 + }, + { + "epoch": 0.66, + "learning_rate": 4.890043046977354e-05, + "loss": 0.0848, + "step": 4700 + }, + { + "epoch": 0.66, + "learning_rate": 4.889996256784578e-05, + "loss": 0.1009, + "step": 4702 + }, + { + "epoch": 0.66, + "learning_rate": 4.889949466591803e-05, + "loss": 0.0826, + "step": 4704 + }, + { + "epoch": 0.66, + "learning_rate": 4.889902676399027e-05, + "loss": 0.0922, + "step": 4706 + }, + { + "epoch": 0.66, + "learning_rate": 4.8898558862062515e-05, + "loss": 0.1119, + "step": 4708 + }, + { + "epoch": 0.66, + "learning_rate": 4.8898090960134754e-05, + "loss": 0.0901, + "step": 4710 + }, + { + "epoch": 0.66, + "learning_rate": 4.889762305820701e-05, + "loss": 0.1133, + "step": 4712 + }, + { + "epoch": 0.66, + "learning_rate": 4.8897155156279246e-05, + "loss": 0.085, + "step": 4714 + }, + { + "epoch": 0.66, + "learning_rate": 4.889668725435149e-05, + "loss": 0.1013, + "step": 4716 + }, + { + "epoch": 0.66, + "learning_rate": 4.889621935242373e-05, + "loss": 0.0785, + "step": 4718 + }, + { + "epoch": 0.66, + "learning_rate": 4.889575145049598e-05, + "loss": 0.0972, + "step": 4720 + }, + { + "epoch": 0.66, + "learning_rate": 4.889528354856822e-05, + "loss": 0.0839, + "step": 4722 + }, + { + "epoch": 0.66, + "learning_rate": 4.889481564664047e-05, + "loss": 0.0845, + "step": 4724 + }, + { + "epoch": 0.66, + "learning_rate": 4.889434774471271e-05, + "loss": 0.112, + "step": 4726 + }, + { + "epoch": 0.66, + "learning_rate": 4.8893879842784954e-05, + "loss": 0.0875, + "step": 4728 + }, + { + "epoch": 0.66, + "learning_rate": 4.88934119408572e-05, + "loss": 0.0809, + "step": 4730 + }, + { + "epoch": 0.66, + "learning_rate": 4.8892944038929446e-05, + "loss": 0.1014, + "step": 4732 + }, + { + "epoch": 0.66, + "learning_rate": 4.8892476137001685e-05, + "loss": 0.1008, + "step": 4734 + }, + { + "epoch": 0.66, + "learning_rate": 4.889200823507393e-05, + "loss": 0.0968, + "step": 4736 + }, + { + "epoch": 0.67, + "learning_rate": 4.8891540333146177e-05, + "loss": 0.104, + "step": 4738 + }, + { + "epoch": 0.67, + "learning_rate": 4.889107243121842e-05, + "loss": 0.0895, + "step": 4740 + }, + { + "epoch": 0.67, + "learning_rate": 4.889060452929066e-05, + "loss": 0.1038, + "step": 4742 + }, + { + "epoch": 0.67, + "learning_rate": 4.889013662736291e-05, + "loss": 0.0948, + "step": 4744 + }, + { + "epoch": 0.67, + "learning_rate": 4.8889668725435153e-05, + "loss": 0.0859, + "step": 4746 + }, + { + "epoch": 0.67, + "learning_rate": 4.88892008235074e-05, + "loss": 0.0925, + "step": 4748 + }, + { + "epoch": 0.67, + "learning_rate": 4.888873292157964e-05, + "loss": 0.0894, + "step": 4750 + }, + { + "epoch": 0.67, + "learning_rate": 4.8888265019651884e-05, + "loss": 0.1084, + "step": 4752 + }, + { + "epoch": 0.67, + "learning_rate": 4.8887797117724124e-05, + "loss": 0.1075, + "step": 4754 + }, + { + "epoch": 0.67, + "learning_rate": 4.8887329215796376e-05, + "loss": 0.0874, + "step": 4756 + }, + { + "epoch": 0.67, + "learning_rate": 4.8886861313868615e-05, + "loss": 0.1107, + "step": 4758 + }, + { + "epoch": 0.67, + "learning_rate": 4.888639341194086e-05, + "loss": 0.1032, + "step": 4760 + }, + { + "epoch": 0.67, + "learning_rate": 4.88859255100131e-05, + "loss": 0.1123, + "step": 4762 + }, + { + "epoch": 0.67, + "learning_rate": 4.888545760808535e-05, + "loss": 0.075, + "step": 4764 + }, + { + "epoch": 0.67, + "learning_rate": 4.888498970615759e-05, + "loss": 0.0872, + "step": 4766 + }, + { + "epoch": 0.67, + "learning_rate": 4.888452180422984e-05, + "loss": 0.1139, + "step": 4768 + }, + { + "epoch": 0.67, + "learning_rate": 4.888405390230208e-05, + "loss": 0.077, + "step": 4770 + }, + { + "epoch": 0.67, + "learning_rate": 4.888358600037432e-05, + "loss": 0.1046, + "step": 4772 + }, + { + "epoch": 0.67, + "learning_rate": 4.888311809844657e-05, + "loss": 0.0873, + "step": 4774 + }, + { + "epoch": 0.67, + "learning_rate": 4.8882650196518815e-05, + "loss": 0.0995, + "step": 4776 + }, + { + "epoch": 0.67, + "learning_rate": 4.8882182294591054e-05, + "loss": 0.0791, + "step": 4778 + }, + { + "epoch": 0.67, + "learning_rate": 4.88817143926633e-05, + "loss": 0.0981, + "step": 4780 + }, + { + "epoch": 0.67, + "learning_rate": 4.8881246490735546e-05, + "loss": 0.0895, + "step": 4782 + }, + { + "epoch": 0.67, + "learning_rate": 4.888077858880779e-05, + "loss": 0.0972, + "step": 4784 + }, + { + "epoch": 0.67, + "learning_rate": 4.888031068688003e-05, + "loss": 0.1236, + "step": 4786 + }, + { + "epoch": 0.67, + "learning_rate": 4.887984278495228e-05, + "loss": 0.097, + "step": 4788 + }, + { + "epoch": 0.67, + "learning_rate": 4.887937488302452e-05, + "loss": 0.1093, + "step": 4790 + }, + { + "epoch": 0.67, + "learning_rate": 4.887890698109677e-05, + "loss": 0.0803, + "step": 4792 + }, + { + "epoch": 0.67, + "learning_rate": 4.887843907916901e-05, + "loss": 0.0826, + "step": 4794 + }, + { + "epoch": 0.67, + "learning_rate": 4.8877971177241254e-05, + "loss": 0.1125, + "step": 4796 + }, + { + "epoch": 0.67, + "learning_rate": 4.88775032753135e-05, + "loss": 0.105, + "step": 4798 + }, + { + "epoch": 0.67, + "learning_rate": 4.8877035373385746e-05, + "loss": 0.0959, + "step": 4800 + }, + { + "epoch": 0.67, + "learning_rate": 4.8876567471457985e-05, + "loss": 0.0936, + "step": 4802 + }, + { + "epoch": 0.67, + "learning_rate": 4.887609956953023e-05, + "loss": 0.1086, + "step": 4804 + }, + { + "epoch": 0.67, + "learning_rate": 4.887563166760247e-05, + "loss": 0.0829, + "step": 4806 + }, + { + "epoch": 0.67, + "learning_rate": 4.887516376567472e-05, + "loss": 0.1008, + "step": 4808 + }, + { + "epoch": 0.68, + "learning_rate": 4.887469586374696e-05, + "loss": 0.0936, + "step": 4810 + }, + { + "epoch": 0.68, + "learning_rate": 4.88742279618192e-05, + "loss": 0.0756, + "step": 4812 + }, + { + "epoch": 0.68, + "learning_rate": 4.887376005989145e-05, + "loss": 0.0609, + "step": 4814 + }, + { + "epoch": 0.68, + "learning_rate": 4.887329215796369e-05, + "loss": 0.1086, + "step": 4816 + }, + { + "epoch": 0.68, + "learning_rate": 4.887282425603594e-05, + "loss": 0.102, + "step": 4818 + }, + { + "epoch": 0.68, + "learning_rate": 4.887235635410818e-05, + "loss": 0.0932, + "step": 4820 + }, + { + "epoch": 0.68, + "learning_rate": 4.8871888452180424e-05, + "loss": 0.0812, + "step": 4822 + }, + { + "epoch": 0.68, + "learning_rate": 4.887142055025267e-05, + "loss": 0.0932, + "step": 4824 + }, + { + "epoch": 0.68, + "learning_rate": 4.8870952648324915e-05, + "loss": 0.0684, + "step": 4826 + }, + { + "epoch": 0.68, + "learning_rate": 4.8870484746397155e-05, + "loss": 0.0867, + "step": 4828 + }, + { + "epoch": 0.68, + "learning_rate": 4.88700168444694e-05, + "loss": 0.1134, + "step": 4830 + }, + { + "epoch": 0.68, + "learning_rate": 4.8869548942541646e-05, + "loss": 0.0709, + "step": 4832 + }, + { + "epoch": 0.68, + "learning_rate": 4.886908104061389e-05, + "loss": 0.0993, + "step": 4834 + }, + { + "epoch": 0.68, + "learning_rate": 4.886861313868613e-05, + "loss": 0.0925, + "step": 4836 + }, + { + "epoch": 0.68, + "learning_rate": 4.886814523675838e-05, + "loss": 0.074, + "step": 4838 + }, + { + "epoch": 0.68, + "learning_rate": 4.8867677334830616e-05, + "loss": 0.0871, + "step": 4840 + }, + { + "epoch": 0.68, + "learning_rate": 4.886720943290287e-05, + "loss": 0.1135, + "step": 4842 + }, + { + "epoch": 0.68, + "learning_rate": 4.886674153097511e-05, + "loss": 0.0792, + "step": 4844 + }, + { + "epoch": 0.68, + "learning_rate": 4.8866273629047354e-05, + "loss": 0.098, + "step": 4846 + }, + { + "epoch": 0.68, + "learning_rate": 4.886580572711959e-05, + "loss": 0.0822, + "step": 4848 + }, + { + "epoch": 0.68, + "learning_rate": 4.886533782519184e-05, + "loss": 0.1011, + "step": 4850 + }, + { + "epoch": 0.68, + "learning_rate": 4.8864869923264085e-05, + "loss": 0.0857, + "step": 4852 + }, + { + "epoch": 0.68, + "learning_rate": 4.886440202133633e-05, + "loss": 0.1509, + "step": 4854 + }, + { + "epoch": 0.68, + "learning_rate": 4.886393411940857e-05, + "loss": 0.098, + "step": 4856 + }, + { + "epoch": 0.68, + "learning_rate": 4.8863466217480816e-05, + "loss": 0.0951, + "step": 4858 + }, + { + "epoch": 0.68, + "learning_rate": 4.886299831555306e-05, + "loss": 0.0924, + "step": 4860 + }, + { + "epoch": 0.68, + "learning_rate": 4.886253041362531e-05, + "loss": 0.0884, + "step": 4862 + }, + { + "epoch": 0.68, + "learning_rate": 4.886206251169755e-05, + "loss": 0.1034, + "step": 4864 + }, + { + "epoch": 0.68, + "learning_rate": 4.886159460976979e-05, + "loss": 0.0821, + "step": 4866 + }, + { + "epoch": 0.68, + "learning_rate": 4.886112670784204e-05, + "loss": 0.1115, + "step": 4868 + }, + { + "epoch": 0.68, + "learning_rate": 4.8860658805914285e-05, + "loss": 0.0928, + "step": 4870 + }, + { + "epoch": 0.68, + "learning_rate": 4.8860190903986524e-05, + "loss": 0.116, + "step": 4872 + }, + { + "epoch": 0.68, + "learning_rate": 4.885972300205877e-05, + "loss": 0.1248, + "step": 4874 + }, + { + "epoch": 0.68, + "learning_rate": 4.8859255100131016e-05, + "loss": 0.0932, + "step": 4876 + }, + { + "epoch": 0.68, + "learning_rate": 4.885878719820326e-05, + "loss": 0.0847, + "step": 4878 + }, + { + "epoch": 0.69, + "learning_rate": 4.88583192962755e-05, + "loss": 0.1006, + "step": 4880 + }, + { + "epoch": 0.69, + "learning_rate": 4.885785139434775e-05, + "loss": 0.0924, + "step": 4882 + }, + { + "epoch": 0.69, + "learning_rate": 4.8857383492419986e-05, + "loss": 0.1081, + "step": 4884 + }, + { + "epoch": 0.69, + "learning_rate": 4.885691559049224e-05, + "loss": 0.1089, + "step": 4886 + }, + { + "epoch": 0.69, + "learning_rate": 4.885644768856448e-05, + "loss": 0.0778, + "step": 4888 + }, + { + "epoch": 0.69, + "learning_rate": 4.8855979786636724e-05, + "loss": 0.0876, + "step": 4890 + }, + { + "epoch": 0.69, + "learning_rate": 4.885551188470896e-05, + "loss": 0.0956, + "step": 4892 + }, + { + "epoch": 0.69, + "learning_rate": 4.8855043982781215e-05, + "loss": 0.0968, + "step": 4894 + }, + { + "epoch": 0.69, + "learning_rate": 4.8854576080853455e-05, + "loss": 0.1055, + "step": 4896 + }, + { + "epoch": 0.69, + "learning_rate": 4.88541081789257e-05, + "loss": 0.0718, + "step": 4898 + }, + { + "epoch": 0.69, + "learning_rate": 4.885364027699794e-05, + "loss": 0.1171, + "step": 4900 + }, + { + "epoch": 0.69, + "learning_rate": 4.8853172375070186e-05, + "loss": 0.0821, + "step": 4902 + }, + { + "epoch": 0.69, + "learning_rate": 4.885270447314243e-05, + "loss": 0.083, + "step": 4904 + }, + { + "epoch": 0.69, + "learning_rate": 4.885223657121468e-05, + "loss": 0.1034, + "step": 4906 + }, + { + "epoch": 0.69, + "learning_rate": 4.8851768669286916e-05, + "loss": 0.1082, + "step": 4908 + }, + { + "epoch": 0.69, + "learning_rate": 4.885130076735916e-05, + "loss": 0.0854, + "step": 4910 + }, + { + "epoch": 0.69, + "learning_rate": 4.885083286543141e-05, + "loss": 0.0951, + "step": 4912 + }, + { + "epoch": 0.69, + "learning_rate": 4.8850364963503654e-05, + "loss": 0.1106, + "step": 4914 + }, + { + "epoch": 0.69, + "learning_rate": 4.884989706157589e-05, + "loss": 0.1112, + "step": 4916 + }, + { + "epoch": 0.69, + "learning_rate": 4.884942915964814e-05, + "loss": 0.0852, + "step": 4918 + }, + { + "epoch": 0.69, + "learning_rate": 4.8848961257720385e-05, + "loss": 0.0912, + "step": 4920 + }, + { + "epoch": 0.69, + "learning_rate": 4.884849335579263e-05, + "loss": 0.0921, + "step": 4922 + }, + { + "epoch": 0.69, + "learning_rate": 4.884802545386487e-05, + "loss": 0.0897, + "step": 4924 + }, + { + "epoch": 0.69, + "learning_rate": 4.8847557551937116e-05, + "loss": 0.1344, + "step": 4926 + }, + { + "epoch": 0.69, + "learning_rate": 4.884708965000936e-05, + "loss": 0.0962, + "step": 4928 + }, + { + "epoch": 0.69, + "learning_rate": 4.884662174808161e-05, + "loss": 0.1081, + "step": 4930 + }, + { + "epoch": 0.69, + "learning_rate": 4.884615384615385e-05, + "loss": 0.0939, + "step": 4932 + }, + { + "epoch": 0.69, + "learning_rate": 4.884568594422609e-05, + "loss": 0.097, + "step": 4934 + }, + { + "epoch": 0.69, + "learning_rate": 4.884521804229833e-05, + "loss": 0.0804, + "step": 4936 + }, + { + "epoch": 0.69, + "learning_rate": 4.8844750140370585e-05, + "loss": 0.1433, + "step": 4938 + }, + { + "epoch": 0.69, + "learning_rate": 4.8844282238442824e-05, + "loss": 0.0986, + "step": 4940 + }, + { + "epoch": 0.69, + "learning_rate": 4.884381433651507e-05, + "loss": 0.0981, + "step": 4942 + }, + { + "epoch": 0.69, + "learning_rate": 4.884334643458731e-05, + "loss": 0.0859, + "step": 4944 + }, + { + "epoch": 0.69, + "learning_rate": 4.884287853265956e-05, + "loss": 0.1135, + "step": 4946 + }, + { + "epoch": 0.69, + "learning_rate": 4.88424106307318e-05, + "loss": 0.1019, + "step": 4948 + }, + { + "epoch": 0.69, + "learning_rate": 4.884194272880405e-05, + "loss": 0.1109, + "step": 4950 + }, + { + "epoch": 0.7, + "learning_rate": 4.8841474826876286e-05, + "loss": 0.1208, + "step": 4952 + }, + { + "epoch": 0.7, + "learning_rate": 4.884100692494853e-05, + "loss": 0.0752, + "step": 4954 + }, + { + "epoch": 0.7, + "learning_rate": 4.884053902302078e-05, + "loss": 0.1325, + "step": 4956 + }, + { + "epoch": 0.7, + "learning_rate": 4.8840071121093024e-05, + "loss": 0.111, + "step": 4958 + }, + { + "epoch": 0.7, + "learning_rate": 4.883960321916526e-05, + "loss": 0.087, + "step": 4960 + }, + { + "epoch": 0.7, + "learning_rate": 4.883913531723751e-05, + "loss": 0.0767, + "step": 4962 + }, + { + "epoch": 0.7, + "learning_rate": 4.8838667415309755e-05, + "loss": 0.0867, + "step": 4964 + }, + { + "epoch": 0.7, + "learning_rate": 4.8838199513382e-05, + "loss": 0.0836, + "step": 4966 + }, + { + "epoch": 0.7, + "learning_rate": 4.883773161145424e-05, + "loss": 0.084, + "step": 4968 + }, + { + "epoch": 0.7, + "learning_rate": 4.8837263709526486e-05, + "loss": 0.1198, + "step": 4970 + }, + { + "epoch": 0.7, + "learning_rate": 4.883679580759873e-05, + "loss": 0.0794, + "step": 4972 + }, + { + "epoch": 0.7, + "learning_rate": 4.883632790567098e-05, + "loss": 0.0856, + "step": 4974 + }, + { + "epoch": 0.7, + "learning_rate": 4.8835860003743216e-05, + "loss": 0.0822, + "step": 4976 + }, + { + "epoch": 0.7, + "learning_rate": 4.883539210181546e-05, + "loss": 0.1034, + "step": 4978 + }, + { + "epoch": 0.7, + "learning_rate": 4.883492419988771e-05, + "loss": 0.1047, + "step": 4980 + }, + { + "epoch": 0.7, + "learning_rate": 4.8834456297959954e-05, + "loss": 0.1017, + "step": 4982 + }, + { + "epoch": 0.7, + "learning_rate": 4.883398839603219e-05, + "loss": 0.1045, + "step": 4984 + }, + { + "epoch": 0.7, + "learning_rate": 4.883352049410444e-05, + "loss": 0.0984, + "step": 4986 + }, + { + "epoch": 0.7, + "learning_rate": 4.883305259217668e-05, + "loss": 0.0999, + "step": 4988 + }, + { + "epoch": 0.7, + "learning_rate": 4.883258469024893e-05, + "loss": 0.0997, + "step": 4990 + }, + { + "epoch": 0.7, + "learning_rate": 4.883211678832117e-05, + "loss": 0.0962, + "step": 4992 + }, + { + "epoch": 0.7, + "learning_rate": 4.8831648886393416e-05, + "loss": 0.0899, + "step": 4994 + }, + { + "epoch": 0.7, + "learning_rate": 4.8831180984465655e-05, + "loss": 0.0797, + "step": 4996 + }, + { + "epoch": 0.7, + "learning_rate": 4.88307130825379e-05, + "loss": 0.0936, + "step": 4998 + }, + { + "epoch": 0.7, + "learning_rate": 4.883024518061015e-05, + "loss": 0.0877, + "step": 5000 + }, + { + "epoch": 0.7, + "eval_gen_len": 30.7586, + "eval_loss": 1.0278133153915405, + "eval_meteor": 0.0429, + "eval_runtime": 14.877, + "eval_samples_per_second": 3.899, + "eval_steps_per_second": 0.538, + "step": 5000 + }, + { + "epoch": 0.7, + "learning_rate": 4.882977727868239e-05, + "loss": 0.1066, + "step": 5002 + }, + { + "epoch": 0.7, + "learning_rate": 4.882930937675463e-05, + "loss": 0.0858, + "step": 5004 + }, + { + "epoch": 0.7, + "learning_rate": 4.882884147482688e-05, + "loss": 0.092, + "step": 5006 + }, + { + "epoch": 0.7, + "learning_rate": 4.8828373572899124e-05, + "loss": 0.09, + "step": 5008 + }, + { + "epoch": 0.7, + "learning_rate": 4.882790567097137e-05, + "loss": 0.1196, + "step": 5010 + }, + { + "epoch": 0.7, + "learning_rate": 4.882743776904361e-05, + "loss": 0.1112, + "step": 5012 + }, + { + "epoch": 0.7, + "learning_rate": 4.8826969867115855e-05, + "loss": 0.102, + "step": 5014 + }, + { + "epoch": 0.7, + "learning_rate": 4.88265019651881e-05, + "loss": 0.0905, + "step": 5016 + }, + { + "epoch": 0.7, + "learning_rate": 4.882603406326035e-05, + "loss": 0.1077, + "step": 5018 + }, + { + "epoch": 0.7, + "learning_rate": 4.8825566161332586e-05, + "loss": 0.1116, + "step": 5020 + }, + { + "epoch": 0.7, + "learning_rate": 4.882509825940483e-05, + "loss": 0.1122, + "step": 5022 + }, + { + "epoch": 0.71, + "learning_rate": 4.882463035747708e-05, + "loss": 0.0991, + "step": 5024 + }, + { + "epoch": 0.71, + "learning_rate": 4.8824162455549324e-05, + "loss": 0.0988, + "step": 5026 + }, + { + "epoch": 0.71, + "learning_rate": 4.882369455362156e-05, + "loss": 0.0812, + "step": 5028 + }, + { + "epoch": 0.71, + "learning_rate": 4.882322665169381e-05, + "loss": 0.0862, + "step": 5030 + }, + { + "epoch": 0.71, + "learning_rate": 4.882275874976605e-05, + "loss": 0.09, + "step": 5032 + }, + { + "epoch": 0.71, + "learning_rate": 4.88222908478383e-05, + "loss": 0.0951, + "step": 5034 + }, + { + "epoch": 0.71, + "learning_rate": 4.882182294591054e-05, + "loss": 0.0941, + "step": 5036 + }, + { + "epoch": 0.71, + "learning_rate": 4.8821355043982786e-05, + "loss": 0.1444, + "step": 5038 + }, + { + "epoch": 0.71, + "learning_rate": 4.8820887142055025e-05, + "loss": 0.1041, + "step": 5040 + }, + { + "epoch": 0.71, + "learning_rate": 4.882041924012728e-05, + "loss": 0.1071, + "step": 5042 + }, + { + "epoch": 0.71, + "learning_rate": 4.8819951338199517e-05, + "loss": 0.0689, + "step": 5044 + }, + { + "epoch": 0.71, + "learning_rate": 4.881948343627176e-05, + "loss": 0.0874, + "step": 5046 + }, + { + "epoch": 0.71, + "learning_rate": 4.8819015534344e-05, + "loss": 0.1425, + "step": 5048 + }, + { + "epoch": 0.71, + "learning_rate": 4.881854763241625e-05, + "loss": 0.0879, + "step": 5050 + }, + { + "epoch": 0.71, + "learning_rate": 4.881807973048849e-05, + "loss": 0.0951, + "step": 5052 + }, + { + "epoch": 0.71, + "learning_rate": 4.881761182856074e-05, + "loss": 0.0918, + "step": 5054 + }, + { + "epoch": 0.71, + "learning_rate": 4.881714392663298e-05, + "loss": 0.0966, + "step": 5056 + }, + { + "epoch": 0.71, + "learning_rate": 4.8816676024705224e-05, + "loss": 0.0977, + "step": 5058 + }, + { + "epoch": 0.71, + "learning_rate": 4.881620812277747e-05, + "loss": 0.0908, + "step": 5060 + }, + { + "epoch": 0.71, + "learning_rate": 4.8815740220849716e-05, + "loss": 0.0909, + "step": 5062 + }, + { + "epoch": 0.71, + "learning_rate": 4.8815272318921955e-05, + "loss": 0.0929, + "step": 5064 + }, + { + "epoch": 0.71, + "learning_rate": 4.8814804416994194e-05, + "loss": 0.0875, + "step": 5066 + }, + { + "epoch": 0.71, + "learning_rate": 4.881433651506645e-05, + "loss": 0.0916, + "step": 5068 + }, + { + "epoch": 0.71, + "learning_rate": 4.8813868613138686e-05, + "loss": 0.1227, + "step": 5070 + }, + { + "epoch": 0.71, + "learning_rate": 4.881340071121093e-05, + "loss": 0.1054, + "step": 5072 + }, + { + "epoch": 0.71, + "learning_rate": 4.881293280928317e-05, + "loss": 0.0916, + "step": 5074 + }, + { + "epoch": 0.71, + "learning_rate": 4.8812464907355424e-05, + "loss": 0.0883, + "step": 5076 + }, + { + "epoch": 0.71, + "learning_rate": 4.881199700542766e-05, + "loss": 0.098, + "step": 5078 + }, + { + "epoch": 0.71, + "learning_rate": 4.881152910349991e-05, + "loss": 0.0925, + "step": 5080 + }, + { + "epoch": 0.71, + "learning_rate": 4.881106120157215e-05, + "loss": 0.0824, + "step": 5082 + }, + { + "epoch": 0.71, + "learning_rate": 4.8810593299644394e-05, + "loss": 0.0913, + "step": 5084 + }, + { + "epoch": 0.71, + "learning_rate": 4.881012539771664e-05, + "loss": 0.078, + "step": 5086 + }, + { + "epoch": 0.71, + "learning_rate": 4.8809657495788886e-05, + "loss": 0.0972, + "step": 5088 + }, + { + "epoch": 0.71, + "learning_rate": 4.8809189593861125e-05, + "loss": 0.0802, + "step": 5090 + }, + { + "epoch": 0.71, + "learning_rate": 4.880872169193337e-05, + "loss": 0.0854, + "step": 5092 + }, + { + "epoch": 0.72, + "learning_rate": 4.880825379000562e-05, + "loss": 0.0963, + "step": 5094 + }, + { + "epoch": 0.72, + "learning_rate": 4.880778588807786e-05, + "loss": 0.1212, + "step": 5096 + }, + { + "epoch": 0.72, + "learning_rate": 4.88073179861501e-05, + "loss": 0.0881, + "step": 5098 + }, + { + "epoch": 0.72, + "learning_rate": 4.880685008422235e-05, + "loss": 0.1133, + "step": 5100 + }, + { + "epoch": 0.72, + "learning_rate": 4.8806382182294594e-05, + "loss": 0.0829, + "step": 5102 + }, + { + "epoch": 0.72, + "learning_rate": 4.880591428036684e-05, + "loss": 0.0888, + "step": 5104 + }, + { + "epoch": 0.72, + "learning_rate": 4.880544637843908e-05, + "loss": 0.1003, + "step": 5106 + }, + { + "epoch": 0.72, + "learning_rate": 4.8804978476511325e-05, + "loss": 0.0716, + "step": 5108 + }, + { + "epoch": 0.72, + "learning_rate": 4.880451057458357e-05, + "loss": 0.1152, + "step": 5110 + }, + { + "epoch": 0.72, + "learning_rate": 4.8804042672655817e-05, + "loss": 0.082, + "step": 5112 + }, + { + "epoch": 0.72, + "learning_rate": 4.8803574770728056e-05, + "loss": 0.0875, + "step": 5114 + }, + { + "epoch": 0.72, + "learning_rate": 4.88031068688003e-05, + "loss": 0.1069, + "step": 5116 + }, + { + "epoch": 0.72, + "learning_rate": 4.880263896687254e-05, + "loss": 0.0879, + "step": 5118 + }, + { + "epoch": 0.72, + "learning_rate": 4.8802171064944793e-05, + "loss": 0.0848, + "step": 5120 + }, + { + "epoch": 0.72, + "learning_rate": 4.880170316301703e-05, + "loss": 0.0749, + "step": 5122 + }, + { + "epoch": 0.72, + "learning_rate": 4.880123526108928e-05, + "loss": 0.0772, + "step": 5124 + }, + { + "epoch": 0.72, + "learning_rate": 4.880076735916152e-05, + "loss": 0.0929, + "step": 5126 + }, + { + "epoch": 0.72, + "learning_rate": 4.8800299457233763e-05, + "loss": 0.1225, + "step": 5128 + }, + { + "epoch": 0.72, + "learning_rate": 4.879983155530601e-05, + "loss": 0.0864, + "step": 5130 + }, + { + "epoch": 0.72, + "learning_rate": 4.8799363653378255e-05, + "loss": 0.099, + "step": 5132 + }, + { + "epoch": 0.72, + "learning_rate": 4.8798895751450494e-05, + "loss": 0.0949, + "step": 5134 + }, + { + "epoch": 0.72, + "learning_rate": 4.879842784952274e-05, + "loss": 0.0723, + "step": 5136 + }, + { + "epoch": 0.72, + "learning_rate": 4.8797959947594986e-05, + "loss": 0.1157, + "step": 5138 + }, + { + "epoch": 0.72, + "learning_rate": 4.879749204566723e-05, + "loss": 0.0795, + "step": 5140 + }, + { + "epoch": 0.72, + "learning_rate": 4.879702414373947e-05, + "loss": 0.111, + "step": 5142 + }, + { + "epoch": 0.72, + "learning_rate": 4.879655624181172e-05, + "loss": 0.1169, + "step": 5144 + }, + { + "epoch": 0.72, + "learning_rate": 4.879608833988396e-05, + "loss": 0.0893, + "step": 5146 + }, + { + "epoch": 0.72, + "learning_rate": 4.879562043795621e-05, + "loss": 0.1175, + "step": 5148 + }, + { + "epoch": 0.72, + "learning_rate": 4.879515253602845e-05, + "loss": 0.1116, + "step": 5150 + }, + { + "epoch": 0.72, + "learning_rate": 4.8794684634100694e-05, + "loss": 0.1105, + "step": 5152 + }, + { + "epoch": 0.72, + "learning_rate": 4.879421673217294e-05, + "loss": 0.1006, + "step": 5154 + }, + { + "epoch": 0.72, + "learning_rate": 4.8793748830245186e-05, + "loss": 0.0852, + "step": 5156 + }, + { + "epoch": 0.72, + "learning_rate": 4.8793280928317425e-05, + "loss": 0.0972, + "step": 5158 + }, + { + "epoch": 0.72, + "learning_rate": 4.879281302638967e-05, + "loss": 0.1039, + "step": 5160 + }, + { + "epoch": 0.72, + "learning_rate": 4.879234512446191e-05, + "loss": 0.1021, + "step": 5162 + }, + { + "epoch": 0.72, + "learning_rate": 4.879187722253416e-05, + "loss": 0.1008, + "step": 5164 + }, + { + "epoch": 0.73, + "learning_rate": 4.87914093206064e-05, + "loss": 0.0823, + "step": 5166 + }, + { + "epoch": 0.73, + "learning_rate": 4.879094141867865e-05, + "loss": 0.0925, + "step": 5168 + }, + { + "epoch": 0.73, + "learning_rate": 4.879047351675089e-05, + "loss": 0.1156, + "step": 5170 + }, + { + "epoch": 0.73, + "learning_rate": 4.879000561482314e-05, + "loss": 0.1213, + "step": 5172 + }, + { + "epoch": 0.73, + "learning_rate": 4.878953771289538e-05, + "loss": 0.1397, + "step": 5174 + }, + { + "epoch": 0.73, + "learning_rate": 4.8789069810967625e-05, + "loss": 0.0847, + "step": 5176 + }, + { + "epoch": 0.73, + "learning_rate": 4.8788601909039864e-05, + "loss": 0.0956, + "step": 5178 + }, + { + "epoch": 0.73, + "learning_rate": 4.878813400711211e-05, + "loss": 0.1024, + "step": 5180 + }, + { + "epoch": 0.73, + "learning_rate": 4.8787666105184356e-05, + "loss": 0.0907, + "step": 5182 + }, + { + "epoch": 0.73, + "learning_rate": 4.87871982032566e-05, + "loss": 0.0927, + "step": 5184 + }, + { + "epoch": 0.73, + "learning_rate": 4.878673030132884e-05, + "loss": 0.0882, + "step": 5186 + }, + { + "epoch": 0.73, + "learning_rate": 4.878626239940109e-05, + "loss": 0.0587, + "step": 5188 + }, + { + "epoch": 0.73, + "learning_rate": 4.878579449747333e-05, + "loss": 0.0883, + "step": 5190 + }, + { + "epoch": 0.73, + "learning_rate": 4.878532659554558e-05, + "loss": 0.0874, + "step": 5192 + }, + { + "epoch": 0.73, + "learning_rate": 4.878485869361782e-05, + "loss": 0.0828, + "step": 5194 + }, + { + "epoch": 0.73, + "learning_rate": 4.8784390791690064e-05, + "loss": 0.064, + "step": 5196 + }, + { + "epoch": 0.73, + "learning_rate": 4.878392288976231e-05, + "loss": 0.0796, + "step": 5198 + }, + { + "epoch": 0.73, + "learning_rate": 4.8783454987834555e-05, + "loss": 0.0894, + "step": 5200 + }, + { + "epoch": 0.73, + "learning_rate": 4.8782987085906794e-05, + "loss": 0.0972, + "step": 5202 + }, + { + "epoch": 0.73, + "learning_rate": 4.878251918397904e-05, + "loss": 0.1182, + "step": 5204 + }, + { + "epoch": 0.73, + "learning_rate": 4.8782051282051286e-05, + "loss": 0.0831, + "step": 5206 + }, + { + "epoch": 0.73, + "learning_rate": 4.878158338012353e-05, + "loss": 0.096, + "step": 5208 + }, + { + "epoch": 0.73, + "learning_rate": 4.878111547819577e-05, + "loss": 0.0795, + "step": 5210 + }, + { + "epoch": 0.73, + "learning_rate": 4.878064757626802e-05, + "loss": 0.1084, + "step": 5212 + }, + { + "epoch": 0.73, + "learning_rate": 4.8780179674340256e-05, + "loss": 0.0898, + "step": 5214 + }, + { + "epoch": 0.73, + "learning_rate": 4.877971177241251e-05, + "loss": 0.095, + "step": 5216 + }, + { + "epoch": 0.73, + "learning_rate": 4.877924387048475e-05, + "loss": 0.0821, + "step": 5218 + }, + { + "epoch": 0.73, + "learning_rate": 4.8778775968556994e-05, + "loss": 0.1048, + "step": 5220 + }, + { + "epoch": 0.73, + "learning_rate": 4.877830806662923e-05, + "loss": 0.0875, + "step": 5222 + }, + { + "epoch": 0.73, + "learning_rate": 4.8777840164701486e-05, + "loss": 0.0855, + "step": 5224 + }, + { + "epoch": 0.73, + "learning_rate": 4.8777372262773725e-05, + "loss": 0.0792, + "step": 5226 + }, + { + "epoch": 0.73, + "learning_rate": 4.877690436084597e-05, + "loss": 0.1225, + "step": 5228 + }, + { + "epoch": 0.73, + "learning_rate": 4.877643645891821e-05, + "loss": 0.1503, + "step": 5230 + }, + { + "epoch": 0.73, + "learning_rate": 4.8775968556990456e-05, + "loss": 0.0773, + "step": 5232 + }, + { + "epoch": 0.73, + "learning_rate": 4.87755006550627e-05, + "loss": 0.1163, + "step": 5234 + }, + { + "epoch": 0.73, + "learning_rate": 4.877503275313495e-05, + "loss": 0.0889, + "step": 5236 + }, + { + "epoch": 0.74, + "learning_rate": 4.877456485120719e-05, + "loss": 0.1115, + "step": 5238 + }, + { + "epoch": 0.74, + "learning_rate": 4.877409694927943e-05, + "loss": 0.1228, + "step": 5240 + }, + { + "epoch": 0.74, + "learning_rate": 4.877362904735168e-05, + "loss": 0.0851, + "step": 5242 + }, + { + "epoch": 0.74, + "learning_rate": 4.8773161145423925e-05, + "loss": 0.105, + "step": 5244 + }, + { + "epoch": 0.74, + "learning_rate": 4.8772693243496164e-05, + "loss": 0.0942, + "step": 5246 + }, + { + "epoch": 0.74, + "learning_rate": 4.877222534156841e-05, + "loss": 0.0972, + "step": 5248 + }, + { + "epoch": 0.74, + "learning_rate": 4.8771757439640656e-05, + "loss": 0.1146, + "step": 5250 + }, + { + "epoch": 0.74, + "learning_rate": 4.87712895377129e-05, + "loss": 0.1119, + "step": 5252 + }, + { + "epoch": 0.74, + "learning_rate": 4.877082163578514e-05, + "loss": 0.1169, + "step": 5254 + }, + { + "epoch": 0.74, + "learning_rate": 4.877035373385739e-05, + "loss": 0.1061, + "step": 5256 + }, + { + "epoch": 0.74, + "learning_rate": 4.876988583192963e-05, + "loss": 0.0853, + "step": 5258 + }, + { + "epoch": 0.74, + "learning_rate": 4.876941793000188e-05, + "loss": 0.1577, + "step": 5260 + }, + { + "epoch": 0.74, + "learning_rate": 4.876895002807412e-05, + "loss": 0.0861, + "step": 5262 + }, + { + "epoch": 0.74, + "learning_rate": 4.8768482126146364e-05, + "loss": 0.0989, + "step": 5264 + }, + { + "epoch": 0.74, + "learning_rate": 4.87680142242186e-05, + "loss": 0.1048, + "step": 5266 + }, + { + "epoch": 0.74, + "learning_rate": 4.8767546322290855e-05, + "loss": 0.088, + "step": 5268 + }, + { + "epoch": 0.74, + "learning_rate": 4.8767078420363094e-05, + "loss": 0.1005, + "step": 5270 + }, + { + "epoch": 0.74, + "learning_rate": 4.876661051843534e-05, + "loss": 0.0915, + "step": 5272 + }, + { + "epoch": 0.74, + "learning_rate": 4.876614261650758e-05, + "loss": 0.0972, + "step": 5274 + }, + { + "epoch": 0.74, + "learning_rate": 4.8765674714579825e-05, + "loss": 0.0944, + "step": 5276 + }, + { + "epoch": 0.74, + "learning_rate": 4.876520681265207e-05, + "loss": 0.0973, + "step": 5278 + }, + { + "epoch": 0.74, + "learning_rate": 4.876473891072432e-05, + "loss": 0.0953, + "step": 5280 + }, + { + "epoch": 0.74, + "learning_rate": 4.8764271008796556e-05, + "loss": 0.0862, + "step": 5282 + }, + { + "epoch": 0.74, + "learning_rate": 4.87638031068688e-05, + "loss": 0.098, + "step": 5284 + }, + { + "epoch": 0.74, + "learning_rate": 4.876333520494105e-05, + "loss": 0.1076, + "step": 5286 + }, + { + "epoch": 0.74, + "learning_rate": 4.8762867303013294e-05, + "loss": 0.0939, + "step": 5288 + }, + { + "epoch": 0.74, + "learning_rate": 4.876239940108553e-05, + "loss": 0.0821, + "step": 5290 + }, + { + "epoch": 0.74, + "learning_rate": 4.876193149915778e-05, + "loss": 0.0927, + "step": 5292 + }, + { + "epoch": 0.74, + "learning_rate": 4.8761463597230025e-05, + "loss": 0.0995, + "step": 5294 + }, + { + "epoch": 0.74, + "learning_rate": 4.876099569530227e-05, + "loss": 0.0986, + "step": 5296 + }, + { + "epoch": 0.74, + "learning_rate": 4.876052779337451e-05, + "loss": 0.0788, + "step": 5298 + }, + { + "epoch": 0.74, + "learning_rate": 4.8760059891446756e-05, + "loss": 0.0971, + "step": 5300 + }, + { + "epoch": 0.74, + "learning_rate": 4.8759591989519e-05, + "loss": 0.0936, + "step": 5302 + }, + { + "epoch": 0.74, + "learning_rate": 4.875912408759125e-05, + "loss": 0.1128, + "step": 5304 + }, + { + "epoch": 0.74, + "learning_rate": 4.875865618566349e-05, + "loss": 0.1094, + "step": 5306 + }, + { + "epoch": 0.75, + "learning_rate": 4.875818828373573e-05, + "loss": 0.1193, + "step": 5308 + }, + { + "epoch": 0.75, + "learning_rate": 4.875772038180797e-05, + "loss": 0.0843, + "step": 5310 + }, + { + "epoch": 0.75, + "learning_rate": 4.8757252479880225e-05, + "loss": 0.0942, + "step": 5312 + }, + { + "epoch": 0.75, + "learning_rate": 4.8756784577952464e-05, + "loss": 0.1054, + "step": 5314 + }, + { + "epoch": 0.75, + "learning_rate": 4.875631667602471e-05, + "loss": 0.1116, + "step": 5316 + }, + { + "epoch": 0.75, + "learning_rate": 4.875584877409695e-05, + "loss": 0.0969, + "step": 5318 + }, + { + "epoch": 0.75, + "learning_rate": 4.8755380872169195e-05, + "loss": 0.099, + "step": 5320 + }, + { + "epoch": 0.75, + "learning_rate": 4.875491297024144e-05, + "loss": 0.1027, + "step": 5322 + }, + { + "epoch": 0.75, + "learning_rate": 4.875444506831368e-05, + "loss": 0.0683, + "step": 5324 + }, + { + "epoch": 0.75, + "learning_rate": 4.8753977166385926e-05, + "loss": 0.1208, + "step": 5326 + }, + { + "epoch": 0.75, + "learning_rate": 4.875350926445817e-05, + "loss": 0.0801, + "step": 5328 + }, + { + "epoch": 0.75, + "learning_rate": 4.875304136253042e-05, + "loss": 0.0926, + "step": 5330 + }, + { + "epoch": 0.75, + "learning_rate": 4.875257346060266e-05, + "loss": 0.0755, + "step": 5332 + }, + { + "epoch": 0.75, + "learning_rate": 4.87521055586749e-05, + "loss": 0.0812, + "step": 5334 + }, + { + "epoch": 0.75, + "learning_rate": 4.875163765674715e-05, + "loss": 0.1073, + "step": 5336 + }, + { + "epoch": 0.75, + "learning_rate": 4.8751169754819395e-05, + "loss": 0.1058, + "step": 5338 + }, + { + "epoch": 0.75, + "learning_rate": 4.8750701852891634e-05, + "loss": 0.0831, + "step": 5340 + }, + { + "epoch": 0.75, + "learning_rate": 4.875023395096388e-05, + "loss": 0.1217, + "step": 5342 + }, + { + "epoch": 0.75, + "learning_rate": 4.874976604903612e-05, + "loss": 0.0912, + "step": 5344 + }, + { + "epoch": 0.75, + "learning_rate": 4.874929814710837e-05, + "loss": 0.0806, + "step": 5346 + }, + { + "epoch": 0.75, + "learning_rate": 4.874883024518061e-05, + "loss": 0.0736, + "step": 5348 + }, + { + "epoch": 0.75, + "learning_rate": 4.8748362343252856e-05, + "loss": 0.1044, + "step": 5350 + }, + { + "epoch": 0.75, + "learning_rate": 4.8747894441325096e-05, + "loss": 0.0607, + "step": 5352 + }, + { + "epoch": 0.75, + "learning_rate": 4.874742653939735e-05, + "loss": 0.0997, + "step": 5354 + }, + { + "epoch": 0.75, + "learning_rate": 4.874695863746959e-05, + "loss": 0.1129, + "step": 5356 + }, + { + "epoch": 0.75, + "learning_rate": 4.874649073554183e-05, + "loss": 0.1079, + "step": 5358 + }, + { + "epoch": 0.75, + "learning_rate": 4.874602283361407e-05, + "loss": 0.1107, + "step": 5360 + }, + { + "epoch": 0.75, + "learning_rate": 4.874555493168632e-05, + "loss": 0.1014, + "step": 5362 + }, + { + "epoch": 0.75, + "learning_rate": 4.8745087029758564e-05, + "loss": 0.0788, + "step": 5364 + }, + { + "epoch": 0.75, + "learning_rate": 4.874461912783081e-05, + "loss": 0.0861, + "step": 5366 + }, + { + "epoch": 0.75, + "learning_rate": 4.874415122590305e-05, + "loss": 0.0941, + "step": 5368 + }, + { + "epoch": 0.75, + "learning_rate": 4.8743683323975295e-05, + "loss": 0.1025, + "step": 5370 + }, + { + "epoch": 0.75, + "learning_rate": 4.874321542204754e-05, + "loss": 0.0818, + "step": 5372 + }, + { + "epoch": 0.75, + "learning_rate": 4.874274752011979e-05, + "loss": 0.0861, + "step": 5374 + }, + { + "epoch": 0.75, + "learning_rate": 4.8742279618192026e-05, + "loss": 0.0949, + "step": 5376 + }, + { + "epoch": 0.75, + "learning_rate": 4.874181171626427e-05, + "loss": 0.0917, + "step": 5378 + }, + { + "epoch": 0.76, + "learning_rate": 4.874134381433652e-05, + "loss": 0.0911, + "step": 5380 + }, + { + "epoch": 0.76, + "learning_rate": 4.8740875912408764e-05, + "loss": 0.1092, + "step": 5382 + }, + { + "epoch": 0.76, + "learning_rate": 4.8740408010481e-05, + "loss": 0.0843, + "step": 5384 + }, + { + "epoch": 0.76, + "learning_rate": 4.873994010855325e-05, + "loss": 0.1067, + "step": 5386 + }, + { + "epoch": 0.76, + "learning_rate": 4.8739472206625495e-05, + "loss": 0.1009, + "step": 5388 + }, + { + "epoch": 0.76, + "learning_rate": 4.873900430469774e-05, + "loss": 0.0771, + "step": 5390 + }, + { + "epoch": 0.76, + "learning_rate": 4.873853640276998e-05, + "loss": 0.0855, + "step": 5392 + }, + { + "epoch": 0.76, + "learning_rate": 4.8738068500842226e-05, + "loss": 0.0999, + "step": 5394 + }, + { + "epoch": 0.76, + "learning_rate": 4.8737600598914465e-05, + "loss": 0.0723, + "step": 5396 + }, + { + "epoch": 0.76, + "learning_rate": 4.873713269698672e-05, + "loss": 0.0921, + "step": 5398 + }, + { + "epoch": 0.76, + "learning_rate": 4.873666479505896e-05, + "loss": 0.1074, + "step": 5400 + }, + { + "epoch": 0.76, + "learning_rate": 4.87361968931312e-05, + "loss": 0.1037, + "step": 5402 + }, + { + "epoch": 0.76, + "learning_rate": 4.873572899120344e-05, + "loss": 0.1177, + "step": 5404 + }, + { + "epoch": 0.76, + "learning_rate": 4.873526108927569e-05, + "loss": 0.0802, + "step": 5406 + }, + { + "epoch": 0.76, + "learning_rate": 4.8734793187347934e-05, + "loss": 0.1014, + "step": 5408 + }, + { + "epoch": 0.76, + "learning_rate": 4.873432528542018e-05, + "loss": 0.1004, + "step": 5410 + }, + { + "epoch": 0.76, + "learning_rate": 4.873385738349242e-05, + "loss": 0.0965, + "step": 5412 + }, + { + "epoch": 0.76, + "learning_rate": 4.8733389481564665e-05, + "loss": 0.1153, + "step": 5414 + }, + { + "epoch": 0.76, + "learning_rate": 4.873292157963691e-05, + "loss": 0.0954, + "step": 5416 + }, + { + "epoch": 0.76, + "learning_rate": 4.8732453677709156e-05, + "loss": 0.1108, + "step": 5418 + }, + { + "epoch": 0.76, + "learning_rate": 4.8731985775781396e-05, + "loss": 0.0833, + "step": 5420 + }, + { + "epoch": 0.76, + "learning_rate": 4.873151787385364e-05, + "loss": 0.092, + "step": 5422 + }, + { + "epoch": 0.76, + "learning_rate": 4.873104997192589e-05, + "loss": 0.0987, + "step": 5424 + }, + { + "epoch": 0.76, + "learning_rate": 4.873058206999813e-05, + "loss": 0.1232, + "step": 5426 + }, + { + "epoch": 0.76, + "learning_rate": 4.873011416807037e-05, + "loss": 0.0961, + "step": 5428 + }, + { + "epoch": 0.76, + "learning_rate": 4.872964626614262e-05, + "loss": 0.0977, + "step": 5430 + }, + { + "epoch": 0.76, + "learning_rate": 4.8729178364214864e-05, + "loss": 0.1228, + "step": 5432 + }, + { + "epoch": 0.76, + "learning_rate": 4.872871046228711e-05, + "loss": 0.0832, + "step": 5434 + }, + { + "epoch": 0.76, + "learning_rate": 4.872824256035935e-05, + "loss": 0.0934, + "step": 5436 + }, + { + "epoch": 0.76, + "learning_rate": 4.8727774658431595e-05, + "loss": 0.1066, + "step": 5438 + }, + { + "epoch": 0.76, + "learning_rate": 4.8727306756503834e-05, + "loss": 0.0877, + "step": 5440 + }, + { + "epoch": 0.76, + "learning_rate": 4.872683885457609e-05, + "loss": 0.0959, + "step": 5442 + }, + { + "epoch": 0.76, + "learning_rate": 4.8726370952648326e-05, + "loss": 0.1042, + "step": 5444 + }, + { + "epoch": 0.76, + "learning_rate": 4.872590305072057e-05, + "loss": 0.1263, + "step": 5446 + }, + { + "epoch": 0.76, + "learning_rate": 4.872543514879281e-05, + "loss": 0.1156, + "step": 5448 + }, + { + "epoch": 0.77, + "learning_rate": 4.8724967246865064e-05, + "loss": 0.102, + "step": 5450 + }, + { + "epoch": 0.77, + "learning_rate": 4.87244993449373e-05, + "loss": 0.0916, + "step": 5452 + }, + { + "epoch": 0.77, + "learning_rate": 4.872403144300955e-05, + "loss": 0.1195, + "step": 5454 + }, + { + "epoch": 0.77, + "learning_rate": 4.872356354108179e-05, + "loss": 0.0908, + "step": 5456 + }, + { + "epoch": 0.77, + "learning_rate": 4.8723095639154034e-05, + "loss": 0.0879, + "step": 5458 + }, + { + "epoch": 0.77, + "learning_rate": 4.872262773722628e-05, + "loss": 0.0976, + "step": 5460 + }, + { + "epoch": 0.77, + "learning_rate": 4.8722159835298526e-05, + "loss": 0.0783, + "step": 5462 + }, + { + "epoch": 0.77, + "learning_rate": 4.8721691933370765e-05, + "loss": 0.1068, + "step": 5464 + }, + { + "epoch": 0.77, + "learning_rate": 4.872122403144301e-05, + "loss": 0.0795, + "step": 5466 + }, + { + "epoch": 0.77, + "learning_rate": 4.872075612951526e-05, + "loss": 0.0846, + "step": 5468 + }, + { + "epoch": 0.77, + "learning_rate": 4.87202882275875e-05, + "loss": 0.0909, + "step": 5470 + }, + { + "epoch": 0.77, + "learning_rate": 4.871982032565974e-05, + "loss": 0.0914, + "step": 5472 + }, + { + "epoch": 0.77, + "learning_rate": 4.871935242373199e-05, + "loss": 0.144, + "step": 5474 + }, + { + "epoch": 0.77, + "learning_rate": 4.8718884521804234e-05, + "loss": 0.0967, + "step": 5476 + }, + { + "epoch": 0.77, + "learning_rate": 4.871841661987648e-05, + "loss": 0.077, + "step": 5478 + }, + { + "epoch": 0.77, + "learning_rate": 4.871794871794872e-05, + "loss": 0.0871, + "step": 5480 + }, + { + "epoch": 0.77, + "learning_rate": 4.8717480816020965e-05, + "loss": 0.1024, + "step": 5482 + }, + { + "epoch": 0.77, + "learning_rate": 4.871701291409321e-05, + "loss": 0.1155, + "step": 5484 + }, + { + "epoch": 0.77, + "learning_rate": 4.8716545012165456e-05, + "loss": 0.1163, + "step": 5486 + }, + { + "epoch": 0.77, + "learning_rate": 4.8716077110237696e-05, + "loss": 0.1136, + "step": 5488 + }, + { + "epoch": 0.77, + "learning_rate": 4.871560920830994e-05, + "loss": 0.1093, + "step": 5490 + }, + { + "epoch": 0.77, + "learning_rate": 4.871514130638218e-05, + "loss": 0.0954, + "step": 5492 + }, + { + "epoch": 0.77, + "learning_rate": 4.871467340445443e-05, + "loss": 0.0912, + "step": 5494 + }, + { + "epoch": 0.77, + "learning_rate": 4.871420550252667e-05, + "loss": 0.1163, + "step": 5496 + }, + { + "epoch": 0.77, + "learning_rate": 4.871373760059892e-05, + "loss": 0.1029, + "step": 5498 + }, + { + "epoch": 0.77, + "learning_rate": 4.871326969867116e-05, + "loss": 0.0773, + "step": 5500 + }, + { + "epoch": 0.77, + "learning_rate": 4.871280179674341e-05, + "loss": 0.0908, + "step": 5502 + }, + { + "epoch": 0.77, + "learning_rate": 4.871233389481565e-05, + "loss": 0.0809, + "step": 5504 + }, + { + "epoch": 0.77, + "learning_rate": 4.8711865992887895e-05, + "loss": 0.1013, + "step": 5506 + }, + { + "epoch": 0.77, + "learning_rate": 4.8711398090960134e-05, + "loss": 0.0955, + "step": 5508 + }, + { + "epoch": 0.77, + "learning_rate": 4.871093018903238e-05, + "loss": 0.0844, + "step": 5510 + }, + { + "epoch": 0.77, + "learning_rate": 4.8710462287104626e-05, + "loss": 0.1063, + "step": 5512 + }, + { + "epoch": 0.77, + "learning_rate": 4.870999438517687e-05, + "loss": 0.0918, + "step": 5514 + }, + { + "epoch": 0.77, + "learning_rate": 4.870952648324911e-05, + "loss": 0.0901, + "step": 5516 + }, + { + "epoch": 0.77, + "learning_rate": 4.870905858132136e-05, + "loss": 0.0831, + "step": 5518 + }, + { + "epoch": 0.77, + "learning_rate": 4.87085906793936e-05, + "loss": 0.0877, + "step": 5520 + }, + { + "epoch": 0.78, + "learning_rate": 4.870812277746585e-05, + "loss": 0.1052, + "step": 5522 + }, + { + "epoch": 0.78, + "learning_rate": 4.870765487553809e-05, + "loss": 0.0827, + "step": 5524 + }, + { + "epoch": 0.78, + "learning_rate": 4.8707186973610334e-05, + "loss": 0.1171, + "step": 5526 + }, + { + "epoch": 0.78, + "learning_rate": 4.870671907168258e-05, + "loss": 0.0911, + "step": 5528 + }, + { + "epoch": 0.78, + "learning_rate": 4.8706251169754826e-05, + "loss": 0.129, + "step": 5530 + }, + { + "epoch": 0.78, + "learning_rate": 4.8705783267827065e-05, + "loss": 0.0777, + "step": 5532 + }, + { + "epoch": 0.78, + "learning_rate": 4.870531536589931e-05, + "loss": 0.0703, + "step": 5534 + }, + { + "epoch": 0.78, + "learning_rate": 4.870484746397156e-05, + "loss": 0.1071, + "step": 5536 + }, + { + "epoch": 0.78, + "learning_rate": 4.87043795620438e-05, + "loss": 0.0731, + "step": 5538 + }, + { + "epoch": 0.78, + "learning_rate": 4.870391166011604e-05, + "loss": 0.0921, + "step": 5540 + }, + { + "epoch": 0.78, + "learning_rate": 4.870344375818829e-05, + "loss": 0.0842, + "step": 5542 + }, + { + "epoch": 0.78, + "learning_rate": 4.870297585626053e-05, + "loss": 0.1026, + "step": 5544 + }, + { + "epoch": 0.78, + "learning_rate": 4.870250795433278e-05, + "loss": 0.0941, + "step": 5546 + }, + { + "epoch": 0.78, + "learning_rate": 4.870204005240502e-05, + "loss": 0.0942, + "step": 5548 + }, + { + "epoch": 0.78, + "learning_rate": 4.8701572150477265e-05, + "loss": 0.0914, + "step": 5550 + }, + { + "epoch": 0.78, + "learning_rate": 4.8701104248549504e-05, + "loss": 0.1367, + "step": 5552 + }, + { + "epoch": 0.78, + "learning_rate": 4.870063634662175e-05, + "loss": 0.0841, + "step": 5554 + }, + { + "epoch": 0.78, + "learning_rate": 4.8700168444693996e-05, + "loss": 0.1086, + "step": 5556 + }, + { + "epoch": 0.78, + "learning_rate": 4.869970054276624e-05, + "loss": 0.086, + "step": 5558 + }, + { + "epoch": 0.78, + "learning_rate": 4.869923264083848e-05, + "loss": 0.1065, + "step": 5560 + }, + { + "epoch": 0.78, + "learning_rate": 4.8698764738910727e-05, + "loss": 0.0951, + "step": 5562 + }, + { + "epoch": 0.78, + "learning_rate": 4.869829683698297e-05, + "loss": 0.0806, + "step": 5564 + }, + { + "epoch": 0.78, + "learning_rate": 4.869782893505522e-05, + "loss": 0.1128, + "step": 5566 + }, + { + "epoch": 0.78, + "learning_rate": 4.869736103312746e-05, + "loss": 0.1105, + "step": 5568 + }, + { + "epoch": 0.78, + "learning_rate": 4.8696893131199703e-05, + "loss": 0.0936, + "step": 5570 + }, + { + "epoch": 0.78, + "learning_rate": 4.869642522927195e-05, + "loss": 0.0906, + "step": 5572 + }, + { + "epoch": 0.78, + "learning_rate": 4.869595732734419e-05, + "loss": 0.0979, + "step": 5574 + }, + { + "epoch": 0.78, + "learning_rate": 4.8695489425416434e-05, + "loss": 0.1293, + "step": 5576 + }, + { + "epoch": 0.78, + "learning_rate": 4.8695021523488674e-05, + "loss": 0.0945, + "step": 5578 + }, + { + "epoch": 0.78, + "learning_rate": 4.8694553621560926e-05, + "loss": 0.1071, + "step": 5580 + }, + { + "epoch": 0.78, + "learning_rate": 4.8694085719633165e-05, + "loss": 0.0819, + "step": 5582 + }, + { + "epoch": 0.78, + "learning_rate": 4.869361781770541e-05, + "loss": 0.09, + "step": 5584 + }, + { + "epoch": 0.78, + "learning_rate": 4.869314991577765e-05, + "loss": 0.094, + "step": 5586 + }, + { + "epoch": 0.78, + "learning_rate": 4.8692682013849896e-05, + "loss": 0.0905, + "step": 5588 + }, + { + "epoch": 0.78, + "learning_rate": 4.869221411192214e-05, + "loss": 0.1133, + "step": 5590 + }, + { + "epoch": 0.78, + "learning_rate": 4.869174620999439e-05, + "loss": 0.1095, + "step": 5592 + }, + { + "epoch": 0.79, + "learning_rate": 4.869127830806663e-05, + "loss": 0.1052, + "step": 5594 + }, + { + "epoch": 0.79, + "learning_rate": 4.869081040613887e-05, + "loss": 0.0891, + "step": 5596 + }, + { + "epoch": 0.79, + "learning_rate": 4.869034250421112e-05, + "loss": 0.1034, + "step": 5598 + }, + { + "epoch": 0.79, + "learning_rate": 4.8689874602283365e-05, + "loss": 0.0836, + "step": 5600 + }, + { + "epoch": 0.79, + "learning_rate": 4.8689406700355604e-05, + "loss": 0.0989, + "step": 5602 + }, + { + "epoch": 0.79, + "learning_rate": 4.868893879842785e-05, + "loss": 0.0892, + "step": 5604 + }, + { + "epoch": 0.79, + "learning_rate": 4.8688470896500096e-05, + "loss": 0.0902, + "step": 5606 + }, + { + "epoch": 0.79, + "learning_rate": 4.868800299457234e-05, + "loss": 0.1037, + "step": 5608 + }, + { + "epoch": 0.79, + "learning_rate": 4.868753509264458e-05, + "loss": 0.1106, + "step": 5610 + }, + { + "epoch": 0.79, + "learning_rate": 4.868706719071683e-05, + "loss": 0.1128, + "step": 5612 + }, + { + "epoch": 0.79, + "learning_rate": 4.868659928878907e-05, + "loss": 0.1263, + "step": 5614 + }, + { + "epoch": 0.79, + "learning_rate": 4.868613138686132e-05, + "loss": 0.1297, + "step": 5616 + }, + { + "epoch": 0.79, + "learning_rate": 4.868566348493356e-05, + "loss": 0.116, + "step": 5618 + }, + { + "epoch": 0.79, + "learning_rate": 4.8685195583005804e-05, + "loss": 0.1371, + "step": 5620 + }, + { + "epoch": 0.79, + "learning_rate": 4.868472768107804e-05, + "loss": 0.1183, + "step": 5622 + }, + { + "epoch": 0.79, + "learning_rate": 4.8684259779150296e-05, + "loss": 0.0773, + "step": 5624 + }, + { + "epoch": 0.79, + "learning_rate": 4.8683791877222535e-05, + "loss": 0.1158, + "step": 5626 + }, + { + "epoch": 0.79, + "learning_rate": 4.868332397529478e-05, + "loss": 0.0743, + "step": 5628 + }, + { + "epoch": 0.79, + "learning_rate": 4.868285607336702e-05, + "loss": 0.0896, + "step": 5630 + }, + { + "epoch": 0.79, + "learning_rate": 4.868238817143927e-05, + "loss": 0.1097, + "step": 5632 + }, + { + "epoch": 0.79, + "learning_rate": 4.868192026951151e-05, + "loss": 0.0834, + "step": 5634 + }, + { + "epoch": 0.79, + "learning_rate": 4.868145236758376e-05, + "loss": 0.0808, + "step": 5636 + }, + { + "epoch": 0.79, + "learning_rate": 4.8680984465656e-05, + "loss": 0.0934, + "step": 5638 + }, + { + "epoch": 0.79, + "learning_rate": 4.868051656372824e-05, + "loss": 0.1005, + "step": 5640 + }, + { + "epoch": 0.79, + "learning_rate": 4.868004866180049e-05, + "loss": 0.1217, + "step": 5642 + }, + { + "epoch": 0.79, + "learning_rate": 4.8679580759872734e-05, + "loss": 0.0884, + "step": 5644 + }, + { + "epoch": 0.79, + "learning_rate": 4.8679112857944974e-05, + "loss": 0.1019, + "step": 5646 + }, + { + "epoch": 0.79, + "learning_rate": 4.867864495601722e-05, + "loss": 0.0945, + "step": 5648 + }, + { + "epoch": 0.79, + "learning_rate": 4.8678177054089465e-05, + "loss": 0.093, + "step": 5650 + }, + { + "epoch": 0.79, + "learning_rate": 4.867770915216171e-05, + "loss": 0.1392, + "step": 5652 + }, + { + "epoch": 0.79, + "learning_rate": 4.867724125023395e-05, + "loss": 0.0792, + "step": 5654 + }, + { + "epoch": 0.79, + "learning_rate": 4.8676773348306196e-05, + "loss": 0.1005, + "step": 5656 + }, + { + "epoch": 0.79, + "learning_rate": 4.867630544637844e-05, + "loss": 0.1035, + "step": 5658 + }, + { + "epoch": 0.79, + "learning_rate": 4.867583754445069e-05, + "loss": 0.085, + "step": 5660 + }, + { + "epoch": 0.79, + "learning_rate": 4.867536964252293e-05, + "loss": 0.1051, + "step": 5662 + }, + { + "epoch": 0.8, + "learning_rate": 4.867490174059517e-05, + "loss": 0.0851, + "step": 5664 + }, + { + "epoch": 0.8, + "learning_rate": 4.867443383866742e-05, + "loss": 0.0912, + "step": 5666 + }, + { + "epoch": 0.8, + "learning_rate": 4.8673965936739665e-05, + "loss": 0.0964, + "step": 5668 + }, + { + "epoch": 0.8, + "learning_rate": 4.8673498034811904e-05, + "loss": 0.0948, + "step": 5670 + }, + { + "epoch": 0.8, + "learning_rate": 4.867303013288415e-05, + "loss": 0.1255, + "step": 5672 + }, + { + "epoch": 0.8, + "learning_rate": 4.867256223095639e-05, + "loss": 0.0945, + "step": 5674 + }, + { + "epoch": 0.8, + "learning_rate": 4.867209432902864e-05, + "loss": 0.1198, + "step": 5676 + }, + { + "epoch": 0.8, + "learning_rate": 4.867162642710088e-05, + "loss": 0.0898, + "step": 5678 + }, + { + "epoch": 0.8, + "learning_rate": 4.867115852517313e-05, + "loss": 0.0981, + "step": 5680 + }, + { + "epoch": 0.8, + "learning_rate": 4.8670690623245366e-05, + "loss": 0.0937, + "step": 5682 + }, + { + "epoch": 0.8, + "learning_rate": 4.867022272131762e-05, + "loss": 0.0942, + "step": 5684 + }, + { + "epoch": 0.8, + "learning_rate": 4.866975481938986e-05, + "loss": 0.0784, + "step": 5686 + }, + { + "epoch": 0.8, + "learning_rate": 4.8669286917462104e-05, + "loss": 0.132, + "step": 5688 + }, + { + "epoch": 0.8, + "learning_rate": 4.866881901553434e-05, + "loss": 0.1304, + "step": 5690 + }, + { + "epoch": 0.8, + "learning_rate": 4.866835111360659e-05, + "loss": 0.0966, + "step": 5692 + }, + { + "epoch": 0.8, + "learning_rate": 4.8667883211678835e-05, + "loss": 0.1298, + "step": 5694 + }, + { + "epoch": 0.8, + "learning_rate": 4.866741530975108e-05, + "loss": 0.0615, + "step": 5696 + }, + { + "epoch": 0.8, + "learning_rate": 4.866694740782332e-05, + "loss": 0.075, + "step": 5698 + }, + { + "epoch": 0.8, + "learning_rate": 4.8666479505895566e-05, + "loss": 0.11, + "step": 5700 + }, + { + "epoch": 0.8, + "learning_rate": 4.866601160396781e-05, + "loss": 0.0993, + "step": 5702 + }, + { + "epoch": 0.8, + "learning_rate": 4.866554370204006e-05, + "loss": 0.0881, + "step": 5704 + }, + { + "epoch": 0.8, + "learning_rate": 4.86650758001123e-05, + "loss": 0.0967, + "step": 5706 + }, + { + "epoch": 0.8, + "learning_rate": 4.866460789818454e-05, + "loss": 0.1145, + "step": 5708 + }, + { + "epoch": 0.8, + "learning_rate": 4.866413999625679e-05, + "loss": 0.1109, + "step": 5710 + }, + { + "epoch": 0.8, + "learning_rate": 4.8663672094329034e-05, + "loss": 0.1026, + "step": 5712 + }, + { + "epoch": 0.8, + "learning_rate": 4.8663204192401274e-05, + "loss": 0.1205, + "step": 5714 + }, + { + "epoch": 0.8, + "learning_rate": 4.866273629047352e-05, + "loss": 0.0781, + "step": 5716 + }, + { + "epoch": 0.8, + "learning_rate": 4.866226838854576e-05, + "loss": 0.1263, + "step": 5718 + }, + { + "epoch": 0.8, + "learning_rate": 4.866180048661801e-05, + "loss": 0.0782, + "step": 5720 + }, + { + "epoch": 0.8, + "learning_rate": 4.866133258469025e-05, + "loss": 0.092, + "step": 5722 + }, + { + "epoch": 0.8, + "learning_rate": 4.8660864682762496e-05, + "loss": 0.084, + "step": 5724 + }, + { + "epoch": 0.8, + "learning_rate": 4.8660396780834735e-05, + "loss": 0.0845, + "step": 5726 + }, + { + "epoch": 0.8, + "learning_rate": 4.865992887890699e-05, + "loss": 0.1016, + "step": 5728 + }, + { + "epoch": 0.8, + "learning_rate": 4.865946097697923e-05, + "loss": 0.1108, + "step": 5730 + }, + { + "epoch": 0.8, + "learning_rate": 4.865899307505147e-05, + "loss": 0.1022, + "step": 5732 + }, + { + "epoch": 0.8, + "learning_rate": 4.865852517312371e-05, + "loss": 0.0797, + "step": 5734 + }, + { + "epoch": 0.81, + "learning_rate": 4.865805727119596e-05, + "loss": 0.1102, + "step": 5736 + }, + { + "epoch": 0.81, + "learning_rate": 4.8657589369268204e-05, + "loss": 0.1081, + "step": 5738 + }, + { + "epoch": 0.81, + "learning_rate": 4.865712146734045e-05, + "loss": 0.1162, + "step": 5740 + }, + { + "epoch": 0.81, + "learning_rate": 4.865665356541269e-05, + "loss": 0.1079, + "step": 5742 + }, + { + "epoch": 0.81, + "learning_rate": 4.8656185663484935e-05, + "loss": 0.0622, + "step": 5744 + }, + { + "epoch": 0.81, + "learning_rate": 4.865571776155718e-05, + "loss": 0.0808, + "step": 5746 + }, + { + "epoch": 0.81, + "learning_rate": 4.865524985962943e-05, + "loss": 0.0969, + "step": 5748 + }, + { + "epoch": 0.81, + "learning_rate": 4.8654781957701666e-05, + "loss": 0.1134, + "step": 5750 + }, + { + "epoch": 0.81, + "learning_rate": 4.865431405577391e-05, + "loss": 0.0911, + "step": 5752 + }, + { + "epoch": 0.81, + "learning_rate": 4.865384615384616e-05, + "loss": 0.1119, + "step": 5754 + }, + { + "epoch": 0.81, + "learning_rate": 4.8653378251918404e-05, + "loss": 0.1267, + "step": 5756 + }, + { + "epoch": 0.81, + "learning_rate": 4.865291034999064e-05, + "loss": 0.1022, + "step": 5758 + }, + { + "epoch": 0.81, + "learning_rate": 4.865244244806289e-05, + "loss": 0.0947, + "step": 5760 + }, + { + "epoch": 0.81, + "learning_rate": 4.8651974546135135e-05, + "loss": 0.0856, + "step": 5762 + }, + { + "epoch": 0.81, + "learning_rate": 4.865150664420738e-05, + "loss": 0.1083, + "step": 5764 + }, + { + "epoch": 0.81, + "learning_rate": 4.865103874227962e-05, + "loss": 0.1219, + "step": 5766 + }, + { + "epoch": 0.81, + "learning_rate": 4.8650570840351866e-05, + "loss": 0.1151, + "step": 5768 + }, + { + "epoch": 0.81, + "learning_rate": 4.8650102938424105e-05, + "loss": 0.1197, + "step": 5770 + }, + { + "epoch": 0.81, + "learning_rate": 4.864963503649636e-05, + "loss": 0.0811, + "step": 5772 + }, + { + "epoch": 0.81, + "learning_rate": 4.86491671345686e-05, + "loss": 0.093, + "step": 5774 + }, + { + "epoch": 0.81, + "learning_rate": 4.864869923264084e-05, + "loss": 0.1262, + "step": 5776 + }, + { + "epoch": 0.81, + "learning_rate": 4.864823133071308e-05, + "loss": 0.1196, + "step": 5778 + }, + { + "epoch": 0.81, + "learning_rate": 4.8647763428785334e-05, + "loss": 0.0947, + "step": 5780 + }, + { + "epoch": 0.81, + "learning_rate": 4.8647295526857574e-05, + "loss": 0.1022, + "step": 5782 + }, + { + "epoch": 0.81, + "learning_rate": 4.864682762492982e-05, + "loss": 0.1471, + "step": 5784 + }, + { + "epoch": 0.81, + "learning_rate": 4.864635972300206e-05, + "loss": 0.0992, + "step": 5786 + }, + { + "epoch": 0.81, + "learning_rate": 4.8645891821074305e-05, + "loss": 0.0914, + "step": 5788 + }, + { + "epoch": 0.81, + "learning_rate": 4.864542391914655e-05, + "loss": 0.1023, + "step": 5790 + }, + { + "epoch": 0.81, + "learning_rate": 4.8644956017218796e-05, + "loss": 0.1051, + "step": 5792 + }, + { + "epoch": 0.81, + "learning_rate": 4.8644488115291036e-05, + "loss": 0.1129, + "step": 5794 + }, + { + "epoch": 0.81, + "learning_rate": 4.864402021336328e-05, + "loss": 0.0977, + "step": 5796 + }, + { + "epoch": 0.81, + "learning_rate": 4.864355231143553e-05, + "loss": 0.0916, + "step": 5798 + }, + { + "epoch": 0.81, + "learning_rate": 4.864308440950777e-05, + "loss": 0.0867, + "step": 5800 + }, + { + "epoch": 0.81, + "learning_rate": 4.864261650758001e-05, + "loss": 0.0936, + "step": 5802 + }, + { + "epoch": 0.81, + "learning_rate": 4.864214860565226e-05, + "loss": 0.1163, + "step": 5804 + }, + { + "epoch": 0.81, + "learning_rate": 4.8641680703724504e-05, + "loss": 0.104, + "step": 5806 + }, + { + "epoch": 0.82, + "learning_rate": 4.864121280179675e-05, + "loss": 0.1155, + "step": 5808 + }, + { + "epoch": 0.82, + "learning_rate": 4.864074489986899e-05, + "loss": 0.098, + "step": 5810 + }, + { + "epoch": 0.82, + "learning_rate": 4.8640276997941235e-05, + "loss": 0.0928, + "step": 5812 + }, + { + "epoch": 0.82, + "learning_rate": 4.863980909601348e-05, + "loss": 0.0958, + "step": 5814 + }, + { + "epoch": 0.82, + "learning_rate": 4.863934119408573e-05, + "loss": 0.0983, + "step": 5816 + }, + { + "epoch": 0.82, + "learning_rate": 4.8638873292157966e-05, + "loss": 0.0974, + "step": 5818 + }, + { + "epoch": 0.82, + "learning_rate": 4.863840539023021e-05, + "loss": 0.1004, + "step": 5820 + }, + { + "epoch": 0.82, + "learning_rate": 4.863793748830245e-05, + "loss": 0.14, + "step": 5822 + }, + { + "epoch": 0.82, + "learning_rate": 4.8637469586374704e-05, + "loss": 0.0913, + "step": 5824 + }, + { + "epoch": 0.82, + "learning_rate": 4.863700168444694e-05, + "loss": 0.0883, + "step": 5826 + }, + { + "epoch": 0.82, + "learning_rate": 4.863653378251918e-05, + "loss": 0.1027, + "step": 5828 + }, + { + "epoch": 0.82, + "learning_rate": 4.863606588059143e-05, + "loss": 0.0953, + "step": 5830 + }, + { + "epoch": 0.82, + "learning_rate": 4.8635597978663674e-05, + "loss": 0.0811, + "step": 5832 + }, + { + "epoch": 0.82, + "learning_rate": 4.863513007673592e-05, + "loss": 0.1174, + "step": 5834 + }, + { + "epoch": 0.82, + "learning_rate": 4.863466217480816e-05, + "loss": 0.1065, + "step": 5836 + }, + { + "epoch": 0.82, + "learning_rate": 4.8634194272880405e-05, + "loss": 0.1064, + "step": 5838 + }, + { + "epoch": 0.82, + "learning_rate": 4.863372637095265e-05, + "loss": 0.0954, + "step": 5840 + }, + { + "epoch": 0.82, + "learning_rate": 4.86332584690249e-05, + "loss": 0.1126, + "step": 5842 + }, + { + "epoch": 0.82, + "learning_rate": 4.8632790567097136e-05, + "loss": 0.0907, + "step": 5844 + }, + { + "epoch": 0.82, + "learning_rate": 4.863232266516938e-05, + "loss": 0.111, + "step": 5846 + }, + { + "epoch": 0.82, + "learning_rate": 4.863185476324163e-05, + "loss": 0.092, + "step": 5848 + }, + { + "epoch": 0.82, + "learning_rate": 4.8631386861313874e-05, + "loss": 0.0969, + "step": 5850 + }, + { + "epoch": 0.82, + "learning_rate": 4.863091895938611e-05, + "loss": 0.1048, + "step": 5852 + }, + { + "epoch": 0.82, + "learning_rate": 4.863045105745836e-05, + "loss": 0.098, + "step": 5854 + }, + { + "epoch": 0.82, + "learning_rate": 4.86299831555306e-05, + "loss": 0.1183, + "step": 5856 + }, + { + "epoch": 0.82, + "learning_rate": 4.862951525360285e-05, + "loss": 0.0794, + "step": 5858 + }, + { + "epoch": 0.82, + "learning_rate": 4.862904735167509e-05, + "loss": 0.1104, + "step": 5860 + }, + { + "epoch": 0.82, + "learning_rate": 4.8628579449747336e-05, + "loss": 0.1069, + "step": 5862 + }, + { + "epoch": 0.82, + "learning_rate": 4.8628111547819575e-05, + "loss": 0.1049, + "step": 5864 + }, + { + "epoch": 0.82, + "learning_rate": 4.862764364589182e-05, + "loss": 0.0684, + "step": 5866 + }, + { + "epoch": 0.82, + "learning_rate": 4.8627175743964066e-05, + "loss": 0.1266, + "step": 5868 + }, + { + "epoch": 0.82, + "learning_rate": 4.862670784203631e-05, + "loss": 0.11, + "step": 5870 + }, + { + "epoch": 0.82, + "learning_rate": 4.862623994010855e-05, + "loss": 0.1415, + "step": 5872 + }, + { + "epoch": 0.82, + "learning_rate": 4.86257720381808e-05, + "loss": 0.0985, + "step": 5874 + }, + { + "epoch": 0.82, + "learning_rate": 4.862530413625304e-05, + "loss": 0.0979, + "step": 5876 + }, + { + "epoch": 0.83, + "learning_rate": 4.862483623432529e-05, + "loss": 0.0997, + "step": 5878 + }, + { + "epoch": 0.83, + "learning_rate": 4.862436833239753e-05, + "loss": 0.0902, + "step": 5880 + }, + { + "epoch": 0.83, + "learning_rate": 4.8623900430469774e-05, + "loss": 0.0743, + "step": 5882 + }, + { + "epoch": 0.83, + "learning_rate": 4.862343252854202e-05, + "loss": 0.1087, + "step": 5884 + }, + { + "epoch": 0.83, + "learning_rate": 4.8622964626614266e-05, + "loss": 0.1133, + "step": 5886 + }, + { + "epoch": 0.83, + "learning_rate": 4.8622496724686505e-05, + "loss": 0.089, + "step": 5888 + }, + { + "epoch": 0.83, + "learning_rate": 4.862202882275875e-05, + "loss": 0.0954, + "step": 5890 + }, + { + "epoch": 0.83, + "learning_rate": 4.8621560920831e-05, + "loss": 0.097, + "step": 5892 + }, + { + "epoch": 0.83, + "learning_rate": 4.862109301890324e-05, + "loss": 0.1184, + "step": 5894 + }, + { + "epoch": 0.83, + "learning_rate": 4.862062511697548e-05, + "loss": 0.0975, + "step": 5896 + }, + { + "epoch": 0.83, + "learning_rate": 4.862015721504773e-05, + "loss": 0.1071, + "step": 5898 + }, + { + "epoch": 0.83, + "learning_rate": 4.861968931311997e-05, + "loss": 0.069, + "step": 5900 + }, + { + "epoch": 0.83, + "learning_rate": 4.861922141119222e-05, + "loss": 0.1376, + "step": 5902 + }, + { + "epoch": 0.83, + "learning_rate": 4.861875350926446e-05, + "loss": 0.1246, + "step": 5904 + }, + { + "epoch": 0.83, + "learning_rate": 4.8618285607336705e-05, + "loss": 0.1001, + "step": 5906 + }, + { + "epoch": 0.83, + "learning_rate": 4.8617817705408944e-05, + "loss": 0.1003, + "step": 5908 + }, + { + "epoch": 0.83, + "learning_rate": 4.86173498034812e-05, + "loss": 0.1234, + "step": 5910 + }, + { + "epoch": 0.83, + "learning_rate": 4.8616881901553436e-05, + "loss": 0.0929, + "step": 5912 + }, + { + "epoch": 0.83, + "learning_rate": 4.861641399962568e-05, + "loss": 0.0917, + "step": 5914 + }, + { + "epoch": 0.83, + "learning_rate": 4.861594609769792e-05, + "loss": 0.0919, + "step": 5916 + }, + { + "epoch": 0.83, + "learning_rate": 4.861547819577017e-05, + "loss": 0.0958, + "step": 5918 + }, + { + "epoch": 0.83, + "learning_rate": 4.861501029384241e-05, + "loss": 0.1047, + "step": 5920 + }, + { + "epoch": 0.83, + "learning_rate": 4.861454239191466e-05, + "loss": 0.0816, + "step": 5922 + }, + { + "epoch": 0.83, + "learning_rate": 4.86140744899869e-05, + "loss": 0.1162, + "step": 5924 + }, + { + "epoch": 0.83, + "learning_rate": 4.8613606588059144e-05, + "loss": 0.0917, + "step": 5926 + }, + { + "epoch": 0.83, + "learning_rate": 4.861313868613139e-05, + "loss": 0.1072, + "step": 5928 + }, + { + "epoch": 0.83, + "learning_rate": 4.8612670784203636e-05, + "loss": 0.1359, + "step": 5930 + }, + { + "epoch": 0.83, + "learning_rate": 4.8612202882275875e-05, + "loss": 0.1258, + "step": 5932 + }, + { + "epoch": 0.83, + "learning_rate": 4.861173498034812e-05, + "loss": 0.1104, + "step": 5934 + }, + { + "epoch": 0.83, + "learning_rate": 4.8611267078420366e-05, + "loss": 0.0921, + "step": 5936 + }, + { + "epoch": 0.83, + "learning_rate": 4.861079917649261e-05, + "loss": 0.0894, + "step": 5938 + }, + { + "epoch": 0.83, + "learning_rate": 4.861033127456485e-05, + "loss": 0.086, + "step": 5940 + }, + { + "epoch": 0.83, + "learning_rate": 4.86098633726371e-05, + "loss": 0.1098, + "step": 5942 + }, + { + "epoch": 0.83, + "learning_rate": 4.860939547070934e-05, + "loss": 0.0945, + "step": 5944 + }, + { + "epoch": 0.83, + "learning_rate": 4.860892756878159e-05, + "loss": 0.083, + "step": 5946 + }, + { + "epoch": 0.83, + "learning_rate": 4.860845966685383e-05, + "loss": 0.0899, + "step": 5948 + }, + { + "epoch": 0.84, + "learning_rate": 4.8607991764926074e-05, + "loss": 0.1089, + "step": 5950 + }, + { + "epoch": 0.84, + "learning_rate": 4.8607523862998313e-05, + "loss": 0.0862, + "step": 5952 + }, + { + "epoch": 0.84, + "learning_rate": 4.8607055961070566e-05, + "loss": 0.1048, + "step": 5954 + }, + { + "epoch": 0.84, + "learning_rate": 4.8606588059142805e-05, + "loss": 0.1208, + "step": 5956 + }, + { + "epoch": 0.84, + "learning_rate": 4.860612015721505e-05, + "loss": 0.1038, + "step": 5958 + }, + { + "epoch": 0.84, + "learning_rate": 4.860565225528729e-05, + "loss": 0.0831, + "step": 5960 + }, + { + "epoch": 0.84, + "learning_rate": 4.860518435335954e-05, + "loss": 0.0993, + "step": 5962 + }, + { + "epoch": 0.84, + "learning_rate": 4.860471645143178e-05, + "loss": 0.1206, + "step": 5964 + }, + { + "epoch": 0.84, + "learning_rate": 4.860424854950403e-05, + "loss": 0.0931, + "step": 5966 + }, + { + "epoch": 0.84, + "learning_rate": 4.860378064757627e-05, + "loss": 0.0725, + "step": 5968 + }, + { + "epoch": 0.84, + "learning_rate": 4.860331274564851e-05, + "loss": 0.0996, + "step": 5970 + }, + { + "epoch": 0.84, + "learning_rate": 4.860284484372076e-05, + "loss": 0.0704, + "step": 5972 + }, + { + "epoch": 0.84, + "learning_rate": 4.8602376941793005e-05, + "loss": 0.0899, + "step": 5974 + }, + { + "epoch": 0.84, + "learning_rate": 4.8601909039865244e-05, + "loss": 0.0883, + "step": 5976 + }, + { + "epoch": 0.84, + "learning_rate": 4.860144113793749e-05, + "loss": 0.0819, + "step": 5978 + }, + { + "epoch": 0.84, + "learning_rate": 4.8600973236009736e-05, + "loss": 0.0999, + "step": 5980 + }, + { + "epoch": 0.84, + "learning_rate": 4.860050533408198e-05, + "loss": 0.0846, + "step": 5982 + }, + { + "epoch": 0.84, + "learning_rate": 4.860003743215422e-05, + "loss": 0.0805, + "step": 5984 + }, + { + "epoch": 0.84, + "learning_rate": 4.859956953022647e-05, + "loss": 0.1264, + "step": 5986 + }, + { + "epoch": 0.84, + "learning_rate": 4.859910162829871e-05, + "loss": 0.1262, + "step": 5988 + }, + { + "epoch": 0.84, + "learning_rate": 4.859863372637096e-05, + "loss": 0.1123, + "step": 5990 + }, + { + "epoch": 0.84, + "learning_rate": 4.85981658244432e-05, + "loss": 0.106, + "step": 5992 + }, + { + "epoch": 0.84, + "learning_rate": 4.8597697922515444e-05, + "loss": 0.0991, + "step": 5994 + }, + { + "epoch": 0.84, + "learning_rate": 4.859723002058769e-05, + "loss": 0.0839, + "step": 5996 + }, + { + "epoch": 0.84, + "learning_rate": 4.8596762118659936e-05, + "loss": 0.0766, + "step": 5998 + }, + { + "epoch": 0.84, + "learning_rate": 4.8596294216732175e-05, + "loss": 0.1062, + "step": 6000 + }, + { + "epoch": 0.84, + "eval_gen_len": 29.3793, + "eval_loss": 1.022807002067566, + "eval_meteor": 0.0513, + "eval_runtime": 14.225, + "eval_samples_per_second": 4.077, + "eval_steps_per_second": 0.562, + "step": 6000 + }, + { + "epoch": 0.84, + "learning_rate": 4.859582631480442e-05, + "loss": 0.0607, + "step": 6002 + }, + { + "epoch": 0.84, + "learning_rate": 4.859535841287666e-05, + "loss": 0.0952, + "step": 6004 + }, + { + "epoch": 0.84, + "learning_rate": 4.859489051094891e-05, + "loss": 0.1087, + "step": 6006 + }, + { + "epoch": 0.84, + "learning_rate": 4.859442260902115e-05, + "loss": 0.1181, + "step": 6008 + }, + { + "epoch": 0.84, + "learning_rate": 4.85939547070934e-05, + "loss": 0.0786, + "step": 6010 + }, + { + "epoch": 0.84, + "learning_rate": 4.8593486805165637e-05, + "loss": 0.0841, + "step": 6012 + }, + { + "epoch": 0.84, + "learning_rate": 4.859301890323788e-05, + "loss": 0.1031, + "step": 6014 + }, + { + "epoch": 0.84, + "learning_rate": 4.859255100131013e-05, + "loss": 0.092, + "step": 6016 + }, + { + "epoch": 0.84, + "learning_rate": 4.8592083099382374e-05, + "loss": 0.1029, + "step": 6018 + }, + { + "epoch": 0.85, + "learning_rate": 4.8591615197454613e-05, + "loss": 0.0929, + "step": 6020 + }, + { + "epoch": 0.85, + "learning_rate": 4.859114729552686e-05, + "loss": 0.0853, + "step": 6022 + }, + { + "epoch": 0.85, + "learning_rate": 4.8590679393599105e-05, + "loss": 0.0855, + "step": 6024 + }, + { + "epoch": 0.85, + "learning_rate": 4.859021149167135e-05, + "loss": 0.0849, + "step": 6026 + }, + { + "epoch": 0.85, + "learning_rate": 4.858974358974359e-05, + "loss": 0.0936, + "step": 6028 + }, + { + "epoch": 0.85, + "learning_rate": 4.8589275687815836e-05, + "loss": 0.1235, + "step": 6030 + }, + { + "epoch": 0.85, + "learning_rate": 4.858880778588808e-05, + "loss": 0.0925, + "step": 6032 + }, + { + "epoch": 0.85, + "learning_rate": 4.858833988396033e-05, + "loss": 0.1205, + "step": 6034 + }, + { + "epoch": 0.85, + "learning_rate": 4.858787198203257e-05, + "loss": 0.1217, + "step": 6036 + }, + { + "epoch": 0.85, + "learning_rate": 4.858740408010481e-05, + "loss": 0.0957, + "step": 6038 + }, + { + "epoch": 0.85, + "learning_rate": 4.858693617817706e-05, + "loss": 0.0844, + "step": 6040 + }, + { + "epoch": 0.85, + "learning_rate": 4.8586468276249305e-05, + "loss": 0.0872, + "step": 6042 + }, + { + "epoch": 0.85, + "learning_rate": 4.8586000374321544e-05, + "loss": 0.1021, + "step": 6044 + }, + { + "epoch": 0.85, + "learning_rate": 4.858553247239379e-05, + "loss": 0.1088, + "step": 6046 + }, + { + "epoch": 0.85, + "learning_rate": 4.858506457046603e-05, + "loss": 0.0879, + "step": 6048 + }, + { + "epoch": 0.85, + "learning_rate": 4.858459666853828e-05, + "loss": 0.1001, + "step": 6050 + }, + { + "epoch": 0.85, + "learning_rate": 4.858412876661052e-05, + "loss": 0.1154, + "step": 6052 + }, + { + "epoch": 0.85, + "learning_rate": 4.858366086468277e-05, + "loss": 0.0824, + "step": 6054 + }, + { + "epoch": 0.85, + "learning_rate": 4.8583192962755006e-05, + "loss": 0.0981, + "step": 6056 + }, + { + "epoch": 0.85, + "learning_rate": 4.858272506082726e-05, + "loss": 0.0853, + "step": 6058 + }, + { + "epoch": 0.85, + "learning_rate": 4.85822571588995e-05, + "loss": 0.0906, + "step": 6060 + }, + { + "epoch": 0.85, + "learning_rate": 4.8581789256971744e-05, + "loss": 0.1102, + "step": 6062 + }, + { + "epoch": 0.85, + "learning_rate": 4.858132135504398e-05, + "loss": 0.0784, + "step": 6064 + }, + { + "epoch": 0.85, + "learning_rate": 4.858085345311623e-05, + "loss": 0.0725, + "step": 6066 + }, + { + "epoch": 0.85, + "learning_rate": 4.8580385551188475e-05, + "loss": 0.1081, + "step": 6068 + }, + { + "epoch": 0.85, + "learning_rate": 4.857991764926072e-05, + "loss": 0.1199, + "step": 6070 + }, + { + "epoch": 0.85, + "learning_rate": 4.857944974733296e-05, + "loss": 0.1477, + "step": 6072 + }, + { + "epoch": 0.85, + "learning_rate": 4.8578981845405206e-05, + "loss": 0.1329, + "step": 6074 + }, + { + "epoch": 0.85, + "learning_rate": 4.857851394347745e-05, + "loss": 0.0915, + "step": 6076 + }, + { + "epoch": 0.85, + "learning_rate": 4.857804604154969e-05, + "loss": 0.0836, + "step": 6078 + }, + { + "epoch": 0.85, + "learning_rate": 4.857757813962194e-05, + "loss": 0.1161, + "step": 6080 + }, + { + "epoch": 0.85, + "learning_rate": 4.8577110237694176e-05, + "loss": 0.0971, + "step": 6082 + }, + { + "epoch": 0.85, + "learning_rate": 4.857664233576643e-05, + "loss": 0.1114, + "step": 6084 + }, + { + "epoch": 0.85, + "learning_rate": 4.857617443383867e-05, + "loss": 0.11, + "step": 6086 + }, + { + "epoch": 0.85, + "learning_rate": 4.8575706531910913e-05, + "loss": 0.1008, + "step": 6088 + }, + { + "epoch": 0.85, + "learning_rate": 4.857523862998315e-05, + "loss": 0.1019, + "step": 6090 + }, + { + "epoch": 0.86, + "learning_rate": 4.8574770728055405e-05, + "loss": 0.1183, + "step": 6092 + }, + { + "epoch": 0.86, + "learning_rate": 4.8574302826127644e-05, + "loss": 0.1048, + "step": 6094 + }, + { + "epoch": 0.86, + "learning_rate": 4.857383492419989e-05, + "loss": 0.0978, + "step": 6096 + }, + { + "epoch": 0.86, + "learning_rate": 4.857336702227213e-05, + "loss": 0.1271, + "step": 6098 + }, + { + "epoch": 0.86, + "learning_rate": 4.8572899120344375e-05, + "loss": 0.1231, + "step": 6100 + }, + { + "epoch": 0.86, + "learning_rate": 4.857243121841662e-05, + "loss": 0.075, + "step": 6102 + }, + { + "epoch": 0.86, + "learning_rate": 4.857196331648887e-05, + "loss": 0.0971, + "step": 6104 + }, + { + "epoch": 0.86, + "learning_rate": 4.8571495414561106e-05, + "loss": 0.0998, + "step": 6106 + }, + { + "epoch": 0.86, + "learning_rate": 4.857102751263335e-05, + "loss": 0.1032, + "step": 6108 + }, + { + "epoch": 0.86, + "learning_rate": 4.85705596107056e-05, + "loss": 0.0914, + "step": 6110 + }, + { + "epoch": 0.86, + "learning_rate": 4.8570091708777844e-05, + "loss": 0.1036, + "step": 6112 + }, + { + "epoch": 0.86, + "learning_rate": 4.856962380685008e-05, + "loss": 0.1222, + "step": 6114 + }, + { + "epoch": 0.86, + "learning_rate": 4.856915590492233e-05, + "loss": 0.1505, + "step": 6116 + }, + { + "epoch": 0.86, + "learning_rate": 4.8568688002994575e-05, + "loss": 0.1288, + "step": 6118 + }, + { + "epoch": 0.86, + "learning_rate": 4.856822010106682e-05, + "loss": 0.0904, + "step": 6120 + }, + { + "epoch": 0.86, + "learning_rate": 4.856775219913906e-05, + "loss": 0.0823, + "step": 6122 + }, + { + "epoch": 0.86, + "learning_rate": 4.8567284297211306e-05, + "loss": 0.1094, + "step": 6124 + }, + { + "epoch": 0.86, + "learning_rate": 4.856681639528355e-05, + "loss": 0.0855, + "step": 6126 + }, + { + "epoch": 0.86, + "learning_rate": 4.85663484933558e-05, + "loss": 0.1334, + "step": 6128 + }, + { + "epoch": 0.86, + "learning_rate": 4.856588059142804e-05, + "loss": 0.1046, + "step": 6130 + }, + { + "epoch": 0.86, + "learning_rate": 4.856541268950028e-05, + "loss": 0.0929, + "step": 6132 + }, + { + "epoch": 0.86, + "learning_rate": 4.856494478757252e-05, + "loss": 0.1016, + "step": 6134 + }, + { + "epoch": 0.86, + "learning_rate": 4.8564476885644775e-05, + "loss": 0.0838, + "step": 6136 + }, + { + "epoch": 0.86, + "learning_rate": 4.8564008983717014e-05, + "loss": 0.0816, + "step": 6138 + }, + { + "epoch": 0.86, + "learning_rate": 4.856354108178926e-05, + "loss": 0.11, + "step": 6140 + }, + { + "epoch": 0.86, + "learning_rate": 4.85630731798615e-05, + "loss": 0.1227, + "step": 6142 + }, + { + "epoch": 0.86, + "learning_rate": 4.8562605277933745e-05, + "loss": 0.0896, + "step": 6144 + }, + { + "epoch": 0.86, + "learning_rate": 4.856213737600599e-05, + "loss": 0.1126, + "step": 6146 + }, + { + "epoch": 0.86, + "learning_rate": 4.856166947407824e-05, + "loss": 0.0829, + "step": 6148 + }, + { + "epoch": 0.86, + "learning_rate": 4.8561201572150476e-05, + "loss": 0.1106, + "step": 6150 + }, + { + "epoch": 0.86, + "learning_rate": 4.856073367022272e-05, + "loss": 0.1107, + "step": 6152 + }, + { + "epoch": 0.86, + "learning_rate": 4.856026576829497e-05, + "loss": 0.1127, + "step": 6154 + }, + { + "epoch": 0.86, + "learning_rate": 4.8559797866367214e-05, + "loss": 0.0964, + "step": 6156 + }, + { + "epoch": 0.86, + "learning_rate": 4.855932996443945e-05, + "loss": 0.1157, + "step": 6158 + }, + { + "epoch": 0.86, + "learning_rate": 4.85588620625117e-05, + "loss": 0.0899, + "step": 6160 + }, + { + "epoch": 0.86, + "learning_rate": 4.8558394160583944e-05, + "loss": 0.119, + "step": 6162 + }, + { + "epoch": 0.87, + "learning_rate": 4.855792625865619e-05, + "loss": 0.0888, + "step": 6164 + }, + { + "epoch": 0.87, + "learning_rate": 4.855745835672843e-05, + "loss": 0.1033, + "step": 6166 + }, + { + "epoch": 0.87, + "learning_rate": 4.8556990454800675e-05, + "loss": 0.1036, + "step": 6168 + }, + { + "epoch": 0.87, + "learning_rate": 4.855652255287292e-05, + "loss": 0.1108, + "step": 6170 + }, + { + "epoch": 0.87, + "learning_rate": 4.855605465094517e-05, + "loss": 0.0931, + "step": 6172 + }, + { + "epoch": 0.87, + "learning_rate": 4.8555586749017406e-05, + "loss": 0.119, + "step": 6174 + }, + { + "epoch": 0.87, + "learning_rate": 4.855511884708965e-05, + "loss": 0.096, + "step": 6176 + }, + { + "epoch": 0.87, + "learning_rate": 4.855465094516189e-05, + "loss": 0.1296, + "step": 6178 + }, + { + "epoch": 0.87, + "learning_rate": 4.8554183043234144e-05, + "loss": 0.1038, + "step": 6180 + }, + { + "epoch": 0.87, + "learning_rate": 4.855371514130638e-05, + "loss": 0.1, + "step": 6182 + }, + { + "epoch": 0.87, + "learning_rate": 4.855324723937863e-05, + "loss": 0.0973, + "step": 6184 + }, + { + "epoch": 0.87, + "learning_rate": 4.855277933745087e-05, + "loss": 0.0748, + "step": 6186 + }, + { + "epoch": 0.87, + "learning_rate": 4.855231143552312e-05, + "loss": 0.0792, + "step": 6188 + }, + { + "epoch": 0.87, + "learning_rate": 4.855184353359536e-05, + "loss": 0.0941, + "step": 6190 + }, + { + "epoch": 0.87, + "learning_rate": 4.8551375631667606e-05, + "loss": 0.0827, + "step": 6192 + }, + { + "epoch": 0.87, + "learning_rate": 4.8550907729739845e-05, + "loss": 0.1037, + "step": 6194 + }, + { + "epoch": 0.87, + "learning_rate": 4.855043982781209e-05, + "loss": 0.105, + "step": 6196 + }, + { + "epoch": 0.87, + "learning_rate": 4.854997192588434e-05, + "loss": 0.1088, + "step": 6198 + }, + { + "epoch": 0.87, + "learning_rate": 4.854950402395658e-05, + "loss": 0.1072, + "step": 6200 + }, + { + "epoch": 0.87, + "learning_rate": 4.854903612202882e-05, + "loss": 0.1163, + "step": 6202 + }, + { + "epoch": 0.87, + "learning_rate": 4.854856822010107e-05, + "loss": 0.0877, + "step": 6204 + }, + { + "epoch": 0.87, + "learning_rate": 4.8548100318173314e-05, + "loss": 0.114, + "step": 6206 + }, + { + "epoch": 0.87, + "learning_rate": 4.854763241624556e-05, + "loss": 0.1165, + "step": 6208 + }, + { + "epoch": 0.87, + "learning_rate": 4.85471645143178e-05, + "loss": 0.1088, + "step": 6210 + }, + { + "epoch": 0.87, + "learning_rate": 4.8546696612390045e-05, + "loss": 0.0901, + "step": 6212 + }, + { + "epoch": 0.87, + "learning_rate": 4.854622871046229e-05, + "loss": 0.0794, + "step": 6214 + }, + { + "epoch": 0.87, + "learning_rate": 4.854576080853454e-05, + "loss": 0.1076, + "step": 6216 + }, + { + "epoch": 0.87, + "learning_rate": 4.8545292906606776e-05, + "loss": 0.1255, + "step": 6218 + }, + { + "epoch": 0.87, + "learning_rate": 4.854482500467902e-05, + "loss": 0.0943, + "step": 6220 + }, + { + "epoch": 0.87, + "learning_rate": 4.854435710275127e-05, + "loss": 0.1077, + "step": 6222 + }, + { + "epoch": 0.87, + "learning_rate": 4.8543889200823514e-05, + "loss": 0.1009, + "step": 6224 + }, + { + "epoch": 0.87, + "learning_rate": 4.854342129889575e-05, + "loss": 0.1059, + "step": 6226 + }, + { + "epoch": 0.87, + "learning_rate": 4.8542953396968e-05, + "loss": 0.0911, + "step": 6228 + }, + { + "epoch": 0.87, + "learning_rate": 4.854248549504024e-05, + "loss": 0.183, + "step": 6230 + }, + { + "epoch": 0.87, + "learning_rate": 4.854201759311249e-05, + "loss": 0.1078, + "step": 6232 + }, + { + "epoch": 0.88, + "learning_rate": 4.854154969118473e-05, + "loss": 0.0954, + "step": 6234 + }, + { + "epoch": 0.88, + "learning_rate": 4.8541081789256975e-05, + "loss": 0.1076, + "step": 6236 + }, + { + "epoch": 0.88, + "learning_rate": 4.8540613887329215e-05, + "loss": 0.0982, + "step": 6238 + }, + { + "epoch": 0.88, + "learning_rate": 4.854014598540147e-05, + "loss": 0.1291, + "step": 6240 + }, + { + "epoch": 0.88, + "learning_rate": 4.8539678083473706e-05, + "loss": 0.0829, + "step": 6242 + }, + { + "epoch": 0.88, + "learning_rate": 4.853921018154595e-05, + "loss": 0.0662, + "step": 6244 + }, + { + "epoch": 0.88, + "learning_rate": 4.853874227961819e-05, + "loss": 0.0739, + "step": 6246 + }, + { + "epoch": 0.88, + "learning_rate": 4.853827437769044e-05, + "loss": 0.0794, + "step": 6248 + }, + { + "epoch": 0.88, + "learning_rate": 4.853780647576268e-05, + "loss": 0.1045, + "step": 6250 + }, + { + "epoch": 0.88, + "learning_rate": 4.853733857383493e-05, + "loss": 0.0904, + "step": 6252 + }, + { + "epoch": 0.88, + "learning_rate": 4.853687067190717e-05, + "loss": 0.077, + "step": 6254 + }, + { + "epoch": 0.88, + "learning_rate": 4.8536402769979414e-05, + "loss": 0.0991, + "step": 6256 + }, + { + "epoch": 0.88, + "learning_rate": 4.853593486805166e-05, + "loss": 0.1022, + "step": 6258 + }, + { + "epoch": 0.88, + "learning_rate": 4.8535466966123906e-05, + "loss": 0.106, + "step": 6260 + }, + { + "epoch": 0.88, + "learning_rate": 4.8534999064196145e-05, + "loss": 0.1008, + "step": 6262 + }, + { + "epoch": 0.88, + "learning_rate": 4.853453116226839e-05, + "loss": 0.0948, + "step": 6264 + }, + { + "epoch": 0.88, + "learning_rate": 4.853406326034064e-05, + "loss": 0.0963, + "step": 6266 + }, + { + "epoch": 0.88, + "learning_rate": 4.853359535841288e-05, + "loss": 0.0881, + "step": 6268 + }, + { + "epoch": 0.88, + "learning_rate": 4.853312745648512e-05, + "loss": 0.0933, + "step": 6270 + }, + { + "epoch": 0.88, + "learning_rate": 4.853265955455737e-05, + "loss": 0.0912, + "step": 6272 + }, + { + "epoch": 0.88, + "learning_rate": 4.8532191652629614e-05, + "loss": 0.1104, + "step": 6274 + }, + { + "epoch": 0.88, + "learning_rate": 4.853172375070186e-05, + "loss": 0.11, + "step": 6276 + }, + { + "epoch": 0.88, + "learning_rate": 4.85312558487741e-05, + "loss": 0.0975, + "step": 6278 + }, + { + "epoch": 0.88, + "learning_rate": 4.8530787946846345e-05, + "loss": 0.1034, + "step": 6280 + }, + { + "epoch": 0.88, + "learning_rate": 4.8530320044918584e-05, + "loss": 0.0773, + "step": 6282 + }, + { + "epoch": 0.88, + "learning_rate": 4.852985214299084e-05, + "loss": 0.1249, + "step": 6284 + }, + { + "epoch": 0.88, + "learning_rate": 4.8529384241063076e-05, + "loss": 0.1355, + "step": 6286 + }, + { + "epoch": 0.88, + "learning_rate": 4.852891633913532e-05, + "loss": 0.0809, + "step": 6288 + }, + { + "epoch": 0.88, + "learning_rate": 4.852844843720756e-05, + "loss": 0.1087, + "step": 6290 + }, + { + "epoch": 0.88, + "learning_rate": 4.852798053527981e-05, + "loss": 0.0923, + "step": 6292 + }, + { + "epoch": 0.88, + "learning_rate": 4.852751263335205e-05, + "loss": 0.1128, + "step": 6294 + }, + { + "epoch": 0.88, + "learning_rate": 4.85270447314243e-05, + "loss": 0.0799, + "step": 6296 + }, + { + "epoch": 0.88, + "learning_rate": 4.852657682949654e-05, + "loss": 0.0949, + "step": 6298 + }, + { + "epoch": 0.88, + "learning_rate": 4.8526108927568784e-05, + "loss": 0.1028, + "step": 6300 + }, + { + "epoch": 0.88, + "learning_rate": 4.852564102564103e-05, + "loss": 0.1062, + "step": 6302 + }, + { + "epoch": 0.88, + "learning_rate": 4.8525173123713275e-05, + "loss": 0.0819, + "step": 6304 + }, + { + "epoch": 0.89, + "learning_rate": 4.8524705221785515e-05, + "loss": 0.0895, + "step": 6306 + }, + { + "epoch": 0.89, + "learning_rate": 4.852423731985776e-05, + "loss": 0.0985, + "step": 6308 + }, + { + "epoch": 0.89, + "learning_rate": 4.8523769417930006e-05, + "loss": 0.0971, + "step": 6310 + }, + { + "epoch": 0.89, + "learning_rate": 4.852330151600225e-05, + "loss": 0.1381, + "step": 6312 + }, + { + "epoch": 0.89, + "learning_rate": 4.852283361407449e-05, + "loss": 0.0967, + "step": 6314 + }, + { + "epoch": 0.89, + "learning_rate": 4.852236571214674e-05, + "loss": 0.1021, + "step": 6316 + }, + { + "epoch": 0.89, + "learning_rate": 4.852189781021898e-05, + "loss": 0.0963, + "step": 6318 + }, + { + "epoch": 0.89, + "learning_rate": 4.852142990829123e-05, + "loss": 0.1066, + "step": 6320 + }, + { + "epoch": 0.89, + "learning_rate": 4.852096200636347e-05, + "loss": 0.0817, + "step": 6322 + }, + { + "epoch": 0.89, + "learning_rate": 4.8520494104435714e-05, + "loss": 0.1203, + "step": 6324 + }, + { + "epoch": 0.89, + "learning_rate": 4.8520026202507953e-05, + "loss": 0.0768, + "step": 6326 + }, + { + "epoch": 0.89, + "learning_rate": 4.8519558300580206e-05, + "loss": 0.0735, + "step": 6328 + }, + { + "epoch": 0.89, + "learning_rate": 4.8519090398652445e-05, + "loss": 0.1131, + "step": 6330 + }, + { + "epoch": 0.89, + "learning_rate": 4.8518622496724684e-05, + "loss": 0.0946, + "step": 6332 + }, + { + "epoch": 0.89, + "learning_rate": 4.851815459479693e-05, + "loss": 0.1166, + "step": 6334 + }, + { + "epoch": 0.89, + "learning_rate": 4.8517686692869176e-05, + "loss": 0.0827, + "step": 6336 + }, + { + "epoch": 0.89, + "learning_rate": 4.851721879094142e-05, + "loss": 0.0983, + "step": 6338 + }, + { + "epoch": 0.89, + "learning_rate": 4.851675088901366e-05, + "loss": 0.0972, + "step": 6340 + }, + { + "epoch": 0.89, + "learning_rate": 4.851628298708591e-05, + "loss": 0.0934, + "step": 6342 + }, + { + "epoch": 0.89, + "learning_rate": 4.851581508515815e-05, + "loss": 0.1255, + "step": 6344 + }, + { + "epoch": 0.89, + "learning_rate": 4.85153471832304e-05, + "loss": 0.0948, + "step": 6346 + }, + { + "epoch": 0.89, + "learning_rate": 4.851487928130264e-05, + "loss": 0.0938, + "step": 6348 + }, + { + "epoch": 0.89, + "learning_rate": 4.8514411379374884e-05, + "loss": 0.0958, + "step": 6350 + }, + { + "epoch": 0.89, + "learning_rate": 4.851394347744713e-05, + "loss": 0.1154, + "step": 6352 + }, + { + "epoch": 0.89, + "learning_rate": 4.8513475575519376e-05, + "loss": 0.1278, + "step": 6354 + }, + { + "epoch": 0.89, + "learning_rate": 4.8513007673591615e-05, + "loss": 0.1073, + "step": 6356 + }, + { + "epoch": 0.89, + "learning_rate": 4.851253977166386e-05, + "loss": 0.109, + "step": 6358 + }, + { + "epoch": 0.89, + "learning_rate": 4.85120718697361e-05, + "loss": 0.102, + "step": 6360 + }, + { + "epoch": 0.89, + "learning_rate": 4.851160396780835e-05, + "loss": 0.0921, + "step": 6362 + }, + { + "epoch": 0.89, + "learning_rate": 4.851113606588059e-05, + "loss": 0.0813, + "step": 6364 + }, + { + "epoch": 0.89, + "learning_rate": 4.851066816395284e-05, + "loss": 0.1047, + "step": 6366 + }, + { + "epoch": 0.89, + "learning_rate": 4.851020026202508e-05, + "loss": 0.1126, + "step": 6368 + }, + { + "epoch": 0.89, + "learning_rate": 4.850973236009733e-05, + "loss": 0.0892, + "step": 6370 + }, + { + "epoch": 0.89, + "learning_rate": 4.850926445816957e-05, + "loss": 0.1046, + "step": 6372 + }, + { + "epoch": 0.89, + "learning_rate": 4.8508796556241815e-05, + "loss": 0.1093, + "step": 6374 + }, + { + "epoch": 0.9, + "learning_rate": 4.8508328654314054e-05, + "loss": 0.1002, + "step": 6376 + }, + { + "epoch": 0.9, + "learning_rate": 4.85078607523863e-05, + "loss": 0.1065, + "step": 6378 + }, + { + "epoch": 0.9, + "learning_rate": 4.8507392850458546e-05, + "loss": 0.1215, + "step": 6380 + }, + { + "epoch": 0.9, + "learning_rate": 4.850692494853079e-05, + "loss": 0.0916, + "step": 6382 + }, + { + "epoch": 0.9, + "learning_rate": 4.850645704660303e-05, + "loss": 0.0933, + "step": 6384 + }, + { + "epoch": 0.9, + "learning_rate": 4.8505989144675277e-05, + "loss": 0.1038, + "step": 6386 + }, + { + "epoch": 0.9, + "learning_rate": 4.850552124274752e-05, + "loss": 0.0875, + "step": 6388 + }, + { + "epoch": 0.9, + "learning_rate": 4.850505334081977e-05, + "loss": 0.1225, + "step": 6390 + }, + { + "epoch": 0.9, + "learning_rate": 4.850458543889201e-05, + "loss": 0.0956, + "step": 6392 + }, + { + "epoch": 0.9, + "learning_rate": 4.8504117536964253e-05, + "loss": 0.1178, + "step": 6394 + }, + { + "epoch": 0.9, + "learning_rate": 4.85036496350365e-05, + "loss": 0.1011, + "step": 6396 + }, + { + "epoch": 0.9, + "learning_rate": 4.8503181733108745e-05, + "loss": 0.0821, + "step": 6398 + }, + { + "epoch": 0.9, + "learning_rate": 4.8502713831180984e-05, + "loss": 0.1073, + "step": 6400 + }, + { + "epoch": 0.9, + "learning_rate": 4.850224592925323e-05, + "loss": 0.0952, + "step": 6402 + }, + { + "epoch": 0.9, + "learning_rate": 4.8501778027325476e-05, + "loss": 0.1331, + "step": 6404 + }, + { + "epoch": 0.9, + "learning_rate": 4.850131012539772e-05, + "loss": 0.0767, + "step": 6406 + }, + { + "epoch": 0.9, + "learning_rate": 4.850084222346996e-05, + "loss": 0.1026, + "step": 6408 + }, + { + "epoch": 0.9, + "learning_rate": 4.850037432154221e-05, + "loss": 0.0974, + "step": 6410 + }, + { + "epoch": 0.9, + "learning_rate": 4.8499906419614446e-05, + "loss": 0.1189, + "step": 6412 + }, + { + "epoch": 0.9, + "learning_rate": 4.84994385176867e-05, + "loss": 0.1029, + "step": 6414 + }, + { + "epoch": 0.9, + "learning_rate": 4.849897061575894e-05, + "loss": 0.0899, + "step": 6416 + }, + { + "epoch": 0.9, + "learning_rate": 4.8498502713831184e-05, + "loss": 0.0949, + "step": 6418 + }, + { + "epoch": 0.9, + "learning_rate": 4.849803481190342e-05, + "loss": 0.0998, + "step": 6420 + }, + { + "epoch": 0.9, + "learning_rate": 4.849756690997567e-05, + "loss": 0.0758, + "step": 6422 + }, + { + "epoch": 0.9, + "learning_rate": 4.8497099008047915e-05, + "loss": 0.1262, + "step": 6424 + }, + { + "epoch": 0.9, + "learning_rate": 4.849663110612016e-05, + "loss": 0.0781, + "step": 6426 + }, + { + "epoch": 0.9, + "learning_rate": 4.84961632041924e-05, + "loss": 0.0878, + "step": 6428 + }, + { + "epoch": 0.9, + "learning_rate": 4.8495695302264646e-05, + "loss": 0.0719, + "step": 6430 + }, + { + "epoch": 0.9, + "learning_rate": 4.849522740033689e-05, + "loss": 0.0843, + "step": 6432 + }, + { + "epoch": 0.9, + "learning_rate": 4.849475949840914e-05, + "loss": 0.0928, + "step": 6434 + }, + { + "epoch": 0.9, + "learning_rate": 4.849429159648138e-05, + "loss": 0.1238, + "step": 6436 + }, + { + "epoch": 0.9, + "learning_rate": 4.849382369455362e-05, + "loss": 0.113, + "step": 6438 + }, + { + "epoch": 0.9, + "learning_rate": 4.849335579262587e-05, + "loss": 0.0953, + "step": 6440 + }, + { + "epoch": 0.9, + "learning_rate": 4.8492887890698115e-05, + "loss": 0.1073, + "step": 6442 + }, + { + "epoch": 0.9, + "learning_rate": 4.8492419988770354e-05, + "loss": 0.1078, + "step": 6444 + }, + { + "epoch": 0.9, + "learning_rate": 4.84919520868426e-05, + "loss": 0.0983, + "step": 6446 + }, + { + "epoch": 0.91, + "learning_rate": 4.8491484184914846e-05, + "loss": 0.087, + "step": 6448 + }, + { + "epoch": 0.91, + "learning_rate": 4.849101628298709e-05, + "loss": 0.1162, + "step": 6450 + }, + { + "epoch": 0.91, + "learning_rate": 4.849054838105933e-05, + "loss": 0.1125, + "step": 6452 + }, + { + "epoch": 0.91, + "learning_rate": 4.8490080479131577e-05, + "loss": 0.1088, + "step": 6454 + }, + { + "epoch": 0.91, + "learning_rate": 4.8489612577203816e-05, + "loss": 0.1248, + "step": 6456 + }, + { + "epoch": 0.91, + "learning_rate": 4.848914467527607e-05, + "loss": 0.0978, + "step": 6458 + }, + { + "epoch": 0.91, + "learning_rate": 4.848867677334831e-05, + "loss": 0.0851, + "step": 6460 + }, + { + "epoch": 0.91, + "learning_rate": 4.8488208871420553e-05, + "loss": 0.1121, + "step": 6462 + }, + { + "epoch": 0.91, + "learning_rate": 4.848774096949279e-05, + "loss": 0.0838, + "step": 6464 + }, + { + "epoch": 0.91, + "learning_rate": 4.8487273067565045e-05, + "loss": 0.1132, + "step": 6466 + }, + { + "epoch": 0.91, + "learning_rate": 4.8486805165637284e-05, + "loss": 0.1201, + "step": 6468 + }, + { + "epoch": 0.91, + "learning_rate": 4.848633726370953e-05, + "loss": 0.0927, + "step": 6470 + }, + { + "epoch": 0.91, + "learning_rate": 4.848586936178177e-05, + "loss": 0.1031, + "step": 6472 + }, + { + "epoch": 0.91, + "learning_rate": 4.8485401459854015e-05, + "loss": 0.1185, + "step": 6474 + }, + { + "epoch": 0.91, + "learning_rate": 4.848493355792626e-05, + "loss": 0.1226, + "step": 6476 + }, + { + "epoch": 0.91, + "learning_rate": 4.848446565599851e-05, + "loss": 0.1114, + "step": 6478 + }, + { + "epoch": 0.91, + "learning_rate": 4.8483997754070746e-05, + "loss": 0.1083, + "step": 6480 + }, + { + "epoch": 0.91, + "learning_rate": 4.848352985214299e-05, + "loss": 0.1019, + "step": 6482 + }, + { + "epoch": 0.91, + "learning_rate": 4.848306195021524e-05, + "loss": 0.077, + "step": 6484 + }, + { + "epoch": 0.91, + "learning_rate": 4.8482594048287484e-05, + "loss": 0.1112, + "step": 6486 + }, + { + "epoch": 0.91, + "learning_rate": 4.848212614635972e-05, + "loss": 0.0828, + "step": 6488 + }, + { + "epoch": 0.91, + "learning_rate": 4.848165824443197e-05, + "loss": 0.1204, + "step": 6490 + }, + { + "epoch": 0.91, + "learning_rate": 4.8481190342504215e-05, + "loss": 0.1105, + "step": 6492 + }, + { + "epoch": 0.91, + "learning_rate": 4.848072244057646e-05, + "loss": 0.0913, + "step": 6494 + }, + { + "epoch": 0.91, + "learning_rate": 4.84802545386487e-05, + "loss": 0.1125, + "step": 6496 + }, + { + "epoch": 0.91, + "learning_rate": 4.8479786636720946e-05, + "loss": 0.1119, + "step": 6498 + }, + { + "epoch": 0.91, + "learning_rate": 4.847931873479319e-05, + "loss": 0.106, + "step": 6500 + }, + { + "epoch": 0.91, + "learning_rate": 4.847885083286544e-05, + "loss": 0.0962, + "step": 6502 + }, + { + "epoch": 0.91, + "learning_rate": 4.847838293093768e-05, + "loss": 0.1283, + "step": 6504 + }, + { + "epoch": 0.91, + "learning_rate": 4.847791502900992e-05, + "loss": 0.0884, + "step": 6506 + }, + { + "epoch": 0.91, + "learning_rate": 4.847744712708216e-05, + "loss": 0.0893, + "step": 6508 + }, + { + "epoch": 0.91, + "learning_rate": 4.8476979225154415e-05, + "loss": 0.1104, + "step": 6510 + }, + { + "epoch": 0.91, + "learning_rate": 4.8476511323226654e-05, + "loss": 0.0902, + "step": 6512 + }, + { + "epoch": 0.91, + "learning_rate": 4.84760434212989e-05, + "loss": 0.1052, + "step": 6514 + }, + { + "epoch": 0.91, + "learning_rate": 4.847557551937114e-05, + "loss": 0.0942, + "step": 6516 + }, + { + "epoch": 0.91, + "learning_rate": 4.847510761744339e-05, + "loss": 0.1106, + "step": 6518 + }, + { + "epoch": 0.92, + "learning_rate": 4.847463971551563e-05, + "loss": 0.0741, + "step": 6520 + }, + { + "epoch": 0.92, + "learning_rate": 4.8474171813587877e-05, + "loss": 0.0991, + "step": 6522 + }, + { + "epoch": 0.92, + "learning_rate": 4.8473703911660116e-05, + "loss": 0.1039, + "step": 6524 + }, + { + "epoch": 0.92, + "learning_rate": 4.847323600973236e-05, + "loss": 0.1115, + "step": 6526 + }, + { + "epoch": 0.92, + "learning_rate": 4.847276810780461e-05, + "loss": 0.1064, + "step": 6528 + }, + { + "epoch": 0.92, + "learning_rate": 4.8472300205876853e-05, + "loss": 0.1291, + "step": 6530 + }, + { + "epoch": 0.92, + "learning_rate": 4.847183230394909e-05, + "loss": 0.1032, + "step": 6532 + }, + { + "epoch": 0.92, + "learning_rate": 4.847136440202134e-05, + "loss": 0.1156, + "step": 6534 + }, + { + "epoch": 0.92, + "learning_rate": 4.8470896500093584e-05, + "loss": 0.1356, + "step": 6536 + }, + { + "epoch": 0.92, + "learning_rate": 4.847042859816583e-05, + "loss": 0.1375, + "step": 6538 + }, + { + "epoch": 0.92, + "learning_rate": 4.846996069623807e-05, + "loss": 0.0808, + "step": 6540 + }, + { + "epoch": 0.92, + "learning_rate": 4.8469492794310315e-05, + "loss": 0.1261, + "step": 6542 + }, + { + "epoch": 0.92, + "learning_rate": 4.846902489238256e-05, + "loss": 0.0996, + "step": 6544 + }, + { + "epoch": 0.92, + "learning_rate": 4.846855699045481e-05, + "loss": 0.1103, + "step": 6546 + }, + { + "epoch": 0.92, + "learning_rate": 4.8468089088527046e-05, + "loss": 0.1061, + "step": 6548 + }, + { + "epoch": 0.92, + "learning_rate": 4.846762118659929e-05, + "loss": 0.0861, + "step": 6550 + }, + { + "epoch": 0.92, + "learning_rate": 4.846715328467154e-05, + "loss": 0.0728, + "step": 6552 + }, + { + "epoch": 0.92, + "learning_rate": 4.8466685382743784e-05, + "loss": 0.1171, + "step": 6554 + }, + { + "epoch": 0.92, + "learning_rate": 4.846621748081602e-05, + "loss": 0.092, + "step": 6556 + }, + { + "epoch": 0.92, + "learning_rate": 4.846574957888827e-05, + "loss": 0.1062, + "step": 6558 + }, + { + "epoch": 0.92, + "learning_rate": 4.846528167696051e-05, + "loss": 0.0762, + "step": 6560 + }, + { + "epoch": 0.92, + "learning_rate": 4.846481377503276e-05, + "loss": 0.0735, + "step": 6562 + }, + { + "epoch": 0.92, + "learning_rate": 4.8464345873105e-05, + "loss": 0.0945, + "step": 6564 + }, + { + "epoch": 0.92, + "learning_rate": 4.8463877971177246e-05, + "loss": 0.1233, + "step": 6566 + }, + { + "epoch": 0.92, + "learning_rate": 4.8463410069249485e-05, + "loss": 0.1, + "step": 6568 + }, + { + "epoch": 0.92, + "learning_rate": 4.846294216732173e-05, + "loss": 0.0977, + "step": 6570 + }, + { + "epoch": 0.92, + "learning_rate": 4.846247426539398e-05, + "loss": 0.0987, + "step": 6572 + }, + { + "epoch": 0.92, + "learning_rate": 4.846200636346622e-05, + "loss": 0.1063, + "step": 6574 + }, + { + "epoch": 0.92, + "learning_rate": 4.846153846153846e-05, + "loss": 0.0971, + "step": 6576 + }, + { + "epoch": 0.92, + "learning_rate": 4.846107055961071e-05, + "loss": 0.1042, + "step": 6578 + }, + { + "epoch": 0.92, + "learning_rate": 4.8460602657682954e-05, + "loss": 0.1063, + "step": 6580 + }, + { + "epoch": 0.92, + "learning_rate": 4.84601347557552e-05, + "loss": 0.1105, + "step": 6582 + }, + { + "epoch": 0.92, + "learning_rate": 4.845966685382744e-05, + "loss": 0.1092, + "step": 6584 + }, + { + "epoch": 0.92, + "learning_rate": 4.8459198951899685e-05, + "loss": 0.0794, + "step": 6586 + }, + { + "epoch": 0.92, + "learning_rate": 4.845873104997193e-05, + "loss": 0.1062, + "step": 6588 + }, + { + "epoch": 0.93, + "learning_rate": 4.845826314804417e-05, + "loss": 0.1065, + "step": 6590 + }, + { + "epoch": 0.93, + "learning_rate": 4.8457795246116416e-05, + "loss": 0.1123, + "step": 6592 + }, + { + "epoch": 0.93, + "learning_rate": 4.8457327344188655e-05, + "loss": 0.1104, + "step": 6594 + }, + { + "epoch": 0.93, + "learning_rate": 4.845685944226091e-05, + "loss": 0.0636, + "step": 6596 + }, + { + "epoch": 0.93, + "learning_rate": 4.845639154033315e-05, + "loss": 0.0963, + "step": 6598 + }, + { + "epoch": 0.93, + "learning_rate": 4.845592363840539e-05, + "loss": 0.1153, + "step": 6600 + }, + { + "epoch": 0.93, + "learning_rate": 4.845545573647763e-05, + "loss": 0.1124, + "step": 6602 + }, + { + "epoch": 0.93, + "learning_rate": 4.845498783454988e-05, + "loss": 0.167, + "step": 6604 + }, + { + "epoch": 0.93, + "learning_rate": 4.8454519932622124e-05, + "loss": 0.0995, + "step": 6606 + }, + { + "epoch": 0.93, + "learning_rate": 4.845405203069437e-05, + "loss": 0.1159, + "step": 6608 + }, + { + "epoch": 0.93, + "learning_rate": 4.845358412876661e-05, + "loss": 0.1221, + "step": 6610 + }, + { + "epoch": 0.93, + "learning_rate": 4.8453116226838855e-05, + "loss": 0.0812, + "step": 6612 + }, + { + "epoch": 0.93, + "learning_rate": 4.84526483249111e-05, + "loss": 0.0912, + "step": 6614 + }, + { + "epoch": 0.93, + "learning_rate": 4.8452180422983346e-05, + "loss": 0.125, + "step": 6616 + }, + { + "epoch": 0.93, + "learning_rate": 4.8451712521055585e-05, + "loss": 0.1155, + "step": 6618 + }, + { + "epoch": 0.93, + "learning_rate": 4.845124461912783e-05, + "loss": 0.0926, + "step": 6620 + }, + { + "epoch": 0.93, + "learning_rate": 4.845077671720008e-05, + "loss": 0.1366, + "step": 6622 + }, + { + "epoch": 0.93, + "learning_rate": 4.845030881527232e-05, + "loss": 0.1135, + "step": 6624 + }, + { + "epoch": 0.93, + "learning_rate": 4.844984091334456e-05, + "loss": 0.1239, + "step": 6626 + }, + { + "epoch": 0.93, + "learning_rate": 4.844937301141681e-05, + "loss": 0.1278, + "step": 6628 + }, + { + "epoch": 0.93, + "learning_rate": 4.8448905109489054e-05, + "loss": 0.0896, + "step": 6630 + }, + { + "epoch": 0.93, + "learning_rate": 4.84484372075613e-05, + "loss": 0.0938, + "step": 6632 + }, + { + "epoch": 0.93, + "learning_rate": 4.844796930563354e-05, + "loss": 0.0911, + "step": 6634 + }, + { + "epoch": 0.93, + "learning_rate": 4.8447501403705785e-05, + "loss": 0.1133, + "step": 6636 + }, + { + "epoch": 0.93, + "learning_rate": 4.8447033501778024e-05, + "loss": 0.1028, + "step": 6638 + }, + { + "epoch": 0.93, + "learning_rate": 4.844656559985028e-05, + "loss": 0.1258, + "step": 6640 + }, + { + "epoch": 0.93, + "learning_rate": 4.8446097697922516e-05, + "loss": 0.1117, + "step": 6642 + }, + { + "epoch": 0.93, + "learning_rate": 4.844562979599476e-05, + "loss": 0.0975, + "step": 6644 + }, + { + "epoch": 0.93, + "learning_rate": 4.8445161894067e-05, + "loss": 0.1121, + "step": 6646 + }, + { + "epoch": 0.93, + "learning_rate": 4.8444693992139254e-05, + "loss": 0.1198, + "step": 6648 + }, + { + "epoch": 0.93, + "learning_rate": 4.844422609021149e-05, + "loss": 0.1173, + "step": 6650 + }, + { + "epoch": 0.93, + "learning_rate": 4.844375818828374e-05, + "loss": 0.1062, + "step": 6652 + }, + { + "epoch": 0.93, + "learning_rate": 4.844329028635598e-05, + "loss": 0.0994, + "step": 6654 + }, + { + "epoch": 0.93, + "learning_rate": 4.8442822384428224e-05, + "loss": 0.1192, + "step": 6656 + }, + { + "epoch": 0.93, + "learning_rate": 4.844235448250047e-05, + "loss": 0.0775, + "step": 6658 + }, + { + "epoch": 0.93, + "learning_rate": 4.8441886580572716e-05, + "loss": 0.1088, + "step": 6660 + }, + { + "epoch": 0.94, + "learning_rate": 4.8441418678644955e-05, + "loss": 0.1271, + "step": 6662 + }, + { + "epoch": 0.94, + "learning_rate": 4.84409507767172e-05, + "loss": 0.0869, + "step": 6664 + }, + { + "epoch": 0.94, + "learning_rate": 4.844048287478945e-05, + "loss": 0.1133, + "step": 6666 + }, + { + "epoch": 0.94, + "learning_rate": 4.844001497286169e-05, + "loss": 0.0873, + "step": 6668 + }, + { + "epoch": 0.94, + "learning_rate": 4.843954707093393e-05, + "loss": 0.0897, + "step": 6670 + }, + { + "epoch": 0.94, + "learning_rate": 4.843907916900618e-05, + "loss": 0.0942, + "step": 6672 + }, + { + "epoch": 0.94, + "learning_rate": 4.8438611267078424e-05, + "loss": 0.1154, + "step": 6674 + }, + { + "epoch": 0.94, + "learning_rate": 4.843814336515067e-05, + "loss": 0.106, + "step": 6676 + }, + { + "epoch": 0.94, + "learning_rate": 4.843767546322291e-05, + "loss": 0.1179, + "step": 6678 + }, + { + "epoch": 0.94, + "learning_rate": 4.8437207561295155e-05, + "loss": 0.1076, + "step": 6680 + }, + { + "epoch": 0.94, + "learning_rate": 4.84367396593674e-05, + "loss": 0.1158, + "step": 6682 + }, + { + "epoch": 0.94, + "learning_rate": 4.8436271757439646e-05, + "loss": 0.1141, + "step": 6684 + }, + { + "epoch": 0.94, + "learning_rate": 4.8435803855511885e-05, + "loss": 0.1287, + "step": 6686 + }, + { + "epoch": 0.94, + "learning_rate": 4.843533595358413e-05, + "loss": 0.0818, + "step": 6688 + }, + { + "epoch": 0.94, + "learning_rate": 4.843486805165637e-05, + "loss": 0.1097, + "step": 6690 + }, + { + "epoch": 0.94, + "learning_rate": 4.843440014972862e-05, + "loss": 0.0919, + "step": 6692 + }, + { + "epoch": 0.94, + "learning_rate": 4.843393224780086e-05, + "loss": 0.0963, + "step": 6694 + }, + { + "epoch": 0.94, + "learning_rate": 4.843346434587311e-05, + "loss": 0.1151, + "step": 6696 + }, + { + "epoch": 0.94, + "learning_rate": 4.843299644394535e-05, + "loss": 0.1007, + "step": 6698 + }, + { + "epoch": 0.94, + "learning_rate": 4.843252854201759e-05, + "loss": 0.1192, + "step": 6700 + }, + { + "epoch": 0.94, + "learning_rate": 4.843206064008984e-05, + "loss": 0.0704, + "step": 6702 + }, + { + "epoch": 0.94, + "learning_rate": 4.8431592738162085e-05, + "loss": 0.1191, + "step": 6704 + }, + { + "epoch": 0.94, + "learning_rate": 4.8431124836234324e-05, + "loss": 0.0758, + "step": 6706 + }, + { + "epoch": 0.94, + "learning_rate": 4.843065693430657e-05, + "loss": 0.12, + "step": 6708 + }, + { + "epoch": 0.94, + "learning_rate": 4.8430189032378816e-05, + "loss": 0.1071, + "step": 6710 + }, + { + "epoch": 0.94, + "learning_rate": 4.842972113045106e-05, + "loss": 0.1074, + "step": 6712 + }, + { + "epoch": 0.94, + "learning_rate": 4.84292532285233e-05, + "loss": 0.0955, + "step": 6714 + }, + { + "epoch": 0.94, + "learning_rate": 4.842878532659555e-05, + "loss": 0.0992, + "step": 6716 + }, + { + "epoch": 0.94, + "learning_rate": 4.842831742466779e-05, + "loss": 0.1008, + "step": 6718 + }, + { + "epoch": 0.94, + "learning_rate": 4.842784952274004e-05, + "loss": 0.1133, + "step": 6720 + }, + { + "epoch": 0.94, + "learning_rate": 4.842738162081228e-05, + "loss": 0.1024, + "step": 6722 + }, + { + "epoch": 0.94, + "learning_rate": 4.8426913718884524e-05, + "loss": 0.0731, + "step": 6724 + }, + { + "epoch": 0.94, + "learning_rate": 4.842644581695677e-05, + "loss": 0.0925, + "step": 6726 + }, + { + "epoch": 0.94, + "learning_rate": 4.8425977915029016e-05, + "loss": 0.0857, + "step": 6728 + }, + { + "epoch": 0.94, + "learning_rate": 4.8425510013101255e-05, + "loss": 0.0843, + "step": 6730 + }, + { + "epoch": 0.94, + "learning_rate": 4.84250421111735e-05, + "loss": 0.106, + "step": 6732 + }, + { + "epoch": 0.95, + "learning_rate": 4.842457420924574e-05, + "loss": 0.1088, + "step": 6734 + }, + { + "epoch": 0.95, + "learning_rate": 4.842410630731799e-05, + "loss": 0.097, + "step": 6736 + }, + { + "epoch": 0.95, + "learning_rate": 4.842363840539023e-05, + "loss": 0.1064, + "step": 6738 + }, + { + "epoch": 0.95, + "learning_rate": 4.842317050346248e-05, + "loss": 0.1006, + "step": 6740 + }, + { + "epoch": 0.95, + "learning_rate": 4.842270260153472e-05, + "loss": 0.1257, + "step": 6742 + }, + { + "epoch": 0.95, + "learning_rate": 4.842223469960697e-05, + "loss": 0.1, + "step": 6744 + }, + { + "epoch": 0.95, + "learning_rate": 4.842176679767921e-05, + "loss": 0.1103, + "step": 6746 + }, + { + "epoch": 0.95, + "learning_rate": 4.8421298895751455e-05, + "loss": 0.108, + "step": 6748 + }, + { + "epoch": 0.95, + "learning_rate": 4.8420830993823694e-05, + "loss": 0.0737, + "step": 6750 + }, + { + "epoch": 0.95, + "learning_rate": 4.842036309189594e-05, + "loss": 0.1007, + "step": 6752 + }, + { + "epoch": 0.95, + "learning_rate": 4.8419895189968186e-05, + "loss": 0.0944, + "step": 6754 + }, + { + "epoch": 0.95, + "learning_rate": 4.841942728804043e-05, + "loss": 0.1202, + "step": 6756 + }, + { + "epoch": 0.95, + "learning_rate": 4.841895938611267e-05, + "loss": 0.0799, + "step": 6758 + }, + { + "epoch": 0.95, + "learning_rate": 4.8418491484184916e-05, + "loss": 0.0949, + "step": 6760 + }, + { + "epoch": 0.95, + "learning_rate": 4.841802358225716e-05, + "loss": 0.1105, + "step": 6762 + }, + { + "epoch": 0.95, + "learning_rate": 4.841755568032941e-05, + "loss": 0.0744, + "step": 6764 + }, + { + "epoch": 0.95, + "learning_rate": 4.841708777840165e-05, + "loss": 0.1234, + "step": 6766 + }, + { + "epoch": 0.95, + "learning_rate": 4.841661987647389e-05, + "loss": 0.092, + "step": 6768 + }, + { + "epoch": 0.95, + "learning_rate": 4.841615197454614e-05, + "loss": 0.0982, + "step": 6770 + }, + { + "epoch": 0.95, + "learning_rate": 4.8415684072618385e-05, + "loss": 0.1087, + "step": 6772 + }, + { + "epoch": 0.95, + "learning_rate": 4.8415216170690624e-05, + "loss": 0.0769, + "step": 6774 + }, + { + "epoch": 0.95, + "learning_rate": 4.841474826876287e-05, + "loss": 0.0998, + "step": 6776 + }, + { + "epoch": 0.95, + "learning_rate": 4.8414280366835116e-05, + "loss": 0.097, + "step": 6778 + }, + { + "epoch": 0.95, + "learning_rate": 4.841381246490736e-05, + "loss": 0.1062, + "step": 6780 + }, + { + "epoch": 0.95, + "learning_rate": 4.84133445629796e-05, + "loss": 0.1102, + "step": 6782 + }, + { + "epoch": 0.95, + "learning_rate": 4.841287666105185e-05, + "loss": 0.0784, + "step": 6784 + }, + { + "epoch": 0.95, + "learning_rate": 4.8412408759124086e-05, + "loss": 0.0953, + "step": 6786 + }, + { + "epoch": 0.95, + "learning_rate": 4.841194085719634e-05, + "loss": 0.0952, + "step": 6788 + }, + { + "epoch": 0.95, + "learning_rate": 4.841147295526858e-05, + "loss": 0.1155, + "step": 6790 + }, + { + "epoch": 0.95, + "learning_rate": 4.8411005053340824e-05, + "loss": 0.0963, + "step": 6792 + }, + { + "epoch": 0.95, + "learning_rate": 4.841053715141306e-05, + "loss": 0.1182, + "step": 6794 + }, + { + "epoch": 0.95, + "learning_rate": 4.8410069249485316e-05, + "loss": 0.0928, + "step": 6796 + }, + { + "epoch": 0.95, + "learning_rate": 4.8409601347557555e-05, + "loss": 0.097, + "step": 6798 + }, + { + "epoch": 0.95, + "learning_rate": 4.84091334456298e-05, + "loss": 0.0953, + "step": 6800 + }, + { + "epoch": 0.95, + "learning_rate": 4.840866554370204e-05, + "loss": 0.0926, + "step": 6802 + }, + { + "epoch": 0.96, + "learning_rate": 4.8408197641774286e-05, + "loss": 0.074, + "step": 6804 + }, + { + "epoch": 0.96, + "learning_rate": 4.840772973984653e-05, + "loss": 0.0794, + "step": 6806 + }, + { + "epoch": 0.96, + "learning_rate": 4.840726183791878e-05, + "loss": 0.1061, + "step": 6808 + }, + { + "epoch": 0.96, + "learning_rate": 4.840679393599102e-05, + "loss": 0.108, + "step": 6810 + }, + { + "epoch": 0.96, + "learning_rate": 4.840632603406326e-05, + "loss": 0.0953, + "step": 6812 + }, + { + "epoch": 0.96, + "learning_rate": 4.840585813213551e-05, + "loss": 0.0895, + "step": 6814 + }, + { + "epoch": 0.96, + "learning_rate": 4.8405390230207755e-05, + "loss": 0.097, + "step": 6816 + }, + { + "epoch": 0.96, + "learning_rate": 4.8404922328279994e-05, + "loss": 0.0926, + "step": 6818 + }, + { + "epoch": 0.96, + "learning_rate": 4.840445442635224e-05, + "loss": 0.1324, + "step": 6820 + }, + { + "epoch": 0.96, + "learning_rate": 4.8403986524424486e-05, + "loss": 0.0832, + "step": 6822 + }, + { + "epoch": 0.96, + "learning_rate": 4.840351862249673e-05, + "loss": 0.097, + "step": 6824 + }, + { + "epoch": 0.96, + "learning_rate": 4.840305072056897e-05, + "loss": 0.1154, + "step": 6826 + }, + { + "epoch": 0.96, + "learning_rate": 4.8402582818641216e-05, + "loss": 0.1211, + "step": 6828 + }, + { + "epoch": 0.96, + "learning_rate": 4.840211491671346e-05, + "loss": 0.105, + "step": 6830 + }, + { + "epoch": 0.96, + "learning_rate": 4.840164701478571e-05, + "loss": 0.0973, + "step": 6832 + }, + { + "epoch": 0.96, + "learning_rate": 4.840117911285795e-05, + "loss": 0.0933, + "step": 6834 + }, + { + "epoch": 0.96, + "learning_rate": 4.840071121093019e-05, + "loss": 0.0872, + "step": 6836 + }, + { + "epoch": 0.96, + "learning_rate": 4.840024330900243e-05, + "loss": 0.1034, + "step": 6838 + }, + { + "epoch": 0.96, + "learning_rate": 4.839977540707468e-05, + "loss": 0.0869, + "step": 6840 + }, + { + "epoch": 0.96, + "learning_rate": 4.8399307505146924e-05, + "loss": 0.0798, + "step": 6842 + }, + { + "epoch": 0.96, + "learning_rate": 4.8398839603219163e-05, + "loss": 0.0998, + "step": 6844 + }, + { + "epoch": 0.96, + "learning_rate": 4.839837170129141e-05, + "loss": 0.0968, + "step": 6846 + }, + { + "epoch": 0.96, + "learning_rate": 4.8397903799363655e-05, + "loss": 0.1173, + "step": 6848 + }, + { + "epoch": 0.96, + "learning_rate": 4.83974358974359e-05, + "loss": 0.0919, + "step": 6850 + }, + { + "epoch": 0.96, + "learning_rate": 4.839696799550814e-05, + "loss": 0.1159, + "step": 6852 + }, + { + "epoch": 0.96, + "learning_rate": 4.8396500093580386e-05, + "loss": 0.0714, + "step": 6854 + }, + { + "epoch": 0.96, + "learning_rate": 4.839603219165263e-05, + "loss": 0.0924, + "step": 6856 + }, + { + "epoch": 0.96, + "learning_rate": 4.839556428972488e-05, + "loss": 0.1054, + "step": 6858 + }, + { + "epoch": 0.96, + "learning_rate": 4.839509638779712e-05, + "loss": 0.1118, + "step": 6860 + }, + { + "epoch": 0.96, + "learning_rate": 4.839462848586936e-05, + "loss": 0.0879, + "step": 6862 + }, + { + "epoch": 0.96, + "learning_rate": 4.839416058394161e-05, + "loss": 0.1111, + "step": 6864 + }, + { + "epoch": 0.96, + "learning_rate": 4.8393692682013855e-05, + "loss": 0.0892, + "step": 6866 + }, + { + "epoch": 0.96, + "learning_rate": 4.8393224780086094e-05, + "loss": 0.1077, + "step": 6868 + }, + { + "epoch": 0.96, + "learning_rate": 4.839275687815834e-05, + "loss": 0.1028, + "step": 6870 + }, + { + "epoch": 0.96, + "learning_rate": 4.839228897623058e-05, + "loss": 0.1059, + "step": 6872 + }, + { + "epoch": 0.96, + "learning_rate": 4.839182107430283e-05, + "loss": 0.0976, + "step": 6874 + }, + { + "epoch": 0.97, + "learning_rate": 4.839135317237507e-05, + "loss": 0.0977, + "step": 6876 + }, + { + "epoch": 0.97, + "learning_rate": 4.839088527044732e-05, + "loss": 0.0917, + "step": 6878 + }, + { + "epoch": 0.97, + "learning_rate": 4.8390417368519556e-05, + "loss": 0.0875, + "step": 6880 + }, + { + "epoch": 0.97, + "learning_rate": 4.83899494665918e-05, + "loss": 0.1239, + "step": 6882 + }, + { + "epoch": 0.97, + "learning_rate": 4.838948156466405e-05, + "loss": 0.0962, + "step": 6884 + }, + { + "epoch": 0.97, + "learning_rate": 4.8389013662736294e-05, + "loss": 0.105, + "step": 6886 + }, + { + "epoch": 0.97, + "learning_rate": 4.838854576080853e-05, + "loss": 0.102, + "step": 6888 + }, + { + "epoch": 0.97, + "learning_rate": 4.838807785888078e-05, + "loss": 0.0977, + "step": 6890 + }, + { + "epoch": 0.97, + "learning_rate": 4.8387609956953025e-05, + "loss": 0.1037, + "step": 6892 + }, + { + "epoch": 0.97, + "learning_rate": 4.838714205502527e-05, + "loss": 0.0885, + "step": 6894 + }, + { + "epoch": 0.97, + "learning_rate": 4.838667415309751e-05, + "loss": 0.0917, + "step": 6896 + }, + { + "epoch": 0.97, + "learning_rate": 4.8386206251169756e-05, + "loss": 0.1001, + "step": 6898 + }, + { + "epoch": 0.97, + "learning_rate": 4.8385738349242e-05, + "loss": 0.1313, + "step": 6900 + }, + { + "epoch": 0.97, + "learning_rate": 4.838527044731425e-05, + "loss": 0.1169, + "step": 6902 + }, + { + "epoch": 0.97, + "learning_rate": 4.8384802545386487e-05, + "loss": 0.1195, + "step": 6904 + }, + { + "epoch": 0.97, + "learning_rate": 4.838433464345873e-05, + "loss": 0.0921, + "step": 6906 + }, + { + "epoch": 0.97, + "learning_rate": 4.838386674153098e-05, + "loss": 0.1008, + "step": 6908 + }, + { + "epoch": 0.97, + "learning_rate": 4.8383398839603224e-05, + "loss": 0.0865, + "step": 6910 + }, + { + "epoch": 0.97, + "learning_rate": 4.8382930937675463e-05, + "loss": 0.0884, + "step": 6912 + }, + { + "epoch": 0.97, + "learning_rate": 4.838246303574771e-05, + "loss": 0.1269, + "step": 6914 + }, + { + "epoch": 0.97, + "learning_rate": 4.838199513381995e-05, + "loss": 0.0834, + "step": 6916 + }, + { + "epoch": 0.97, + "learning_rate": 4.83815272318922e-05, + "loss": 0.0979, + "step": 6918 + }, + { + "epoch": 0.97, + "learning_rate": 4.838105932996444e-05, + "loss": 0.0706, + "step": 6920 + }, + { + "epoch": 0.97, + "learning_rate": 4.8380591428036686e-05, + "loss": 0.1109, + "step": 6922 + }, + { + "epoch": 0.97, + "learning_rate": 4.8380123526108925e-05, + "loss": 0.0822, + "step": 6924 + }, + { + "epoch": 0.97, + "learning_rate": 4.837965562418118e-05, + "loss": 0.103, + "step": 6926 + }, + { + "epoch": 0.97, + "learning_rate": 4.837918772225342e-05, + "loss": 0.1162, + "step": 6928 + }, + { + "epoch": 0.97, + "learning_rate": 4.837871982032566e-05, + "loss": 0.0904, + "step": 6930 + }, + { + "epoch": 0.97, + "learning_rate": 4.83782519183979e-05, + "loss": 0.0865, + "step": 6932 + }, + { + "epoch": 0.97, + "learning_rate": 4.837778401647015e-05, + "loss": 0.1013, + "step": 6934 + }, + { + "epoch": 0.97, + "learning_rate": 4.8377316114542394e-05, + "loss": 0.0997, + "step": 6936 + }, + { + "epoch": 0.97, + "learning_rate": 4.837684821261464e-05, + "loss": 0.096, + "step": 6938 + }, + { + "epoch": 0.97, + "learning_rate": 4.837638031068688e-05, + "loss": 0.1282, + "step": 6940 + }, + { + "epoch": 0.97, + "learning_rate": 4.8375912408759125e-05, + "loss": 0.081, + "step": 6942 + }, + { + "epoch": 0.97, + "learning_rate": 4.837544450683137e-05, + "loss": 0.0763, + "step": 6944 + }, + { + "epoch": 0.98, + "learning_rate": 4.837497660490362e-05, + "loss": 0.0851, + "step": 6946 + }, + { + "epoch": 0.98, + "learning_rate": 4.8374508702975856e-05, + "loss": 0.1228, + "step": 6948 + }, + { + "epoch": 0.98, + "learning_rate": 4.83740408010481e-05, + "loss": 0.1094, + "step": 6950 + }, + { + "epoch": 0.98, + "learning_rate": 4.837357289912035e-05, + "loss": 0.1124, + "step": 6952 + }, + { + "epoch": 0.98, + "learning_rate": 4.8373104997192594e-05, + "loss": 0.0812, + "step": 6954 + }, + { + "epoch": 0.98, + "learning_rate": 4.837263709526483e-05, + "loss": 0.0816, + "step": 6956 + }, + { + "epoch": 0.98, + "learning_rate": 4.837216919333708e-05, + "loss": 0.1111, + "step": 6958 + }, + { + "epoch": 0.98, + "learning_rate": 4.8371701291409325e-05, + "loss": 0.1044, + "step": 6960 + }, + { + "epoch": 0.98, + "learning_rate": 4.837123338948157e-05, + "loss": 0.1142, + "step": 6962 + }, + { + "epoch": 0.98, + "learning_rate": 4.837076548755381e-05, + "loss": 0.1034, + "step": 6964 + }, + { + "epoch": 0.98, + "learning_rate": 4.8370297585626056e-05, + "loss": 0.1176, + "step": 6966 + }, + { + "epoch": 0.98, + "learning_rate": 4.8369829683698295e-05, + "loss": 0.0962, + "step": 6968 + }, + { + "epoch": 0.98, + "learning_rate": 4.836936178177055e-05, + "loss": 0.0932, + "step": 6970 + }, + { + "epoch": 0.98, + "learning_rate": 4.836889387984279e-05, + "loss": 0.1057, + "step": 6972 + }, + { + "epoch": 0.98, + "learning_rate": 4.836842597791503e-05, + "loss": 0.1024, + "step": 6974 + }, + { + "epoch": 0.98, + "learning_rate": 4.836795807598727e-05, + "loss": 0.1027, + "step": 6976 + }, + { + "epoch": 0.98, + "learning_rate": 4.8367490174059524e-05, + "loss": 0.1228, + "step": 6978 + }, + { + "epoch": 0.98, + "learning_rate": 4.8367022272131763e-05, + "loss": 0.0971, + "step": 6980 + }, + { + "epoch": 0.98, + "learning_rate": 4.836655437020401e-05, + "loss": 0.1094, + "step": 6982 + }, + { + "epoch": 0.98, + "learning_rate": 4.836608646827625e-05, + "loss": 0.0743, + "step": 6984 + }, + { + "epoch": 0.98, + "learning_rate": 4.8365618566348494e-05, + "loss": 0.0925, + "step": 6986 + }, + { + "epoch": 0.98, + "learning_rate": 4.836515066442074e-05, + "loss": 0.1026, + "step": 6988 + }, + { + "epoch": 0.98, + "learning_rate": 4.8364682762492986e-05, + "loss": 0.1369, + "step": 6990 + }, + { + "epoch": 0.98, + "learning_rate": 4.8364214860565225e-05, + "loss": 0.1028, + "step": 6992 + }, + { + "epoch": 0.98, + "learning_rate": 4.836374695863747e-05, + "loss": 0.1015, + "step": 6994 + }, + { + "epoch": 0.98, + "learning_rate": 4.836327905670972e-05, + "loss": 0.104, + "step": 6996 + }, + { + "epoch": 0.98, + "learning_rate": 4.836281115478196e-05, + "loss": 0.0902, + "step": 6998 + }, + { + "epoch": 0.98, + "learning_rate": 4.83623432528542e-05, + "loss": 0.1163, + "step": 7000 + }, + { + "epoch": 0.98, + "eval_gen_len": 29.0862, + "eval_loss": 1.0126045942306519, + "eval_meteor": 0.0443, + "eval_runtime": 14.844, + "eval_samples_per_second": 3.907, + "eval_steps_per_second": 0.539, + "step": 7000 + }, + { + "epoch": 0.98, + "learning_rate": 4.836187535092645e-05, + "loss": 0.1137, + "step": 7002 + }, + { + "epoch": 0.98, + "learning_rate": 4.8361407448998694e-05, + "loss": 0.0969, + "step": 7004 + }, + { + "epoch": 0.98, + "learning_rate": 4.836093954707094e-05, + "loss": 0.0938, + "step": 7006 + }, + { + "epoch": 0.98, + "learning_rate": 4.836047164514318e-05, + "loss": 0.0878, + "step": 7008 + }, + { + "epoch": 0.98, + "learning_rate": 4.8360003743215425e-05, + "loss": 0.0865, + "step": 7010 + }, + { + "epoch": 0.98, + "learning_rate": 4.8359535841287664e-05, + "loss": 0.1052, + "step": 7012 + }, + { + "epoch": 0.98, + "learning_rate": 4.835906793935992e-05, + "loss": 0.0923, + "step": 7014 + }, + { + "epoch": 0.98, + "learning_rate": 4.8358600037432156e-05, + "loss": 0.1033, + "step": 7016 + }, + { + "epoch": 0.99, + "learning_rate": 4.83581321355044e-05, + "loss": 0.099, + "step": 7018 + }, + { + "epoch": 0.99, + "learning_rate": 4.835766423357664e-05, + "loss": 0.119, + "step": 7020 + }, + { + "epoch": 0.99, + "learning_rate": 4.8357196331648894e-05, + "loss": 0.1074, + "step": 7022 + }, + { + "epoch": 0.99, + "learning_rate": 4.835672842972113e-05, + "loss": 0.1074, + "step": 7024 + }, + { + "epoch": 0.99, + "learning_rate": 4.835626052779338e-05, + "loss": 0.104, + "step": 7026 + }, + { + "epoch": 0.99, + "learning_rate": 4.835579262586562e-05, + "loss": 0.0993, + "step": 7028 + }, + { + "epoch": 0.99, + "learning_rate": 4.8355324723937864e-05, + "loss": 0.085, + "step": 7030 + }, + { + "epoch": 0.99, + "learning_rate": 4.835485682201011e-05, + "loss": 0.0896, + "step": 7032 + }, + { + "epoch": 0.99, + "learning_rate": 4.8354388920082356e-05, + "loss": 0.0996, + "step": 7034 + }, + { + "epoch": 0.99, + "learning_rate": 4.8353921018154595e-05, + "loss": 0.1204, + "step": 7036 + }, + { + "epoch": 0.99, + "learning_rate": 4.835345311622684e-05, + "loss": 0.0781, + "step": 7038 + }, + { + "epoch": 0.99, + "learning_rate": 4.835298521429909e-05, + "loss": 0.1396, + "step": 7040 + }, + { + "epoch": 0.99, + "learning_rate": 4.835251731237133e-05, + "loss": 0.1755, + "step": 7042 + }, + { + "epoch": 0.99, + "learning_rate": 4.835204941044357e-05, + "loss": 0.0937, + "step": 7044 + }, + { + "epoch": 0.99, + "learning_rate": 4.835158150851582e-05, + "loss": 0.0942, + "step": 7046 + }, + { + "epoch": 0.99, + "learning_rate": 4.8351113606588064e-05, + "loss": 0.1169, + "step": 7048 + }, + { + "epoch": 0.99, + "learning_rate": 4.835064570466031e-05, + "loss": 0.0962, + "step": 7050 + }, + { + "epoch": 0.99, + "learning_rate": 4.835017780273255e-05, + "loss": 0.1254, + "step": 7052 + }, + { + "epoch": 0.99, + "learning_rate": 4.8349709900804794e-05, + "loss": 0.1053, + "step": 7054 + }, + { + "epoch": 0.99, + "learning_rate": 4.834924199887704e-05, + "loss": 0.1188, + "step": 7056 + }, + { + "epoch": 0.99, + "learning_rate": 4.8348774096949286e-05, + "loss": 0.1038, + "step": 7058 + }, + { + "epoch": 0.99, + "learning_rate": 4.8348306195021525e-05, + "loss": 0.1068, + "step": 7060 + }, + { + "epoch": 0.99, + "learning_rate": 4.834783829309377e-05, + "loss": 0.1112, + "step": 7062 + }, + { + "epoch": 0.99, + "learning_rate": 4.834737039116601e-05, + "loss": 0.1352, + "step": 7064 + }, + { + "epoch": 0.99, + "learning_rate": 4.834690248923826e-05, + "loss": 0.1129, + "step": 7066 + }, + { + "epoch": 0.99, + "learning_rate": 4.83464345873105e-05, + "loss": 0.1124, + "step": 7068 + }, + { + "epoch": 0.99, + "learning_rate": 4.834596668538275e-05, + "loss": 0.1069, + "step": 7070 + }, + { + "epoch": 0.99, + "learning_rate": 4.834549878345499e-05, + "loss": 0.1167, + "step": 7072 + }, + { + "epoch": 0.99, + "learning_rate": 4.834503088152724e-05, + "loss": 0.0909, + "step": 7074 + }, + { + "epoch": 0.99, + "learning_rate": 4.834456297959948e-05, + "loss": 0.1275, + "step": 7076 + }, + { + "epoch": 0.99, + "learning_rate": 4.8344095077671725e-05, + "loss": 0.096, + "step": 7078 + }, + { + "epoch": 0.99, + "learning_rate": 4.8343627175743964e-05, + "loss": 0.1105, + "step": 7080 + }, + { + "epoch": 0.99, + "learning_rate": 4.834315927381621e-05, + "loss": 0.1029, + "step": 7082 + }, + { + "epoch": 0.99, + "learning_rate": 4.8342691371888456e-05, + "loss": 0.0912, + "step": 7084 + }, + { + "epoch": 0.99, + "learning_rate": 4.83422234699607e-05, + "loss": 0.0942, + "step": 7086 + }, + { + "epoch": 0.99, + "learning_rate": 4.834175556803294e-05, + "loss": 0.0853, + "step": 7088 + }, + { + "epoch": 1.0, + "learning_rate": 4.834128766610519e-05, + "loss": 0.0912, + "step": 7090 + }, + { + "epoch": 1.0, + "learning_rate": 4.834081976417743e-05, + "loss": 0.0714, + "step": 7092 + }, + { + "epoch": 1.0, + "learning_rate": 4.834035186224967e-05, + "loss": 0.0945, + "step": 7094 + }, + { + "epoch": 1.0, + "learning_rate": 4.833988396032192e-05, + "loss": 0.1026, + "step": 7096 + }, + { + "epoch": 1.0, + "learning_rate": 4.833941605839416e-05, + "loss": 0.1034, + "step": 7098 + }, + { + "epoch": 1.0, + "learning_rate": 4.833894815646641e-05, + "loss": 0.0926, + "step": 7100 + }, + { + "epoch": 1.0, + "learning_rate": 4.833848025453865e-05, + "loss": 0.085, + "step": 7102 + }, + { + "epoch": 1.0, + "learning_rate": 4.8338012352610895e-05, + "loss": 0.1048, + "step": 7104 + }, + { + "epoch": 1.0, + "learning_rate": 4.8337544450683134e-05, + "loss": 0.1001, + "step": 7106 + }, + { + "epoch": 1.0, + "learning_rate": 4.833707654875539e-05, + "loss": 0.0964, + "step": 7108 + }, + { + "epoch": 1.0, + "learning_rate": 4.8336608646827626e-05, + "loss": 0.1004, + "step": 7110 + }, + { + "epoch": 1.0, + "learning_rate": 4.833614074489987e-05, + "loss": 0.0852, + "step": 7112 + }, + { + "epoch": 1.0, + "learning_rate": 4.833567284297211e-05, + "loss": 0.0964, + "step": 7114 + }, + { + "epoch": 1.0, + "learning_rate": 4.833520494104436e-05, + "loss": 0.0953, + "step": 7116 + }, + { + "epoch": 1.0, + "learning_rate": 4.83347370391166e-05, + "loss": 0.1235, + "step": 7118 + }, + { + "epoch": 1.0, + "learning_rate": 4.833426913718885e-05, + "loss": 0.0937, + "step": 7120 + }, + { + "epoch": 1.0, + "learning_rate": 4.833380123526109e-05, + "loss": 0.1229, + "step": 7122 + }, + { + "epoch": 1.0, + "learning_rate": 4.8333567284297214e-05, + "loss": 0.1498, + "step": 7124 + }, + { + "epoch": 1.0, + "learning_rate": 4.833309938236946e-05, + "loss": 0.0874, + "step": 7126 + }, + { + "epoch": 1.0, + "learning_rate": 4.8332631480441706e-05, + "loss": 0.0612, + "step": 7128 + }, + { + "epoch": 1.0, + "learning_rate": 4.8332163578513945e-05, + "loss": 0.0723, + "step": 7130 + }, + { + "epoch": 1.0, + "learning_rate": 4.833169567658619e-05, + "loss": 0.0633, + "step": 7132 + }, + { + "epoch": 1.0, + "learning_rate": 4.833122777465843e-05, + "loss": 0.068, + "step": 7134 + }, + { + "epoch": 1.0, + "learning_rate": 4.833075987273068e-05, + "loss": 0.0644, + "step": 7136 + }, + { + "epoch": 1.0, + "learning_rate": 4.833029197080292e-05, + "loss": 0.059, + "step": 7138 + }, + { + "epoch": 1.0, + "learning_rate": 4.832982406887517e-05, + "loss": 0.0491, + "step": 7140 + }, + { + "epoch": 1.0, + "learning_rate": 4.832935616694741e-05, + "loss": 0.0526, + "step": 7142 + }, + { + "epoch": 1.0, + "learning_rate": 4.832888826501965e-05, + "loss": 0.0508, + "step": 7144 + }, + { + "epoch": 1.0, + "learning_rate": 4.83284203630919e-05, + "loss": 0.0626, + "step": 7146 + }, + { + "epoch": 1.0, + "learning_rate": 4.8327952461164145e-05, + "loss": 0.0754, + "step": 7148 + }, + { + "epoch": 1.0, + "learning_rate": 4.8327484559236384e-05, + "loss": 0.0685, + "step": 7150 + }, + { + "epoch": 1.0, + "learning_rate": 4.832701665730863e-05, + "loss": 0.0698, + "step": 7152 + }, + { + "epoch": 1.0, + "learning_rate": 4.8326548755380876e-05, + "loss": 0.0732, + "step": 7154 + }, + { + "epoch": 1.0, + "learning_rate": 4.832608085345312e-05, + "loss": 0.0628, + "step": 7156 + }, + { + "epoch": 1.0, + "learning_rate": 4.832561295152536e-05, + "loss": 0.075, + "step": 7158 + }, + { + "epoch": 1.01, + "learning_rate": 4.8325145049597607e-05, + "loss": 0.0582, + "step": 7160 + }, + { + "epoch": 1.01, + "learning_rate": 4.832467714766985e-05, + "loss": 0.063, + "step": 7162 + }, + { + "epoch": 1.01, + "learning_rate": 4.83242092457421e-05, + "loss": 0.0574, + "step": 7164 + }, + { + "epoch": 1.01, + "learning_rate": 4.832374134381434e-05, + "loss": 0.057, + "step": 7166 + }, + { + "epoch": 1.01, + "learning_rate": 4.8323273441886583e-05, + "loss": 0.0757, + "step": 7168 + }, + { + "epoch": 1.01, + "learning_rate": 4.832280553995883e-05, + "loss": 0.0547, + "step": 7170 + }, + { + "epoch": 1.01, + "learning_rate": 4.8322337638031075e-05, + "loss": 0.0701, + "step": 7172 + }, + { + "epoch": 1.01, + "learning_rate": 4.8321869736103314e-05, + "loss": 0.0587, + "step": 7174 + }, + { + "epoch": 1.01, + "learning_rate": 4.832140183417556e-05, + "loss": 0.0661, + "step": 7176 + }, + { + "epoch": 1.01, + "learning_rate": 4.83209339322478e-05, + "loss": 0.0653, + "step": 7178 + }, + { + "epoch": 1.01, + "learning_rate": 4.832046603032005e-05, + "loss": 0.0517, + "step": 7180 + }, + { + "epoch": 1.01, + "learning_rate": 4.831999812839229e-05, + "loss": 0.0517, + "step": 7182 + }, + { + "epoch": 1.01, + "learning_rate": 4.831953022646454e-05, + "loss": 0.0689, + "step": 7184 + }, + { + "epoch": 1.01, + "learning_rate": 4.8319062324536776e-05, + "loss": 0.0664, + "step": 7186 + }, + { + "epoch": 1.01, + "learning_rate": 4.831859442260903e-05, + "loss": 0.0978, + "step": 7188 + }, + { + "epoch": 1.01, + "learning_rate": 4.831812652068127e-05, + "loss": 0.0796, + "step": 7190 + }, + { + "epoch": 1.01, + "learning_rate": 4.8317658618753514e-05, + "loss": 0.0552, + "step": 7192 + }, + { + "epoch": 1.01, + "learning_rate": 4.831719071682575e-05, + "loss": 0.0539, + "step": 7194 + }, + { + "epoch": 1.01, + "learning_rate": 4.8316722814898e-05, + "loss": 0.0512, + "step": 7196 + }, + { + "epoch": 1.01, + "learning_rate": 4.8316254912970245e-05, + "loss": 0.0468, + "step": 7198 + }, + { + "epoch": 1.01, + "learning_rate": 4.831578701104249e-05, + "loss": 0.0708, + "step": 7200 + }, + { + "epoch": 1.01, + "learning_rate": 4.831531910911473e-05, + "loss": 0.0638, + "step": 7202 + }, + { + "epoch": 1.01, + "learning_rate": 4.8314851207186976e-05, + "loss": 0.0721, + "step": 7204 + }, + { + "epoch": 1.01, + "learning_rate": 4.831438330525922e-05, + "loss": 0.1117, + "step": 7206 + }, + { + "epoch": 1.01, + "learning_rate": 4.831391540333147e-05, + "loss": 0.0617, + "step": 7208 + }, + { + "epoch": 1.01, + "learning_rate": 4.831344750140371e-05, + "loss": 0.0466, + "step": 7210 + }, + { + "epoch": 1.01, + "learning_rate": 4.831297959947595e-05, + "loss": 0.064, + "step": 7212 + }, + { + "epoch": 1.01, + "learning_rate": 4.83125116975482e-05, + "loss": 0.0715, + "step": 7214 + }, + { + "epoch": 1.01, + "learning_rate": 4.8312043795620445e-05, + "loss": 0.0638, + "step": 7216 + }, + { + "epoch": 1.01, + "learning_rate": 4.8311575893692684e-05, + "loss": 0.0467, + "step": 7218 + }, + { + "epoch": 1.01, + "learning_rate": 4.831110799176492e-05, + "loss": 0.0772, + "step": 7220 + }, + { + "epoch": 1.01, + "learning_rate": 4.8310640089837176e-05, + "loss": 0.0694, + "step": 7222 + }, + { + "epoch": 1.01, + "learning_rate": 4.8310172187909415e-05, + "loss": 0.0674, + "step": 7224 + }, + { + "epoch": 1.01, + "learning_rate": 4.830970428598166e-05, + "loss": 0.0755, + "step": 7226 + }, + { + "epoch": 1.01, + "learning_rate": 4.83092363840539e-05, + "loss": 0.0662, + "step": 7228 + }, + { + "epoch": 1.01, + "learning_rate": 4.8308768482126146e-05, + "loss": 0.0614, + "step": 7230 + }, + { + "epoch": 1.02, + "learning_rate": 4.830830058019839e-05, + "loss": 0.0579, + "step": 7232 + }, + { + "epoch": 1.02, + "learning_rate": 4.830783267827064e-05, + "loss": 0.0823, + "step": 7234 + }, + { + "epoch": 1.02, + "learning_rate": 4.830736477634288e-05, + "loss": 0.0653, + "step": 7236 + }, + { + "epoch": 1.02, + "learning_rate": 4.830689687441512e-05, + "loss": 0.0713, + "step": 7238 + }, + { + "epoch": 1.02, + "learning_rate": 4.830642897248737e-05, + "loss": 0.0644, + "step": 7240 + }, + { + "epoch": 1.02, + "learning_rate": 4.8305961070559614e-05, + "loss": 0.0711, + "step": 7242 + }, + { + "epoch": 1.02, + "learning_rate": 4.8305493168631854e-05, + "loss": 0.0618, + "step": 7244 + }, + { + "epoch": 1.02, + "learning_rate": 4.83050252667041e-05, + "loss": 0.0537, + "step": 7246 + }, + { + "epoch": 1.02, + "learning_rate": 4.8304557364776345e-05, + "loss": 0.0449, + "step": 7248 + }, + { + "epoch": 1.02, + "learning_rate": 4.830408946284859e-05, + "loss": 0.0807, + "step": 7250 + }, + { + "epoch": 1.02, + "learning_rate": 4.830362156092083e-05, + "loss": 0.0597, + "step": 7252 + }, + { + "epoch": 1.02, + "learning_rate": 4.8303153658993076e-05, + "loss": 0.067, + "step": 7254 + }, + { + "epoch": 1.02, + "learning_rate": 4.830268575706532e-05, + "loss": 0.0769, + "step": 7256 + }, + { + "epoch": 1.02, + "learning_rate": 4.830221785513757e-05, + "loss": 0.0526, + "step": 7258 + }, + { + "epoch": 1.02, + "learning_rate": 4.830174995320981e-05, + "loss": 0.0624, + "step": 7260 + }, + { + "epoch": 1.02, + "learning_rate": 4.830128205128205e-05, + "loss": 0.0553, + "step": 7262 + }, + { + "epoch": 1.02, + "learning_rate": 4.830081414935429e-05, + "loss": 0.0978, + "step": 7264 + }, + { + "epoch": 1.02, + "learning_rate": 4.8300346247426545e-05, + "loss": 0.0651, + "step": 7266 + }, + { + "epoch": 1.02, + "learning_rate": 4.8299878345498784e-05, + "loss": 0.0584, + "step": 7268 + }, + { + "epoch": 1.02, + "learning_rate": 4.829941044357103e-05, + "loss": 0.066, + "step": 7270 + }, + { + "epoch": 1.02, + "learning_rate": 4.829894254164327e-05, + "loss": 0.0597, + "step": 7272 + }, + { + "epoch": 1.02, + "learning_rate": 4.829847463971552e-05, + "loss": 0.0579, + "step": 7274 + }, + { + "epoch": 1.02, + "learning_rate": 4.829800673778776e-05, + "loss": 0.0657, + "step": 7276 + }, + { + "epoch": 1.02, + "learning_rate": 4.829753883586001e-05, + "loss": 0.0558, + "step": 7278 + }, + { + "epoch": 1.02, + "learning_rate": 4.8297070933932246e-05, + "loss": 0.0477, + "step": 7280 + }, + { + "epoch": 1.02, + "learning_rate": 4.829660303200449e-05, + "loss": 0.0681, + "step": 7282 + }, + { + "epoch": 1.02, + "learning_rate": 4.829613513007674e-05, + "loss": 0.0579, + "step": 7284 + }, + { + "epoch": 1.02, + "learning_rate": 4.8295667228148984e-05, + "loss": 0.0725, + "step": 7286 + }, + { + "epoch": 1.02, + "learning_rate": 4.829519932622122e-05, + "loss": 0.0675, + "step": 7288 + }, + { + "epoch": 1.02, + "learning_rate": 4.829473142429347e-05, + "loss": 0.0747, + "step": 7290 + }, + { + "epoch": 1.02, + "learning_rate": 4.8294263522365715e-05, + "loss": 0.061, + "step": 7292 + }, + { + "epoch": 1.02, + "learning_rate": 4.829379562043796e-05, + "loss": 0.0767, + "step": 7294 + }, + { + "epoch": 1.02, + "learning_rate": 4.82933277185102e-05, + "loss": 0.0563, + "step": 7296 + }, + { + "epoch": 1.02, + "learning_rate": 4.8292859816582446e-05, + "loss": 0.0656, + "step": 7298 + }, + { + "epoch": 1.02, + "learning_rate": 4.829239191465469e-05, + "loss": 0.0674, + "step": 7300 + }, + { + "epoch": 1.02, + "learning_rate": 4.829192401272694e-05, + "loss": 0.0565, + "step": 7302 + }, + { + "epoch": 1.03, + "learning_rate": 4.829145611079918e-05, + "loss": 0.0848, + "step": 7304 + }, + { + "epoch": 1.03, + "learning_rate": 4.829098820887142e-05, + "loss": 0.0647, + "step": 7306 + }, + { + "epoch": 1.03, + "learning_rate": 4.829052030694366e-05, + "loss": 0.1279, + "step": 7308 + }, + { + "epoch": 1.03, + "learning_rate": 4.8290052405015914e-05, + "loss": 0.1035, + "step": 7310 + }, + { + "epoch": 1.03, + "learning_rate": 4.8289584503088154e-05, + "loss": 0.0651, + "step": 7312 + }, + { + "epoch": 1.03, + "learning_rate": 4.82891166011604e-05, + "loss": 0.0533, + "step": 7314 + }, + { + "epoch": 1.03, + "learning_rate": 4.828864869923264e-05, + "loss": 0.0539, + "step": 7316 + }, + { + "epoch": 1.03, + "learning_rate": 4.828818079730489e-05, + "loss": 0.0557, + "step": 7318 + }, + { + "epoch": 1.03, + "learning_rate": 4.828771289537713e-05, + "loss": 0.0614, + "step": 7320 + }, + { + "epoch": 1.03, + "learning_rate": 4.8287244993449376e-05, + "loss": 0.0697, + "step": 7322 + }, + { + "epoch": 1.03, + "learning_rate": 4.8286777091521616e-05, + "loss": 0.0604, + "step": 7324 + }, + { + "epoch": 1.03, + "learning_rate": 4.828630918959386e-05, + "loss": 0.0769, + "step": 7326 + }, + { + "epoch": 1.03, + "learning_rate": 4.828584128766611e-05, + "loss": 0.0574, + "step": 7328 + }, + { + "epoch": 1.03, + "learning_rate": 4.828537338573835e-05, + "loss": 0.0731, + "step": 7330 + }, + { + "epoch": 1.03, + "learning_rate": 4.828490548381059e-05, + "loss": 0.0723, + "step": 7332 + }, + { + "epoch": 1.03, + "learning_rate": 4.828443758188284e-05, + "loss": 0.0887, + "step": 7334 + }, + { + "epoch": 1.03, + "learning_rate": 4.8283969679955084e-05, + "loss": 0.0518, + "step": 7336 + }, + { + "epoch": 1.03, + "learning_rate": 4.828350177802733e-05, + "loss": 0.0574, + "step": 7338 + }, + { + "epoch": 1.03, + "learning_rate": 4.828303387609957e-05, + "loss": 0.0467, + "step": 7340 + }, + { + "epoch": 1.03, + "learning_rate": 4.8282565974171815e-05, + "loss": 0.0675, + "step": 7342 + }, + { + "epoch": 1.03, + "learning_rate": 4.828209807224406e-05, + "loss": 0.0528, + "step": 7344 + }, + { + "epoch": 1.03, + "learning_rate": 4.828163017031631e-05, + "loss": 0.0507, + "step": 7346 + }, + { + "epoch": 1.03, + "learning_rate": 4.8281162268388546e-05, + "loss": 0.0674, + "step": 7348 + }, + { + "epoch": 1.03, + "learning_rate": 4.828069436646079e-05, + "loss": 0.069, + "step": 7350 + }, + { + "epoch": 1.03, + "learning_rate": 4.828022646453304e-05, + "loss": 0.0562, + "step": 7352 + }, + { + "epoch": 1.03, + "learning_rate": 4.8279758562605284e-05, + "loss": 0.0706, + "step": 7354 + }, + { + "epoch": 1.03, + "learning_rate": 4.827929066067752e-05, + "loss": 0.0788, + "step": 7356 + }, + { + "epoch": 1.03, + "learning_rate": 4.827882275874977e-05, + "loss": 0.057, + "step": 7358 + }, + { + "epoch": 1.03, + "learning_rate": 4.827835485682201e-05, + "loss": 0.0555, + "step": 7360 + }, + { + "epoch": 1.03, + "learning_rate": 4.827788695489426e-05, + "loss": 0.0682, + "step": 7362 + }, + { + "epoch": 1.03, + "learning_rate": 4.82774190529665e-05, + "loss": 0.0704, + "step": 7364 + }, + { + "epoch": 1.03, + "learning_rate": 4.8276951151038746e-05, + "loss": 0.0572, + "step": 7366 + }, + { + "epoch": 1.03, + "learning_rate": 4.8276483249110985e-05, + "loss": 0.0653, + "step": 7368 + }, + { + "epoch": 1.03, + "learning_rate": 4.827601534718324e-05, + "loss": 0.0529, + "step": 7370 + }, + { + "epoch": 1.03, + "learning_rate": 4.827554744525548e-05, + "loss": 0.0839, + "step": 7372 + }, + { + "epoch": 1.04, + "learning_rate": 4.827507954332772e-05, + "loss": 0.054, + "step": 7374 + }, + { + "epoch": 1.04, + "learning_rate": 4.827461164139996e-05, + "loss": 0.0571, + "step": 7376 + }, + { + "epoch": 1.04, + "learning_rate": 4.827414373947221e-05, + "loss": 0.0714, + "step": 7378 + }, + { + "epoch": 1.04, + "learning_rate": 4.8273675837544454e-05, + "loss": 0.0643, + "step": 7380 + }, + { + "epoch": 1.04, + "learning_rate": 4.82732079356167e-05, + "loss": 0.0683, + "step": 7382 + }, + { + "epoch": 1.04, + "learning_rate": 4.827274003368894e-05, + "loss": 0.0637, + "step": 7384 + }, + { + "epoch": 1.04, + "learning_rate": 4.8272272131761185e-05, + "loss": 0.0618, + "step": 7386 + }, + { + "epoch": 1.04, + "learning_rate": 4.827180422983343e-05, + "loss": 0.0454, + "step": 7388 + }, + { + "epoch": 1.04, + "learning_rate": 4.8271336327905676e-05, + "loss": 0.0771, + "step": 7390 + }, + { + "epoch": 1.04, + "learning_rate": 4.8270868425977916e-05, + "loss": 0.0571, + "step": 7392 + }, + { + "epoch": 1.04, + "learning_rate": 4.827040052405016e-05, + "loss": 0.0552, + "step": 7394 + }, + { + "epoch": 1.04, + "learning_rate": 4.826993262212241e-05, + "loss": 0.0479, + "step": 7396 + }, + { + "epoch": 1.04, + "learning_rate": 4.826946472019465e-05, + "loss": 0.0548, + "step": 7398 + }, + { + "epoch": 1.04, + "learning_rate": 4.826899681826689e-05, + "loss": 0.0895, + "step": 7400 + }, + { + "epoch": 1.04, + "learning_rate": 4.826852891633914e-05, + "loss": 0.061, + "step": 7402 + }, + { + "epoch": 1.04, + "learning_rate": 4.8268061014411384e-05, + "loss": 0.0498, + "step": 7404 + }, + { + "epoch": 1.04, + "learning_rate": 4.826759311248363e-05, + "loss": 0.0818, + "step": 7406 + }, + { + "epoch": 1.04, + "learning_rate": 4.826712521055587e-05, + "loss": 0.0608, + "step": 7408 + }, + { + "epoch": 1.04, + "learning_rate": 4.8266657308628115e-05, + "loss": 0.0578, + "step": 7410 + }, + { + "epoch": 1.04, + "learning_rate": 4.8266189406700354e-05, + "loss": 0.0586, + "step": 7412 + }, + { + "epoch": 1.04, + "learning_rate": 4.826572150477261e-05, + "loss": 0.0822, + "step": 7414 + }, + { + "epoch": 1.04, + "learning_rate": 4.8265253602844846e-05, + "loss": 0.0717, + "step": 7416 + }, + { + "epoch": 1.04, + "learning_rate": 4.826478570091709e-05, + "loss": 0.0689, + "step": 7418 + }, + { + "epoch": 1.04, + "learning_rate": 4.826431779898933e-05, + "loss": 0.0631, + "step": 7420 + }, + { + "epoch": 1.04, + "learning_rate": 4.826384989706158e-05, + "loss": 0.0611, + "step": 7422 + }, + { + "epoch": 1.04, + "learning_rate": 4.826338199513382e-05, + "loss": 0.0666, + "step": 7424 + }, + { + "epoch": 1.04, + "learning_rate": 4.826291409320607e-05, + "loss": 0.0727, + "step": 7426 + }, + { + "epoch": 1.04, + "learning_rate": 4.826244619127831e-05, + "loss": 0.0587, + "step": 7428 + }, + { + "epoch": 1.04, + "learning_rate": 4.8261978289350554e-05, + "loss": 0.0682, + "step": 7430 + }, + { + "epoch": 1.04, + "learning_rate": 4.82615103874228e-05, + "loss": 0.0822, + "step": 7432 + }, + { + "epoch": 1.04, + "learning_rate": 4.8261042485495046e-05, + "loss": 0.0738, + "step": 7434 + }, + { + "epoch": 1.04, + "learning_rate": 4.8260574583567285e-05, + "loss": 0.0635, + "step": 7436 + }, + { + "epoch": 1.04, + "learning_rate": 4.826010668163953e-05, + "loss": 0.0888, + "step": 7438 + }, + { + "epoch": 1.04, + "learning_rate": 4.825963877971178e-05, + "loss": 0.0693, + "step": 7440 + }, + { + "epoch": 1.04, + "learning_rate": 4.825917087778402e-05, + "loss": 0.0491, + "step": 7442 + }, + { + "epoch": 1.04, + "learning_rate": 4.825870297585626e-05, + "loss": 0.0827, + "step": 7444 + }, + { + "epoch": 1.05, + "learning_rate": 4.825823507392851e-05, + "loss": 0.0647, + "step": 7446 + }, + { + "epoch": 1.05, + "learning_rate": 4.8257767172000754e-05, + "loss": 0.0726, + "step": 7448 + }, + { + "epoch": 1.05, + "learning_rate": 4.8257299270073e-05, + "loss": 0.0706, + "step": 7450 + }, + { + "epoch": 1.05, + "learning_rate": 4.825683136814524e-05, + "loss": 0.0626, + "step": 7452 + }, + { + "epoch": 1.05, + "learning_rate": 4.8256363466217485e-05, + "loss": 0.0643, + "step": 7454 + }, + { + "epoch": 1.05, + "learning_rate": 4.8255895564289724e-05, + "loss": 0.077, + "step": 7456 + }, + { + "epoch": 1.05, + "learning_rate": 4.8255427662361976e-05, + "loss": 0.0779, + "step": 7458 + }, + { + "epoch": 1.05, + "learning_rate": 4.8254959760434216e-05, + "loss": 0.0888, + "step": 7460 + }, + { + "epoch": 1.05, + "learning_rate": 4.825449185850646e-05, + "loss": 0.0717, + "step": 7462 + }, + { + "epoch": 1.05, + "learning_rate": 4.82540239565787e-05, + "loss": 0.0695, + "step": 7464 + }, + { + "epoch": 1.05, + "learning_rate": 4.825355605465095e-05, + "loss": 0.0626, + "step": 7466 + }, + { + "epoch": 1.05, + "learning_rate": 4.825308815272319e-05, + "loss": 0.0669, + "step": 7468 + }, + { + "epoch": 1.05, + "learning_rate": 4.825262025079544e-05, + "loss": 0.0651, + "step": 7470 + }, + { + "epoch": 1.05, + "learning_rate": 4.825215234886768e-05, + "loss": 0.0729, + "step": 7472 + }, + { + "epoch": 1.05, + "learning_rate": 4.825168444693992e-05, + "loss": 0.0771, + "step": 7474 + }, + { + "epoch": 1.05, + "learning_rate": 4.825121654501217e-05, + "loss": 0.06, + "step": 7476 + }, + { + "epoch": 1.05, + "learning_rate": 4.825074864308441e-05, + "loss": 0.09, + "step": 7478 + }, + { + "epoch": 1.05, + "learning_rate": 4.8250280741156654e-05, + "loss": 0.0725, + "step": 7480 + }, + { + "epoch": 1.05, + "learning_rate": 4.82498128392289e-05, + "loss": 0.0557, + "step": 7482 + }, + { + "epoch": 1.05, + "learning_rate": 4.8249344937301146e-05, + "loss": 0.0807, + "step": 7484 + }, + { + "epoch": 1.05, + "learning_rate": 4.8248877035373385e-05, + "loss": 0.0546, + "step": 7486 + }, + { + "epoch": 1.05, + "learning_rate": 4.824840913344563e-05, + "loss": 0.0702, + "step": 7488 + }, + { + "epoch": 1.05, + "learning_rate": 4.824794123151787e-05, + "loss": 0.056, + "step": 7490 + }, + { + "epoch": 1.05, + "learning_rate": 4.824747332959012e-05, + "loss": 0.0478, + "step": 7492 + }, + { + "epoch": 1.05, + "learning_rate": 4.824700542766236e-05, + "loss": 0.1009, + "step": 7494 + }, + { + "epoch": 1.05, + "learning_rate": 4.824653752573461e-05, + "loss": 0.0793, + "step": 7496 + }, + { + "epoch": 1.05, + "learning_rate": 4.824606962380685e-05, + "loss": 0.0483, + "step": 7498 + }, + { + "epoch": 1.05, + "learning_rate": 4.82456017218791e-05, + "loss": 0.0788, + "step": 7500 + }, + { + "epoch": 1.05, + "learning_rate": 4.824513381995134e-05, + "loss": 0.057, + "step": 7502 + }, + { + "epoch": 1.05, + "learning_rate": 4.8244665918023585e-05, + "loss": 0.0492, + "step": 7504 + }, + { + "epoch": 1.05, + "learning_rate": 4.8244198016095824e-05, + "loss": 0.0532, + "step": 7506 + }, + { + "epoch": 1.05, + "learning_rate": 4.824373011416807e-05, + "loss": 0.0536, + "step": 7508 + }, + { + "epoch": 1.05, + "learning_rate": 4.8243262212240316e-05, + "loss": 0.0543, + "step": 7510 + }, + { + "epoch": 1.05, + "learning_rate": 4.824279431031256e-05, + "loss": 0.062, + "step": 7512 + }, + { + "epoch": 1.05, + "learning_rate": 4.82423264083848e-05, + "loss": 0.0596, + "step": 7514 + }, + { + "epoch": 1.06, + "learning_rate": 4.824185850645705e-05, + "loss": 0.062, + "step": 7516 + }, + { + "epoch": 1.06, + "learning_rate": 4.824139060452929e-05, + "loss": 0.0675, + "step": 7518 + }, + { + "epoch": 1.06, + "learning_rate": 4.824092270260154e-05, + "loss": 0.086, + "step": 7520 + }, + { + "epoch": 1.06, + "learning_rate": 4.824045480067378e-05, + "loss": 0.066, + "step": 7522 + }, + { + "epoch": 1.06, + "learning_rate": 4.8239986898746024e-05, + "loss": 0.0588, + "step": 7524 + }, + { + "epoch": 1.06, + "learning_rate": 4.823951899681827e-05, + "loss": 0.0607, + "step": 7526 + }, + { + "epoch": 1.06, + "learning_rate": 4.8239051094890516e-05, + "loss": 0.0704, + "step": 7528 + }, + { + "epoch": 1.06, + "learning_rate": 4.8238583192962755e-05, + "loss": 0.0591, + "step": 7530 + }, + { + "epoch": 1.06, + "learning_rate": 4.8238115291035e-05, + "loss": 0.06, + "step": 7532 + }, + { + "epoch": 1.06, + "learning_rate": 4.8237647389107247e-05, + "loss": 0.0442, + "step": 7534 + }, + { + "epoch": 1.06, + "learning_rate": 4.823717948717949e-05, + "loss": 0.0712, + "step": 7536 + }, + { + "epoch": 1.06, + "learning_rate": 4.823671158525173e-05, + "loss": 0.0796, + "step": 7538 + }, + { + "epoch": 1.06, + "learning_rate": 4.823624368332398e-05, + "loss": 0.069, + "step": 7540 + }, + { + "epoch": 1.06, + "learning_rate": 4.8235775781396217e-05, + "loss": 0.0548, + "step": 7542 + }, + { + "epoch": 1.06, + "learning_rate": 4.823530787946847e-05, + "loss": 0.0547, + "step": 7544 + }, + { + "epoch": 1.06, + "learning_rate": 4.823483997754071e-05, + "loss": 0.0631, + "step": 7546 + }, + { + "epoch": 1.06, + "learning_rate": 4.8234372075612954e-05, + "loss": 0.0475, + "step": 7548 + }, + { + "epoch": 1.06, + "learning_rate": 4.8233904173685193e-05, + "loss": 0.0955, + "step": 7550 + }, + { + "epoch": 1.06, + "learning_rate": 4.8233436271757446e-05, + "loss": 0.0594, + "step": 7552 + }, + { + "epoch": 1.06, + "learning_rate": 4.8232968369829685e-05, + "loss": 0.0649, + "step": 7554 + }, + { + "epoch": 1.06, + "learning_rate": 4.823250046790193e-05, + "loss": 0.0786, + "step": 7556 + }, + { + "epoch": 1.06, + "learning_rate": 4.823203256597417e-05, + "loss": 0.0765, + "step": 7558 + }, + { + "epoch": 1.06, + "learning_rate": 4.8231564664046416e-05, + "loss": 0.0757, + "step": 7560 + }, + { + "epoch": 1.06, + "learning_rate": 4.823109676211866e-05, + "loss": 0.0754, + "step": 7562 + }, + { + "epoch": 1.06, + "learning_rate": 4.823062886019091e-05, + "loss": 0.0841, + "step": 7564 + }, + { + "epoch": 1.06, + "learning_rate": 4.823016095826315e-05, + "loss": 0.0519, + "step": 7566 + }, + { + "epoch": 1.06, + "learning_rate": 4.822969305633539e-05, + "loss": 0.0816, + "step": 7568 + }, + { + "epoch": 1.06, + "learning_rate": 4.822922515440764e-05, + "loss": 0.0679, + "step": 7570 + }, + { + "epoch": 1.06, + "learning_rate": 4.8228757252479885e-05, + "loss": 0.0718, + "step": 7572 + }, + { + "epoch": 1.06, + "learning_rate": 4.8228289350552124e-05, + "loss": 0.0536, + "step": 7574 + }, + { + "epoch": 1.06, + "learning_rate": 4.822782144862437e-05, + "loss": 0.0623, + "step": 7576 + }, + { + "epoch": 1.06, + "learning_rate": 4.8227353546696616e-05, + "loss": 0.0431, + "step": 7578 + }, + { + "epoch": 1.06, + "learning_rate": 4.822688564476886e-05, + "loss": 0.0596, + "step": 7580 + }, + { + "epoch": 1.06, + "learning_rate": 4.82264177428411e-05, + "loss": 0.0538, + "step": 7582 + }, + { + "epoch": 1.06, + "learning_rate": 4.822594984091335e-05, + "loss": 0.0893, + "step": 7584 + }, + { + "epoch": 1.06, + "learning_rate": 4.822548193898559e-05, + "loss": 0.0555, + "step": 7586 + }, + { + "epoch": 1.07, + "learning_rate": 4.822501403705784e-05, + "loss": 0.0734, + "step": 7588 + }, + { + "epoch": 1.07, + "learning_rate": 4.822454613513008e-05, + "loss": 0.0567, + "step": 7590 + }, + { + "epoch": 1.07, + "learning_rate": 4.8224078233202324e-05, + "loss": 0.064, + "step": 7592 + }, + { + "epoch": 1.07, + "learning_rate": 4.822361033127456e-05, + "loss": 0.065, + "step": 7594 + }, + { + "epoch": 1.07, + "learning_rate": 4.8223142429346816e-05, + "loss": 0.0534, + "step": 7596 + }, + { + "epoch": 1.07, + "learning_rate": 4.8222674527419055e-05, + "loss": 0.0747, + "step": 7598 + }, + { + "epoch": 1.07, + "learning_rate": 4.82222066254913e-05, + "loss": 0.0647, + "step": 7600 + }, + { + "epoch": 1.07, + "learning_rate": 4.822173872356354e-05, + "loss": 0.0856, + "step": 7602 + }, + { + "epoch": 1.07, + "learning_rate": 4.8221270821635786e-05, + "loss": 0.0707, + "step": 7604 + }, + { + "epoch": 1.07, + "learning_rate": 4.822080291970803e-05, + "loss": 0.0771, + "step": 7606 + }, + { + "epoch": 1.07, + "learning_rate": 4.822033501778028e-05, + "loss": 0.0515, + "step": 7608 + }, + { + "epoch": 1.07, + "learning_rate": 4.821986711585252e-05, + "loss": 0.0551, + "step": 7610 + }, + { + "epoch": 1.07, + "learning_rate": 4.821939921392476e-05, + "loss": 0.0631, + "step": 7612 + }, + { + "epoch": 1.07, + "learning_rate": 4.821893131199701e-05, + "loss": 0.0606, + "step": 7614 + }, + { + "epoch": 1.07, + "learning_rate": 4.8218463410069254e-05, + "loss": 0.0536, + "step": 7616 + }, + { + "epoch": 1.07, + "learning_rate": 4.8217995508141493e-05, + "loss": 0.0663, + "step": 7618 + }, + { + "epoch": 1.07, + "learning_rate": 4.821752760621374e-05, + "loss": 0.0762, + "step": 7620 + }, + { + "epoch": 1.07, + "learning_rate": 4.8217059704285985e-05, + "loss": 0.0706, + "step": 7622 + }, + { + "epoch": 1.07, + "learning_rate": 4.821659180235823e-05, + "loss": 0.0515, + "step": 7624 + }, + { + "epoch": 1.07, + "learning_rate": 4.821612390043047e-05, + "loss": 0.0671, + "step": 7626 + }, + { + "epoch": 1.07, + "learning_rate": 4.8215655998502716e-05, + "loss": 0.0682, + "step": 7628 + }, + { + "epoch": 1.07, + "learning_rate": 4.821518809657496e-05, + "loss": 0.0613, + "step": 7630 + }, + { + "epoch": 1.07, + "learning_rate": 4.821472019464721e-05, + "loss": 0.0691, + "step": 7632 + }, + { + "epoch": 1.07, + "learning_rate": 4.821425229271945e-05, + "loss": 0.0641, + "step": 7634 + }, + { + "epoch": 1.07, + "learning_rate": 4.821378439079169e-05, + "loss": 0.0649, + "step": 7636 + }, + { + "epoch": 1.07, + "learning_rate": 4.821331648886393e-05, + "loss": 0.0827, + "step": 7638 + }, + { + "epoch": 1.07, + "learning_rate": 4.8212848586936185e-05, + "loss": 0.0626, + "step": 7640 + }, + { + "epoch": 1.07, + "learning_rate": 4.8212380685008424e-05, + "loss": 0.0616, + "step": 7642 + }, + { + "epoch": 1.07, + "learning_rate": 4.821191278308067e-05, + "loss": 0.0842, + "step": 7644 + }, + { + "epoch": 1.07, + "learning_rate": 4.821144488115291e-05, + "loss": 0.0791, + "step": 7646 + }, + { + "epoch": 1.07, + "learning_rate": 4.821097697922516e-05, + "loss": 0.0669, + "step": 7648 + }, + { + "epoch": 1.07, + "learning_rate": 4.82105090772974e-05, + "loss": 0.0816, + "step": 7650 + }, + { + "epoch": 1.07, + "learning_rate": 4.821004117536965e-05, + "loss": 0.0793, + "step": 7652 + }, + { + "epoch": 1.07, + "learning_rate": 4.8209573273441886e-05, + "loss": 0.0845, + "step": 7654 + }, + { + "epoch": 1.07, + "learning_rate": 4.820910537151413e-05, + "loss": 0.0449, + "step": 7656 + }, + { + "epoch": 1.07, + "learning_rate": 4.820863746958638e-05, + "loss": 0.0511, + "step": 7658 + }, + { + "epoch": 1.08, + "learning_rate": 4.8208169567658624e-05, + "loss": 0.0566, + "step": 7660 + }, + { + "epoch": 1.08, + "learning_rate": 4.820770166573086e-05, + "loss": 0.0652, + "step": 7662 + }, + { + "epoch": 1.08, + "learning_rate": 4.820723376380311e-05, + "loss": 0.0824, + "step": 7664 + }, + { + "epoch": 1.08, + "learning_rate": 4.8206765861875355e-05, + "loss": 0.0643, + "step": 7666 + }, + { + "epoch": 1.08, + "learning_rate": 4.82062979599476e-05, + "loss": 0.0708, + "step": 7668 + }, + { + "epoch": 1.08, + "learning_rate": 4.820583005801984e-05, + "loss": 0.0625, + "step": 7670 + }, + { + "epoch": 1.08, + "learning_rate": 4.8205362156092086e-05, + "loss": 0.079, + "step": 7672 + }, + { + "epoch": 1.08, + "learning_rate": 4.820489425416433e-05, + "loss": 0.082, + "step": 7674 + }, + { + "epoch": 1.08, + "learning_rate": 4.820442635223658e-05, + "loss": 0.05, + "step": 7676 + }, + { + "epoch": 1.08, + "learning_rate": 4.820395845030882e-05, + "loss": 0.0489, + "step": 7678 + }, + { + "epoch": 1.08, + "learning_rate": 4.820349054838106e-05, + "loss": 0.0706, + "step": 7680 + }, + { + "epoch": 1.08, + "learning_rate": 4.820302264645331e-05, + "loss": 0.0685, + "step": 7682 + }, + { + "epoch": 1.08, + "learning_rate": 4.8202554744525554e-05, + "loss": 0.0767, + "step": 7684 + }, + { + "epoch": 1.08, + "learning_rate": 4.8202086842597794e-05, + "loss": 0.0713, + "step": 7686 + }, + { + "epoch": 1.08, + "learning_rate": 4.820161894067004e-05, + "loss": 0.0667, + "step": 7688 + }, + { + "epoch": 1.08, + "learning_rate": 4.820115103874228e-05, + "loss": 0.0843, + "step": 7690 + }, + { + "epoch": 1.08, + "learning_rate": 4.820068313681453e-05, + "loss": 0.0671, + "step": 7692 + }, + { + "epoch": 1.08, + "learning_rate": 4.820021523488677e-05, + "loss": 0.0672, + "step": 7694 + }, + { + "epoch": 1.08, + "learning_rate": 4.8199747332959016e-05, + "loss": 0.0553, + "step": 7696 + }, + { + "epoch": 1.08, + "learning_rate": 4.8199279431031255e-05, + "loss": 0.0708, + "step": 7698 + }, + { + "epoch": 1.08, + "learning_rate": 4.81988115291035e-05, + "loss": 0.0711, + "step": 7700 + }, + { + "epoch": 1.08, + "learning_rate": 4.819834362717575e-05, + "loss": 0.0792, + "step": 7702 + }, + { + "epoch": 1.08, + "learning_rate": 4.819787572524799e-05, + "loss": 0.0559, + "step": 7704 + }, + { + "epoch": 1.08, + "learning_rate": 4.819740782332023e-05, + "loss": 0.0718, + "step": 7706 + }, + { + "epoch": 1.08, + "learning_rate": 4.819693992139248e-05, + "loss": 0.0542, + "step": 7708 + }, + { + "epoch": 1.08, + "learning_rate": 4.8196472019464724e-05, + "loss": 0.0631, + "step": 7710 + }, + { + "epoch": 1.08, + "learning_rate": 4.819600411753697e-05, + "loss": 0.0861, + "step": 7712 + }, + { + "epoch": 1.08, + "learning_rate": 4.819553621560921e-05, + "loss": 0.0579, + "step": 7714 + }, + { + "epoch": 1.08, + "learning_rate": 4.8195068313681455e-05, + "loss": 0.0659, + "step": 7716 + }, + { + "epoch": 1.08, + "learning_rate": 4.81946004117537e-05, + "loss": 0.08, + "step": 7718 + }, + { + "epoch": 1.08, + "learning_rate": 4.819413250982595e-05, + "loss": 0.0688, + "step": 7720 + }, + { + "epoch": 1.08, + "learning_rate": 4.8193664607898186e-05, + "loss": 0.0613, + "step": 7722 + }, + { + "epoch": 1.08, + "learning_rate": 4.819319670597043e-05, + "loss": 0.0559, + "step": 7724 + }, + { + "epoch": 1.08, + "learning_rate": 4.819272880404268e-05, + "loss": 0.0671, + "step": 7726 + }, + { + "epoch": 1.08, + "learning_rate": 4.819226090211492e-05, + "loss": 0.0881, + "step": 7728 + }, + { + "epoch": 1.09, + "learning_rate": 4.819179300018716e-05, + "loss": 0.0615, + "step": 7730 + }, + { + "epoch": 1.09, + "learning_rate": 4.81913250982594e-05, + "loss": 0.0512, + "step": 7732 + }, + { + "epoch": 1.09, + "learning_rate": 4.819085719633165e-05, + "loss": 0.0774, + "step": 7734 + }, + { + "epoch": 1.09, + "learning_rate": 4.8190389294403894e-05, + "loss": 0.0809, + "step": 7736 + }, + { + "epoch": 1.09, + "learning_rate": 4.818992139247614e-05, + "loss": 0.0522, + "step": 7738 + }, + { + "epoch": 1.09, + "learning_rate": 4.818945349054838e-05, + "loss": 0.0971, + "step": 7740 + }, + { + "epoch": 1.09, + "learning_rate": 4.8188985588620625e-05, + "loss": 0.0542, + "step": 7742 + }, + { + "epoch": 1.09, + "learning_rate": 4.818851768669287e-05, + "loss": 0.0455, + "step": 7744 + }, + { + "epoch": 1.09, + "learning_rate": 4.818804978476512e-05, + "loss": 0.0589, + "step": 7746 + }, + { + "epoch": 1.09, + "learning_rate": 4.8187581882837356e-05, + "loss": 0.0563, + "step": 7748 + }, + { + "epoch": 1.09, + "learning_rate": 4.81871139809096e-05, + "loss": 0.0941, + "step": 7750 + }, + { + "epoch": 1.09, + "learning_rate": 4.818664607898185e-05, + "loss": 0.0742, + "step": 7752 + }, + { + "epoch": 1.09, + "learning_rate": 4.8186178177054094e-05, + "loss": 0.0508, + "step": 7754 + }, + { + "epoch": 1.09, + "learning_rate": 4.818571027512633e-05, + "loss": 0.0669, + "step": 7756 + }, + { + "epoch": 1.09, + "learning_rate": 4.818524237319858e-05, + "loss": 0.0881, + "step": 7758 + }, + { + "epoch": 1.09, + "learning_rate": 4.8184774471270824e-05, + "loss": 0.0674, + "step": 7760 + }, + { + "epoch": 1.09, + "learning_rate": 4.818430656934307e-05, + "loss": 0.0609, + "step": 7762 + }, + { + "epoch": 1.09, + "learning_rate": 4.818383866741531e-05, + "loss": 0.0733, + "step": 7764 + }, + { + "epoch": 1.09, + "learning_rate": 4.8183370765487555e-05, + "loss": 0.0565, + "step": 7766 + }, + { + "epoch": 1.09, + "learning_rate": 4.8182902863559795e-05, + "loss": 0.0612, + "step": 7768 + }, + { + "epoch": 1.09, + "learning_rate": 4.818243496163205e-05, + "loss": 0.0705, + "step": 7770 + }, + { + "epoch": 1.09, + "learning_rate": 4.8181967059704286e-05, + "loss": 0.0761, + "step": 7772 + }, + { + "epoch": 1.09, + "learning_rate": 4.818149915777653e-05, + "loss": 0.0654, + "step": 7774 + }, + { + "epoch": 1.09, + "learning_rate": 4.818103125584877e-05, + "loss": 0.0732, + "step": 7776 + }, + { + "epoch": 1.09, + "learning_rate": 4.8180563353921024e-05, + "loss": 0.0572, + "step": 7778 + }, + { + "epoch": 1.09, + "learning_rate": 4.818009545199326e-05, + "loss": 0.0724, + "step": 7780 + }, + { + "epoch": 1.09, + "learning_rate": 4.817962755006551e-05, + "loss": 0.0695, + "step": 7782 + }, + { + "epoch": 1.09, + "learning_rate": 4.817915964813775e-05, + "loss": 0.0701, + "step": 7784 + }, + { + "epoch": 1.09, + "learning_rate": 4.8178691746209994e-05, + "loss": 0.0616, + "step": 7786 + }, + { + "epoch": 1.09, + "learning_rate": 4.817822384428224e-05, + "loss": 0.07, + "step": 7788 + }, + { + "epoch": 1.09, + "learning_rate": 4.8177755942354486e-05, + "loss": 0.057, + "step": 7790 + }, + { + "epoch": 1.09, + "learning_rate": 4.8177288040426725e-05, + "loss": 0.0552, + "step": 7792 + }, + { + "epoch": 1.09, + "learning_rate": 4.817682013849897e-05, + "loss": 0.0599, + "step": 7794 + }, + { + "epoch": 1.09, + "learning_rate": 4.817635223657122e-05, + "loss": 0.0666, + "step": 7796 + }, + { + "epoch": 1.09, + "learning_rate": 4.817588433464346e-05, + "loss": 0.0762, + "step": 7798 + }, + { + "epoch": 1.09, + "learning_rate": 4.81754164327157e-05, + "loss": 0.078, + "step": 7800 + }, + { + "epoch": 1.1, + "learning_rate": 4.817494853078795e-05, + "loss": 0.0919, + "step": 7802 + }, + { + "epoch": 1.1, + "learning_rate": 4.8174480628860194e-05, + "loss": 0.0589, + "step": 7804 + }, + { + "epoch": 1.1, + "learning_rate": 4.817401272693244e-05, + "loss": 0.0794, + "step": 7806 + }, + { + "epoch": 1.1, + "learning_rate": 4.817354482500468e-05, + "loss": 0.0589, + "step": 7808 + }, + { + "epoch": 1.1, + "learning_rate": 4.8173076923076925e-05, + "loss": 0.06, + "step": 7810 + }, + { + "epoch": 1.1, + "learning_rate": 4.817260902114917e-05, + "loss": 0.0764, + "step": 7812 + }, + { + "epoch": 1.1, + "learning_rate": 4.817214111922142e-05, + "loss": 0.0708, + "step": 7814 + }, + { + "epoch": 1.1, + "learning_rate": 4.8171673217293656e-05, + "loss": 0.0765, + "step": 7816 + }, + { + "epoch": 1.1, + "learning_rate": 4.81712053153659e-05, + "loss": 0.0632, + "step": 7818 + }, + { + "epoch": 1.1, + "learning_rate": 4.817073741343814e-05, + "loss": 0.0682, + "step": 7820 + }, + { + "epoch": 1.1, + "learning_rate": 4.8170269511510394e-05, + "loss": 0.0869, + "step": 7822 + }, + { + "epoch": 1.1, + "learning_rate": 4.816980160958263e-05, + "loss": 0.0638, + "step": 7824 + }, + { + "epoch": 1.1, + "learning_rate": 4.816933370765488e-05, + "loss": 0.0583, + "step": 7826 + }, + { + "epoch": 1.1, + "learning_rate": 4.816886580572712e-05, + "loss": 0.0606, + "step": 7828 + }, + { + "epoch": 1.1, + "learning_rate": 4.816839790379937e-05, + "loss": 0.068, + "step": 7830 + }, + { + "epoch": 1.1, + "learning_rate": 4.816793000187161e-05, + "loss": 0.0684, + "step": 7832 + }, + { + "epoch": 1.1, + "learning_rate": 4.8167462099943855e-05, + "loss": 0.07, + "step": 7834 + }, + { + "epoch": 1.1, + "learning_rate": 4.8166994198016095e-05, + "loss": 0.0608, + "step": 7836 + }, + { + "epoch": 1.1, + "learning_rate": 4.816652629608834e-05, + "loss": 0.0557, + "step": 7838 + }, + { + "epoch": 1.1, + "learning_rate": 4.8166058394160586e-05, + "loss": 0.071, + "step": 7840 + }, + { + "epoch": 1.1, + "learning_rate": 4.816559049223283e-05, + "loss": 0.0844, + "step": 7842 + }, + { + "epoch": 1.1, + "learning_rate": 4.816512259030507e-05, + "loss": 0.0665, + "step": 7844 + }, + { + "epoch": 1.1, + "learning_rate": 4.816465468837732e-05, + "loss": 0.0669, + "step": 7846 + }, + { + "epoch": 1.1, + "learning_rate": 4.816418678644956e-05, + "loss": 0.0813, + "step": 7848 + }, + { + "epoch": 1.1, + "learning_rate": 4.816371888452181e-05, + "loss": 0.06, + "step": 7850 + }, + { + "epoch": 1.1, + "learning_rate": 4.816325098259405e-05, + "loss": 0.0722, + "step": 7852 + }, + { + "epoch": 1.1, + "learning_rate": 4.8162783080666294e-05, + "loss": 0.062, + "step": 7854 + }, + { + "epoch": 1.1, + "learning_rate": 4.816231517873854e-05, + "loss": 0.0721, + "step": 7856 + }, + { + "epoch": 1.1, + "learning_rate": 4.8161847276810786e-05, + "loss": 0.0635, + "step": 7858 + }, + { + "epoch": 1.1, + "learning_rate": 4.8161379374883025e-05, + "loss": 0.0516, + "step": 7860 + }, + { + "epoch": 1.1, + "learning_rate": 4.816091147295527e-05, + "loss": 0.0586, + "step": 7862 + }, + { + "epoch": 1.1, + "learning_rate": 4.816044357102752e-05, + "loss": 0.0866, + "step": 7864 + }, + { + "epoch": 1.1, + "learning_rate": 4.815997566909976e-05, + "loss": 0.1013, + "step": 7866 + }, + { + "epoch": 1.1, + "learning_rate": 4.8159507767172e-05, + "loss": 0.0496, + "step": 7868 + }, + { + "epoch": 1.1, + "learning_rate": 4.815903986524425e-05, + "loss": 0.056, + "step": 7870 + }, + { + "epoch": 1.1, + "learning_rate": 4.815857196331649e-05, + "loss": 0.0675, + "step": 7872 + }, + { + "epoch": 1.11, + "learning_rate": 4.815810406138874e-05, + "loss": 0.0529, + "step": 7874 + }, + { + "epoch": 1.11, + "learning_rate": 4.815763615946098e-05, + "loss": 0.0555, + "step": 7876 + }, + { + "epoch": 1.11, + "learning_rate": 4.8157168257533225e-05, + "loss": 0.054, + "step": 7878 + }, + { + "epoch": 1.11, + "learning_rate": 4.8156700355605464e-05, + "loss": 0.0417, + "step": 7880 + }, + { + "epoch": 1.11, + "learning_rate": 4.815623245367771e-05, + "loss": 0.0638, + "step": 7882 + }, + { + "epoch": 1.11, + "learning_rate": 4.8155764551749956e-05, + "loss": 0.0669, + "step": 7884 + }, + { + "epoch": 1.11, + "learning_rate": 4.81552966498222e-05, + "loss": 0.0656, + "step": 7886 + }, + { + "epoch": 1.11, + "learning_rate": 4.815482874789444e-05, + "loss": 0.0671, + "step": 7888 + }, + { + "epoch": 1.11, + "learning_rate": 4.815436084596669e-05, + "loss": 0.0731, + "step": 7890 + }, + { + "epoch": 1.11, + "learning_rate": 4.815389294403893e-05, + "loss": 0.0734, + "step": 7892 + }, + { + "epoch": 1.11, + "learning_rate": 4.815342504211118e-05, + "loss": 0.0656, + "step": 7894 + }, + { + "epoch": 1.11, + "learning_rate": 4.815295714018342e-05, + "loss": 0.0782, + "step": 7896 + }, + { + "epoch": 1.11, + "learning_rate": 4.8152489238255664e-05, + "loss": 0.0714, + "step": 7898 + }, + { + "epoch": 1.11, + "learning_rate": 4.815202133632791e-05, + "loss": 0.0672, + "step": 7900 + }, + { + "epoch": 1.11, + "learning_rate": 4.8151553434400155e-05, + "loss": 0.0649, + "step": 7902 + }, + { + "epoch": 1.11, + "learning_rate": 4.8151085532472395e-05, + "loss": 0.0899, + "step": 7904 + }, + { + "epoch": 1.11, + "learning_rate": 4.815061763054464e-05, + "loss": 0.0662, + "step": 7906 + }, + { + "epoch": 1.11, + "learning_rate": 4.8150149728616886e-05, + "loss": 0.0721, + "step": 7908 + }, + { + "epoch": 1.11, + "learning_rate": 4.814968182668913e-05, + "loss": 0.049, + "step": 7910 + }, + { + "epoch": 1.11, + "learning_rate": 4.814921392476137e-05, + "loss": 0.0706, + "step": 7912 + }, + { + "epoch": 1.11, + "learning_rate": 4.814874602283362e-05, + "loss": 0.0792, + "step": 7914 + }, + { + "epoch": 1.11, + "learning_rate": 4.8148278120905857e-05, + "loss": 0.0566, + "step": 7916 + }, + { + "epoch": 1.11, + "learning_rate": 4.814781021897811e-05, + "loss": 0.0589, + "step": 7918 + }, + { + "epoch": 1.11, + "learning_rate": 4.814734231705035e-05, + "loss": 0.064, + "step": 7920 + }, + { + "epoch": 1.11, + "learning_rate": 4.8146874415122594e-05, + "loss": 0.0704, + "step": 7922 + }, + { + "epoch": 1.11, + "learning_rate": 4.8146406513194833e-05, + "loss": 0.0591, + "step": 7924 + }, + { + "epoch": 1.11, + "learning_rate": 4.8145938611267086e-05, + "loss": 0.076, + "step": 7926 + }, + { + "epoch": 1.11, + "learning_rate": 4.8145470709339325e-05, + "loss": 0.0813, + "step": 7928 + }, + { + "epoch": 1.11, + "learning_rate": 4.814500280741157e-05, + "loss": 0.0558, + "step": 7930 + }, + { + "epoch": 1.11, + "learning_rate": 4.814453490548381e-05, + "loss": 0.0759, + "step": 7932 + }, + { + "epoch": 1.11, + "learning_rate": 4.8144067003556056e-05, + "loss": 0.0678, + "step": 7934 + }, + { + "epoch": 1.11, + "learning_rate": 4.81435991016283e-05, + "loss": 0.0739, + "step": 7936 + }, + { + "epoch": 1.11, + "learning_rate": 4.814313119970055e-05, + "loss": 0.0725, + "step": 7938 + }, + { + "epoch": 1.11, + "learning_rate": 4.814266329777279e-05, + "loss": 0.0775, + "step": 7940 + }, + { + "epoch": 1.11, + "learning_rate": 4.814219539584503e-05, + "loss": 0.0732, + "step": 7942 + }, + { + "epoch": 1.12, + "learning_rate": 4.814172749391728e-05, + "loss": 0.0624, + "step": 7944 + }, + { + "epoch": 1.12, + "learning_rate": 4.8141259591989525e-05, + "loss": 0.0711, + "step": 7946 + }, + { + "epoch": 1.12, + "learning_rate": 4.8140791690061764e-05, + "loss": 0.0646, + "step": 7948 + }, + { + "epoch": 1.12, + "learning_rate": 4.814032378813401e-05, + "loss": 0.0741, + "step": 7950 + }, + { + "epoch": 1.12, + "learning_rate": 4.8139855886206256e-05, + "loss": 0.059, + "step": 7952 + }, + { + "epoch": 1.12, + "learning_rate": 4.81393879842785e-05, + "loss": 0.0857, + "step": 7954 + }, + { + "epoch": 1.12, + "learning_rate": 4.813892008235074e-05, + "loss": 0.0581, + "step": 7956 + }, + { + "epoch": 1.12, + "learning_rate": 4.813845218042299e-05, + "loss": 0.0606, + "step": 7958 + }, + { + "epoch": 1.12, + "learning_rate": 4.813798427849523e-05, + "loss": 0.0702, + "step": 7960 + }, + { + "epoch": 1.12, + "learning_rate": 4.813751637656748e-05, + "loss": 0.0659, + "step": 7962 + }, + { + "epoch": 1.12, + "learning_rate": 4.813704847463972e-05, + "loss": 0.0766, + "step": 7964 + }, + { + "epoch": 1.12, + "learning_rate": 4.8136580572711964e-05, + "loss": 0.0675, + "step": 7966 + }, + { + "epoch": 1.12, + "learning_rate": 4.81361126707842e-05, + "loss": 0.0975, + "step": 7968 + }, + { + "epoch": 1.12, + "learning_rate": 4.8135644768856455e-05, + "loss": 0.0676, + "step": 7970 + }, + { + "epoch": 1.12, + "learning_rate": 4.8135176866928695e-05, + "loss": 0.0702, + "step": 7972 + }, + { + "epoch": 1.12, + "learning_rate": 4.813470896500094e-05, + "loss": 0.0593, + "step": 7974 + }, + { + "epoch": 1.12, + "learning_rate": 4.813424106307318e-05, + "loss": 0.0601, + "step": 7976 + }, + { + "epoch": 1.12, + "learning_rate": 4.8133773161145426e-05, + "loss": 0.067, + "step": 7978 + }, + { + "epoch": 1.12, + "learning_rate": 4.813330525921767e-05, + "loss": 0.0551, + "step": 7980 + }, + { + "epoch": 1.12, + "learning_rate": 4.813283735728991e-05, + "loss": 0.0796, + "step": 7982 + }, + { + "epoch": 1.12, + "learning_rate": 4.8132369455362157e-05, + "loss": 0.082, + "step": 7984 + }, + { + "epoch": 1.12, + "learning_rate": 4.81319015534344e-05, + "loss": 0.066, + "step": 7986 + }, + { + "epoch": 1.12, + "learning_rate": 4.813143365150665e-05, + "loss": 0.0799, + "step": 7988 + }, + { + "epoch": 1.12, + "learning_rate": 4.813096574957889e-05, + "loss": 0.071, + "step": 7990 + }, + { + "epoch": 1.12, + "learning_rate": 4.8130497847651133e-05, + "loss": 0.0782, + "step": 7992 + }, + { + "epoch": 1.12, + "learning_rate": 4.813002994572338e-05, + "loss": 0.0746, + "step": 7994 + }, + { + "epoch": 1.12, + "learning_rate": 4.8129562043795625e-05, + "loss": 0.088, + "step": 7996 + }, + { + "epoch": 1.12, + "learning_rate": 4.8129094141867864e-05, + "loss": 0.0548, + "step": 7998 + }, + { + "epoch": 1.12, + "learning_rate": 4.812862623994011e-05, + "loss": 0.0478, + "step": 8000 + }, + { + "epoch": 1.12, + "eval_gen_len": 30.9483, + "eval_loss": 1.0468480587005615, + "eval_meteor": 0.0439, + "eval_runtime": 14.548, + "eval_samples_per_second": 3.987, + "eval_steps_per_second": 0.55, + "step": 8000 + }, + { + "epoch": 1.12, + "learning_rate": 4.812815833801235e-05, + "loss": 0.05, + "step": 8002 + }, + { + "epoch": 1.12, + "learning_rate": 4.81276904360846e-05, + "loss": 0.0737, + "step": 8004 + }, + { + "epoch": 1.12, + "learning_rate": 4.812722253415684e-05, + "loss": 0.0783, + "step": 8006 + }, + { + "epoch": 1.12, + "learning_rate": 4.812675463222909e-05, + "loss": 0.0702, + "step": 8008 + }, + { + "epoch": 1.12, + "learning_rate": 4.8126286730301326e-05, + "loss": 0.0799, + "step": 8010 + }, + { + "epoch": 1.12, + "learning_rate": 4.812581882837357e-05, + "loss": 0.0586, + "step": 8012 + }, + { + "epoch": 1.12, + "learning_rate": 4.812535092644582e-05, + "loss": 0.0582, + "step": 8014 + }, + { + "epoch": 1.13, + "learning_rate": 4.8124883024518064e-05, + "loss": 0.0588, + "step": 8016 + }, + { + "epoch": 1.13, + "learning_rate": 4.81244151225903e-05, + "loss": 0.0802, + "step": 8018 + }, + { + "epoch": 1.13, + "learning_rate": 4.812394722066255e-05, + "loss": 0.0737, + "step": 8020 + }, + { + "epoch": 1.13, + "learning_rate": 4.8123479318734795e-05, + "loss": 0.0783, + "step": 8022 + }, + { + "epoch": 1.13, + "learning_rate": 4.812301141680704e-05, + "loss": 0.0774, + "step": 8024 + }, + { + "epoch": 1.13, + "learning_rate": 4.812254351487928e-05, + "loss": 0.0643, + "step": 8026 + }, + { + "epoch": 1.13, + "learning_rate": 4.8122075612951526e-05, + "loss": 0.0802, + "step": 8028 + }, + { + "epoch": 1.13, + "learning_rate": 4.812160771102377e-05, + "loss": 0.0561, + "step": 8030 + }, + { + "epoch": 1.13, + "learning_rate": 4.812113980909602e-05, + "loss": 0.0615, + "step": 8032 + }, + { + "epoch": 1.13, + "learning_rate": 4.812067190716826e-05, + "loss": 0.0594, + "step": 8034 + }, + { + "epoch": 1.13, + "learning_rate": 4.81202040052405e-05, + "loss": 0.0687, + "step": 8036 + }, + { + "epoch": 1.13, + "learning_rate": 4.811973610331275e-05, + "loss": 0.0634, + "step": 8038 + }, + { + "epoch": 1.13, + "learning_rate": 4.8119268201384995e-05, + "loss": 0.0599, + "step": 8040 + }, + { + "epoch": 1.13, + "learning_rate": 4.8118800299457234e-05, + "loss": 0.0585, + "step": 8042 + }, + { + "epoch": 1.13, + "learning_rate": 4.811833239752948e-05, + "loss": 0.0434, + "step": 8044 + }, + { + "epoch": 1.13, + "learning_rate": 4.811786449560172e-05, + "loss": 0.0796, + "step": 8046 + }, + { + "epoch": 1.13, + "learning_rate": 4.811739659367397e-05, + "loss": 0.0748, + "step": 8048 + }, + { + "epoch": 1.13, + "learning_rate": 4.811692869174621e-05, + "loss": 0.0529, + "step": 8050 + }, + { + "epoch": 1.13, + "learning_rate": 4.8116460789818457e-05, + "loss": 0.0687, + "step": 8052 + }, + { + "epoch": 1.13, + "learning_rate": 4.8115992887890696e-05, + "loss": 0.0586, + "step": 8054 + }, + { + "epoch": 1.13, + "learning_rate": 4.811552498596295e-05, + "loss": 0.0777, + "step": 8056 + }, + { + "epoch": 1.13, + "learning_rate": 4.811505708403519e-05, + "loss": 0.0652, + "step": 8058 + }, + { + "epoch": 1.13, + "learning_rate": 4.8114589182107433e-05, + "loss": 0.0725, + "step": 8060 + }, + { + "epoch": 1.13, + "learning_rate": 4.811412128017967e-05, + "loss": 0.0585, + "step": 8062 + }, + { + "epoch": 1.13, + "learning_rate": 4.811365337825192e-05, + "loss": 0.0635, + "step": 8064 + }, + { + "epoch": 1.13, + "learning_rate": 4.8113185476324164e-05, + "loss": 0.0741, + "step": 8066 + }, + { + "epoch": 1.13, + "learning_rate": 4.811271757439641e-05, + "loss": 0.0816, + "step": 8068 + }, + { + "epoch": 1.13, + "learning_rate": 4.811224967246865e-05, + "loss": 0.0608, + "step": 8070 + }, + { + "epoch": 1.13, + "learning_rate": 4.8111781770540895e-05, + "loss": 0.0733, + "step": 8072 + }, + { + "epoch": 1.13, + "learning_rate": 4.811131386861314e-05, + "loss": 0.0923, + "step": 8074 + }, + { + "epoch": 1.13, + "learning_rate": 4.811084596668539e-05, + "loss": 0.0607, + "step": 8076 + }, + { + "epoch": 1.13, + "learning_rate": 4.8110378064757626e-05, + "loss": 0.0649, + "step": 8078 + }, + { + "epoch": 1.13, + "learning_rate": 4.810991016282987e-05, + "loss": 0.0739, + "step": 8080 + }, + { + "epoch": 1.13, + "learning_rate": 4.810944226090212e-05, + "loss": 0.0613, + "step": 8082 + }, + { + "epoch": 1.13, + "learning_rate": 4.8108974358974364e-05, + "loss": 0.0989, + "step": 8084 + }, + { + "epoch": 1.14, + "learning_rate": 4.81085064570466e-05, + "loss": 0.0552, + "step": 8086 + }, + { + "epoch": 1.14, + "learning_rate": 4.810803855511885e-05, + "loss": 0.0609, + "step": 8088 + }, + { + "epoch": 1.14, + "learning_rate": 4.8107570653191095e-05, + "loss": 0.0698, + "step": 8090 + }, + { + "epoch": 1.14, + "learning_rate": 4.810710275126334e-05, + "loss": 0.0753, + "step": 8092 + }, + { + "epoch": 1.14, + "learning_rate": 4.810663484933558e-05, + "loss": 0.1152, + "step": 8094 + }, + { + "epoch": 1.14, + "learning_rate": 4.8106166947407826e-05, + "loss": 0.0786, + "step": 8096 + }, + { + "epoch": 1.14, + "learning_rate": 4.8105699045480065e-05, + "loss": 0.0664, + "step": 8098 + }, + { + "epoch": 1.14, + "learning_rate": 4.810523114355232e-05, + "loss": 0.0641, + "step": 8100 + }, + { + "epoch": 1.14, + "learning_rate": 4.810476324162456e-05, + "loss": 0.0837, + "step": 8102 + }, + { + "epoch": 1.14, + "learning_rate": 4.81042953396968e-05, + "loss": 0.0699, + "step": 8104 + }, + { + "epoch": 1.14, + "learning_rate": 4.810382743776904e-05, + "loss": 0.0884, + "step": 8106 + }, + { + "epoch": 1.14, + "learning_rate": 4.8103359535841295e-05, + "loss": 0.072, + "step": 8108 + }, + { + "epoch": 1.14, + "learning_rate": 4.8102891633913534e-05, + "loss": 0.0612, + "step": 8110 + }, + { + "epoch": 1.14, + "learning_rate": 4.810242373198578e-05, + "loss": 0.0544, + "step": 8112 + }, + { + "epoch": 1.14, + "learning_rate": 4.810195583005802e-05, + "loss": 0.0678, + "step": 8114 + }, + { + "epoch": 1.14, + "learning_rate": 4.8101487928130265e-05, + "loss": 0.07, + "step": 8116 + }, + { + "epoch": 1.14, + "learning_rate": 4.810102002620251e-05, + "loss": 0.0607, + "step": 8118 + }, + { + "epoch": 1.14, + "learning_rate": 4.8100552124274757e-05, + "loss": 0.0729, + "step": 8120 + }, + { + "epoch": 1.14, + "learning_rate": 4.8100084222346996e-05, + "loss": 0.0656, + "step": 8122 + }, + { + "epoch": 1.14, + "learning_rate": 4.809961632041924e-05, + "loss": 0.0468, + "step": 8124 + }, + { + "epoch": 1.14, + "learning_rate": 4.809914841849149e-05, + "loss": 0.0738, + "step": 8126 + }, + { + "epoch": 1.14, + "learning_rate": 4.8098680516563733e-05, + "loss": 0.0672, + "step": 8128 + }, + { + "epoch": 1.14, + "learning_rate": 4.809821261463597e-05, + "loss": 0.0643, + "step": 8130 + }, + { + "epoch": 1.14, + "learning_rate": 4.809774471270822e-05, + "loss": 0.0657, + "step": 8132 + }, + { + "epoch": 1.14, + "learning_rate": 4.8097276810780464e-05, + "loss": 0.0779, + "step": 8134 + }, + { + "epoch": 1.14, + "learning_rate": 4.809680890885271e-05, + "loss": 0.0668, + "step": 8136 + }, + { + "epoch": 1.14, + "learning_rate": 4.809634100692495e-05, + "loss": 0.067, + "step": 8138 + }, + { + "epoch": 1.14, + "learning_rate": 4.8095873104997195e-05, + "loss": 0.0589, + "step": 8140 + }, + { + "epoch": 1.14, + "learning_rate": 4.809540520306944e-05, + "loss": 0.0877, + "step": 8142 + }, + { + "epoch": 1.14, + "learning_rate": 4.809493730114169e-05, + "loss": 0.0608, + "step": 8144 + }, + { + "epoch": 1.14, + "learning_rate": 4.8094469399213926e-05, + "loss": 0.0513, + "step": 8146 + }, + { + "epoch": 1.14, + "learning_rate": 4.809400149728617e-05, + "loss": 0.0618, + "step": 8148 + }, + { + "epoch": 1.14, + "learning_rate": 4.809353359535841e-05, + "loss": 0.0819, + "step": 8150 + }, + { + "epoch": 1.14, + "learning_rate": 4.8093065693430664e-05, + "loss": 0.1021, + "step": 8152 + }, + { + "epoch": 1.14, + "learning_rate": 4.80925977915029e-05, + "loss": 0.0664, + "step": 8154 + }, + { + "epoch": 1.14, + "learning_rate": 4.809212988957515e-05, + "loss": 0.074, + "step": 8156 + }, + { + "epoch": 1.15, + "learning_rate": 4.809166198764739e-05, + "loss": 0.0618, + "step": 8158 + }, + { + "epoch": 1.15, + "learning_rate": 4.8091194085719634e-05, + "loss": 0.0829, + "step": 8160 + }, + { + "epoch": 1.15, + "learning_rate": 4.809072618379188e-05, + "loss": 0.0654, + "step": 8162 + }, + { + "epoch": 1.15, + "learning_rate": 4.8090258281864126e-05, + "loss": 0.0744, + "step": 8164 + }, + { + "epoch": 1.15, + "learning_rate": 4.8089790379936365e-05, + "loss": 0.083, + "step": 8166 + }, + { + "epoch": 1.15, + "learning_rate": 4.808932247800861e-05, + "loss": 0.0602, + "step": 8168 + }, + { + "epoch": 1.15, + "learning_rate": 4.808885457608086e-05, + "loss": 0.0708, + "step": 8170 + }, + { + "epoch": 1.15, + "learning_rate": 4.80883866741531e-05, + "loss": 0.0772, + "step": 8172 + }, + { + "epoch": 1.15, + "learning_rate": 4.808791877222534e-05, + "loss": 0.0678, + "step": 8174 + }, + { + "epoch": 1.15, + "learning_rate": 4.808745087029759e-05, + "loss": 0.0593, + "step": 8176 + }, + { + "epoch": 1.15, + "learning_rate": 4.8086982968369834e-05, + "loss": 0.0697, + "step": 8178 + }, + { + "epoch": 1.15, + "learning_rate": 4.808651506644208e-05, + "loss": 0.0583, + "step": 8180 + }, + { + "epoch": 1.15, + "learning_rate": 4.808604716451432e-05, + "loss": 0.1039, + "step": 8182 + }, + { + "epoch": 1.15, + "learning_rate": 4.8085579262586565e-05, + "loss": 0.0736, + "step": 8184 + }, + { + "epoch": 1.15, + "learning_rate": 4.808511136065881e-05, + "loss": 0.0623, + "step": 8186 + }, + { + "epoch": 1.15, + "learning_rate": 4.8084643458731057e-05, + "loss": 0.0537, + "step": 8188 + }, + { + "epoch": 1.15, + "learning_rate": 4.8084175556803296e-05, + "loss": 0.0584, + "step": 8190 + }, + { + "epoch": 1.15, + "learning_rate": 4.808370765487554e-05, + "loss": 0.0542, + "step": 8192 + }, + { + "epoch": 1.15, + "learning_rate": 4.808323975294778e-05, + "loss": 0.0765, + "step": 8194 + }, + { + "epoch": 1.15, + "learning_rate": 4.8082771851020033e-05, + "loss": 0.0796, + "step": 8196 + }, + { + "epoch": 1.15, + "learning_rate": 4.808230394909227e-05, + "loss": 0.0519, + "step": 8198 + }, + { + "epoch": 1.15, + "learning_rate": 4.808183604716452e-05, + "loss": 0.0654, + "step": 8200 + }, + { + "epoch": 1.15, + "learning_rate": 4.808136814523676e-05, + "loss": 0.0686, + "step": 8202 + }, + { + "epoch": 1.15, + "learning_rate": 4.808090024330901e-05, + "loss": 0.0627, + "step": 8204 + }, + { + "epoch": 1.15, + "learning_rate": 4.808043234138125e-05, + "loss": 0.0727, + "step": 8206 + }, + { + "epoch": 1.15, + "learning_rate": 4.8079964439453495e-05, + "loss": 0.0523, + "step": 8208 + }, + { + "epoch": 1.15, + "learning_rate": 4.8079496537525735e-05, + "loss": 0.0563, + "step": 8210 + }, + { + "epoch": 1.15, + "learning_rate": 4.807902863559798e-05, + "loss": 0.0895, + "step": 8212 + }, + { + "epoch": 1.15, + "learning_rate": 4.8078560733670226e-05, + "loss": 0.0592, + "step": 8214 + }, + { + "epoch": 1.15, + "learning_rate": 4.807809283174247e-05, + "loss": 0.0438, + "step": 8216 + }, + { + "epoch": 1.15, + "learning_rate": 4.807762492981471e-05, + "loss": 0.0556, + "step": 8218 + }, + { + "epoch": 1.15, + "learning_rate": 4.807715702788696e-05, + "loss": 0.063, + "step": 8220 + }, + { + "epoch": 1.15, + "learning_rate": 4.80766891259592e-05, + "loss": 0.0833, + "step": 8222 + }, + { + "epoch": 1.15, + "learning_rate": 4.807622122403145e-05, + "loss": 0.0723, + "step": 8224 + }, + { + "epoch": 1.15, + "learning_rate": 4.807575332210369e-05, + "loss": 0.051, + "step": 8226 + }, + { + "epoch": 1.15, + "learning_rate": 4.8075285420175934e-05, + "loss": 0.0813, + "step": 8228 + }, + { + "epoch": 1.16, + "learning_rate": 4.807481751824818e-05, + "loss": 0.0658, + "step": 8230 + }, + { + "epoch": 1.16, + "learning_rate": 4.807434961632042e-05, + "loss": 0.0587, + "step": 8232 + }, + { + "epoch": 1.16, + "learning_rate": 4.8073881714392665e-05, + "loss": 0.0742, + "step": 8234 + }, + { + "epoch": 1.16, + "learning_rate": 4.8073413812464904e-05, + "loss": 0.0613, + "step": 8236 + }, + { + "epoch": 1.16, + "learning_rate": 4.807294591053716e-05, + "loss": 0.0605, + "step": 8238 + }, + { + "epoch": 1.16, + "learning_rate": 4.8072478008609396e-05, + "loss": 0.0686, + "step": 8240 + }, + { + "epoch": 1.16, + "learning_rate": 4.807201010668164e-05, + "loss": 0.0761, + "step": 8242 + }, + { + "epoch": 1.16, + "learning_rate": 4.807154220475388e-05, + "loss": 0.0635, + "step": 8244 + }, + { + "epoch": 1.16, + "learning_rate": 4.807107430282613e-05, + "loss": 0.0915, + "step": 8246 + }, + { + "epoch": 1.16, + "learning_rate": 4.807060640089837e-05, + "loss": 0.0766, + "step": 8248 + }, + { + "epoch": 1.16, + "learning_rate": 4.807013849897062e-05, + "loss": 0.0665, + "step": 8250 + }, + { + "epoch": 1.16, + "learning_rate": 4.806967059704286e-05, + "loss": 0.0415, + "step": 8252 + }, + { + "epoch": 1.16, + "learning_rate": 4.8069202695115104e-05, + "loss": 0.0925, + "step": 8254 + }, + { + "epoch": 1.16, + "learning_rate": 4.806873479318735e-05, + "loss": 0.0664, + "step": 8256 + }, + { + "epoch": 1.16, + "learning_rate": 4.8068266891259596e-05, + "loss": 0.0654, + "step": 8258 + }, + { + "epoch": 1.16, + "learning_rate": 4.8067798989331835e-05, + "loss": 0.0634, + "step": 8260 + }, + { + "epoch": 1.16, + "learning_rate": 4.806733108740408e-05, + "loss": 0.0652, + "step": 8262 + }, + { + "epoch": 1.16, + "learning_rate": 4.806686318547633e-05, + "loss": 0.0618, + "step": 8264 + }, + { + "epoch": 1.16, + "learning_rate": 4.806639528354857e-05, + "loss": 0.0875, + "step": 8266 + }, + { + "epoch": 1.16, + "learning_rate": 4.806592738162081e-05, + "loss": 0.063, + "step": 8268 + }, + { + "epoch": 1.16, + "learning_rate": 4.806545947969306e-05, + "loss": 0.0749, + "step": 8270 + }, + { + "epoch": 1.16, + "learning_rate": 4.8064991577765304e-05, + "loss": 0.0755, + "step": 8272 + }, + { + "epoch": 1.16, + "learning_rate": 4.806452367583755e-05, + "loss": 0.0548, + "step": 8274 + }, + { + "epoch": 1.16, + "learning_rate": 4.806405577390979e-05, + "loss": 0.0591, + "step": 8276 + }, + { + "epoch": 1.16, + "learning_rate": 4.8063587871982035e-05, + "loss": 0.0816, + "step": 8278 + }, + { + "epoch": 1.16, + "learning_rate": 4.8063119970054274e-05, + "loss": 0.0765, + "step": 8280 + }, + { + "epoch": 1.16, + "learning_rate": 4.8062652068126526e-05, + "loss": 0.0692, + "step": 8282 + }, + { + "epoch": 1.16, + "learning_rate": 4.8062184166198766e-05, + "loss": 0.0758, + "step": 8284 + }, + { + "epoch": 1.16, + "learning_rate": 4.806171626427101e-05, + "loss": 0.0773, + "step": 8286 + }, + { + "epoch": 1.16, + "learning_rate": 4.806124836234325e-05, + "loss": 0.0591, + "step": 8288 + }, + { + "epoch": 1.16, + "learning_rate": 4.80607804604155e-05, + "loss": 0.0695, + "step": 8290 + }, + { + "epoch": 1.16, + "learning_rate": 4.806031255848774e-05, + "loss": 0.0794, + "step": 8292 + }, + { + "epoch": 1.16, + "learning_rate": 4.805984465655999e-05, + "loss": 0.0875, + "step": 8294 + }, + { + "epoch": 1.16, + "learning_rate": 4.805937675463223e-05, + "loss": 0.0798, + "step": 8296 + }, + { + "epoch": 1.16, + "learning_rate": 4.805890885270447e-05, + "loss": 0.0538, + "step": 8298 + }, + { + "epoch": 1.17, + "learning_rate": 4.805844095077672e-05, + "loss": 0.0534, + "step": 8300 + }, + { + "epoch": 1.17, + "learning_rate": 4.8057973048848965e-05, + "loss": 0.064, + "step": 8302 + }, + { + "epoch": 1.17, + "learning_rate": 4.8057505146921204e-05, + "loss": 0.0612, + "step": 8304 + }, + { + "epoch": 1.17, + "learning_rate": 4.805703724499345e-05, + "loss": 0.0589, + "step": 8306 + }, + { + "epoch": 1.17, + "learning_rate": 4.8056569343065696e-05, + "loss": 0.0721, + "step": 8308 + }, + { + "epoch": 1.17, + "learning_rate": 4.805610144113794e-05, + "loss": 0.0675, + "step": 8310 + }, + { + "epoch": 1.17, + "learning_rate": 4.805563353921018e-05, + "loss": 0.0714, + "step": 8312 + }, + { + "epoch": 1.17, + "learning_rate": 4.805516563728243e-05, + "loss": 0.069, + "step": 8314 + }, + { + "epoch": 1.17, + "learning_rate": 4.805469773535467e-05, + "loss": 0.09, + "step": 8316 + }, + { + "epoch": 1.17, + "learning_rate": 4.805422983342692e-05, + "loss": 0.0664, + "step": 8318 + }, + { + "epoch": 1.17, + "learning_rate": 4.805376193149916e-05, + "loss": 0.0713, + "step": 8320 + }, + { + "epoch": 1.17, + "learning_rate": 4.8053294029571404e-05, + "loss": 0.0632, + "step": 8322 + }, + { + "epoch": 1.17, + "learning_rate": 4.805282612764364e-05, + "loss": 0.0608, + "step": 8324 + }, + { + "epoch": 1.17, + "learning_rate": 4.8052358225715896e-05, + "loss": 0.0491, + "step": 8326 + }, + { + "epoch": 1.17, + "learning_rate": 4.8051890323788135e-05, + "loss": 0.0648, + "step": 8328 + }, + { + "epoch": 1.17, + "learning_rate": 4.805142242186038e-05, + "loss": 0.0589, + "step": 8330 + }, + { + "epoch": 1.17, + "learning_rate": 4.805095451993262e-05, + "loss": 0.0694, + "step": 8332 + }, + { + "epoch": 1.17, + "learning_rate": 4.805048661800487e-05, + "loss": 0.0937, + "step": 8334 + }, + { + "epoch": 1.17, + "learning_rate": 4.805001871607711e-05, + "loss": 0.0812, + "step": 8336 + }, + { + "epoch": 1.17, + "learning_rate": 4.804955081414936e-05, + "loss": 0.0723, + "step": 8338 + }, + { + "epoch": 1.17, + "learning_rate": 4.80490829122216e-05, + "loss": 0.0675, + "step": 8340 + }, + { + "epoch": 1.17, + "learning_rate": 4.804861501029384e-05, + "loss": 0.0632, + "step": 8342 + }, + { + "epoch": 1.17, + "learning_rate": 4.804814710836609e-05, + "loss": 0.0716, + "step": 8344 + }, + { + "epoch": 1.17, + "learning_rate": 4.8047679206438335e-05, + "loss": 0.0603, + "step": 8346 + }, + { + "epoch": 1.17, + "learning_rate": 4.8047211304510574e-05, + "loss": 0.0534, + "step": 8348 + }, + { + "epoch": 1.17, + "learning_rate": 4.804674340258282e-05, + "loss": 0.0696, + "step": 8350 + }, + { + "epoch": 1.17, + "learning_rate": 4.8046275500655066e-05, + "loss": 0.058, + "step": 8352 + }, + { + "epoch": 1.17, + "learning_rate": 4.804580759872731e-05, + "loss": 0.0739, + "step": 8354 + }, + { + "epoch": 1.17, + "learning_rate": 4.804533969679955e-05, + "loss": 0.0697, + "step": 8356 + }, + { + "epoch": 1.17, + "learning_rate": 4.8044871794871796e-05, + "loss": 0.0582, + "step": 8358 + }, + { + "epoch": 1.17, + "learning_rate": 4.804440389294404e-05, + "loss": 0.0533, + "step": 8360 + }, + { + "epoch": 1.17, + "learning_rate": 4.804393599101629e-05, + "loss": 0.086, + "step": 8362 + }, + { + "epoch": 1.17, + "learning_rate": 4.804346808908853e-05, + "loss": 0.0734, + "step": 8364 + }, + { + "epoch": 1.17, + "learning_rate": 4.804300018716077e-05, + "loss": 0.0668, + "step": 8366 + }, + { + "epoch": 1.17, + "learning_rate": 4.804253228523302e-05, + "loss": 0.0731, + "step": 8368 + }, + { + "epoch": 1.17, + "learning_rate": 4.8042064383305265e-05, + "loss": 0.0651, + "step": 8370 + }, + { + "epoch": 1.18, + "learning_rate": 4.8041596481377504e-05, + "loss": 0.0637, + "step": 8372 + }, + { + "epoch": 1.18, + "learning_rate": 4.804112857944975e-05, + "loss": 0.0726, + "step": 8374 + }, + { + "epoch": 1.18, + "learning_rate": 4.804066067752199e-05, + "loss": 0.0624, + "step": 8376 + }, + { + "epoch": 1.18, + "learning_rate": 4.804019277559424e-05, + "loss": 0.0543, + "step": 8378 + }, + { + "epoch": 1.18, + "learning_rate": 4.803972487366648e-05, + "loss": 0.0877, + "step": 8380 + }, + { + "epoch": 1.18, + "learning_rate": 4.803925697173873e-05, + "loss": 0.0859, + "step": 8382 + }, + { + "epoch": 1.18, + "learning_rate": 4.8038789069810966e-05, + "loss": 0.0646, + "step": 8384 + }, + { + "epoch": 1.18, + "learning_rate": 4.803832116788322e-05, + "loss": 0.0769, + "step": 8386 + }, + { + "epoch": 1.18, + "learning_rate": 4.803785326595546e-05, + "loss": 0.0606, + "step": 8388 + }, + { + "epoch": 1.18, + "learning_rate": 4.8037385364027704e-05, + "loss": 0.0608, + "step": 8390 + }, + { + "epoch": 1.18, + "learning_rate": 4.803691746209994e-05, + "loss": 0.0569, + "step": 8392 + }, + { + "epoch": 1.18, + "learning_rate": 4.803644956017219e-05, + "loss": 0.0498, + "step": 8394 + }, + { + "epoch": 1.18, + "learning_rate": 4.8035981658244435e-05, + "loss": 0.0683, + "step": 8396 + }, + { + "epoch": 1.18, + "learning_rate": 4.803551375631668e-05, + "loss": 0.0722, + "step": 8398 + }, + { + "epoch": 1.18, + "learning_rate": 4.803504585438892e-05, + "loss": 0.0843, + "step": 8400 + }, + { + "epoch": 1.18, + "learning_rate": 4.8034577952461166e-05, + "loss": 0.096, + "step": 8402 + }, + { + "epoch": 1.18, + "learning_rate": 4.803411005053341e-05, + "loss": 0.072, + "step": 8404 + }, + { + "epoch": 1.18, + "learning_rate": 4.803364214860566e-05, + "loss": 0.0681, + "step": 8406 + }, + { + "epoch": 1.18, + "learning_rate": 4.80331742466779e-05, + "loss": 0.0835, + "step": 8408 + }, + { + "epoch": 1.18, + "learning_rate": 4.803270634475014e-05, + "loss": 0.0779, + "step": 8410 + }, + { + "epoch": 1.18, + "learning_rate": 4.803223844282239e-05, + "loss": 0.087, + "step": 8412 + }, + { + "epoch": 1.18, + "learning_rate": 4.8031770540894635e-05, + "loss": 0.0525, + "step": 8414 + }, + { + "epoch": 1.18, + "learning_rate": 4.8031302638966874e-05, + "loss": 0.0683, + "step": 8416 + }, + { + "epoch": 1.18, + "learning_rate": 4.803083473703912e-05, + "loss": 0.0614, + "step": 8418 + }, + { + "epoch": 1.18, + "learning_rate": 4.8030366835111366e-05, + "loss": 0.0683, + "step": 8420 + }, + { + "epoch": 1.18, + "learning_rate": 4.802989893318361e-05, + "loss": 0.0632, + "step": 8422 + }, + { + "epoch": 1.18, + "learning_rate": 4.802943103125585e-05, + "loss": 0.0556, + "step": 8424 + }, + { + "epoch": 1.18, + "learning_rate": 4.8028963129328097e-05, + "loss": 0.0691, + "step": 8426 + }, + { + "epoch": 1.18, + "learning_rate": 4.8028495227400336e-05, + "loss": 0.0858, + "step": 8428 + }, + { + "epoch": 1.18, + "learning_rate": 4.802802732547259e-05, + "loss": 0.0655, + "step": 8430 + }, + { + "epoch": 1.18, + "learning_rate": 4.802755942354483e-05, + "loss": 0.0626, + "step": 8432 + }, + { + "epoch": 1.18, + "learning_rate": 4.802709152161707e-05, + "loss": 0.0795, + "step": 8434 + }, + { + "epoch": 1.18, + "learning_rate": 4.802662361968931e-05, + "loss": 0.0672, + "step": 8436 + }, + { + "epoch": 1.18, + "learning_rate": 4.802615571776156e-05, + "loss": 0.0803, + "step": 8438 + }, + { + "epoch": 1.18, + "learning_rate": 4.8025687815833804e-05, + "loss": 0.0785, + "step": 8440 + }, + { + "epoch": 1.19, + "learning_rate": 4.802521991390605e-05, + "loss": 0.0689, + "step": 8442 + }, + { + "epoch": 1.19, + "learning_rate": 4.802475201197829e-05, + "loss": 0.0563, + "step": 8444 + }, + { + "epoch": 1.19, + "learning_rate": 4.8024284110050535e-05, + "loss": 0.0754, + "step": 8446 + }, + { + "epoch": 1.19, + "learning_rate": 4.802381620812278e-05, + "loss": 0.0804, + "step": 8448 + }, + { + "epoch": 1.19, + "learning_rate": 4.802334830619503e-05, + "loss": 0.0577, + "step": 8450 + }, + { + "epoch": 1.19, + "learning_rate": 4.8022880404267266e-05, + "loss": 0.0665, + "step": 8452 + }, + { + "epoch": 1.19, + "learning_rate": 4.802241250233951e-05, + "loss": 0.062, + "step": 8454 + }, + { + "epoch": 1.19, + "learning_rate": 4.802194460041176e-05, + "loss": 0.0794, + "step": 8456 + }, + { + "epoch": 1.19, + "learning_rate": 4.8021476698484004e-05, + "loss": 0.0532, + "step": 8458 + }, + { + "epoch": 1.19, + "learning_rate": 4.802100879655624e-05, + "loss": 0.0724, + "step": 8460 + }, + { + "epoch": 1.19, + "learning_rate": 4.802054089462849e-05, + "loss": 0.0757, + "step": 8462 + }, + { + "epoch": 1.19, + "learning_rate": 4.8020072992700735e-05, + "loss": 0.0747, + "step": 8464 + }, + { + "epoch": 1.19, + "learning_rate": 4.801960509077298e-05, + "loss": 0.0794, + "step": 8466 + }, + { + "epoch": 1.19, + "learning_rate": 4.801913718884522e-05, + "loss": 0.0628, + "step": 8468 + }, + { + "epoch": 1.19, + "learning_rate": 4.8018669286917466e-05, + "loss": 0.0595, + "step": 8470 + }, + { + "epoch": 1.19, + "learning_rate": 4.8018201384989705e-05, + "loss": 0.0721, + "step": 8472 + }, + { + "epoch": 1.19, + "learning_rate": 4.801773348306196e-05, + "loss": 0.0644, + "step": 8474 + }, + { + "epoch": 1.19, + "learning_rate": 4.80172655811342e-05, + "loss": 0.0698, + "step": 8476 + }, + { + "epoch": 1.19, + "learning_rate": 4.801679767920644e-05, + "loss": 0.0772, + "step": 8478 + }, + { + "epoch": 1.19, + "learning_rate": 4.801632977727868e-05, + "loss": 0.0635, + "step": 8480 + }, + { + "epoch": 1.19, + "learning_rate": 4.8015861875350935e-05, + "loss": 0.0813, + "step": 8482 + }, + { + "epoch": 1.19, + "learning_rate": 4.8015393973423174e-05, + "loss": 0.0753, + "step": 8484 + }, + { + "epoch": 1.19, + "learning_rate": 4.801492607149541e-05, + "loss": 0.0805, + "step": 8486 + }, + { + "epoch": 1.19, + "learning_rate": 4.801445816956766e-05, + "loss": 0.0754, + "step": 8488 + }, + { + "epoch": 1.19, + "learning_rate": 4.8013990267639905e-05, + "loss": 0.0578, + "step": 8490 + }, + { + "epoch": 1.19, + "learning_rate": 4.801352236571215e-05, + "loss": 0.0782, + "step": 8492 + }, + { + "epoch": 1.19, + "learning_rate": 4.801305446378439e-05, + "loss": 0.0611, + "step": 8494 + }, + { + "epoch": 1.19, + "learning_rate": 4.8012586561856636e-05, + "loss": 0.0837, + "step": 8496 + }, + { + "epoch": 1.19, + "learning_rate": 4.801211865992888e-05, + "loss": 0.0929, + "step": 8498 + }, + { + "epoch": 1.19, + "learning_rate": 4.801165075800113e-05, + "loss": 0.0574, + "step": 8500 + }, + { + "epoch": 1.19, + "learning_rate": 4.801118285607337e-05, + "loss": 0.0785, + "step": 8502 + }, + { + "epoch": 1.19, + "learning_rate": 4.801071495414561e-05, + "loss": 0.0776, + "step": 8504 + }, + { + "epoch": 1.19, + "learning_rate": 4.801024705221785e-05, + "loss": 0.0673, + "step": 8506 + }, + { + "epoch": 1.19, + "learning_rate": 4.8009779150290104e-05, + "loss": 0.0857, + "step": 8508 + }, + { + "epoch": 1.19, + "learning_rate": 4.8009311248362343e-05, + "loss": 0.0825, + "step": 8510 + }, + { + "epoch": 1.19, + "learning_rate": 4.800884334643459e-05, + "loss": 0.0576, + "step": 8512 + }, + { + "epoch": 1.2, + "learning_rate": 4.800837544450683e-05, + "loss": 0.0612, + "step": 8514 + }, + { + "epoch": 1.2, + "learning_rate": 4.800790754257908e-05, + "loss": 0.0561, + "step": 8516 + }, + { + "epoch": 1.2, + "learning_rate": 4.800743964065132e-05, + "loss": 0.0654, + "step": 8518 + }, + { + "epoch": 1.2, + "learning_rate": 4.8006971738723566e-05, + "loss": 0.0605, + "step": 8520 + }, + { + "epoch": 1.2, + "learning_rate": 4.8006503836795805e-05, + "loss": 0.0577, + "step": 8522 + }, + { + "epoch": 1.2, + "learning_rate": 4.800603593486805e-05, + "loss": 0.0814, + "step": 8524 + }, + { + "epoch": 1.2, + "learning_rate": 4.80055680329403e-05, + "loss": 0.0724, + "step": 8526 + }, + { + "epoch": 1.2, + "learning_rate": 4.800510013101254e-05, + "loss": 0.0654, + "step": 8528 + }, + { + "epoch": 1.2, + "learning_rate": 4.800463222908478e-05, + "loss": 0.0666, + "step": 8530 + }, + { + "epoch": 1.2, + "learning_rate": 4.800416432715703e-05, + "loss": 0.0558, + "step": 8532 + }, + { + "epoch": 1.2, + "learning_rate": 4.8003696425229274e-05, + "loss": 0.0732, + "step": 8534 + }, + { + "epoch": 1.2, + "learning_rate": 4.800322852330152e-05, + "loss": 0.0567, + "step": 8536 + }, + { + "epoch": 1.2, + "learning_rate": 4.800276062137376e-05, + "loss": 0.0474, + "step": 8538 + }, + { + "epoch": 1.2, + "learning_rate": 4.8002292719446005e-05, + "loss": 0.0519, + "step": 8540 + }, + { + "epoch": 1.2, + "learning_rate": 4.800182481751825e-05, + "loss": 0.0582, + "step": 8542 + }, + { + "epoch": 1.2, + "learning_rate": 4.80013569155905e-05, + "loss": 0.0762, + "step": 8544 + }, + { + "epoch": 1.2, + "learning_rate": 4.8000889013662736e-05, + "loss": 0.0652, + "step": 8546 + }, + { + "epoch": 1.2, + "learning_rate": 4.800042111173498e-05, + "loss": 0.0581, + "step": 8548 + }, + { + "epoch": 1.2, + "learning_rate": 4.799995320980723e-05, + "loss": 0.0561, + "step": 8550 + }, + { + "epoch": 1.2, + "learning_rate": 4.7999485307879474e-05, + "loss": 0.0744, + "step": 8552 + }, + { + "epoch": 1.2, + "learning_rate": 4.799901740595171e-05, + "loss": 0.0756, + "step": 8554 + }, + { + "epoch": 1.2, + "learning_rate": 4.799854950402396e-05, + "loss": 0.0574, + "step": 8556 + }, + { + "epoch": 1.2, + "learning_rate": 4.79980816020962e-05, + "loss": 0.0673, + "step": 8558 + }, + { + "epoch": 1.2, + "learning_rate": 4.799761370016845e-05, + "loss": 0.112, + "step": 8560 + }, + { + "epoch": 1.2, + "learning_rate": 4.799714579824069e-05, + "loss": 0.0745, + "step": 8562 + }, + { + "epoch": 1.2, + "learning_rate": 4.7996677896312936e-05, + "loss": 0.0649, + "step": 8564 + }, + { + "epoch": 1.2, + "learning_rate": 4.7996209994385175e-05, + "loss": 0.0651, + "step": 8566 + }, + { + "epoch": 1.2, + "learning_rate": 4.799574209245743e-05, + "loss": 0.0765, + "step": 8568 + }, + { + "epoch": 1.2, + "learning_rate": 4.799527419052967e-05, + "loss": 0.0595, + "step": 8570 + }, + { + "epoch": 1.2, + "learning_rate": 4.799480628860191e-05, + "loss": 0.0711, + "step": 8572 + }, + { + "epoch": 1.2, + "learning_rate": 4.799433838667415e-05, + "loss": 0.0622, + "step": 8574 + }, + { + "epoch": 1.2, + "learning_rate": 4.79938704847464e-05, + "loss": 0.0687, + "step": 8576 + }, + { + "epoch": 1.2, + "learning_rate": 4.7993402582818644e-05, + "loss": 0.0675, + "step": 8578 + }, + { + "epoch": 1.2, + "learning_rate": 4.799293468089089e-05, + "loss": 0.0708, + "step": 8580 + }, + { + "epoch": 1.2, + "learning_rate": 4.799246677896313e-05, + "loss": 0.0816, + "step": 8582 + }, + { + "epoch": 1.2, + "learning_rate": 4.7991998877035374e-05, + "loss": 0.0767, + "step": 8584 + }, + { + "epoch": 1.21, + "learning_rate": 4.799153097510762e-05, + "loss": 0.0521, + "step": 8586 + }, + { + "epoch": 1.21, + "learning_rate": 4.7991063073179866e-05, + "loss": 0.0607, + "step": 8588 + }, + { + "epoch": 1.21, + "learning_rate": 4.7990595171252105e-05, + "loss": 0.0895, + "step": 8590 + }, + { + "epoch": 1.21, + "learning_rate": 4.799012726932435e-05, + "loss": 0.0655, + "step": 8592 + }, + { + "epoch": 1.21, + "learning_rate": 4.79896593673966e-05, + "loss": 0.068, + "step": 8594 + }, + { + "epoch": 1.21, + "learning_rate": 4.798919146546884e-05, + "loss": 0.0752, + "step": 8596 + }, + { + "epoch": 1.21, + "learning_rate": 4.798872356354108e-05, + "loss": 0.0816, + "step": 8598 + }, + { + "epoch": 1.21, + "learning_rate": 4.798825566161333e-05, + "loss": 0.0827, + "step": 8600 + }, + { + "epoch": 1.21, + "learning_rate": 4.798778775968557e-05, + "loss": 0.0787, + "step": 8602 + }, + { + "epoch": 1.21, + "learning_rate": 4.798731985775782e-05, + "loss": 0.0548, + "step": 8604 + }, + { + "epoch": 1.21, + "learning_rate": 4.798685195583006e-05, + "loss": 0.0545, + "step": 8606 + }, + { + "epoch": 1.21, + "learning_rate": 4.7986384053902305e-05, + "loss": 0.073, + "step": 8608 + }, + { + "epoch": 1.21, + "learning_rate": 4.7985916151974544e-05, + "loss": 0.0662, + "step": 8610 + }, + { + "epoch": 1.21, + "learning_rate": 4.79854482500468e-05, + "loss": 0.1088, + "step": 8612 + }, + { + "epoch": 1.21, + "learning_rate": 4.7984980348119036e-05, + "loss": 0.0798, + "step": 8614 + }, + { + "epoch": 1.21, + "learning_rate": 4.798451244619128e-05, + "loss": 0.0667, + "step": 8616 + }, + { + "epoch": 1.21, + "learning_rate": 4.798404454426352e-05, + "loss": 0.0683, + "step": 8618 + }, + { + "epoch": 1.21, + "learning_rate": 4.798357664233577e-05, + "loss": 0.0882, + "step": 8620 + }, + { + "epoch": 1.21, + "learning_rate": 4.798310874040801e-05, + "loss": 0.0718, + "step": 8622 + }, + { + "epoch": 1.21, + "learning_rate": 4.798264083848026e-05, + "loss": 0.0746, + "step": 8624 + }, + { + "epoch": 1.21, + "learning_rate": 4.79821729365525e-05, + "loss": 0.0832, + "step": 8626 + }, + { + "epoch": 1.21, + "learning_rate": 4.7981705034624744e-05, + "loss": 0.0532, + "step": 8628 + }, + { + "epoch": 1.21, + "learning_rate": 4.798123713269699e-05, + "loss": 0.0666, + "step": 8630 + }, + { + "epoch": 1.21, + "learning_rate": 4.7980769230769236e-05, + "loss": 0.0945, + "step": 8632 + }, + { + "epoch": 1.21, + "learning_rate": 4.7980301328841475e-05, + "loss": 0.0786, + "step": 8634 + }, + { + "epoch": 1.21, + "learning_rate": 4.797983342691372e-05, + "loss": 0.0674, + "step": 8636 + }, + { + "epoch": 1.21, + "learning_rate": 4.797936552498597e-05, + "loss": 0.0609, + "step": 8638 + }, + { + "epoch": 1.21, + "learning_rate": 4.797889762305821e-05, + "loss": 0.072, + "step": 8640 + }, + { + "epoch": 1.21, + "learning_rate": 4.797842972113045e-05, + "loss": 0.0711, + "step": 8642 + }, + { + "epoch": 1.21, + "learning_rate": 4.79779618192027e-05, + "loss": 0.0527, + "step": 8644 + }, + { + "epoch": 1.21, + "learning_rate": 4.7977493917274944e-05, + "loss": 0.0668, + "step": 8646 + }, + { + "epoch": 1.21, + "learning_rate": 4.797702601534719e-05, + "loss": 0.0771, + "step": 8648 + }, + { + "epoch": 1.21, + "learning_rate": 4.797655811341943e-05, + "loss": 0.0657, + "step": 8650 + }, + { + "epoch": 1.21, + "learning_rate": 4.7976090211491674e-05, + "loss": 0.0763, + "step": 8652 + }, + { + "epoch": 1.21, + "learning_rate": 4.7975622309563914e-05, + "loss": 0.0674, + "step": 8654 + }, + { + "epoch": 1.22, + "learning_rate": 4.7975154407636166e-05, + "loss": 0.0775, + "step": 8656 + }, + { + "epoch": 1.22, + "learning_rate": 4.7974686505708405e-05, + "loss": 0.0635, + "step": 8658 + }, + { + "epoch": 1.22, + "learning_rate": 4.797421860378065e-05, + "loss": 0.0639, + "step": 8660 + }, + { + "epoch": 1.22, + "learning_rate": 4.797375070185289e-05, + "loss": 0.0586, + "step": 8662 + }, + { + "epoch": 1.22, + "learning_rate": 4.797328279992514e-05, + "loss": 0.0707, + "step": 8664 + }, + { + "epoch": 1.22, + "learning_rate": 4.797281489799738e-05, + "loss": 0.0516, + "step": 8666 + }, + { + "epoch": 1.22, + "learning_rate": 4.797234699606963e-05, + "loss": 0.0704, + "step": 8668 + }, + { + "epoch": 1.22, + "learning_rate": 4.797187909414187e-05, + "loss": 0.0894, + "step": 8670 + }, + { + "epoch": 1.22, + "learning_rate": 4.797141119221411e-05, + "loss": 0.0601, + "step": 8672 + }, + { + "epoch": 1.22, + "learning_rate": 4.797094329028636e-05, + "loss": 0.0699, + "step": 8674 + }, + { + "epoch": 1.22, + "learning_rate": 4.7970475388358605e-05, + "loss": 0.0617, + "step": 8676 + }, + { + "epoch": 1.22, + "learning_rate": 4.7970007486430844e-05, + "loss": 0.0697, + "step": 8678 + }, + { + "epoch": 1.22, + "learning_rate": 4.796953958450309e-05, + "loss": 0.0829, + "step": 8680 + }, + { + "epoch": 1.22, + "learning_rate": 4.7969071682575336e-05, + "loss": 0.0682, + "step": 8682 + }, + { + "epoch": 1.22, + "learning_rate": 4.796860378064758e-05, + "loss": 0.0778, + "step": 8684 + }, + { + "epoch": 1.22, + "learning_rate": 4.796813587871982e-05, + "loss": 0.0773, + "step": 8686 + }, + { + "epoch": 1.22, + "learning_rate": 4.796766797679207e-05, + "loss": 0.0738, + "step": 8688 + }, + { + "epoch": 1.22, + "learning_rate": 4.796720007486431e-05, + "loss": 0.0564, + "step": 8690 + }, + { + "epoch": 1.22, + "learning_rate": 4.796673217293656e-05, + "loss": 0.0794, + "step": 8692 + }, + { + "epoch": 1.22, + "learning_rate": 4.79662642710088e-05, + "loss": 0.0731, + "step": 8694 + }, + { + "epoch": 1.22, + "learning_rate": 4.7965796369081044e-05, + "loss": 0.0756, + "step": 8696 + }, + { + "epoch": 1.22, + "learning_rate": 4.796532846715329e-05, + "loss": 0.0585, + "step": 8698 + }, + { + "epoch": 1.22, + "learning_rate": 4.7964860565225536e-05, + "loss": 0.065, + "step": 8700 + }, + { + "epoch": 1.22, + "learning_rate": 4.7964392663297775e-05, + "loss": 0.0476, + "step": 8702 + }, + { + "epoch": 1.22, + "learning_rate": 4.796392476137002e-05, + "loss": 0.0625, + "step": 8704 + }, + { + "epoch": 1.22, + "learning_rate": 4.796345685944226e-05, + "loss": 0.0746, + "step": 8706 + }, + { + "epoch": 1.22, + "learning_rate": 4.796298895751451e-05, + "loss": 0.0774, + "step": 8708 + }, + { + "epoch": 1.22, + "learning_rate": 4.796252105558675e-05, + "loss": 0.0473, + "step": 8710 + }, + { + "epoch": 1.22, + "learning_rate": 4.7962053153659e-05, + "loss": 0.0834, + "step": 8712 + }, + { + "epoch": 1.22, + "learning_rate": 4.796158525173124e-05, + "loss": 0.0597, + "step": 8714 + }, + { + "epoch": 1.22, + "learning_rate": 4.796111734980348e-05, + "loss": 0.0669, + "step": 8716 + }, + { + "epoch": 1.22, + "learning_rate": 4.796064944787573e-05, + "loss": 0.0567, + "step": 8718 + }, + { + "epoch": 1.22, + "learning_rate": 4.7960181545947974e-05, + "loss": 0.0691, + "step": 8720 + }, + { + "epoch": 1.22, + "learning_rate": 4.7959713644020214e-05, + "loss": 0.0713, + "step": 8722 + }, + { + "epoch": 1.22, + "learning_rate": 4.795924574209246e-05, + "loss": 0.0685, + "step": 8724 + }, + { + "epoch": 1.22, + "learning_rate": 4.7958777840164705e-05, + "loss": 0.0747, + "step": 8726 + }, + { + "epoch": 1.23, + "learning_rate": 4.795830993823695e-05, + "loss": 0.0593, + "step": 8728 + }, + { + "epoch": 1.23, + "learning_rate": 4.795784203630919e-05, + "loss": 0.0783, + "step": 8730 + }, + { + "epoch": 1.23, + "learning_rate": 4.7957374134381436e-05, + "loss": 0.0683, + "step": 8732 + }, + { + "epoch": 1.23, + "learning_rate": 4.795690623245368e-05, + "loss": 0.0755, + "step": 8734 + }, + { + "epoch": 1.23, + "learning_rate": 4.795643833052593e-05, + "loss": 0.0863, + "step": 8736 + }, + { + "epoch": 1.23, + "learning_rate": 4.795597042859817e-05, + "loss": 0.0483, + "step": 8738 + }, + { + "epoch": 1.23, + "learning_rate": 4.7955502526670407e-05, + "loss": 0.0703, + "step": 8740 + }, + { + "epoch": 1.23, + "learning_rate": 4.795503462474266e-05, + "loss": 0.0563, + "step": 8742 + }, + { + "epoch": 1.23, + "learning_rate": 4.79545667228149e-05, + "loss": 0.0479, + "step": 8744 + }, + { + "epoch": 1.23, + "learning_rate": 4.7954098820887144e-05, + "loss": 0.0641, + "step": 8746 + }, + { + "epoch": 1.23, + "learning_rate": 4.795363091895938e-05, + "loss": 0.0904, + "step": 8748 + }, + { + "epoch": 1.23, + "learning_rate": 4.795316301703163e-05, + "loss": 0.089, + "step": 8750 + }, + { + "epoch": 1.23, + "learning_rate": 4.7952695115103875e-05, + "loss": 0.0574, + "step": 8752 + }, + { + "epoch": 1.23, + "learning_rate": 4.795222721317612e-05, + "loss": 0.0807, + "step": 8754 + }, + { + "epoch": 1.23, + "learning_rate": 4.795175931124836e-05, + "loss": 0.0497, + "step": 8756 + }, + { + "epoch": 1.23, + "learning_rate": 4.7951291409320606e-05, + "loss": 0.1108, + "step": 8758 + }, + { + "epoch": 1.23, + "learning_rate": 4.795082350739285e-05, + "loss": 0.0649, + "step": 8760 + }, + { + "epoch": 1.23, + "learning_rate": 4.79503556054651e-05, + "loss": 0.0619, + "step": 8762 + }, + { + "epoch": 1.23, + "learning_rate": 4.794988770353734e-05, + "loss": 0.0562, + "step": 8764 + }, + { + "epoch": 1.23, + "learning_rate": 4.794941980160958e-05, + "loss": 0.0662, + "step": 8766 + }, + { + "epoch": 1.23, + "learning_rate": 4.794895189968183e-05, + "loss": 0.0562, + "step": 8768 + }, + { + "epoch": 1.23, + "learning_rate": 4.7948483997754075e-05, + "loss": 0.08, + "step": 8770 + }, + { + "epoch": 1.23, + "learning_rate": 4.7948016095826314e-05, + "loss": 0.0883, + "step": 8772 + }, + { + "epoch": 1.23, + "learning_rate": 4.794754819389856e-05, + "loss": 0.0709, + "step": 8774 + }, + { + "epoch": 1.23, + "learning_rate": 4.7947080291970806e-05, + "loss": 0.0658, + "step": 8776 + }, + { + "epoch": 1.23, + "learning_rate": 4.794661239004305e-05, + "loss": 0.0704, + "step": 8778 + }, + { + "epoch": 1.23, + "learning_rate": 4.794614448811529e-05, + "loss": 0.0966, + "step": 8780 + }, + { + "epoch": 1.23, + "learning_rate": 4.794567658618754e-05, + "loss": 0.081, + "step": 8782 + }, + { + "epoch": 1.23, + "learning_rate": 4.7945208684259776e-05, + "loss": 0.0678, + "step": 8784 + }, + { + "epoch": 1.23, + "learning_rate": 4.794474078233203e-05, + "loss": 0.074, + "step": 8786 + }, + { + "epoch": 1.23, + "learning_rate": 4.794427288040427e-05, + "loss": 0.0727, + "step": 8788 + }, + { + "epoch": 1.23, + "learning_rate": 4.7943804978476514e-05, + "loss": 0.0591, + "step": 8790 + }, + { + "epoch": 1.23, + "learning_rate": 4.794333707654875e-05, + "loss": 0.095, + "step": 8792 + }, + { + "epoch": 1.23, + "learning_rate": 4.7942869174621005e-05, + "loss": 0.064, + "step": 8794 + }, + { + "epoch": 1.23, + "learning_rate": 4.7942401272693245e-05, + "loss": 0.0724, + "step": 8796 + }, + { + "epoch": 1.23, + "learning_rate": 4.794193337076549e-05, + "loss": 0.0756, + "step": 8798 + }, + { + "epoch": 1.24, + "learning_rate": 4.794146546883773e-05, + "loss": 0.0755, + "step": 8800 + }, + { + "epoch": 1.24, + "learning_rate": 4.7940997566909976e-05, + "loss": 0.08, + "step": 8802 + }, + { + "epoch": 1.24, + "learning_rate": 4.794052966498222e-05, + "loss": 0.074, + "step": 8804 + }, + { + "epoch": 1.24, + "learning_rate": 4.794006176305447e-05, + "loss": 0.0783, + "step": 8806 + }, + { + "epoch": 1.24, + "learning_rate": 4.7939593861126707e-05, + "loss": 0.06, + "step": 8808 + }, + { + "epoch": 1.24, + "learning_rate": 4.793912595919895e-05, + "loss": 0.0711, + "step": 8810 + }, + { + "epoch": 1.24, + "learning_rate": 4.79386580572712e-05, + "loss": 0.0625, + "step": 8812 + }, + { + "epoch": 1.24, + "learning_rate": 4.7938190155343444e-05, + "loss": 0.0793, + "step": 8814 + }, + { + "epoch": 1.24, + "learning_rate": 4.7937722253415683e-05, + "loss": 0.0832, + "step": 8816 + }, + { + "epoch": 1.24, + "learning_rate": 4.793725435148793e-05, + "loss": 0.0637, + "step": 8818 + }, + { + "epoch": 1.24, + "learning_rate": 4.7936786449560175e-05, + "loss": 0.0923, + "step": 8820 + }, + { + "epoch": 1.24, + "learning_rate": 4.793631854763242e-05, + "loss": 0.0939, + "step": 8822 + }, + { + "epoch": 1.24, + "learning_rate": 4.793585064570466e-05, + "loss": 0.0704, + "step": 8824 + }, + { + "epoch": 1.24, + "learning_rate": 4.7935382743776906e-05, + "loss": 0.0715, + "step": 8826 + }, + { + "epoch": 1.24, + "learning_rate": 4.793491484184915e-05, + "loss": 0.0708, + "step": 8828 + }, + { + "epoch": 1.24, + "learning_rate": 4.79344469399214e-05, + "loss": 0.0725, + "step": 8830 + }, + { + "epoch": 1.24, + "learning_rate": 4.793397903799364e-05, + "loss": 0.0567, + "step": 8832 + }, + { + "epoch": 1.24, + "learning_rate": 4.793351113606588e-05, + "loss": 0.087, + "step": 8834 + }, + { + "epoch": 1.24, + "learning_rate": 4.793304323413812e-05, + "loss": 0.074, + "step": 8836 + }, + { + "epoch": 1.24, + "learning_rate": 4.7932575332210375e-05, + "loss": 0.0677, + "step": 8838 + }, + { + "epoch": 1.24, + "learning_rate": 4.7932107430282614e-05, + "loss": 0.0722, + "step": 8840 + }, + { + "epoch": 1.24, + "learning_rate": 4.793163952835486e-05, + "loss": 0.0724, + "step": 8842 + }, + { + "epoch": 1.24, + "learning_rate": 4.79311716264271e-05, + "loss": 0.0814, + "step": 8844 + }, + { + "epoch": 1.24, + "learning_rate": 4.793070372449935e-05, + "loss": 0.0862, + "step": 8846 + }, + { + "epoch": 1.24, + "learning_rate": 4.793023582257159e-05, + "loss": 0.0586, + "step": 8848 + }, + { + "epoch": 1.24, + "learning_rate": 4.792976792064384e-05, + "loss": 0.0689, + "step": 8850 + }, + { + "epoch": 1.24, + "learning_rate": 4.7929300018716076e-05, + "loss": 0.0714, + "step": 8852 + }, + { + "epoch": 1.24, + "learning_rate": 4.792883211678832e-05, + "loss": 0.0742, + "step": 8854 + }, + { + "epoch": 1.24, + "learning_rate": 4.792836421486057e-05, + "loss": 0.0689, + "step": 8856 + }, + { + "epoch": 1.24, + "learning_rate": 4.7927896312932814e-05, + "loss": 0.0753, + "step": 8858 + }, + { + "epoch": 1.24, + "learning_rate": 4.792742841100505e-05, + "loss": 0.0691, + "step": 8860 + }, + { + "epoch": 1.24, + "learning_rate": 4.79269605090773e-05, + "loss": 0.065, + "step": 8862 + }, + { + "epoch": 1.24, + "learning_rate": 4.7926492607149545e-05, + "loss": 0.0636, + "step": 8864 + }, + { + "epoch": 1.24, + "learning_rate": 4.792602470522179e-05, + "loss": 0.0698, + "step": 8866 + }, + { + "epoch": 1.24, + "learning_rate": 4.792555680329403e-05, + "loss": 0.0668, + "step": 8868 + }, + { + "epoch": 1.25, + "learning_rate": 4.7925088901366276e-05, + "loss": 0.0519, + "step": 8870 + }, + { + "epoch": 1.25, + "learning_rate": 4.792462099943852e-05, + "loss": 0.0669, + "step": 8872 + }, + { + "epoch": 1.25, + "learning_rate": 4.792415309751077e-05, + "loss": 0.0521, + "step": 8874 + }, + { + "epoch": 1.25, + "learning_rate": 4.7923685195583007e-05, + "loss": 0.0697, + "step": 8876 + }, + { + "epoch": 1.25, + "learning_rate": 4.792321729365525e-05, + "loss": 0.0792, + "step": 8878 + }, + { + "epoch": 1.25, + "learning_rate": 4.79227493917275e-05, + "loss": 0.1011, + "step": 8880 + }, + { + "epoch": 1.25, + "learning_rate": 4.7922281489799744e-05, + "loss": 0.078, + "step": 8882 + }, + { + "epoch": 1.25, + "learning_rate": 4.7921813587871983e-05, + "loss": 0.0777, + "step": 8884 + }, + { + "epoch": 1.25, + "learning_rate": 4.792134568594423e-05, + "loss": 0.0759, + "step": 8886 + }, + { + "epoch": 1.25, + "learning_rate": 4.792087778401647e-05, + "loss": 0.0692, + "step": 8888 + }, + { + "epoch": 1.25, + "learning_rate": 4.792040988208872e-05, + "loss": 0.1084, + "step": 8890 + }, + { + "epoch": 1.25, + "learning_rate": 4.791994198016096e-05, + "loss": 0.07, + "step": 8892 + }, + { + "epoch": 1.25, + "learning_rate": 4.7919474078233206e-05, + "loss": 0.0706, + "step": 8894 + }, + { + "epoch": 1.25, + "learning_rate": 4.7919006176305445e-05, + "loss": 0.0777, + "step": 8896 + }, + { + "epoch": 1.25, + "learning_rate": 4.791853827437769e-05, + "loss": 0.0781, + "step": 8898 + }, + { + "epoch": 1.25, + "learning_rate": 4.791807037244994e-05, + "loss": 0.0655, + "step": 8900 + }, + { + "epoch": 1.25, + "learning_rate": 4.791760247052218e-05, + "loss": 0.0752, + "step": 8902 + }, + { + "epoch": 1.25, + "learning_rate": 4.791713456859442e-05, + "loss": 0.0952, + "step": 8904 + }, + { + "epoch": 1.25, + "learning_rate": 4.791666666666667e-05, + "loss": 0.0797, + "step": 8906 + }, + { + "epoch": 1.25, + "learning_rate": 4.7916198764738914e-05, + "loss": 0.0725, + "step": 8908 + }, + { + "epoch": 1.25, + "learning_rate": 4.791573086281116e-05, + "loss": 0.0733, + "step": 8910 + }, + { + "epoch": 1.25, + "learning_rate": 4.79152629608834e-05, + "loss": 0.0604, + "step": 8912 + }, + { + "epoch": 1.25, + "learning_rate": 4.7914795058955645e-05, + "loss": 0.0751, + "step": 8914 + }, + { + "epoch": 1.25, + "learning_rate": 4.791432715702789e-05, + "loss": 0.0783, + "step": 8916 + }, + { + "epoch": 1.25, + "learning_rate": 4.791385925510014e-05, + "loss": 0.0816, + "step": 8918 + }, + { + "epoch": 1.25, + "learning_rate": 4.7913391353172376e-05, + "loss": 0.063, + "step": 8920 + }, + { + "epoch": 1.25, + "learning_rate": 4.791292345124462e-05, + "loss": 0.0801, + "step": 8922 + }, + { + "epoch": 1.25, + "learning_rate": 4.791245554931687e-05, + "loss": 0.0831, + "step": 8924 + }, + { + "epoch": 1.25, + "learning_rate": 4.7911987647389114e-05, + "loss": 0.085, + "step": 8926 + }, + { + "epoch": 1.25, + "learning_rate": 4.791151974546135e-05, + "loss": 0.0568, + "step": 8928 + }, + { + "epoch": 1.25, + "learning_rate": 4.79110518435336e-05, + "loss": 0.0749, + "step": 8930 + }, + { + "epoch": 1.25, + "learning_rate": 4.791058394160584e-05, + "loss": 0.0779, + "step": 8932 + }, + { + "epoch": 1.25, + "learning_rate": 4.791011603967809e-05, + "loss": 0.0623, + "step": 8934 + }, + { + "epoch": 1.25, + "learning_rate": 4.790964813775033e-05, + "loss": 0.0537, + "step": 8936 + }, + { + "epoch": 1.25, + "learning_rate": 4.7909180235822576e-05, + "loss": 0.0671, + "step": 8938 + }, + { + "epoch": 1.25, + "learning_rate": 4.7908712333894815e-05, + "loss": 0.0853, + "step": 8940 + }, + { + "epoch": 1.26, + "learning_rate": 4.790824443196707e-05, + "loss": 0.0844, + "step": 8942 + }, + { + "epoch": 1.26, + "learning_rate": 4.7907776530039307e-05, + "loss": 0.0767, + "step": 8944 + }, + { + "epoch": 1.26, + "learning_rate": 4.790730862811155e-05, + "loss": 0.0777, + "step": 8946 + }, + { + "epoch": 1.26, + "learning_rate": 4.790684072618379e-05, + "loss": 0.0656, + "step": 8948 + }, + { + "epoch": 1.26, + "learning_rate": 4.790637282425604e-05, + "loss": 0.0875, + "step": 8950 + }, + { + "epoch": 1.26, + "learning_rate": 4.7905904922328283e-05, + "loss": 0.0703, + "step": 8952 + }, + { + "epoch": 1.26, + "learning_rate": 4.790543702040053e-05, + "loss": 0.0772, + "step": 8954 + }, + { + "epoch": 1.26, + "learning_rate": 4.790496911847277e-05, + "loss": 0.082, + "step": 8956 + }, + { + "epoch": 1.26, + "learning_rate": 4.7904501216545014e-05, + "loss": 0.0669, + "step": 8958 + }, + { + "epoch": 1.26, + "learning_rate": 4.790403331461726e-05, + "loss": 0.0686, + "step": 8960 + }, + { + "epoch": 1.26, + "learning_rate": 4.7903565412689506e-05, + "loss": 0.061, + "step": 8962 + }, + { + "epoch": 1.26, + "learning_rate": 4.7903097510761745e-05, + "loss": 0.0885, + "step": 8964 + }, + { + "epoch": 1.26, + "learning_rate": 4.790262960883399e-05, + "loss": 0.0773, + "step": 8966 + }, + { + "epoch": 1.26, + "learning_rate": 4.790216170690624e-05, + "loss": 0.0892, + "step": 8968 + }, + { + "epoch": 1.26, + "learning_rate": 4.790169380497848e-05, + "loss": 0.083, + "step": 8970 + }, + { + "epoch": 1.26, + "learning_rate": 4.790122590305072e-05, + "loss": 0.0689, + "step": 8972 + }, + { + "epoch": 1.26, + "learning_rate": 4.790075800112297e-05, + "loss": 0.0794, + "step": 8974 + }, + { + "epoch": 1.26, + "learning_rate": 4.7900290099195214e-05, + "loss": 0.0811, + "step": 8976 + }, + { + "epoch": 1.26, + "learning_rate": 4.789982219726746e-05, + "loss": 0.0799, + "step": 8978 + }, + { + "epoch": 1.26, + "learning_rate": 4.78993542953397e-05, + "loss": 0.0745, + "step": 8980 + }, + { + "epoch": 1.26, + "learning_rate": 4.7898886393411945e-05, + "loss": 0.0651, + "step": 8982 + }, + { + "epoch": 1.26, + "learning_rate": 4.7898418491484184e-05, + "loss": 0.0807, + "step": 8984 + }, + { + "epoch": 1.26, + "learning_rate": 4.789795058955644e-05, + "loss": 0.0698, + "step": 8986 + }, + { + "epoch": 1.26, + "learning_rate": 4.7897482687628676e-05, + "loss": 0.0688, + "step": 8988 + }, + { + "epoch": 1.26, + "learning_rate": 4.789701478570092e-05, + "loss": 0.0598, + "step": 8990 + }, + { + "epoch": 1.26, + "learning_rate": 4.789654688377316e-05, + "loss": 0.0674, + "step": 8992 + }, + { + "epoch": 1.26, + "learning_rate": 4.789607898184541e-05, + "loss": 0.0773, + "step": 8994 + }, + { + "epoch": 1.26, + "learning_rate": 4.789561107991765e-05, + "loss": 0.0767, + "step": 8996 + }, + { + "epoch": 1.26, + "learning_rate": 4.789514317798989e-05, + "loss": 0.0646, + "step": 8998 + }, + { + "epoch": 1.26, + "learning_rate": 4.789467527606214e-05, + "loss": 0.0806, + "step": 9000 + }, + { + "epoch": 1.26, + "eval_gen_len": 30.1724, + "eval_loss": 1.0504204034805298, + "eval_meteor": 0.0494, + "eval_runtime": 14.954, + "eval_samples_per_second": 3.879, + "eval_steps_per_second": 0.535, + "step": 9000 + }, + { + "epoch": 1.26, + "learning_rate": 4.7894207374134384e-05, + "loss": 0.0568, + "step": 9002 + }, + { + "epoch": 1.26, + "learning_rate": 4.789373947220663e-05, + "loss": 0.0617, + "step": 9004 + }, + { + "epoch": 1.26, + "learning_rate": 4.789327157027887e-05, + "loss": 0.0897, + "step": 9006 + }, + { + "epoch": 1.26, + "learning_rate": 4.7892803668351115e-05, + "loss": 0.0402, + "step": 9008 + }, + { + "epoch": 1.26, + "learning_rate": 4.789233576642336e-05, + "loss": 0.0854, + "step": 9010 + }, + { + "epoch": 1.27, + "learning_rate": 4.7891867864495607e-05, + "loss": 0.0905, + "step": 9012 + }, + { + "epoch": 1.27, + "learning_rate": 4.7891399962567846e-05, + "loss": 0.0734, + "step": 9014 + }, + { + "epoch": 1.27, + "learning_rate": 4.789093206064009e-05, + "loss": 0.0816, + "step": 9016 + }, + { + "epoch": 1.27, + "learning_rate": 4.789046415871233e-05, + "loss": 0.0621, + "step": 9018 + }, + { + "epoch": 1.27, + "learning_rate": 4.7889996256784583e-05, + "loss": 0.0782, + "step": 9020 + }, + { + "epoch": 1.27, + "learning_rate": 4.788952835485682e-05, + "loss": 0.0572, + "step": 9022 + }, + { + "epoch": 1.27, + "learning_rate": 4.788906045292907e-05, + "loss": 0.0701, + "step": 9024 + }, + { + "epoch": 1.27, + "learning_rate": 4.788859255100131e-05, + "loss": 0.0729, + "step": 9026 + }, + { + "epoch": 1.27, + "learning_rate": 4.7888124649073554e-05, + "loss": 0.0794, + "step": 9028 + }, + { + "epoch": 1.27, + "learning_rate": 4.78876567471458e-05, + "loss": 0.0732, + "step": 9030 + }, + { + "epoch": 1.27, + "learning_rate": 4.7887188845218045e-05, + "loss": 0.1045, + "step": 9032 + }, + { + "epoch": 1.27, + "learning_rate": 4.7886720943290285e-05, + "loss": 0.0823, + "step": 9034 + }, + { + "epoch": 1.27, + "learning_rate": 4.788625304136253e-05, + "loss": 0.0742, + "step": 9036 + }, + { + "epoch": 1.27, + "learning_rate": 4.7885785139434776e-05, + "loss": 0.0572, + "step": 9038 + }, + { + "epoch": 1.27, + "learning_rate": 4.788531723750702e-05, + "loss": 0.0746, + "step": 9040 + }, + { + "epoch": 1.27, + "learning_rate": 4.788484933557926e-05, + "loss": 0.0704, + "step": 9042 + }, + { + "epoch": 1.27, + "learning_rate": 4.788438143365151e-05, + "loss": 0.1047, + "step": 9044 + }, + { + "epoch": 1.27, + "learning_rate": 4.788391353172375e-05, + "loss": 0.0751, + "step": 9046 + }, + { + "epoch": 1.27, + "learning_rate": 4.7883445629796e-05, + "loss": 0.0787, + "step": 9048 + }, + { + "epoch": 1.27, + "learning_rate": 4.788297772786824e-05, + "loss": 0.0724, + "step": 9050 + }, + { + "epoch": 1.27, + "learning_rate": 4.7882509825940484e-05, + "loss": 0.0762, + "step": 9052 + }, + { + "epoch": 1.27, + "learning_rate": 4.788204192401273e-05, + "loss": 0.074, + "step": 9054 + }, + { + "epoch": 1.27, + "learning_rate": 4.7881574022084976e-05, + "loss": 0.0829, + "step": 9056 + }, + { + "epoch": 1.27, + "learning_rate": 4.7881106120157215e-05, + "loss": 0.0561, + "step": 9058 + }, + { + "epoch": 1.27, + "learning_rate": 4.788063821822946e-05, + "loss": 0.0567, + "step": 9060 + }, + { + "epoch": 1.27, + "learning_rate": 4.78801703163017e-05, + "loss": 0.0665, + "step": 9062 + }, + { + "epoch": 1.27, + "learning_rate": 4.787970241437395e-05, + "loss": 0.0696, + "step": 9064 + }, + { + "epoch": 1.27, + "learning_rate": 4.787923451244619e-05, + "loss": 0.0614, + "step": 9066 + }, + { + "epoch": 1.27, + "learning_rate": 4.787876661051844e-05, + "loss": 0.0622, + "step": 9068 + }, + { + "epoch": 1.27, + "learning_rate": 4.787829870859068e-05, + "loss": 0.0754, + "step": 9070 + }, + { + "epoch": 1.27, + "learning_rate": 4.787783080666293e-05, + "loss": 0.0658, + "step": 9072 + }, + { + "epoch": 1.27, + "learning_rate": 4.787736290473517e-05, + "loss": 0.0959, + "step": 9074 + }, + { + "epoch": 1.27, + "learning_rate": 4.7876895002807415e-05, + "loss": 0.0837, + "step": 9076 + }, + { + "epoch": 1.27, + "learning_rate": 4.7876427100879654e-05, + "loss": 0.0675, + "step": 9078 + }, + { + "epoch": 1.27, + "learning_rate": 4.78759591989519e-05, + "loss": 0.0754, + "step": 9080 + }, + { + "epoch": 1.27, + "learning_rate": 4.7875491297024146e-05, + "loss": 0.0684, + "step": 9082 + }, + { + "epoch": 1.28, + "learning_rate": 4.787502339509639e-05, + "loss": 0.1071, + "step": 9084 + }, + { + "epoch": 1.28, + "learning_rate": 4.787455549316863e-05, + "loss": 0.0761, + "step": 9086 + }, + { + "epoch": 1.28, + "learning_rate": 4.787408759124088e-05, + "loss": 0.0935, + "step": 9088 + }, + { + "epoch": 1.28, + "learning_rate": 4.787361968931312e-05, + "loss": 0.0645, + "step": 9090 + }, + { + "epoch": 1.28, + "learning_rate": 4.787315178738537e-05, + "loss": 0.0616, + "step": 9092 + }, + { + "epoch": 1.28, + "learning_rate": 4.787268388545761e-05, + "loss": 0.0762, + "step": 9094 + }, + { + "epoch": 1.28, + "learning_rate": 4.7872215983529854e-05, + "loss": 0.0685, + "step": 9096 + }, + { + "epoch": 1.28, + "learning_rate": 4.78717480816021e-05, + "loss": 0.0776, + "step": 9098 + }, + { + "epoch": 1.28, + "learning_rate": 4.7871280179674345e-05, + "loss": 0.0725, + "step": 9100 + }, + { + "epoch": 1.28, + "learning_rate": 4.7870812277746585e-05, + "loss": 0.0693, + "step": 9102 + }, + { + "epoch": 1.28, + "learning_rate": 4.787034437581883e-05, + "loss": 0.0788, + "step": 9104 + }, + { + "epoch": 1.28, + "learning_rate": 4.7869876473891076e-05, + "loss": 0.0564, + "step": 9106 + }, + { + "epoch": 1.28, + "learning_rate": 4.786940857196332e-05, + "loss": 0.0688, + "step": 9108 + }, + { + "epoch": 1.28, + "learning_rate": 4.786894067003556e-05, + "loss": 0.0705, + "step": 9110 + }, + { + "epoch": 1.28, + "learning_rate": 4.786847276810781e-05, + "loss": 0.0418, + "step": 9112 + }, + { + "epoch": 1.28, + "learning_rate": 4.7868004866180046e-05, + "loss": 0.0612, + "step": 9114 + }, + { + "epoch": 1.28, + "learning_rate": 4.78675369642523e-05, + "loss": 0.0784, + "step": 9116 + }, + { + "epoch": 1.28, + "learning_rate": 4.786706906232454e-05, + "loss": 0.0913, + "step": 9118 + }, + { + "epoch": 1.28, + "learning_rate": 4.7866601160396784e-05, + "loss": 0.0921, + "step": 9120 + }, + { + "epoch": 1.28, + "learning_rate": 4.786613325846902e-05, + "loss": 0.089, + "step": 9122 + }, + { + "epoch": 1.28, + "learning_rate": 4.7865665356541276e-05, + "loss": 0.0623, + "step": 9124 + }, + { + "epoch": 1.28, + "learning_rate": 4.7865197454613515e-05, + "loss": 0.0994, + "step": 9126 + }, + { + "epoch": 1.28, + "learning_rate": 4.786472955268576e-05, + "loss": 0.0848, + "step": 9128 + }, + { + "epoch": 1.28, + "learning_rate": 4.7864261650758e-05, + "loss": 0.0723, + "step": 9130 + }, + { + "epoch": 1.28, + "learning_rate": 4.7863793748830246e-05, + "loss": 0.0634, + "step": 9132 + }, + { + "epoch": 1.28, + "learning_rate": 4.786332584690249e-05, + "loss": 0.0634, + "step": 9134 + }, + { + "epoch": 1.28, + "learning_rate": 4.786285794497474e-05, + "loss": 0.0679, + "step": 9136 + }, + { + "epoch": 1.28, + "learning_rate": 4.786239004304698e-05, + "loss": 0.0653, + "step": 9138 + }, + { + "epoch": 1.28, + "learning_rate": 4.786192214111922e-05, + "loss": 0.0844, + "step": 9140 + }, + { + "epoch": 1.28, + "learning_rate": 4.786145423919147e-05, + "loss": 0.0672, + "step": 9142 + }, + { + "epoch": 1.28, + "learning_rate": 4.7860986337263715e-05, + "loss": 0.0579, + "step": 9144 + }, + { + "epoch": 1.28, + "learning_rate": 4.7860518435335954e-05, + "loss": 0.0592, + "step": 9146 + }, + { + "epoch": 1.28, + "learning_rate": 4.78600505334082e-05, + "loss": 0.0571, + "step": 9148 + }, + { + "epoch": 1.28, + "learning_rate": 4.7859582631480446e-05, + "loss": 0.069, + "step": 9150 + }, + { + "epoch": 1.28, + "learning_rate": 4.785911472955269e-05, + "loss": 0.0774, + "step": 9152 + }, + { + "epoch": 1.28, + "learning_rate": 4.785864682762493e-05, + "loss": 0.1049, + "step": 9154 + }, + { + "epoch": 1.29, + "learning_rate": 4.785817892569718e-05, + "loss": 0.0677, + "step": 9156 + }, + { + "epoch": 1.29, + "learning_rate": 4.785771102376942e-05, + "loss": 0.0734, + "step": 9158 + }, + { + "epoch": 1.29, + "learning_rate": 4.785724312184167e-05, + "loss": 0.1023, + "step": 9160 + }, + { + "epoch": 1.29, + "learning_rate": 4.785677521991391e-05, + "loss": 0.0664, + "step": 9162 + }, + { + "epoch": 1.29, + "learning_rate": 4.7856307317986154e-05, + "loss": 0.0645, + "step": 9164 + }, + { + "epoch": 1.29, + "learning_rate": 4.785583941605839e-05, + "loss": 0.0904, + "step": 9166 + }, + { + "epoch": 1.29, + "learning_rate": 4.7855371514130645e-05, + "loss": 0.0822, + "step": 9168 + }, + { + "epoch": 1.29, + "learning_rate": 4.7854903612202885e-05, + "loss": 0.0716, + "step": 9170 + }, + { + "epoch": 1.29, + "learning_rate": 4.785443571027513e-05, + "loss": 0.0632, + "step": 9172 + }, + { + "epoch": 1.29, + "learning_rate": 4.785396780834737e-05, + "loss": 0.0616, + "step": 9174 + }, + { + "epoch": 1.29, + "learning_rate": 4.7853499906419615e-05, + "loss": 0.0643, + "step": 9176 + }, + { + "epoch": 1.29, + "learning_rate": 4.785303200449186e-05, + "loss": 0.0805, + "step": 9178 + }, + { + "epoch": 1.29, + "learning_rate": 4.785256410256411e-05, + "loss": 0.0551, + "step": 9180 + }, + { + "epoch": 1.29, + "learning_rate": 4.7852096200636346e-05, + "loss": 0.0611, + "step": 9182 + }, + { + "epoch": 1.29, + "learning_rate": 4.785162829870859e-05, + "loss": 0.0653, + "step": 9184 + }, + { + "epoch": 1.29, + "learning_rate": 4.785116039678084e-05, + "loss": 0.0741, + "step": 9186 + }, + { + "epoch": 1.29, + "learning_rate": 4.7850692494853084e-05, + "loss": 0.062, + "step": 9188 + }, + { + "epoch": 1.29, + "learning_rate": 4.785022459292532e-05, + "loss": 0.0667, + "step": 9190 + }, + { + "epoch": 1.29, + "learning_rate": 4.784975669099757e-05, + "loss": 0.0588, + "step": 9192 + }, + { + "epoch": 1.29, + "learning_rate": 4.7849288789069815e-05, + "loss": 0.0786, + "step": 9194 + }, + { + "epoch": 1.29, + "learning_rate": 4.784882088714206e-05, + "loss": 0.1006, + "step": 9196 + }, + { + "epoch": 1.29, + "learning_rate": 4.78483529852143e-05, + "loss": 0.0753, + "step": 9198 + }, + { + "epoch": 1.29, + "learning_rate": 4.7847885083286546e-05, + "loss": 0.0867, + "step": 9200 + }, + { + "epoch": 1.29, + "learning_rate": 4.784741718135879e-05, + "loss": 0.0679, + "step": 9202 + }, + { + "epoch": 1.29, + "learning_rate": 4.784694927943104e-05, + "loss": 0.0838, + "step": 9204 + }, + { + "epoch": 1.29, + "learning_rate": 4.784648137750328e-05, + "loss": 0.0921, + "step": 9206 + }, + { + "epoch": 1.29, + "learning_rate": 4.784601347557552e-05, + "loss": 0.0794, + "step": 9208 + }, + { + "epoch": 1.29, + "learning_rate": 4.784554557364776e-05, + "loss": 0.0599, + "step": 9210 + }, + { + "epoch": 1.29, + "learning_rate": 4.7845077671720015e-05, + "loss": 0.0683, + "step": 9212 + }, + { + "epoch": 1.29, + "learning_rate": 4.7844609769792254e-05, + "loss": 0.0855, + "step": 9214 + }, + { + "epoch": 1.29, + "learning_rate": 4.78441418678645e-05, + "loss": 0.0653, + "step": 9216 + }, + { + "epoch": 1.29, + "learning_rate": 4.784367396593674e-05, + "loss": 0.0585, + "step": 9218 + }, + { + "epoch": 1.29, + "learning_rate": 4.784320606400899e-05, + "loss": 0.081, + "step": 9220 + }, + { + "epoch": 1.29, + "learning_rate": 4.784273816208123e-05, + "loss": 0.0627, + "step": 9222 + }, + { + "epoch": 1.29, + "learning_rate": 4.784227026015348e-05, + "loss": 0.0702, + "step": 9224 + }, + { + "epoch": 1.3, + "learning_rate": 4.7841802358225716e-05, + "loss": 0.0584, + "step": 9226 + }, + { + "epoch": 1.3, + "learning_rate": 4.784133445629796e-05, + "loss": 0.1109, + "step": 9228 + }, + { + "epoch": 1.3, + "learning_rate": 4.784086655437021e-05, + "loss": 0.0746, + "step": 9230 + }, + { + "epoch": 1.3, + "learning_rate": 4.7840398652442454e-05, + "loss": 0.078, + "step": 9232 + }, + { + "epoch": 1.3, + "learning_rate": 4.783993075051469e-05, + "loss": 0.0719, + "step": 9234 + }, + { + "epoch": 1.3, + "learning_rate": 4.783946284858694e-05, + "loss": 0.059, + "step": 9236 + }, + { + "epoch": 1.3, + "learning_rate": 4.7838994946659185e-05, + "loss": 0.0641, + "step": 9238 + }, + { + "epoch": 1.3, + "learning_rate": 4.783852704473143e-05, + "loss": 0.0597, + "step": 9240 + }, + { + "epoch": 1.3, + "learning_rate": 4.783805914280367e-05, + "loss": 0.0759, + "step": 9242 + }, + { + "epoch": 1.3, + "learning_rate": 4.783759124087591e-05, + "loss": 0.0763, + "step": 9244 + }, + { + "epoch": 1.3, + "learning_rate": 4.783712333894816e-05, + "loss": 0.0799, + "step": 9246 + }, + { + "epoch": 1.3, + "learning_rate": 4.78366554370204e-05, + "loss": 0.0796, + "step": 9248 + }, + { + "epoch": 1.3, + "learning_rate": 4.7836187535092646e-05, + "loss": 0.0719, + "step": 9250 + }, + { + "epoch": 1.3, + "learning_rate": 4.7835719633164886e-05, + "loss": 0.0486, + "step": 9252 + }, + { + "epoch": 1.3, + "learning_rate": 4.783525173123714e-05, + "loss": 0.0624, + "step": 9254 + }, + { + "epoch": 1.3, + "learning_rate": 4.783478382930938e-05, + "loss": 0.073, + "step": 9256 + }, + { + "epoch": 1.3, + "learning_rate": 4.783431592738162e-05, + "loss": 0.057, + "step": 9258 + }, + { + "epoch": 1.3, + "learning_rate": 4.783384802545386e-05, + "loss": 0.073, + "step": 9260 + }, + { + "epoch": 1.3, + "learning_rate": 4.783338012352611e-05, + "loss": 0.0743, + "step": 9262 + }, + { + "epoch": 1.3, + "learning_rate": 4.7832912221598354e-05, + "loss": 0.0986, + "step": 9264 + }, + { + "epoch": 1.3, + "learning_rate": 4.78324443196706e-05, + "loss": 0.0742, + "step": 9266 + }, + { + "epoch": 1.3, + "learning_rate": 4.783197641774284e-05, + "loss": 0.0651, + "step": 9268 + }, + { + "epoch": 1.3, + "learning_rate": 4.7831508515815085e-05, + "loss": 0.08, + "step": 9270 + }, + { + "epoch": 1.3, + "learning_rate": 4.783104061388733e-05, + "loss": 0.0776, + "step": 9272 + }, + { + "epoch": 1.3, + "learning_rate": 4.783057271195958e-05, + "loss": 0.0654, + "step": 9274 + }, + { + "epoch": 1.3, + "learning_rate": 4.7830104810031816e-05, + "loss": 0.0775, + "step": 9276 + }, + { + "epoch": 1.3, + "learning_rate": 4.782963690810406e-05, + "loss": 0.0777, + "step": 9278 + }, + { + "epoch": 1.3, + "learning_rate": 4.782916900617631e-05, + "loss": 0.0784, + "step": 9280 + }, + { + "epoch": 1.3, + "learning_rate": 4.7828701104248554e-05, + "loss": 0.0645, + "step": 9282 + }, + { + "epoch": 1.3, + "learning_rate": 4.782823320232079e-05, + "loss": 0.0714, + "step": 9284 + }, + { + "epoch": 1.3, + "learning_rate": 4.782776530039304e-05, + "loss": 0.0904, + "step": 9286 + }, + { + "epoch": 1.3, + "learning_rate": 4.7827297398465285e-05, + "loss": 0.0768, + "step": 9288 + }, + { + "epoch": 1.3, + "learning_rate": 4.782682949653753e-05, + "loss": 0.0812, + "step": 9290 + }, + { + "epoch": 1.3, + "learning_rate": 4.782636159460977e-05, + "loss": 0.093, + "step": 9292 + }, + { + "epoch": 1.3, + "learning_rate": 4.7825893692682016e-05, + "loss": 0.1154, + "step": 9294 + }, + { + "epoch": 1.3, + "learning_rate": 4.7825425790754255e-05, + "loss": 0.0621, + "step": 9296 + }, + { + "epoch": 1.31, + "learning_rate": 4.782495788882651e-05, + "loss": 0.0908, + "step": 9298 + }, + { + "epoch": 1.31, + "learning_rate": 4.782448998689875e-05, + "loss": 0.0811, + "step": 9300 + }, + { + "epoch": 1.31, + "learning_rate": 4.782402208497099e-05, + "loss": 0.092, + "step": 9302 + }, + { + "epoch": 1.31, + "learning_rate": 4.782355418304323e-05, + "loss": 0.0684, + "step": 9304 + }, + { + "epoch": 1.31, + "learning_rate": 4.782308628111548e-05, + "loss": 0.0721, + "step": 9306 + }, + { + "epoch": 1.31, + "learning_rate": 4.7822618379187724e-05, + "loss": 0.0758, + "step": 9308 + }, + { + "epoch": 1.31, + "learning_rate": 4.782215047725997e-05, + "loss": 0.0707, + "step": 9310 + }, + { + "epoch": 1.31, + "learning_rate": 4.782168257533221e-05, + "loss": 0.0724, + "step": 9312 + }, + { + "epoch": 1.31, + "learning_rate": 4.7821214673404455e-05, + "loss": 0.0972, + "step": 9314 + }, + { + "epoch": 1.31, + "learning_rate": 4.78207467714767e-05, + "loss": 0.0708, + "step": 9316 + }, + { + "epoch": 1.31, + "learning_rate": 4.7820278869548946e-05, + "loss": 0.076, + "step": 9318 + }, + { + "epoch": 1.31, + "learning_rate": 4.7819810967621186e-05, + "loss": 0.0801, + "step": 9320 + }, + { + "epoch": 1.31, + "learning_rate": 4.781934306569343e-05, + "loss": 0.0643, + "step": 9322 + }, + { + "epoch": 1.31, + "learning_rate": 4.781887516376568e-05, + "loss": 0.0598, + "step": 9324 + }, + { + "epoch": 1.31, + "learning_rate": 4.781840726183792e-05, + "loss": 0.0849, + "step": 9326 + }, + { + "epoch": 1.31, + "learning_rate": 4.781793935991016e-05, + "loss": 0.0593, + "step": 9328 + }, + { + "epoch": 1.31, + "learning_rate": 4.781747145798241e-05, + "loss": 0.0835, + "step": 9330 + }, + { + "epoch": 1.31, + "learning_rate": 4.7817003556054654e-05, + "loss": 0.0569, + "step": 9332 + }, + { + "epoch": 1.31, + "learning_rate": 4.78165356541269e-05, + "loss": 0.0801, + "step": 9334 + }, + { + "epoch": 1.31, + "learning_rate": 4.781606775219914e-05, + "loss": 0.0816, + "step": 9336 + }, + { + "epoch": 1.31, + "learning_rate": 4.7815599850271385e-05, + "loss": 0.0702, + "step": 9338 + }, + { + "epoch": 1.31, + "learning_rate": 4.7815131948343624e-05, + "loss": 0.0764, + "step": 9340 + }, + { + "epoch": 1.31, + "learning_rate": 4.781466404641588e-05, + "loss": 0.0907, + "step": 9342 + }, + { + "epoch": 1.31, + "learning_rate": 4.7814196144488116e-05, + "loss": 0.0728, + "step": 9344 + }, + { + "epoch": 1.31, + "learning_rate": 4.781372824256036e-05, + "loss": 0.0776, + "step": 9346 + }, + { + "epoch": 1.31, + "learning_rate": 4.78132603406326e-05, + "loss": 0.0794, + "step": 9348 + }, + { + "epoch": 1.31, + "learning_rate": 4.7812792438704854e-05, + "loss": 0.0685, + "step": 9350 + }, + { + "epoch": 1.31, + "learning_rate": 4.781232453677709e-05, + "loss": 0.0847, + "step": 9352 + }, + { + "epoch": 1.31, + "learning_rate": 4.781185663484934e-05, + "loss": 0.0935, + "step": 9354 + }, + { + "epoch": 1.31, + "learning_rate": 4.781138873292158e-05, + "loss": 0.0531, + "step": 9356 + }, + { + "epoch": 1.31, + "learning_rate": 4.7810920830993824e-05, + "loss": 0.0529, + "step": 9358 + }, + { + "epoch": 1.31, + "learning_rate": 4.781045292906607e-05, + "loss": 0.0577, + "step": 9360 + }, + { + "epoch": 1.31, + "learning_rate": 4.7809985027138316e-05, + "loss": 0.0856, + "step": 9362 + }, + { + "epoch": 1.31, + "learning_rate": 4.7809517125210555e-05, + "loss": 0.0855, + "step": 9364 + }, + { + "epoch": 1.31, + "learning_rate": 4.78090492232828e-05, + "loss": 0.0776, + "step": 9366 + }, + { + "epoch": 1.31, + "learning_rate": 4.780858132135505e-05, + "loss": 0.0795, + "step": 9368 + }, + { + "epoch": 1.32, + "learning_rate": 4.780811341942729e-05, + "loss": 0.0794, + "step": 9370 + }, + { + "epoch": 1.32, + "learning_rate": 4.780764551749953e-05, + "loss": 0.0785, + "step": 9372 + }, + { + "epoch": 1.32, + "learning_rate": 4.780717761557178e-05, + "loss": 0.0669, + "step": 9374 + }, + { + "epoch": 1.32, + "learning_rate": 4.7806709713644024e-05, + "loss": 0.0855, + "step": 9376 + }, + { + "epoch": 1.32, + "learning_rate": 4.780624181171627e-05, + "loss": 0.0793, + "step": 9378 + }, + { + "epoch": 1.32, + "learning_rate": 4.780577390978851e-05, + "loss": 0.0725, + "step": 9380 + }, + { + "epoch": 1.32, + "learning_rate": 4.7805306007860755e-05, + "loss": 0.0715, + "step": 9382 + }, + { + "epoch": 1.32, + "learning_rate": 4.7804838105933e-05, + "loss": 0.0715, + "step": 9384 + }, + { + "epoch": 1.32, + "learning_rate": 4.7804370204005247e-05, + "loss": 0.0826, + "step": 9386 + }, + { + "epoch": 1.32, + "learning_rate": 4.7803902302077486e-05, + "loss": 0.0786, + "step": 9388 + }, + { + "epoch": 1.32, + "learning_rate": 4.780343440014973e-05, + "loss": 0.0659, + "step": 9390 + }, + { + "epoch": 1.32, + "learning_rate": 4.780296649822197e-05, + "loss": 0.0815, + "step": 9392 + }, + { + "epoch": 1.32, + "learning_rate": 4.780249859629422e-05, + "loss": 0.0662, + "step": 9394 + }, + { + "epoch": 1.32, + "learning_rate": 4.780203069436646e-05, + "loss": 0.0941, + "step": 9396 + }, + { + "epoch": 1.32, + "learning_rate": 4.780156279243871e-05, + "loss": 0.0717, + "step": 9398 + }, + { + "epoch": 1.32, + "learning_rate": 4.780109489051095e-05, + "loss": 0.0826, + "step": 9400 + }, + { + "epoch": 1.32, + "learning_rate": 4.78006269885832e-05, + "loss": 0.0819, + "step": 9402 + }, + { + "epoch": 1.32, + "learning_rate": 4.780015908665544e-05, + "loss": 0.0614, + "step": 9404 + }, + { + "epoch": 1.32, + "learning_rate": 4.7799691184727685e-05, + "loss": 0.0575, + "step": 9406 + }, + { + "epoch": 1.32, + "learning_rate": 4.7799223282799924e-05, + "loss": 0.0707, + "step": 9408 + }, + { + "epoch": 1.32, + "learning_rate": 4.779875538087217e-05, + "loss": 0.0745, + "step": 9410 + }, + { + "epoch": 1.32, + "learning_rate": 4.7798287478944416e-05, + "loss": 0.1053, + "step": 9412 + }, + { + "epoch": 1.32, + "learning_rate": 4.779781957701666e-05, + "loss": 0.0779, + "step": 9414 + }, + { + "epoch": 1.32, + "learning_rate": 4.77973516750889e-05, + "loss": 0.0562, + "step": 9416 + }, + { + "epoch": 1.32, + "learning_rate": 4.779688377316115e-05, + "loss": 0.0793, + "step": 9418 + }, + { + "epoch": 1.32, + "learning_rate": 4.779641587123339e-05, + "loss": 0.0771, + "step": 9420 + }, + { + "epoch": 1.32, + "learning_rate": 4.779594796930564e-05, + "loss": 0.0901, + "step": 9422 + }, + { + "epoch": 1.32, + "learning_rate": 4.779548006737788e-05, + "loss": 0.0701, + "step": 9424 + }, + { + "epoch": 1.32, + "learning_rate": 4.7795012165450124e-05, + "loss": 0.0783, + "step": 9426 + }, + { + "epoch": 1.32, + "learning_rate": 4.779454426352237e-05, + "loss": 0.0646, + "step": 9428 + }, + { + "epoch": 1.32, + "learning_rate": 4.7794076361594616e-05, + "loss": 0.0641, + "step": 9430 + }, + { + "epoch": 1.32, + "learning_rate": 4.7793608459666855e-05, + "loss": 0.0943, + "step": 9432 + }, + { + "epoch": 1.32, + "learning_rate": 4.77931405577391e-05, + "loss": 0.0689, + "step": 9434 + }, + { + "epoch": 1.32, + "learning_rate": 4.779267265581135e-05, + "loss": 0.0821, + "step": 9436 + }, + { + "epoch": 1.32, + "learning_rate": 4.779220475388359e-05, + "loss": 0.0845, + "step": 9438 + }, + { + "epoch": 1.33, + "learning_rate": 4.779173685195583e-05, + "loss": 0.059, + "step": 9440 + }, + { + "epoch": 1.33, + "learning_rate": 4.779126895002808e-05, + "loss": 0.0603, + "step": 9442 + }, + { + "epoch": 1.33, + "learning_rate": 4.779080104810032e-05, + "loss": 0.0869, + "step": 9444 + }, + { + "epoch": 1.33, + "learning_rate": 4.779033314617257e-05, + "loss": 0.1109, + "step": 9446 + }, + { + "epoch": 1.33, + "learning_rate": 4.778986524424481e-05, + "loss": 0.0692, + "step": 9448 + }, + { + "epoch": 1.33, + "learning_rate": 4.7789397342317055e-05, + "loss": 0.0742, + "step": 9450 + }, + { + "epoch": 1.33, + "learning_rate": 4.7788929440389294e-05, + "loss": 0.0545, + "step": 9452 + }, + { + "epoch": 1.33, + "learning_rate": 4.778846153846154e-05, + "loss": 0.0703, + "step": 9454 + }, + { + "epoch": 1.33, + "learning_rate": 4.7787993636533786e-05, + "loss": 0.0618, + "step": 9456 + }, + { + "epoch": 1.33, + "learning_rate": 4.778752573460603e-05, + "loss": 0.0836, + "step": 9458 + }, + { + "epoch": 1.33, + "learning_rate": 4.778705783267827e-05, + "loss": 0.0541, + "step": 9460 + }, + { + "epoch": 1.33, + "learning_rate": 4.778658993075052e-05, + "loss": 0.0667, + "step": 9462 + }, + { + "epoch": 1.33, + "learning_rate": 4.778612202882276e-05, + "loss": 0.0661, + "step": 9464 + }, + { + "epoch": 1.33, + "learning_rate": 4.778565412689501e-05, + "loss": 0.0839, + "step": 9466 + }, + { + "epoch": 1.33, + "learning_rate": 4.778518622496725e-05, + "loss": 0.0702, + "step": 9468 + }, + { + "epoch": 1.33, + "learning_rate": 4.7784718323039493e-05, + "loss": 0.0738, + "step": 9470 + }, + { + "epoch": 1.33, + "learning_rate": 4.778425042111174e-05, + "loss": 0.0702, + "step": 9472 + }, + { + "epoch": 1.33, + "learning_rate": 4.7783782519183985e-05, + "loss": 0.0947, + "step": 9474 + }, + { + "epoch": 1.33, + "learning_rate": 4.7783314617256224e-05, + "loss": 0.089, + "step": 9476 + }, + { + "epoch": 1.33, + "learning_rate": 4.778284671532847e-05, + "loss": 0.0641, + "step": 9478 + }, + { + "epoch": 1.33, + "learning_rate": 4.7782378813400716e-05, + "loss": 0.0864, + "step": 9480 + }, + { + "epoch": 1.33, + "learning_rate": 4.778191091147296e-05, + "loss": 0.0784, + "step": 9482 + }, + { + "epoch": 1.33, + "learning_rate": 4.77814430095452e-05, + "loss": 0.0937, + "step": 9484 + }, + { + "epoch": 1.33, + "learning_rate": 4.778097510761745e-05, + "loss": 0.0642, + "step": 9486 + }, + { + "epoch": 1.33, + "learning_rate": 4.7780507205689686e-05, + "loss": 0.0696, + "step": 9488 + }, + { + "epoch": 1.33, + "learning_rate": 4.778003930376194e-05, + "loss": 0.0641, + "step": 9490 + }, + { + "epoch": 1.33, + "learning_rate": 4.777957140183418e-05, + "loss": 0.0722, + "step": 9492 + }, + { + "epoch": 1.33, + "learning_rate": 4.7779103499906424e-05, + "loss": 0.0727, + "step": 9494 + }, + { + "epoch": 1.33, + "learning_rate": 4.777863559797866e-05, + "loss": 0.0869, + "step": 9496 + }, + { + "epoch": 1.33, + "learning_rate": 4.777816769605091e-05, + "loss": 0.0742, + "step": 9498 + }, + { + "epoch": 1.33, + "learning_rate": 4.7777699794123155e-05, + "loss": 0.0686, + "step": 9500 + }, + { + "epoch": 1.33, + "learning_rate": 4.7777231892195394e-05, + "loss": 0.0714, + "step": 9502 + }, + { + "epoch": 1.33, + "learning_rate": 4.777676399026764e-05, + "loss": 0.0534, + "step": 9504 + }, + { + "epoch": 1.33, + "learning_rate": 4.7776296088339886e-05, + "loss": 0.0602, + "step": 9506 + }, + { + "epoch": 1.33, + "learning_rate": 4.777582818641213e-05, + "loss": 0.0547, + "step": 9508 + }, + { + "epoch": 1.33, + "learning_rate": 4.777536028448437e-05, + "loss": 0.0715, + "step": 9510 + }, + { + "epoch": 1.34, + "learning_rate": 4.777489238255662e-05, + "loss": 0.0819, + "step": 9512 + }, + { + "epoch": 1.34, + "learning_rate": 4.777442448062886e-05, + "loss": 0.0779, + "step": 9514 + }, + { + "epoch": 1.34, + "learning_rate": 4.777395657870111e-05, + "loss": 0.0715, + "step": 9516 + }, + { + "epoch": 1.34, + "learning_rate": 4.777348867677335e-05, + "loss": 0.0691, + "step": 9518 + }, + { + "epoch": 1.34, + "learning_rate": 4.7773020774845594e-05, + "loss": 0.0671, + "step": 9520 + }, + { + "epoch": 1.34, + "learning_rate": 4.777255287291783e-05, + "loss": 0.0839, + "step": 9522 + }, + { + "epoch": 1.34, + "learning_rate": 4.7772084970990086e-05, + "loss": 0.0706, + "step": 9524 + }, + { + "epoch": 1.34, + "learning_rate": 4.7771617069062325e-05, + "loss": 0.1025, + "step": 9526 + }, + { + "epoch": 1.34, + "learning_rate": 4.777114916713457e-05, + "loss": 0.0597, + "step": 9528 + }, + { + "epoch": 1.34, + "learning_rate": 4.777068126520681e-05, + "loss": 0.0777, + "step": 9530 + }, + { + "epoch": 1.34, + "learning_rate": 4.777021336327906e-05, + "loss": 0.0769, + "step": 9532 + }, + { + "epoch": 1.34, + "learning_rate": 4.77697454613513e-05, + "loss": 0.0754, + "step": 9534 + }, + { + "epoch": 1.34, + "learning_rate": 4.776927755942355e-05, + "loss": 0.0726, + "step": 9536 + }, + { + "epoch": 1.34, + "learning_rate": 4.776880965749579e-05, + "loss": 0.0891, + "step": 9538 + }, + { + "epoch": 1.34, + "learning_rate": 4.776834175556803e-05, + "loss": 0.0779, + "step": 9540 + }, + { + "epoch": 1.34, + "learning_rate": 4.776787385364028e-05, + "loss": 0.0538, + "step": 9542 + }, + { + "epoch": 1.34, + "learning_rate": 4.7767405951712524e-05, + "loss": 0.0767, + "step": 9544 + }, + { + "epoch": 1.34, + "learning_rate": 4.7766938049784764e-05, + "loss": 0.0645, + "step": 9546 + }, + { + "epoch": 1.34, + "learning_rate": 4.776647014785701e-05, + "loss": 0.0704, + "step": 9548 + }, + { + "epoch": 1.34, + "learning_rate": 4.7766002245929255e-05, + "loss": 0.0526, + "step": 9550 + }, + { + "epoch": 1.34, + "learning_rate": 4.77655343440015e-05, + "loss": 0.0731, + "step": 9552 + }, + { + "epoch": 1.34, + "learning_rate": 4.776506644207374e-05, + "loss": 0.0616, + "step": 9554 + }, + { + "epoch": 1.34, + "learning_rate": 4.7764598540145986e-05, + "loss": 0.0945, + "step": 9556 + }, + { + "epoch": 1.34, + "learning_rate": 4.776413063821823e-05, + "loss": 0.0987, + "step": 9558 + }, + { + "epoch": 1.34, + "learning_rate": 4.776366273629048e-05, + "loss": 0.0715, + "step": 9560 + }, + { + "epoch": 1.34, + "learning_rate": 4.776319483436272e-05, + "loss": 0.0913, + "step": 9562 + }, + { + "epoch": 1.34, + "learning_rate": 4.776272693243496e-05, + "loss": 0.0794, + "step": 9564 + }, + { + "epoch": 1.34, + "learning_rate": 4.776225903050721e-05, + "loss": 0.0623, + "step": 9566 + }, + { + "epoch": 1.34, + "learning_rate": 4.7761791128579455e-05, + "loss": 0.05, + "step": 9568 + }, + { + "epoch": 1.34, + "learning_rate": 4.7761323226651694e-05, + "loss": 0.0791, + "step": 9570 + }, + { + "epoch": 1.34, + "learning_rate": 4.776085532472394e-05, + "loss": 0.0791, + "step": 9572 + }, + { + "epoch": 1.34, + "learning_rate": 4.776038742279618e-05, + "loss": 0.0999, + "step": 9574 + }, + { + "epoch": 1.34, + "learning_rate": 4.775991952086843e-05, + "loss": 0.0884, + "step": 9576 + }, + { + "epoch": 1.34, + "learning_rate": 4.775945161894067e-05, + "loss": 0.0693, + "step": 9578 + }, + { + "epoch": 1.34, + "learning_rate": 4.775898371701292e-05, + "loss": 0.0907, + "step": 9580 + }, + { + "epoch": 1.35, + "learning_rate": 4.7758515815085156e-05, + "loss": 0.0747, + "step": 9582 + }, + { + "epoch": 1.35, + "learning_rate": 4.775804791315741e-05, + "loss": 0.0725, + "step": 9584 + }, + { + "epoch": 1.35, + "learning_rate": 4.775758001122965e-05, + "loss": 0.0684, + "step": 9586 + }, + { + "epoch": 1.35, + "learning_rate": 4.7757112109301894e-05, + "loss": 0.065, + "step": 9588 + }, + { + "epoch": 1.35, + "learning_rate": 4.775664420737413e-05, + "loss": 0.0754, + "step": 9590 + }, + { + "epoch": 1.35, + "learning_rate": 4.775617630544638e-05, + "loss": 0.0679, + "step": 9592 + }, + { + "epoch": 1.35, + "learning_rate": 4.7755708403518625e-05, + "loss": 0.0682, + "step": 9594 + }, + { + "epoch": 1.35, + "learning_rate": 4.775524050159087e-05, + "loss": 0.0815, + "step": 9596 + }, + { + "epoch": 1.35, + "learning_rate": 4.775477259966311e-05, + "loss": 0.0898, + "step": 9598 + }, + { + "epoch": 1.35, + "learning_rate": 4.7754304697735356e-05, + "loss": 0.0839, + "step": 9600 + }, + { + "epoch": 1.35, + "learning_rate": 4.77538367958076e-05, + "loss": 0.083, + "step": 9602 + }, + { + "epoch": 1.35, + "learning_rate": 4.775336889387985e-05, + "loss": 0.0856, + "step": 9604 + }, + { + "epoch": 1.35, + "learning_rate": 4.775290099195209e-05, + "loss": 0.0677, + "step": 9606 + }, + { + "epoch": 1.35, + "learning_rate": 4.775243309002433e-05, + "loss": 0.0781, + "step": 9608 + }, + { + "epoch": 1.35, + "learning_rate": 4.775196518809658e-05, + "loss": 0.0991, + "step": 9610 + }, + { + "epoch": 1.35, + "learning_rate": 4.7751497286168824e-05, + "loss": 0.0913, + "step": 9612 + }, + { + "epoch": 1.35, + "learning_rate": 4.7751029384241064e-05, + "loss": 0.0608, + "step": 9614 + }, + { + "epoch": 1.35, + "learning_rate": 4.775056148231331e-05, + "loss": 0.0653, + "step": 9616 + }, + { + "epoch": 1.35, + "learning_rate": 4.775009358038555e-05, + "loss": 0.0609, + "step": 9618 + }, + { + "epoch": 1.35, + "learning_rate": 4.77496256784578e-05, + "loss": 0.0785, + "step": 9620 + }, + { + "epoch": 1.35, + "learning_rate": 4.774915777653004e-05, + "loss": 0.0784, + "step": 9622 + }, + { + "epoch": 1.35, + "learning_rate": 4.7748689874602286e-05, + "loss": 0.0833, + "step": 9624 + }, + { + "epoch": 1.35, + "learning_rate": 4.7748221972674526e-05, + "loss": 0.0717, + "step": 9626 + }, + { + "epoch": 1.35, + "learning_rate": 4.774775407074678e-05, + "loss": 0.0682, + "step": 9628 + }, + { + "epoch": 1.35, + "learning_rate": 4.774728616881902e-05, + "loss": 0.0794, + "step": 9630 + }, + { + "epoch": 1.35, + "learning_rate": 4.774681826689126e-05, + "loss": 0.0642, + "step": 9632 + }, + { + "epoch": 1.35, + "learning_rate": 4.77463503649635e-05, + "loss": 0.0813, + "step": 9634 + }, + { + "epoch": 1.35, + "learning_rate": 4.774588246303575e-05, + "loss": 0.063, + "step": 9636 + }, + { + "epoch": 1.35, + "learning_rate": 4.7745414561107994e-05, + "loss": 0.072, + "step": 9638 + }, + { + "epoch": 1.35, + "learning_rate": 4.774494665918024e-05, + "loss": 0.072, + "step": 9640 + }, + { + "epoch": 1.35, + "learning_rate": 4.774447875725248e-05, + "loss": 0.0679, + "step": 9642 + }, + { + "epoch": 1.35, + "learning_rate": 4.7744010855324725e-05, + "loss": 0.0886, + "step": 9644 + }, + { + "epoch": 1.35, + "learning_rate": 4.774354295339697e-05, + "loss": 0.0753, + "step": 9646 + }, + { + "epoch": 1.35, + "learning_rate": 4.774307505146922e-05, + "loss": 0.073, + "step": 9648 + }, + { + "epoch": 1.35, + "learning_rate": 4.7742607149541456e-05, + "loss": 0.0789, + "step": 9650 + }, + { + "epoch": 1.35, + "learning_rate": 4.77421392476137e-05, + "loss": 0.0915, + "step": 9652 + }, + { + "epoch": 1.36, + "learning_rate": 4.774167134568595e-05, + "loss": 0.0621, + "step": 9654 + }, + { + "epoch": 1.36, + "learning_rate": 4.7741203443758194e-05, + "loss": 0.0681, + "step": 9656 + }, + { + "epoch": 1.36, + "learning_rate": 4.774073554183043e-05, + "loss": 0.1235, + "step": 9658 + }, + { + "epoch": 1.36, + "learning_rate": 4.774026763990268e-05, + "loss": 0.0724, + "step": 9660 + }, + { + "epoch": 1.36, + "learning_rate": 4.7739799737974925e-05, + "loss": 0.0849, + "step": 9662 + }, + { + "epoch": 1.36, + "learning_rate": 4.773933183604717e-05, + "loss": 0.1005, + "step": 9664 + }, + { + "epoch": 1.36, + "learning_rate": 4.773886393411941e-05, + "loss": 0.0999, + "step": 9666 + }, + { + "epoch": 1.36, + "learning_rate": 4.7738396032191656e-05, + "loss": 0.0581, + "step": 9668 + }, + { + "epoch": 1.36, + "learning_rate": 4.7737928130263895e-05, + "loss": 0.0877, + "step": 9670 + }, + { + "epoch": 1.36, + "learning_rate": 4.773746022833615e-05, + "loss": 0.0776, + "step": 9672 + }, + { + "epoch": 1.36, + "learning_rate": 4.773699232640839e-05, + "loss": 0.0849, + "step": 9674 + }, + { + "epoch": 1.36, + "learning_rate": 4.773652442448063e-05, + "loss": 0.0882, + "step": 9676 + }, + { + "epoch": 1.36, + "learning_rate": 4.773605652255287e-05, + "loss": 0.074, + "step": 9678 + }, + { + "epoch": 1.36, + "learning_rate": 4.7735588620625125e-05, + "loss": 0.0653, + "step": 9680 + }, + { + "epoch": 1.36, + "learning_rate": 4.7735120718697364e-05, + "loss": 0.0706, + "step": 9682 + }, + { + "epoch": 1.36, + "learning_rate": 4.773465281676961e-05, + "loss": 0.0886, + "step": 9684 + }, + { + "epoch": 1.36, + "learning_rate": 4.773418491484185e-05, + "loss": 0.0674, + "step": 9686 + }, + { + "epoch": 1.36, + "learning_rate": 4.7733717012914095e-05, + "loss": 0.0827, + "step": 9688 + }, + { + "epoch": 1.36, + "learning_rate": 4.773324911098634e-05, + "loss": 0.0712, + "step": 9690 + }, + { + "epoch": 1.36, + "learning_rate": 4.7732781209058586e-05, + "loss": 0.064, + "step": 9692 + }, + { + "epoch": 1.36, + "learning_rate": 4.7732313307130826e-05, + "loss": 0.0863, + "step": 9694 + }, + { + "epoch": 1.36, + "learning_rate": 4.773184540520307e-05, + "loss": 0.0711, + "step": 9696 + }, + { + "epoch": 1.36, + "learning_rate": 4.773137750327532e-05, + "loss": 0.0988, + "step": 9698 + }, + { + "epoch": 1.36, + "learning_rate": 4.773090960134756e-05, + "loss": 0.0715, + "step": 9700 + }, + { + "epoch": 1.36, + "learning_rate": 4.77304416994198e-05, + "loss": 0.069, + "step": 9702 + }, + { + "epoch": 1.36, + "learning_rate": 4.772997379749205e-05, + "loss": 0.0843, + "step": 9704 + }, + { + "epoch": 1.36, + "learning_rate": 4.7729505895564294e-05, + "loss": 0.0505, + "step": 9706 + }, + { + "epoch": 1.36, + "learning_rate": 4.772903799363654e-05, + "loss": 0.0832, + "step": 9708 + }, + { + "epoch": 1.36, + "learning_rate": 4.772857009170878e-05, + "loss": 0.0674, + "step": 9710 + }, + { + "epoch": 1.36, + "learning_rate": 4.7728102189781025e-05, + "loss": 0.0918, + "step": 9712 + }, + { + "epoch": 1.36, + "learning_rate": 4.772763428785327e-05, + "loss": 0.062, + "step": 9714 + }, + { + "epoch": 1.36, + "learning_rate": 4.772716638592552e-05, + "loss": 0.0787, + "step": 9716 + }, + { + "epoch": 1.36, + "learning_rate": 4.7726698483997756e-05, + "loss": 0.1003, + "step": 9718 + }, + { + "epoch": 1.36, + "learning_rate": 4.772623058207e-05, + "loss": 0.0892, + "step": 9720 + }, + { + "epoch": 1.36, + "learning_rate": 4.772576268014224e-05, + "loss": 0.0901, + "step": 9722 + }, + { + "epoch": 1.36, + "learning_rate": 4.7725294778214494e-05, + "loss": 0.0626, + "step": 9724 + }, + { + "epoch": 1.37, + "learning_rate": 4.772482687628673e-05, + "loss": 0.0543, + "step": 9726 + }, + { + "epoch": 1.37, + "learning_rate": 4.772435897435898e-05, + "loss": 0.0589, + "step": 9728 + }, + { + "epoch": 1.37, + "learning_rate": 4.772389107243122e-05, + "loss": 0.0743, + "step": 9730 + }, + { + "epoch": 1.37, + "learning_rate": 4.7723423170503464e-05, + "loss": 0.0698, + "step": 9732 + }, + { + "epoch": 1.37, + "learning_rate": 4.772295526857571e-05, + "loss": 0.0566, + "step": 9734 + }, + { + "epoch": 1.37, + "learning_rate": 4.7722487366647956e-05, + "loss": 0.0756, + "step": 9736 + }, + { + "epoch": 1.37, + "learning_rate": 4.7722019464720195e-05, + "loss": 0.0818, + "step": 9738 + }, + { + "epoch": 1.37, + "learning_rate": 4.772155156279244e-05, + "loss": 0.08, + "step": 9740 + }, + { + "epoch": 1.37, + "learning_rate": 4.772108366086469e-05, + "loss": 0.0814, + "step": 9742 + }, + { + "epoch": 1.37, + "learning_rate": 4.772061575893693e-05, + "loss": 0.0714, + "step": 9744 + }, + { + "epoch": 1.37, + "learning_rate": 4.772014785700917e-05, + "loss": 0.0875, + "step": 9746 + }, + { + "epoch": 1.37, + "learning_rate": 4.771967995508142e-05, + "loss": 0.0761, + "step": 9748 + }, + { + "epoch": 1.37, + "learning_rate": 4.7719212053153664e-05, + "loss": 0.104, + "step": 9750 + }, + { + "epoch": 1.37, + "learning_rate": 4.77187441512259e-05, + "loss": 0.0747, + "step": 9752 + }, + { + "epoch": 1.37, + "learning_rate": 4.771827624929815e-05, + "loss": 0.0648, + "step": 9754 + }, + { + "epoch": 1.37, + "learning_rate": 4.771780834737039e-05, + "loss": 0.0821, + "step": 9756 + }, + { + "epoch": 1.37, + "learning_rate": 4.771734044544264e-05, + "loss": 0.075, + "step": 9758 + }, + { + "epoch": 1.37, + "learning_rate": 4.771687254351488e-05, + "loss": 0.0703, + "step": 9760 + }, + { + "epoch": 1.37, + "learning_rate": 4.7716404641587126e-05, + "loss": 0.0719, + "step": 9762 + }, + { + "epoch": 1.37, + "learning_rate": 4.7715936739659365e-05, + "loss": 0.0866, + "step": 9764 + }, + { + "epoch": 1.37, + "learning_rate": 4.771546883773161e-05, + "loss": 0.0786, + "step": 9766 + }, + { + "epoch": 1.37, + "learning_rate": 4.7715000935803857e-05, + "loss": 0.0608, + "step": 9768 + }, + { + "epoch": 1.37, + "learning_rate": 4.77145330338761e-05, + "loss": 0.0894, + "step": 9770 + }, + { + "epoch": 1.37, + "learning_rate": 4.771406513194834e-05, + "loss": 0.0729, + "step": 9772 + }, + { + "epoch": 1.37, + "learning_rate": 4.771359723002059e-05, + "loss": 0.0702, + "step": 9774 + }, + { + "epoch": 1.37, + "learning_rate": 4.7713129328092833e-05, + "loss": 0.0739, + "step": 9776 + }, + { + "epoch": 1.37, + "learning_rate": 4.771266142616508e-05, + "loss": 0.0684, + "step": 9778 + }, + { + "epoch": 1.37, + "learning_rate": 4.771219352423732e-05, + "loss": 0.0607, + "step": 9780 + }, + { + "epoch": 1.37, + "learning_rate": 4.7711725622309564e-05, + "loss": 0.0645, + "step": 9782 + }, + { + "epoch": 1.37, + "learning_rate": 4.771125772038181e-05, + "loss": 0.0874, + "step": 9784 + }, + { + "epoch": 1.37, + "learning_rate": 4.7710789818454056e-05, + "loss": 0.0736, + "step": 9786 + }, + { + "epoch": 1.37, + "learning_rate": 4.7710321916526295e-05, + "loss": 0.0786, + "step": 9788 + }, + { + "epoch": 1.37, + "learning_rate": 4.770985401459854e-05, + "loss": 0.0883, + "step": 9790 + }, + { + "epoch": 1.37, + "learning_rate": 4.770938611267079e-05, + "loss": 0.066, + "step": 9792 + }, + { + "epoch": 1.37, + "learning_rate": 4.770891821074303e-05, + "loss": 0.0771, + "step": 9794 + }, + { + "epoch": 1.38, + "learning_rate": 4.770845030881527e-05, + "loss": 0.0729, + "step": 9796 + }, + { + "epoch": 1.38, + "learning_rate": 4.770798240688752e-05, + "loss": 0.0639, + "step": 9798 + }, + { + "epoch": 1.38, + "learning_rate": 4.770751450495976e-05, + "loss": 0.0947, + "step": 9800 + }, + { + "epoch": 1.38, + "learning_rate": 4.770704660303201e-05, + "loss": 0.0579, + "step": 9802 + }, + { + "epoch": 1.38, + "learning_rate": 4.770657870110425e-05, + "loss": 0.0829, + "step": 9804 + }, + { + "epoch": 1.38, + "learning_rate": 4.7706110799176495e-05, + "loss": 0.0607, + "step": 9806 + }, + { + "epoch": 1.38, + "learning_rate": 4.7705642897248734e-05, + "loss": 0.086, + "step": 9808 + }, + { + "epoch": 1.38, + "learning_rate": 4.770517499532099e-05, + "loss": 0.0956, + "step": 9810 + }, + { + "epoch": 1.38, + "learning_rate": 4.7704707093393226e-05, + "loss": 0.0797, + "step": 9812 + }, + { + "epoch": 1.38, + "learning_rate": 4.770423919146547e-05, + "loss": 0.0675, + "step": 9814 + }, + { + "epoch": 1.38, + "learning_rate": 4.770377128953771e-05, + "loss": 0.0789, + "step": 9816 + }, + { + "epoch": 1.38, + "learning_rate": 4.770330338760996e-05, + "loss": 0.0852, + "step": 9818 + }, + { + "epoch": 1.38, + "learning_rate": 4.77028354856822e-05, + "loss": 0.0734, + "step": 9820 + }, + { + "epoch": 1.38, + "learning_rate": 4.770236758375445e-05, + "loss": 0.0697, + "step": 9822 + }, + { + "epoch": 1.38, + "learning_rate": 4.770189968182669e-05, + "loss": 0.0735, + "step": 9824 + }, + { + "epoch": 1.38, + "learning_rate": 4.7701431779898934e-05, + "loss": 0.075, + "step": 9826 + }, + { + "epoch": 1.38, + "learning_rate": 4.770096387797118e-05, + "loss": 0.0738, + "step": 9828 + }, + { + "epoch": 1.38, + "learning_rate": 4.7700495976043426e-05, + "loss": 0.0763, + "step": 9830 + }, + { + "epoch": 1.38, + "learning_rate": 4.7700028074115665e-05, + "loss": 0.0636, + "step": 9832 + }, + { + "epoch": 1.38, + "learning_rate": 4.769956017218791e-05, + "loss": 0.0753, + "step": 9834 + }, + { + "epoch": 1.38, + "learning_rate": 4.7699092270260157e-05, + "loss": 0.0515, + "step": 9836 + }, + { + "epoch": 1.38, + "learning_rate": 4.76986243683324e-05, + "loss": 0.0864, + "step": 9838 + }, + { + "epoch": 1.38, + "learning_rate": 4.769815646640464e-05, + "loss": 0.082, + "step": 9840 + }, + { + "epoch": 1.38, + "learning_rate": 4.769768856447689e-05, + "loss": 0.103, + "step": 9842 + }, + { + "epoch": 1.38, + "learning_rate": 4.7697220662549133e-05, + "loss": 0.0795, + "step": 9844 + }, + { + "epoch": 1.38, + "learning_rate": 4.769675276062138e-05, + "loss": 0.0762, + "step": 9846 + }, + { + "epoch": 1.38, + "learning_rate": 4.769628485869362e-05, + "loss": 0.0741, + "step": 9848 + }, + { + "epoch": 1.38, + "learning_rate": 4.7695816956765864e-05, + "loss": 0.0859, + "step": 9850 + }, + { + "epoch": 1.38, + "learning_rate": 4.7695349054838104e-05, + "loss": 0.0911, + "step": 9852 + }, + { + "epoch": 1.38, + "learning_rate": 4.7694881152910356e-05, + "loss": 0.0538, + "step": 9854 + }, + { + "epoch": 1.38, + "learning_rate": 4.7694413250982595e-05, + "loss": 0.0864, + "step": 9856 + }, + { + "epoch": 1.38, + "learning_rate": 4.769394534905484e-05, + "loss": 0.0714, + "step": 9858 + }, + { + "epoch": 1.38, + "learning_rate": 4.769347744712708e-05, + "loss": 0.076, + "step": 9860 + }, + { + "epoch": 1.38, + "learning_rate": 4.769300954519933e-05, + "loss": 0.0867, + "step": 9862 + }, + { + "epoch": 1.38, + "learning_rate": 4.769254164327157e-05, + "loss": 0.0533, + "step": 9864 + }, + { + "epoch": 1.38, + "learning_rate": 4.769207374134382e-05, + "loss": 0.0657, + "step": 9866 + }, + { + "epoch": 1.39, + "learning_rate": 4.769160583941606e-05, + "loss": 0.0729, + "step": 9868 + }, + { + "epoch": 1.39, + "learning_rate": 4.76911379374883e-05, + "loss": 0.0832, + "step": 9870 + }, + { + "epoch": 1.39, + "learning_rate": 4.769067003556055e-05, + "loss": 0.0678, + "step": 9872 + }, + { + "epoch": 1.39, + "learning_rate": 4.7690202133632795e-05, + "loss": 0.0666, + "step": 9874 + }, + { + "epoch": 1.39, + "learning_rate": 4.7689734231705034e-05, + "loss": 0.0718, + "step": 9876 + }, + { + "epoch": 1.39, + "learning_rate": 4.768926632977728e-05, + "loss": 0.112, + "step": 9878 + }, + { + "epoch": 1.39, + "learning_rate": 4.7688798427849526e-05, + "loss": 0.0844, + "step": 9880 + }, + { + "epoch": 1.39, + "learning_rate": 4.768833052592177e-05, + "loss": 0.0779, + "step": 9882 + }, + { + "epoch": 1.39, + "learning_rate": 4.768786262399401e-05, + "loss": 0.0621, + "step": 9884 + }, + { + "epoch": 1.39, + "learning_rate": 4.768739472206626e-05, + "loss": 0.0749, + "step": 9886 + }, + { + "epoch": 1.39, + "learning_rate": 4.76869268201385e-05, + "loss": 0.0599, + "step": 9888 + }, + { + "epoch": 1.39, + "learning_rate": 4.768645891821075e-05, + "loss": 0.0632, + "step": 9890 + }, + { + "epoch": 1.39, + "learning_rate": 4.768599101628299e-05, + "loss": 0.0714, + "step": 9892 + }, + { + "epoch": 1.39, + "learning_rate": 4.7685523114355234e-05, + "loss": 0.0759, + "step": 9894 + }, + { + "epoch": 1.39, + "learning_rate": 4.768505521242748e-05, + "loss": 0.0895, + "step": 9896 + }, + { + "epoch": 1.39, + "learning_rate": 4.7684587310499726e-05, + "loss": 0.0576, + "step": 9898 + }, + { + "epoch": 1.39, + "learning_rate": 4.7684119408571965e-05, + "loss": 0.0895, + "step": 9900 + }, + { + "epoch": 1.39, + "learning_rate": 4.768365150664421e-05, + "loss": 0.1024, + "step": 9902 + }, + { + "epoch": 1.39, + "learning_rate": 4.768318360471645e-05, + "loss": 0.0746, + "step": 9904 + }, + { + "epoch": 1.39, + "learning_rate": 4.76827157027887e-05, + "loss": 0.064, + "step": 9906 + }, + { + "epoch": 1.39, + "learning_rate": 4.768224780086094e-05, + "loss": 0.0898, + "step": 9908 + }, + { + "epoch": 1.39, + "learning_rate": 4.768177989893319e-05, + "loss": 0.0732, + "step": 9910 + }, + { + "epoch": 1.39, + "learning_rate": 4.768131199700543e-05, + "loss": 0.0689, + "step": 9912 + }, + { + "epoch": 1.39, + "learning_rate": 4.768084409507767e-05, + "loss": 0.0677, + "step": 9914 + }, + { + "epoch": 1.39, + "learning_rate": 4.768037619314992e-05, + "loss": 0.0953, + "step": 9916 + }, + { + "epoch": 1.39, + "learning_rate": 4.7679908291222164e-05, + "loss": 0.0761, + "step": 9918 + }, + { + "epoch": 1.39, + "learning_rate": 4.7679440389294404e-05, + "loss": 0.0828, + "step": 9920 + }, + { + "epoch": 1.39, + "learning_rate": 4.767897248736665e-05, + "loss": 0.0798, + "step": 9922 + }, + { + "epoch": 1.39, + "learning_rate": 4.7678504585438895e-05, + "loss": 0.0787, + "step": 9924 + }, + { + "epoch": 1.39, + "learning_rate": 4.767803668351114e-05, + "loss": 0.0637, + "step": 9926 + }, + { + "epoch": 1.39, + "learning_rate": 4.767756878158338e-05, + "loss": 0.0898, + "step": 9928 + }, + { + "epoch": 1.39, + "learning_rate": 4.7677100879655626e-05, + "loss": 0.0719, + "step": 9930 + }, + { + "epoch": 1.39, + "learning_rate": 4.767663297772787e-05, + "loss": 0.0693, + "step": 9932 + }, + { + "epoch": 1.39, + "learning_rate": 4.767616507580012e-05, + "loss": 0.0773, + "step": 9934 + }, + { + "epoch": 1.39, + "learning_rate": 4.767569717387236e-05, + "loss": 0.0691, + "step": 9936 + }, + { + "epoch": 1.4, + "learning_rate": 4.76752292719446e-05, + "loss": 0.0797, + "step": 9938 + }, + { + "epoch": 1.4, + "learning_rate": 4.767476137001685e-05, + "loss": 0.0809, + "step": 9940 + }, + { + "epoch": 1.4, + "learning_rate": 4.7674293468089095e-05, + "loss": 0.0595, + "step": 9942 + }, + { + "epoch": 1.4, + "learning_rate": 4.7673825566161334e-05, + "loss": 0.0704, + "step": 9944 + }, + { + "epoch": 1.4, + "learning_rate": 4.767335766423358e-05, + "loss": 0.0709, + "step": 9946 + }, + { + "epoch": 1.4, + "learning_rate": 4.767288976230582e-05, + "loss": 0.0863, + "step": 9948 + }, + { + "epoch": 1.4, + "learning_rate": 4.767242186037807e-05, + "loss": 0.0692, + "step": 9950 + }, + { + "epoch": 1.4, + "learning_rate": 4.767195395845031e-05, + "loss": 0.0904, + "step": 9952 + }, + { + "epoch": 1.4, + "learning_rate": 4.767148605652256e-05, + "loss": 0.0807, + "step": 9954 + }, + { + "epoch": 1.4, + "learning_rate": 4.7671018154594796e-05, + "loss": 0.0644, + "step": 9956 + }, + { + "epoch": 1.4, + "learning_rate": 4.767055025266705e-05, + "loss": 0.0687, + "step": 9958 + }, + { + "epoch": 1.4, + "learning_rate": 4.767008235073929e-05, + "loss": 0.0609, + "step": 9960 + }, + { + "epoch": 1.4, + "learning_rate": 4.7669614448811534e-05, + "loss": 0.0771, + "step": 9962 + }, + { + "epoch": 1.4, + "learning_rate": 4.766914654688377e-05, + "loss": 0.0836, + "step": 9964 + }, + { + "epoch": 1.4, + "learning_rate": 4.766867864495602e-05, + "loss": 0.08, + "step": 9966 + }, + { + "epoch": 1.4, + "learning_rate": 4.7668210743028265e-05, + "loss": 0.0753, + "step": 9968 + }, + { + "epoch": 1.4, + "learning_rate": 4.766774284110051e-05, + "loss": 0.0781, + "step": 9970 + }, + { + "epoch": 1.4, + "learning_rate": 4.766727493917275e-05, + "loss": 0.0704, + "step": 9972 + }, + { + "epoch": 1.4, + "learning_rate": 4.7666807037244996e-05, + "loss": 0.0707, + "step": 9974 + }, + { + "epoch": 1.4, + "learning_rate": 4.766633913531724e-05, + "loss": 0.0661, + "step": 9976 + }, + { + "epoch": 1.4, + "learning_rate": 4.766587123338949e-05, + "loss": 0.0683, + "step": 9978 + }, + { + "epoch": 1.4, + "learning_rate": 4.766540333146173e-05, + "loss": 0.0688, + "step": 9980 + }, + { + "epoch": 1.4, + "learning_rate": 4.766493542953397e-05, + "loss": 0.0902, + "step": 9982 + }, + { + "epoch": 1.4, + "learning_rate": 4.766446752760622e-05, + "loss": 0.0783, + "step": 9984 + }, + { + "epoch": 1.4, + "learning_rate": 4.7663999625678464e-05, + "loss": 0.0778, + "step": 9986 + }, + { + "epoch": 1.4, + "learning_rate": 4.7663531723750704e-05, + "loss": 0.0921, + "step": 9988 + }, + { + "epoch": 1.4, + "learning_rate": 4.766306382182295e-05, + "loss": 0.066, + "step": 9990 + }, + { + "epoch": 1.4, + "learning_rate": 4.7662595919895195e-05, + "loss": 0.0598, + "step": 9992 + }, + { + "epoch": 1.4, + "learning_rate": 4.766212801796744e-05, + "loss": 0.0784, + "step": 9994 + }, + { + "epoch": 1.4, + "learning_rate": 4.766166011603968e-05, + "loss": 0.0627, + "step": 9996 + }, + { + "epoch": 1.4, + "learning_rate": 4.7661192214111926e-05, + "loss": 0.0974, + "step": 9998 + }, + { + "epoch": 1.4, + "learning_rate": 4.7660724312184165e-05, + "loss": 0.09, + "step": 10000 + }, + { + "epoch": 1.4, + "eval_gen_len": 28.5517, + "eval_loss": 1.054301142692566, + "eval_meteor": 0.0451, + "eval_runtime": 13.332, + "eval_samples_per_second": 4.35, + "eval_steps_per_second": 0.6, + "step": 10000 + }, + { + "epoch": 1.4, + "learning_rate": 4.766025641025642e-05, + "loss": 0.0818, + "step": 10002 + }, + { + "epoch": 1.4, + "learning_rate": 4.765978850832866e-05, + "loss": 0.0792, + "step": 10004 + }, + { + "epoch": 1.4, + "learning_rate": 4.7659320606400896e-05, + "loss": 0.0772, + "step": 10006 + }, + { + "epoch": 1.4, + "learning_rate": 4.765885270447314e-05, + "loss": 0.0575, + "step": 10008 + }, + { + "epoch": 1.41, + "learning_rate": 4.765838480254539e-05, + "loss": 0.0836, + "step": 10010 + }, + { + "epoch": 1.41, + "learning_rate": 4.7657916900617634e-05, + "loss": 0.0748, + "step": 10012 + }, + { + "epoch": 1.41, + "learning_rate": 4.765744899868987e-05, + "loss": 0.0925, + "step": 10014 + }, + { + "epoch": 1.41, + "learning_rate": 4.765698109676212e-05, + "loss": 0.0833, + "step": 10016 + }, + { + "epoch": 1.41, + "learning_rate": 4.7656513194834365e-05, + "loss": 0.0727, + "step": 10018 + }, + { + "epoch": 1.41, + "learning_rate": 4.765604529290661e-05, + "loss": 0.0711, + "step": 10020 + }, + { + "epoch": 1.41, + "learning_rate": 4.765557739097885e-05, + "loss": 0.075, + "step": 10022 + }, + { + "epoch": 1.41, + "learning_rate": 4.7655109489051096e-05, + "loss": 0.0952, + "step": 10024 + }, + { + "epoch": 1.41, + "learning_rate": 4.765464158712334e-05, + "loss": 0.0643, + "step": 10026 + }, + { + "epoch": 1.41, + "learning_rate": 4.765417368519559e-05, + "loss": 0.0896, + "step": 10028 + }, + { + "epoch": 1.41, + "learning_rate": 4.765370578326783e-05, + "loss": 0.0653, + "step": 10030 + }, + { + "epoch": 1.41, + "learning_rate": 4.765323788134007e-05, + "loss": 0.0816, + "step": 10032 + }, + { + "epoch": 1.41, + "learning_rate": 4.765276997941231e-05, + "loss": 0.0766, + "step": 10034 + }, + { + "epoch": 1.41, + "learning_rate": 4.7652302077484565e-05, + "loss": 0.0978, + "step": 10036 + }, + { + "epoch": 1.41, + "learning_rate": 4.7651834175556804e-05, + "loss": 0.0816, + "step": 10038 + }, + { + "epoch": 1.41, + "learning_rate": 4.765136627362905e-05, + "loss": 0.0778, + "step": 10040 + }, + { + "epoch": 1.41, + "learning_rate": 4.765089837170129e-05, + "loss": 0.0833, + "step": 10042 + }, + { + "epoch": 1.41, + "learning_rate": 4.7650430469773535e-05, + "loss": 0.0851, + "step": 10044 + }, + { + "epoch": 1.41, + "learning_rate": 4.764996256784578e-05, + "loss": 0.063, + "step": 10046 + }, + { + "epoch": 1.41, + "learning_rate": 4.764949466591803e-05, + "loss": 0.0694, + "step": 10048 + }, + { + "epoch": 1.41, + "learning_rate": 4.7649026763990266e-05, + "loss": 0.0671, + "step": 10050 + }, + { + "epoch": 1.41, + "learning_rate": 4.764855886206251e-05, + "loss": 0.0687, + "step": 10052 + }, + { + "epoch": 1.41, + "learning_rate": 4.764809096013476e-05, + "loss": 0.0728, + "step": 10054 + }, + { + "epoch": 1.41, + "learning_rate": 4.7647623058207004e-05, + "loss": 0.0609, + "step": 10056 + }, + { + "epoch": 1.41, + "learning_rate": 4.764715515627924e-05, + "loss": 0.0868, + "step": 10058 + }, + { + "epoch": 1.41, + "learning_rate": 4.764668725435149e-05, + "loss": 0.0629, + "step": 10060 + }, + { + "epoch": 1.41, + "learning_rate": 4.7646219352423735e-05, + "loss": 0.0795, + "step": 10062 + }, + { + "epoch": 1.41, + "learning_rate": 4.764575145049598e-05, + "loss": 0.0821, + "step": 10064 + }, + { + "epoch": 1.41, + "learning_rate": 4.764528354856822e-05, + "loss": 0.0911, + "step": 10066 + }, + { + "epoch": 1.41, + "learning_rate": 4.7644815646640465e-05, + "loss": 0.0839, + "step": 10068 + }, + { + "epoch": 1.41, + "learning_rate": 4.764434774471271e-05, + "loss": 0.0989, + "step": 10070 + }, + { + "epoch": 1.41, + "learning_rate": 4.764387984278496e-05, + "loss": 0.0642, + "step": 10072 + }, + { + "epoch": 1.41, + "learning_rate": 4.7643411940857196e-05, + "loss": 0.074, + "step": 10074 + }, + { + "epoch": 1.41, + "learning_rate": 4.764294403892944e-05, + "loss": 0.0802, + "step": 10076 + }, + { + "epoch": 1.41, + "learning_rate": 4.764247613700168e-05, + "loss": 0.066, + "step": 10078 + }, + { + "epoch": 1.41, + "learning_rate": 4.7642008235073934e-05, + "loss": 0.063, + "step": 10080 + }, + { + "epoch": 1.42, + "learning_rate": 4.764154033314617e-05, + "loss": 0.0642, + "step": 10082 + }, + { + "epoch": 1.42, + "learning_rate": 4.764107243121842e-05, + "loss": 0.0953, + "step": 10084 + }, + { + "epoch": 1.42, + "learning_rate": 4.764060452929066e-05, + "loss": 0.0785, + "step": 10086 + }, + { + "epoch": 1.42, + "learning_rate": 4.764013662736291e-05, + "loss": 0.0715, + "step": 10088 + }, + { + "epoch": 1.42, + "learning_rate": 4.763966872543515e-05, + "loss": 0.0878, + "step": 10090 + }, + { + "epoch": 1.42, + "learning_rate": 4.7639200823507396e-05, + "loss": 0.0906, + "step": 10092 + }, + { + "epoch": 1.42, + "learning_rate": 4.7638732921579635e-05, + "loss": 0.0669, + "step": 10094 + }, + { + "epoch": 1.42, + "learning_rate": 4.763826501965188e-05, + "loss": 0.0847, + "step": 10096 + }, + { + "epoch": 1.42, + "learning_rate": 4.763779711772413e-05, + "loss": 0.1002, + "step": 10098 + }, + { + "epoch": 1.42, + "learning_rate": 4.763732921579637e-05, + "loss": 0.0795, + "step": 10100 + }, + { + "epoch": 1.42, + "learning_rate": 4.763686131386861e-05, + "loss": 0.0743, + "step": 10102 + }, + { + "epoch": 1.42, + "learning_rate": 4.763639341194086e-05, + "loss": 0.0795, + "step": 10104 + }, + { + "epoch": 1.42, + "learning_rate": 4.7635925510013104e-05, + "loss": 0.0786, + "step": 10106 + }, + { + "epoch": 1.42, + "learning_rate": 4.763545760808535e-05, + "loss": 0.0795, + "step": 10108 + }, + { + "epoch": 1.42, + "learning_rate": 4.763498970615759e-05, + "loss": 0.0643, + "step": 10110 + }, + { + "epoch": 1.42, + "learning_rate": 4.7634521804229835e-05, + "loss": 0.0751, + "step": 10112 + }, + { + "epoch": 1.42, + "learning_rate": 4.763405390230208e-05, + "loss": 0.0759, + "step": 10114 + }, + { + "epoch": 1.42, + "learning_rate": 4.763358600037433e-05, + "loss": 0.0764, + "step": 10116 + }, + { + "epoch": 1.42, + "learning_rate": 4.7633118098446566e-05, + "loss": 0.0866, + "step": 10118 + }, + { + "epoch": 1.42, + "learning_rate": 4.763265019651881e-05, + "loss": 0.0631, + "step": 10120 + }, + { + "epoch": 1.42, + "learning_rate": 4.763218229459106e-05, + "loss": 0.0834, + "step": 10122 + }, + { + "epoch": 1.42, + "learning_rate": 4.7631714392663304e-05, + "loss": 0.0667, + "step": 10124 + }, + { + "epoch": 1.42, + "learning_rate": 4.763124649073554e-05, + "loss": 0.0964, + "step": 10126 + }, + { + "epoch": 1.42, + "learning_rate": 4.763077858880779e-05, + "loss": 0.0615, + "step": 10128 + }, + { + "epoch": 1.42, + "learning_rate": 4.763031068688003e-05, + "loss": 0.0611, + "step": 10130 + }, + { + "epoch": 1.42, + "learning_rate": 4.762984278495228e-05, + "loss": 0.0695, + "step": 10132 + }, + { + "epoch": 1.42, + "learning_rate": 4.762937488302452e-05, + "loss": 0.0709, + "step": 10134 + }, + { + "epoch": 1.42, + "learning_rate": 4.7628906981096766e-05, + "loss": 0.0801, + "step": 10136 + }, + { + "epoch": 1.42, + "learning_rate": 4.7628439079169005e-05, + "loss": 0.0692, + "step": 10138 + }, + { + "epoch": 1.42, + "learning_rate": 4.762797117724126e-05, + "loss": 0.0708, + "step": 10140 + }, + { + "epoch": 1.42, + "learning_rate": 4.7627503275313496e-05, + "loss": 0.0976, + "step": 10142 + }, + { + "epoch": 1.42, + "learning_rate": 4.762703537338574e-05, + "loss": 0.0699, + "step": 10144 + }, + { + "epoch": 1.42, + "learning_rate": 4.762656747145798e-05, + "loss": 0.0636, + "step": 10146 + }, + { + "epoch": 1.42, + "learning_rate": 4.762609956953023e-05, + "loss": 0.0588, + "step": 10148 + }, + { + "epoch": 1.42, + "learning_rate": 4.762563166760247e-05, + "loss": 0.059, + "step": 10150 + }, + { + "epoch": 1.43, + "learning_rate": 4.762516376567472e-05, + "loss": 0.0797, + "step": 10152 + }, + { + "epoch": 1.43, + "learning_rate": 4.762469586374696e-05, + "loss": 0.0753, + "step": 10154 + }, + { + "epoch": 1.43, + "learning_rate": 4.7624227961819204e-05, + "loss": 0.0878, + "step": 10156 + }, + { + "epoch": 1.43, + "learning_rate": 4.762376005989145e-05, + "loss": 0.0826, + "step": 10158 + }, + { + "epoch": 1.43, + "learning_rate": 4.7623292157963696e-05, + "loss": 0.0902, + "step": 10160 + }, + { + "epoch": 1.43, + "learning_rate": 4.7622824256035935e-05, + "loss": 0.06, + "step": 10162 + }, + { + "epoch": 1.43, + "learning_rate": 4.762235635410818e-05, + "loss": 0.0776, + "step": 10164 + }, + { + "epoch": 1.43, + "learning_rate": 4.762188845218043e-05, + "loss": 0.0643, + "step": 10166 + }, + { + "epoch": 1.43, + "learning_rate": 4.762142055025267e-05, + "loss": 0.0829, + "step": 10168 + }, + { + "epoch": 1.43, + "learning_rate": 4.762095264832491e-05, + "loss": 0.0756, + "step": 10170 + }, + { + "epoch": 1.43, + "learning_rate": 4.762048474639716e-05, + "loss": 0.0951, + "step": 10172 + }, + { + "epoch": 1.43, + "learning_rate": 4.7620016844469404e-05, + "loss": 0.076, + "step": 10174 + }, + { + "epoch": 1.43, + "learning_rate": 4.761954894254165e-05, + "loss": 0.0738, + "step": 10176 + }, + { + "epoch": 1.43, + "learning_rate": 4.761908104061389e-05, + "loss": 0.0772, + "step": 10178 + }, + { + "epoch": 1.43, + "learning_rate": 4.7618613138686135e-05, + "loss": 0.0677, + "step": 10180 + }, + { + "epoch": 1.43, + "learning_rate": 4.7618145236758374e-05, + "loss": 0.0833, + "step": 10182 + }, + { + "epoch": 1.43, + "learning_rate": 4.761767733483063e-05, + "loss": 0.0759, + "step": 10184 + }, + { + "epoch": 1.43, + "learning_rate": 4.7617209432902866e-05, + "loss": 0.0751, + "step": 10186 + }, + { + "epoch": 1.43, + "learning_rate": 4.761674153097511e-05, + "loss": 0.0665, + "step": 10188 + }, + { + "epoch": 1.43, + "learning_rate": 4.761627362904735e-05, + "loss": 0.0852, + "step": 10190 + }, + { + "epoch": 1.43, + "learning_rate": 4.76158057271196e-05, + "loss": 0.0953, + "step": 10192 + }, + { + "epoch": 1.43, + "learning_rate": 4.761533782519184e-05, + "loss": 0.0823, + "step": 10194 + }, + { + "epoch": 1.43, + "learning_rate": 4.761486992326409e-05, + "loss": 0.0812, + "step": 10196 + }, + { + "epoch": 1.43, + "learning_rate": 4.761440202133633e-05, + "loss": 0.0773, + "step": 10198 + }, + { + "epoch": 1.43, + "learning_rate": 4.7613934119408574e-05, + "loss": 0.0887, + "step": 10200 + }, + { + "epoch": 1.43, + "learning_rate": 4.761346621748082e-05, + "loss": 0.0602, + "step": 10202 + }, + { + "epoch": 1.43, + "learning_rate": 4.7612998315553066e-05, + "loss": 0.0776, + "step": 10204 + }, + { + "epoch": 1.43, + "learning_rate": 4.7612530413625305e-05, + "loss": 0.1012, + "step": 10206 + }, + { + "epoch": 1.43, + "learning_rate": 4.761206251169755e-05, + "loss": 0.098, + "step": 10208 + }, + { + "epoch": 1.43, + "learning_rate": 4.7611594609769796e-05, + "loss": 0.0564, + "step": 10210 + }, + { + "epoch": 1.43, + "learning_rate": 4.761112670784204e-05, + "loss": 0.115, + "step": 10212 + }, + { + "epoch": 1.43, + "learning_rate": 4.761065880591428e-05, + "loss": 0.0707, + "step": 10214 + }, + { + "epoch": 1.43, + "learning_rate": 4.761019090398653e-05, + "loss": 0.0586, + "step": 10216 + }, + { + "epoch": 1.43, + "learning_rate": 4.760972300205877e-05, + "loss": 0.0837, + "step": 10218 + }, + { + "epoch": 1.43, + "learning_rate": 4.760925510013102e-05, + "loss": 0.057, + "step": 10220 + }, + { + "epoch": 1.43, + "learning_rate": 4.760878719820326e-05, + "loss": 0.0619, + "step": 10222 + }, + { + "epoch": 1.44, + "learning_rate": 4.7608319296275504e-05, + "loss": 0.0778, + "step": 10224 + }, + { + "epoch": 1.44, + "learning_rate": 4.7607851394347743e-05, + "loss": 0.0855, + "step": 10226 + }, + { + "epoch": 1.44, + "learning_rate": 4.7607383492419996e-05, + "loss": 0.0645, + "step": 10228 + }, + { + "epoch": 1.44, + "learning_rate": 4.7606915590492235e-05, + "loss": 0.0837, + "step": 10230 + }, + { + "epoch": 1.44, + "learning_rate": 4.760644768856448e-05, + "loss": 0.0745, + "step": 10232 + }, + { + "epoch": 1.44, + "learning_rate": 4.760597978663672e-05, + "loss": 0.0639, + "step": 10234 + }, + { + "epoch": 1.44, + "learning_rate": 4.760551188470897e-05, + "loss": 0.0886, + "step": 10236 + }, + { + "epoch": 1.44, + "learning_rate": 4.760504398278121e-05, + "loss": 0.0946, + "step": 10238 + }, + { + "epoch": 1.44, + "learning_rate": 4.760457608085346e-05, + "loss": 0.0819, + "step": 10240 + }, + { + "epoch": 1.44, + "learning_rate": 4.76041081789257e-05, + "loss": 0.0925, + "step": 10242 + }, + { + "epoch": 1.44, + "learning_rate": 4.760364027699794e-05, + "loss": 0.0561, + "step": 10244 + }, + { + "epoch": 1.44, + "learning_rate": 4.760317237507019e-05, + "loss": 0.0709, + "step": 10246 + }, + { + "epoch": 1.44, + "learning_rate": 4.7602704473142435e-05, + "loss": 0.064, + "step": 10248 + }, + { + "epoch": 1.44, + "learning_rate": 4.7602236571214674e-05, + "loss": 0.047, + "step": 10250 + }, + { + "epoch": 1.44, + "learning_rate": 4.760176866928692e-05, + "loss": 0.0794, + "step": 10252 + }, + { + "epoch": 1.44, + "learning_rate": 4.7601300767359166e-05, + "loss": 0.0786, + "step": 10254 + }, + { + "epoch": 1.44, + "learning_rate": 4.760083286543141e-05, + "loss": 0.0785, + "step": 10256 + }, + { + "epoch": 1.44, + "learning_rate": 4.760036496350365e-05, + "loss": 0.0695, + "step": 10258 + }, + { + "epoch": 1.44, + "learning_rate": 4.759989706157589e-05, + "loss": 0.0832, + "step": 10260 + }, + { + "epoch": 1.44, + "learning_rate": 4.759942915964814e-05, + "loss": 0.0776, + "step": 10262 + }, + { + "epoch": 1.44, + "learning_rate": 4.759896125772038e-05, + "loss": 0.0844, + "step": 10264 + }, + { + "epoch": 1.44, + "learning_rate": 4.759849335579263e-05, + "loss": 0.0635, + "step": 10266 + }, + { + "epoch": 1.44, + "learning_rate": 4.759802545386487e-05, + "loss": 0.0904, + "step": 10268 + }, + { + "epoch": 1.44, + "learning_rate": 4.759755755193712e-05, + "loss": 0.0815, + "step": 10270 + }, + { + "epoch": 1.44, + "learning_rate": 4.759708965000936e-05, + "loss": 0.0622, + "step": 10272 + }, + { + "epoch": 1.44, + "learning_rate": 4.7596621748081605e-05, + "loss": 0.0852, + "step": 10274 + }, + { + "epoch": 1.44, + "learning_rate": 4.7596153846153844e-05, + "loss": 0.0782, + "step": 10276 + }, + { + "epoch": 1.44, + "learning_rate": 4.759568594422609e-05, + "loss": 0.0733, + "step": 10278 + }, + { + "epoch": 1.44, + "learning_rate": 4.7595218042298336e-05, + "loss": 0.0831, + "step": 10280 + }, + { + "epoch": 1.44, + "learning_rate": 4.759475014037058e-05, + "loss": 0.069, + "step": 10282 + }, + { + "epoch": 1.44, + "learning_rate": 4.759428223844282e-05, + "loss": 0.0582, + "step": 10284 + }, + { + "epoch": 1.44, + "learning_rate": 4.7593814336515067e-05, + "loss": 0.0581, + "step": 10286 + }, + { + "epoch": 1.44, + "learning_rate": 4.759334643458731e-05, + "loss": 0.0904, + "step": 10288 + }, + { + "epoch": 1.44, + "learning_rate": 4.759287853265956e-05, + "loss": 0.0674, + "step": 10290 + }, + { + "epoch": 1.44, + "learning_rate": 4.75924106307318e-05, + "loss": 0.068, + "step": 10292 + }, + { + "epoch": 1.44, + "learning_rate": 4.7591942728804043e-05, + "loss": 0.0824, + "step": 10294 + }, + { + "epoch": 1.45, + "learning_rate": 4.759147482687629e-05, + "loss": 0.0638, + "step": 10296 + }, + { + "epoch": 1.45, + "learning_rate": 4.7591006924948535e-05, + "loss": 0.0851, + "step": 10298 + }, + { + "epoch": 1.45, + "learning_rate": 4.7590539023020774e-05, + "loss": 0.0628, + "step": 10300 + }, + { + "epoch": 1.45, + "learning_rate": 4.759007112109302e-05, + "loss": 0.0814, + "step": 10302 + }, + { + "epoch": 1.45, + "learning_rate": 4.7589603219165266e-05, + "loss": 0.0811, + "step": 10304 + }, + { + "epoch": 1.45, + "learning_rate": 4.758913531723751e-05, + "loss": 0.0766, + "step": 10306 + }, + { + "epoch": 1.45, + "learning_rate": 4.758866741530975e-05, + "loss": 0.0863, + "step": 10308 + }, + { + "epoch": 1.45, + "learning_rate": 4.7588199513382e-05, + "loss": 0.072, + "step": 10310 + }, + { + "epoch": 1.45, + "learning_rate": 4.7587731611454236e-05, + "loss": 0.065, + "step": 10312 + }, + { + "epoch": 1.45, + "learning_rate": 4.758726370952649e-05, + "loss": 0.0783, + "step": 10314 + }, + { + "epoch": 1.45, + "learning_rate": 4.758679580759873e-05, + "loss": 0.0976, + "step": 10316 + }, + { + "epoch": 1.45, + "learning_rate": 4.7586327905670974e-05, + "loss": 0.0812, + "step": 10318 + }, + { + "epoch": 1.45, + "learning_rate": 4.758586000374321e-05, + "loss": 0.0647, + "step": 10320 + }, + { + "epoch": 1.45, + "learning_rate": 4.758539210181546e-05, + "loss": 0.0655, + "step": 10322 + }, + { + "epoch": 1.45, + "learning_rate": 4.7584924199887705e-05, + "loss": 0.0794, + "step": 10324 + }, + { + "epoch": 1.45, + "learning_rate": 4.758445629795995e-05, + "loss": 0.0911, + "step": 10326 + }, + { + "epoch": 1.45, + "learning_rate": 4.758398839603219e-05, + "loss": 0.0695, + "step": 10328 + }, + { + "epoch": 1.45, + "learning_rate": 4.7583520494104436e-05, + "loss": 0.0937, + "step": 10330 + }, + { + "epoch": 1.45, + "learning_rate": 4.758305259217668e-05, + "loss": 0.0858, + "step": 10332 + }, + { + "epoch": 1.45, + "learning_rate": 4.758258469024893e-05, + "loss": 0.086, + "step": 10334 + }, + { + "epoch": 1.45, + "learning_rate": 4.758211678832117e-05, + "loss": 0.0736, + "step": 10336 + }, + { + "epoch": 1.45, + "learning_rate": 4.758164888639341e-05, + "loss": 0.0772, + "step": 10338 + }, + { + "epoch": 1.45, + "learning_rate": 4.758118098446566e-05, + "loss": 0.069, + "step": 10340 + }, + { + "epoch": 1.45, + "learning_rate": 4.7580713082537905e-05, + "loss": 0.0619, + "step": 10342 + }, + { + "epoch": 1.45, + "learning_rate": 4.7580245180610144e-05, + "loss": 0.0741, + "step": 10344 + }, + { + "epoch": 1.45, + "learning_rate": 4.757977727868239e-05, + "loss": 0.067, + "step": 10346 + }, + { + "epoch": 1.45, + "learning_rate": 4.7579309376754636e-05, + "loss": 0.1078, + "step": 10348 + }, + { + "epoch": 1.45, + "learning_rate": 4.757884147482688e-05, + "loss": 0.0737, + "step": 10350 + }, + { + "epoch": 1.45, + "learning_rate": 4.757837357289912e-05, + "loss": 0.0616, + "step": 10352 + }, + { + "epoch": 1.45, + "learning_rate": 4.7577905670971367e-05, + "loss": 0.0721, + "step": 10354 + }, + { + "epoch": 1.45, + "learning_rate": 4.7577437769043606e-05, + "loss": 0.0865, + "step": 10356 + }, + { + "epoch": 1.45, + "learning_rate": 4.757696986711586e-05, + "loss": 0.0837, + "step": 10358 + }, + { + "epoch": 1.45, + "learning_rate": 4.75765019651881e-05, + "loss": 0.0855, + "step": 10360 + }, + { + "epoch": 1.45, + "learning_rate": 4.7576034063260343e-05, + "loss": 0.0708, + "step": 10362 + }, + { + "epoch": 1.45, + "learning_rate": 4.757556616133258e-05, + "loss": 0.087, + "step": 10364 + }, + { + "epoch": 1.46, + "learning_rate": 4.7575098259404835e-05, + "loss": 0.0723, + "step": 10366 + }, + { + "epoch": 1.46, + "learning_rate": 4.7574630357477074e-05, + "loss": 0.0713, + "step": 10368 + }, + { + "epoch": 1.46, + "learning_rate": 4.757416245554932e-05, + "loss": 0.0699, + "step": 10370 + }, + { + "epoch": 1.46, + "learning_rate": 4.757369455362156e-05, + "loss": 0.0911, + "step": 10372 + }, + { + "epoch": 1.46, + "learning_rate": 4.7573226651693805e-05, + "loss": 0.0783, + "step": 10374 + }, + { + "epoch": 1.46, + "learning_rate": 4.757275874976605e-05, + "loss": 0.0519, + "step": 10376 + }, + { + "epoch": 1.46, + "learning_rate": 4.75722908478383e-05, + "loss": 0.0662, + "step": 10378 + }, + { + "epoch": 1.46, + "learning_rate": 4.7571822945910536e-05, + "loss": 0.0599, + "step": 10380 + }, + { + "epoch": 1.46, + "learning_rate": 4.757135504398278e-05, + "loss": 0.067, + "step": 10382 + }, + { + "epoch": 1.46, + "learning_rate": 4.757088714205503e-05, + "loss": 0.0655, + "step": 10384 + }, + { + "epoch": 1.46, + "learning_rate": 4.7570419240127274e-05, + "loss": 0.1293, + "step": 10386 + }, + { + "epoch": 1.46, + "learning_rate": 4.756995133819951e-05, + "loss": 0.0776, + "step": 10388 + }, + { + "epoch": 1.46, + "learning_rate": 4.756948343627176e-05, + "loss": 0.0816, + "step": 10390 + }, + { + "epoch": 1.46, + "learning_rate": 4.7569015534344005e-05, + "loss": 0.0835, + "step": 10392 + }, + { + "epoch": 1.46, + "learning_rate": 4.756854763241625e-05, + "loss": 0.0476, + "step": 10394 + }, + { + "epoch": 1.46, + "learning_rate": 4.756807973048849e-05, + "loss": 0.0549, + "step": 10396 + }, + { + "epoch": 1.46, + "learning_rate": 4.7567611828560736e-05, + "loss": 0.1, + "step": 10398 + }, + { + "epoch": 1.46, + "learning_rate": 4.756714392663298e-05, + "loss": 0.0943, + "step": 10400 + }, + { + "epoch": 1.46, + "learning_rate": 4.756667602470523e-05, + "loss": 0.0853, + "step": 10402 + }, + { + "epoch": 1.46, + "learning_rate": 4.756620812277747e-05, + "loss": 0.0702, + "step": 10404 + }, + { + "epoch": 1.46, + "learning_rate": 4.756574022084971e-05, + "loss": 0.0667, + "step": 10406 + }, + { + "epoch": 1.46, + "learning_rate": 4.756527231892195e-05, + "loss": 0.0767, + "step": 10408 + }, + { + "epoch": 1.46, + "learning_rate": 4.7564804416994205e-05, + "loss": 0.0769, + "step": 10410 + }, + { + "epoch": 1.46, + "learning_rate": 4.7564336515066444e-05, + "loss": 0.0684, + "step": 10412 + }, + { + "epoch": 1.46, + "learning_rate": 4.756386861313869e-05, + "loss": 0.0733, + "step": 10414 + }, + { + "epoch": 1.46, + "learning_rate": 4.756340071121093e-05, + "loss": 0.0682, + "step": 10416 + }, + { + "epoch": 1.46, + "learning_rate": 4.756293280928318e-05, + "loss": 0.0649, + "step": 10418 + }, + { + "epoch": 1.46, + "learning_rate": 4.756246490735542e-05, + "loss": 0.0797, + "step": 10420 + }, + { + "epoch": 1.46, + "learning_rate": 4.756199700542767e-05, + "loss": 0.0649, + "step": 10422 + }, + { + "epoch": 1.46, + "learning_rate": 4.7561529103499906e-05, + "loss": 0.0745, + "step": 10424 + }, + { + "epoch": 1.46, + "learning_rate": 4.756106120157215e-05, + "loss": 0.0737, + "step": 10426 + }, + { + "epoch": 1.46, + "learning_rate": 4.75605932996444e-05, + "loss": 0.0702, + "step": 10428 + }, + { + "epoch": 1.46, + "learning_rate": 4.7560125397716643e-05, + "loss": 0.0813, + "step": 10430 + }, + { + "epoch": 1.46, + "learning_rate": 4.755965749578888e-05, + "loss": 0.071, + "step": 10432 + }, + { + "epoch": 1.46, + "learning_rate": 4.755918959386113e-05, + "loss": 0.0971, + "step": 10434 + }, + { + "epoch": 1.46, + "learning_rate": 4.7558721691933374e-05, + "loss": 0.0881, + "step": 10436 + }, + { + "epoch": 1.47, + "learning_rate": 4.755825379000562e-05, + "loss": 0.0977, + "step": 10438 + }, + { + "epoch": 1.47, + "learning_rate": 4.755778588807786e-05, + "loss": 0.0615, + "step": 10440 + }, + { + "epoch": 1.47, + "learning_rate": 4.7557317986150105e-05, + "loss": 0.0871, + "step": 10442 + }, + { + "epoch": 1.47, + "learning_rate": 4.755685008422235e-05, + "loss": 0.094, + "step": 10444 + }, + { + "epoch": 1.47, + "learning_rate": 4.75563821822946e-05, + "loss": 0.0777, + "step": 10446 + }, + { + "epoch": 1.47, + "learning_rate": 4.7555914280366836e-05, + "loss": 0.0763, + "step": 10448 + }, + { + "epoch": 1.47, + "learning_rate": 4.755544637843908e-05, + "loss": 0.0814, + "step": 10450 + }, + { + "epoch": 1.47, + "learning_rate": 4.755497847651133e-05, + "loss": 0.1032, + "step": 10452 + }, + { + "epoch": 1.47, + "learning_rate": 4.7554510574583574e-05, + "loss": 0.0849, + "step": 10454 + }, + { + "epoch": 1.47, + "learning_rate": 4.755404267265581e-05, + "loss": 0.0768, + "step": 10456 + }, + { + "epoch": 1.47, + "learning_rate": 4.755357477072806e-05, + "loss": 0.08, + "step": 10458 + }, + { + "epoch": 1.47, + "learning_rate": 4.75531068688003e-05, + "loss": 0.0926, + "step": 10460 + }, + { + "epoch": 1.47, + "learning_rate": 4.755263896687255e-05, + "loss": 0.0698, + "step": 10462 + }, + { + "epoch": 1.47, + "learning_rate": 4.755217106494479e-05, + "loss": 0.077, + "step": 10464 + }, + { + "epoch": 1.47, + "learning_rate": 4.7551703163017036e-05, + "loss": 0.0722, + "step": 10466 + }, + { + "epoch": 1.47, + "learning_rate": 4.7551235261089275e-05, + "loss": 0.0538, + "step": 10468 + }, + { + "epoch": 1.47, + "learning_rate": 4.755076735916152e-05, + "loss": 0.1016, + "step": 10470 + }, + { + "epoch": 1.47, + "learning_rate": 4.755029945723377e-05, + "loss": 0.0763, + "step": 10472 + }, + { + "epoch": 1.47, + "learning_rate": 4.754983155530601e-05, + "loss": 0.0851, + "step": 10474 + }, + { + "epoch": 1.47, + "learning_rate": 4.754936365337825e-05, + "loss": 0.081, + "step": 10476 + }, + { + "epoch": 1.47, + "learning_rate": 4.75488957514505e-05, + "loss": 0.0741, + "step": 10478 + }, + { + "epoch": 1.47, + "learning_rate": 4.7548427849522744e-05, + "loss": 0.0784, + "step": 10480 + }, + { + "epoch": 1.47, + "learning_rate": 4.754795994759499e-05, + "loss": 0.0804, + "step": 10482 + }, + { + "epoch": 1.47, + "learning_rate": 4.754749204566723e-05, + "loss": 0.0797, + "step": 10484 + }, + { + "epoch": 1.47, + "learning_rate": 4.7547024143739475e-05, + "loss": 0.0931, + "step": 10486 + }, + { + "epoch": 1.47, + "learning_rate": 4.754655624181172e-05, + "loss": 0.0706, + "step": 10488 + }, + { + "epoch": 1.47, + "learning_rate": 4.754608833988397e-05, + "loss": 0.072, + "step": 10490 + }, + { + "epoch": 1.47, + "learning_rate": 4.7545620437956206e-05, + "loss": 0.0801, + "step": 10492 + }, + { + "epoch": 1.47, + "learning_rate": 4.754515253602845e-05, + "loss": 0.074, + "step": 10494 + }, + { + "epoch": 1.47, + "learning_rate": 4.75446846341007e-05, + "loss": 0.0835, + "step": 10496 + }, + { + "epoch": 1.47, + "learning_rate": 4.7544216732172944e-05, + "loss": 0.0612, + "step": 10498 + }, + { + "epoch": 1.47, + "learning_rate": 4.754374883024518e-05, + "loss": 0.0785, + "step": 10500 + }, + { + "epoch": 1.47, + "learning_rate": 4.754328092831743e-05, + "loss": 0.0584, + "step": 10502 + }, + { + "epoch": 1.47, + "learning_rate": 4.754281302638967e-05, + "loss": 0.0711, + "step": 10504 + }, + { + "epoch": 1.47, + "learning_rate": 4.754234512446192e-05, + "loss": 0.079, + "step": 10506 + }, + { + "epoch": 1.48, + "learning_rate": 4.754187722253416e-05, + "loss": 0.0791, + "step": 10508 + }, + { + "epoch": 1.48, + "learning_rate": 4.75414093206064e-05, + "loss": 0.0831, + "step": 10510 + }, + { + "epoch": 1.48, + "learning_rate": 4.7540941418678645e-05, + "loss": 0.0661, + "step": 10512 + }, + { + "epoch": 1.48, + "learning_rate": 4.754047351675089e-05, + "loss": 0.0787, + "step": 10514 + }, + { + "epoch": 1.48, + "learning_rate": 4.7540005614823136e-05, + "loss": 0.0951, + "step": 10516 + }, + { + "epoch": 1.48, + "learning_rate": 4.7539537712895376e-05, + "loss": 0.0659, + "step": 10518 + }, + { + "epoch": 1.48, + "learning_rate": 4.753906981096762e-05, + "loss": 0.0759, + "step": 10520 + }, + { + "epoch": 1.48, + "learning_rate": 4.753860190903987e-05, + "loss": 0.073, + "step": 10522 + }, + { + "epoch": 1.48, + "learning_rate": 4.753813400711211e-05, + "loss": 0.0576, + "step": 10524 + }, + { + "epoch": 1.48, + "learning_rate": 4.753766610518435e-05, + "loss": 0.0819, + "step": 10526 + }, + { + "epoch": 1.48, + "learning_rate": 4.75371982032566e-05, + "loss": 0.0805, + "step": 10528 + }, + { + "epoch": 1.48, + "learning_rate": 4.7536730301328844e-05, + "loss": 0.0822, + "step": 10530 + }, + { + "epoch": 1.48, + "learning_rate": 4.753626239940109e-05, + "loss": 0.0725, + "step": 10532 + }, + { + "epoch": 1.48, + "learning_rate": 4.753579449747333e-05, + "loss": 0.0829, + "step": 10534 + }, + { + "epoch": 1.48, + "learning_rate": 4.7535326595545575e-05, + "loss": 0.0851, + "step": 10536 + }, + { + "epoch": 1.48, + "learning_rate": 4.7534858693617814e-05, + "loss": 0.0814, + "step": 10538 + }, + { + "epoch": 1.48, + "learning_rate": 4.753439079169007e-05, + "loss": 0.0964, + "step": 10540 + }, + { + "epoch": 1.48, + "learning_rate": 4.7533922889762306e-05, + "loss": 0.0887, + "step": 10542 + }, + { + "epoch": 1.48, + "learning_rate": 4.753345498783455e-05, + "loss": 0.0728, + "step": 10544 + }, + { + "epoch": 1.48, + "learning_rate": 4.753298708590679e-05, + "loss": 0.0643, + "step": 10546 + }, + { + "epoch": 1.48, + "learning_rate": 4.7532519183979044e-05, + "loss": 0.0664, + "step": 10548 + }, + { + "epoch": 1.48, + "learning_rate": 4.753205128205128e-05, + "loss": 0.0892, + "step": 10550 + }, + { + "epoch": 1.48, + "learning_rate": 4.753158338012353e-05, + "loss": 0.0759, + "step": 10552 + }, + { + "epoch": 1.48, + "learning_rate": 4.753111547819577e-05, + "loss": 0.0717, + "step": 10554 + }, + { + "epoch": 1.48, + "learning_rate": 4.7530647576268014e-05, + "loss": 0.0964, + "step": 10556 + }, + { + "epoch": 1.48, + "learning_rate": 4.753017967434026e-05, + "loss": 0.0866, + "step": 10558 + }, + { + "epoch": 1.48, + "learning_rate": 4.7529711772412506e-05, + "loss": 0.076, + "step": 10560 + }, + { + "epoch": 1.48, + "learning_rate": 4.7529243870484745e-05, + "loss": 0.0838, + "step": 10562 + }, + { + "epoch": 1.48, + "learning_rate": 4.752877596855699e-05, + "loss": 0.0642, + "step": 10564 + }, + { + "epoch": 1.48, + "learning_rate": 4.752830806662924e-05, + "loss": 0.0777, + "step": 10566 + }, + { + "epoch": 1.48, + "learning_rate": 4.752784016470148e-05, + "loss": 0.0674, + "step": 10568 + }, + { + "epoch": 1.48, + "learning_rate": 4.752737226277372e-05, + "loss": 0.0672, + "step": 10570 + }, + { + "epoch": 1.48, + "learning_rate": 4.752690436084597e-05, + "loss": 0.0698, + "step": 10572 + }, + { + "epoch": 1.48, + "learning_rate": 4.7526436458918214e-05, + "loss": 0.0635, + "step": 10574 + }, + { + "epoch": 1.48, + "learning_rate": 4.752596855699046e-05, + "loss": 0.0889, + "step": 10576 + }, + { + "epoch": 1.48, + "learning_rate": 4.75255006550627e-05, + "loss": 0.0894, + "step": 10578 + }, + { + "epoch": 1.49, + "learning_rate": 4.7525032753134945e-05, + "loss": 0.0573, + "step": 10580 + }, + { + "epoch": 1.49, + "learning_rate": 4.752456485120719e-05, + "loss": 0.0707, + "step": 10582 + }, + { + "epoch": 1.49, + "learning_rate": 4.7524096949279436e-05, + "loss": 0.0701, + "step": 10584 + }, + { + "epoch": 1.49, + "learning_rate": 4.7523629047351676e-05, + "loss": 0.066, + "step": 10586 + }, + { + "epoch": 1.49, + "learning_rate": 4.752316114542392e-05, + "loss": 0.0738, + "step": 10588 + }, + { + "epoch": 1.49, + "learning_rate": 4.752269324349616e-05, + "loss": 0.073, + "step": 10590 + }, + { + "epoch": 1.49, + "learning_rate": 4.752222534156841e-05, + "loss": 0.0956, + "step": 10592 + }, + { + "epoch": 1.49, + "learning_rate": 4.752175743964065e-05, + "loss": 0.0718, + "step": 10594 + }, + { + "epoch": 1.49, + "learning_rate": 4.75212895377129e-05, + "loss": 0.0701, + "step": 10596 + }, + { + "epoch": 1.49, + "learning_rate": 4.752082163578514e-05, + "loss": 0.0723, + "step": 10598 + }, + { + "epoch": 1.49, + "learning_rate": 4.752035373385738e-05, + "loss": 0.0728, + "step": 10600 + }, + { + "epoch": 1.49, + "learning_rate": 4.751988583192963e-05, + "loss": 0.0656, + "step": 10602 + }, + { + "epoch": 1.49, + "learning_rate": 4.7519417930001875e-05, + "loss": 0.0809, + "step": 10604 + }, + { + "epoch": 1.49, + "learning_rate": 4.7518950028074114e-05, + "loss": 0.0736, + "step": 10606 + }, + { + "epoch": 1.49, + "learning_rate": 4.751848212614636e-05, + "loss": 0.0861, + "step": 10608 + }, + { + "epoch": 1.49, + "learning_rate": 4.7518014224218606e-05, + "loss": 0.0679, + "step": 10610 + }, + { + "epoch": 1.49, + "learning_rate": 4.751754632229085e-05, + "loss": 0.0859, + "step": 10612 + }, + { + "epoch": 1.49, + "learning_rate": 4.751707842036309e-05, + "loss": 0.0857, + "step": 10614 + }, + { + "epoch": 1.49, + "learning_rate": 4.751661051843534e-05, + "loss": 0.0977, + "step": 10616 + }, + { + "epoch": 1.49, + "learning_rate": 4.751614261650758e-05, + "loss": 0.0763, + "step": 10618 + }, + { + "epoch": 1.49, + "learning_rate": 4.751567471457983e-05, + "loss": 0.0703, + "step": 10620 + }, + { + "epoch": 1.49, + "learning_rate": 4.751520681265207e-05, + "loss": 0.0713, + "step": 10622 + }, + { + "epoch": 1.49, + "learning_rate": 4.7514738910724314e-05, + "loss": 0.0648, + "step": 10624 + }, + { + "epoch": 1.49, + "learning_rate": 4.751427100879656e-05, + "loss": 0.0707, + "step": 10626 + }, + { + "epoch": 1.49, + "learning_rate": 4.7513803106868806e-05, + "loss": 0.077, + "step": 10628 + }, + { + "epoch": 1.49, + "learning_rate": 4.7513335204941045e-05, + "loss": 0.102, + "step": 10630 + }, + { + "epoch": 1.49, + "learning_rate": 4.751286730301329e-05, + "loss": 0.067, + "step": 10632 + }, + { + "epoch": 1.49, + "learning_rate": 4.751239940108553e-05, + "loss": 0.0642, + "step": 10634 + }, + { + "epoch": 1.49, + "learning_rate": 4.751193149915778e-05, + "loss": 0.0691, + "step": 10636 + }, + { + "epoch": 1.49, + "learning_rate": 4.751146359723002e-05, + "loss": 0.0809, + "step": 10638 + }, + { + "epoch": 1.49, + "learning_rate": 4.751099569530227e-05, + "loss": 0.0847, + "step": 10640 + }, + { + "epoch": 1.49, + "learning_rate": 4.751052779337451e-05, + "loss": 0.069, + "step": 10642 + }, + { + "epoch": 1.49, + "learning_rate": 4.751005989144676e-05, + "loss": 0.0711, + "step": 10644 + }, + { + "epoch": 1.49, + "learning_rate": 4.7509591989519e-05, + "loss": 0.0763, + "step": 10646 + }, + { + "epoch": 1.49, + "learning_rate": 4.7509124087591245e-05, + "loss": 0.0765, + "step": 10648 + }, + { + "epoch": 1.49, + "learning_rate": 4.7508656185663484e-05, + "loss": 0.1077, + "step": 10650 + }, + { + "epoch": 1.5, + "learning_rate": 4.750818828373573e-05, + "loss": 0.0587, + "step": 10652 + }, + { + "epoch": 1.5, + "learning_rate": 4.7507720381807976e-05, + "loss": 0.0733, + "step": 10654 + }, + { + "epoch": 1.5, + "learning_rate": 4.750725247988022e-05, + "loss": 0.0625, + "step": 10656 + }, + { + "epoch": 1.5, + "learning_rate": 4.750678457795246e-05, + "loss": 0.0723, + "step": 10658 + }, + { + "epoch": 1.5, + "learning_rate": 4.7506316676024707e-05, + "loss": 0.0725, + "step": 10660 + }, + { + "epoch": 1.5, + "learning_rate": 4.750584877409695e-05, + "loss": 0.0743, + "step": 10662 + }, + { + "epoch": 1.5, + "learning_rate": 4.75053808721692e-05, + "loss": 0.0622, + "step": 10664 + }, + { + "epoch": 1.5, + "learning_rate": 4.750491297024144e-05, + "loss": 0.1071, + "step": 10666 + }, + { + "epoch": 1.5, + "learning_rate": 4.7504445068313683e-05, + "loss": 0.0714, + "step": 10668 + }, + { + "epoch": 1.5, + "learning_rate": 4.750397716638593e-05, + "loss": 0.05, + "step": 10670 + }, + { + "epoch": 1.5, + "learning_rate": 4.7503509264458175e-05, + "loss": 0.0744, + "step": 10672 + }, + { + "epoch": 1.5, + "learning_rate": 4.7503041362530414e-05, + "loss": 0.0863, + "step": 10674 + }, + { + "epoch": 1.5, + "learning_rate": 4.750257346060266e-05, + "loss": 0.1183, + "step": 10676 + }, + { + "epoch": 1.5, + "learning_rate": 4.7502105558674906e-05, + "loss": 0.1107, + "step": 10678 + }, + { + "epoch": 1.5, + "learning_rate": 4.750163765674715e-05, + "loss": 0.0695, + "step": 10680 + }, + { + "epoch": 1.5, + "learning_rate": 4.750116975481939e-05, + "loss": 0.0695, + "step": 10682 + }, + { + "epoch": 1.5, + "learning_rate": 4.750070185289164e-05, + "loss": 0.0654, + "step": 10684 + }, + { + "epoch": 1.5, + "learning_rate": 4.7500233950963876e-05, + "loss": 0.0953, + "step": 10686 + }, + { + "epoch": 1.5, + "learning_rate": 4.749976604903613e-05, + "loss": 0.0743, + "step": 10688 + }, + { + "epoch": 1.5, + "learning_rate": 4.749929814710837e-05, + "loss": 0.073, + "step": 10690 + }, + { + "epoch": 1.5, + "learning_rate": 4.7498830245180614e-05, + "loss": 0.0664, + "step": 10692 + }, + { + "epoch": 1.5, + "learning_rate": 4.749836234325285e-05, + "loss": 0.0779, + "step": 10694 + }, + { + "epoch": 1.5, + "learning_rate": 4.7497894441325106e-05, + "loss": 0.0899, + "step": 10696 + }, + { + "epoch": 1.5, + "learning_rate": 4.7497426539397345e-05, + "loss": 0.0873, + "step": 10698 + }, + { + "epoch": 1.5, + "learning_rate": 4.749695863746959e-05, + "loss": 0.0767, + "step": 10700 + }, + { + "epoch": 1.5, + "learning_rate": 4.749649073554183e-05, + "loss": 0.0691, + "step": 10702 + }, + { + "epoch": 1.5, + "learning_rate": 4.7496022833614076e-05, + "loss": 0.0822, + "step": 10704 + }, + { + "epoch": 1.5, + "learning_rate": 4.749555493168632e-05, + "loss": 0.0739, + "step": 10706 + }, + { + "epoch": 1.5, + "learning_rate": 4.749508702975857e-05, + "loss": 0.0483, + "step": 10708 + }, + { + "epoch": 1.5, + "learning_rate": 4.749461912783081e-05, + "loss": 0.0882, + "step": 10710 + }, + { + "epoch": 1.5, + "learning_rate": 4.749415122590305e-05, + "loss": 0.0757, + "step": 10712 + }, + { + "epoch": 1.5, + "learning_rate": 4.74936833239753e-05, + "loss": 0.0726, + "step": 10714 + }, + { + "epoch": 1.5, + "learning_rate": 4.7493215422047545e-05, + "loss": 0.0577, + "step": 10716 + }, + { + "epoch": 1.5, + "learning_rate": 4.7492747520119784e-05, + "loss": 0.0701, + "step": 10718 + }, + { + "epoch": 1.5, + "learning_rate": 4.749227961819203e-05, + "loss": 0.1074, + "step": 10720 + }, + { + "epoch": 1.51, + "learning_rate": 4.7491811716264276e-05, + "loss": 0.0624, + "step": 10722 + }, + { + "epoch": 1.51, + "learning_rate": 4.749134381433652e-05, + "loss": 0.0882, + "step": 10724 + }, + { + "epoch": 1.51, + "learning_rate": 4.749087591240876e-05, + "loss": 0.0645, + "step": 10726 + }, + { + "epoch": 1.51, + "learning_rate": 4.7490408010481007e-05, + "loss": 0.0711, + "step": 10728 + }, + { + "epoch": 1.51, + "learning_rate": 4.748994010855325e-05, + "loss": 0.0796, + "step": 10730 + }, + { + "epoch": 1.51, + "learning_rate": 4.74894722066255e-05, + "loss": 0.0767, + "step": 10732 + }, + { + "epoch": 1.51, + "learning_rate": 4.748900430469774e-05, + "loss": 0.0687, + "step": 10734 + }, + { + "epoch": 1.51, + "learning_rate": 4.7488536402769983e-05, + "loss": 0.084, + "step": 10736 + }, + { + "epoch": 1.51, + "learning_rate": 4.748806850084222e-05, + "loss": 0.067, + "step": 10738 + }, + { + "epoch": 1.51, + "learning_rate": 4.7487600598914475e-05, + "loss": 0.0952, + "step": 10740 + }, + { + "epoch": 1.51, + "learning_rate": 4.7487132696986714e-05, + "loss": 0.0873, + "step": 10742 + }, + { + "epoch": 1.51, + "learning_rate": 4.748666479505896e-05, + "loss": 0.0961, + "step": 10744 + }, + { + "epoch": 1.51, + "learning_rate": 4.74861968931312e-05, + "loss": 0.0729, + "step": 10746 + }, + { + "epoch": 1.51, + "learning_rate": 4.7485728991203445e-05, + "loss": 0.0772, + "step": 10748 + }, + { + "epoch": 1.51, + "learning_rate": 4.748526108927569e-05, + "loss": 0.0884, + "step": 10750 + }, + { + "epoch": 1.51, + "learning_rate": 4.748479318734794e-05, + "loss": 0.0868, + "step": 10752 + }, + { + "epoch": 1.51, + "learning_rate": 4.7484325285420176e-05, + "loss": 0.0791, + "step": 10754 + }, + { + "epoch": 1.51, + "learning_rate": 4.748385738349242e-05, + "loss": 0.0874, + "step": 10756 + }, + { + "epoch": 1.51, + "learning_rate": 4.748338948156467e-05, + "loss": 0.0987, + "step": 10758 + }, + { + "epoch": 1.51, + "learning_rate": 4.7482921579636914e-05, + "loss": 0.0765, + "step": 10760 + }, + { + "epoch": 1.51, + "learning_rate": 4.748245367770915e-05, + "loss": 0.0673, + "step": 10762 + }, + { + "epoch": 1.51, + "learning_rate": 4.74819857757814e-05, + "loss": 0.0719, + "step": 10764 + }, + { + "epoch": 1.51, + "learning_rate": 4.7481517873853645e-05, + "loss": 0.0771, + "step": 10766 + }, + { + "epoch": 1.51, + "learning_rate": 4.7481049971925884e-05, + "loss": 0.0686, + "step": 10768 + }, + { + "epoch": 1.51, + "learning_rate": 4.748058206999813e-05, + "loss": 0.0705, + "step": 10770 + }, + { + "epoch": 1.51, + "learning_rate": 4.748011416807037e-05, + "loss": 0.0792, + "step": 10772 + }, + { + "epoch": 1.51, + "learning_rate": 4.747964626614262e-05, + "loss": 0.072, + "step": 10774 + }, + { + "epoch": 1.51, + "learning_rate": 4.747917836421486e-05, + "loss": 0.06, + "step": 10776 + }, + { + "epoch": 1.51, + "learning_rate": 4.747871046228711e-05, + "loss": 0.1011, + "step": 10778 + }, + { + "epoch": 1.51, + "learning_rate": 4.7478242560359346e-05, + "loss": 0.0713, + "step": 10780 + }, + { + "epoch": 1.51, + "learning_rate": 4.747777465843159e-05, + "loss": 0.0661, + "step": 10782 + }, + { + "epoch": 1.51, + "learning_rate": 4.747730675650384e-05, + "loss": 0.0876, + "step": 10784 + }, + { + "epoch": 1.51, + "learning_rate": 4.7476838854576084e-05, + "loss": 0.0917, + "step": 10786 + }, + { + "epoch": 1.51, + "learning_rate": 4.747637095264832e-05, + "loss": 0.1038, + "step": 10788 + }, + { + "epoch": 1.51, + "learning_rate": 4.747590305072057e-05, + "loss": 0.0869, + "step": 10790 + }, + { + "epoch": 1.51, + "learning_rate": 4.7475435148792815e-05, + "loss": 0.0682, + "step": 10792 + }, + { + "epoch": 1.52, + "learning_rate": 4.747496724686506e-05, + "loss": 0.1075, + "step": 10794 + }, + { + "epoch": 1.52, + "learning_rate": 4.74744993449373e-05, + "loss": 0.0597, + "step": 10796 + }, + { + "epoch": 1.52, + "learning_rate": 4.7474031443009546e-05, + "loss": 0.0841, + "step": 10798 + }, + { + "epoch": 1.52, + "learning_rate": 4.747356354108179e-05, + "loss": 0.0933, + "step": 10800 + }, + { + "epoch": 1.52, + "learning_rate": 4.747309563915404e-05, + "loss": 0.0673, + "step": 10802 + }, + { + "epoch": 1.52, + "learning_rate": 4.747262773722628e-05, + "loss": 0.0675, + "step": 10804 + }, + { + "epoch": 1.52, + "learning_rate": 4.747215983529852e-05, + "loss": 0.0756, + "step": 10806 + }, + { + "epoch": 1.52, + "learning_rate": 4.747169193337077e-05, + "loss": 0.07, + "step": 10808 + }, + { + "epoch": 1.52, + "learning_rate": 4.7471224031443014e-05, + "loss": 0.0655, + "step": 10810 + }, + { + "epoch": 1.52, + "learning_rate": 4.7470756129515254e-05, + "loss": 0.074, + "step": 10812 + }, + { + "epoch": 1.52, + "learning_rate": 4.74702882275875e-05, + "loss": 0.0651, + "step": 10814 + }, + { + "epoch": 1.52, + "learning_rate": 4.746982032565974e-05, + "loss": 0.0483, + "step": 10816 + }, + { + "epoch": 1.52, + "learning_rate": 4.746935242373199e-05, + "loss": 0.0795, + "step": 10818 + }, + { + "epoch": 1.52, + "learning_rate": 4.746888452180423e-05, + "loss": 0.0669, + "step": 10820 + }, + { + "epoch": 1.52, + "learning_rate": 4.7468416619876476e-05, + "loss": 0.0668, + "step": 10822 + }, + { + "epoch": 1.52, + "learning_rate": 4.7467948717948715e-05, + "loss": 0.0602, + "step": 10824 + }, + { + "epoch": 1.52, + "learning_rate": 4.746748081602097e-05, + "loss": 0.0764, + "step": 10826 + }, + { + "epoch": 1.52, + "learning_rate": 4.746701291409321e-05, + "loss": 0.0816, + "step": 10828 + }, + { + "epoch": 1.52, + "learning_rate": 4.746654501216545e-05, + "loss": 0.1023, + "step": 10830 + }, + { + "epoch": 1.52, + "learning_rate": 4.746607711023769e-05, + "loss": 0.0804, + "step": 10832 + }, + { + "epoch": 1.52, + "learning_rate": 4.746560920830994e-05, + "loss": 0.1046, + "step": 10834 + }, + { + "epoch": 1.52, + "learning_rate": 4.7465141306382184e-05, + "loss": 0.0669, + "step": 10836 + }, + { + "epoch": 1.52, + "learning_rate": 4.746467340445443e-05, + "loss": 0.0675, + "step": 10838 + }, + { + "epoch": 1.52, + "learning_rate": 4.746420550252667e-05, + "loss": 0.1118, + "step": 10840 + }, + { + "epoch": 1.52, + "learning_rate": 4.7463737600598915e-05, + "loss": 0.0717, + "step": 10842 + }, + { + "epoch": 1.52, + "learning_rate": 4.746326969867116e-05, + "loss": 0.0678, + "step": 10844 + }, + { + "epoch": 1.52, + "learning_rate": 4.746280179674341e-05, + "loss": 0.0864, + "step": 10846 + }, + { + "epoch": 1.52, + "learning_rate": 4.7462333894815646e-05, + "loss": 0.0957, + "step": 10848 + }, + { + "epoch": 1.52, + "learning_rate": 4.746186599288789e-05, + "loss": 0.0651, + "step": 10850 + }, + { + "epoch": 1.52, + "learning_rate": 4.746139809096014e-05, + "loss": 0.0623, + "step": 10852 + }, + { + "epoch": 1.52, + "learning_rate": 4.7460930189032384e-05, + "loss": 0.0774, + "step": 10854 + }, + { + "epoch": 1.52, + "learning_rate": 4.746046228710462e-05, + "loss": 0.0969, + "step": 10856 + }, + { + "epoch": 1.52, + "learning_rate": 4.745999438517687e-05, + "loss": 0.0752, + "step": 10858 + }, + { + "epoch": 1.52, + "learning_rate": 4.7459526483249115e-05, + "loss": 0.0795, + "step": 10860 + }, + { + "epoch": 1.52, + "learning_rate": 4.745905858132136e-05, + "loss": 0.0802, + "step": 10862 + }, + { + "epoch": 1.52, + "learning_rate": 4.74585906793936e-05, + "loss": 0.0815, + "step": 10864 + }, + { + "epoch": 1.53, + "learning_rate": 4.7458122777465846e-05, + "loss": 0.0612, + "step": 10866 + }, + { + "epoch": 1.53, + "learning_rate": 4.7457654875538085e-05, + "loss": 0.0575, + "step": 10868 + }, + { + "epoch": 1.53, + "learning_rate": 4.745718697361034e-05, + "loss": 0.0899, + "step": 10870 + }, + { + "epoch": 1.53, + "learning_rate": 4.745671907168258e-05, + "loss": 0.082, + "step": 10872 + }, + { + "epoch": 1.53, + "learning_rate": 4.745625116975482e-05, + "loss": 0.084, + "step": 10874 + }, + { + "epoch": 1.53, + "learning_rate": 4.745578326782706e-05, + "loss": 0.0872, + "step": 10876 + }, + { + "epoch": 1.53, + "learning_rate": 4.7455315365899314e-05, + "loss": 0.0752, + "step": 10878 + }, + { + "epoch": 1.53, + "learning_rate": 4.7454847463971554e-05, + "loss": 0.0753, + "step": 10880 + }, + { + "epoch": 1.53, + "learning_rate": 4.74543795620438e-05, + "loss": 0.0819, + "step": 10882 + }, + { + "epoch": 1.53, + "learning_rate": 4.745391166011604e-05, + "loss": 0.0843, + "step": 10884 + }, + { + "epoch": 1.53, + "learning_rate": 4.7453443758188284e-05, + "loss": 0.098, + "step": 10886 + }, + { + "epoch": 1.53, + "learning_rate": 4.745297585626053e-05, + "loss": 0.0839, + "step": 10888 + }, + { + "epoch": 1.53, + "learning_rate": 4.7452507954332776e-05, + "loss": 0.0642, + "step": 10890 + }, + { + "epoch": 1.53, + "learning_rate": 4.7452040052405015e-05, + "loss": 0.0771, + "step": 10892 + }, + { + "epoch": 1.53, + "learning_rate": 4.745157215047726e-05, + "loss": 0.08, + "step": 10894 + }, + { + "epoch": 1.53, + "learning_rate": 4.745110424854951e-05, + "loss": 0.0784, + "step": 10896 + }, + { + "epoch": 1.53, + "learning_rate": 4.745063634662175e-05, + "loss": 0.0784, + "step": 10898 + }, + { + "epoch": 1.53, + "learning_rate": 4.745016844469399e-05, + "loss": 0.0807, + "step": 10900 + }, + { + "epoch": 1.53, + "learning_rate": 4.744970054276624e-05, + "loss": 0.0517, + "step": 10902 + }, + { + "epoch": 1.53, + "learning_rate": 4.7449232640838484e-05, + "loss": 0.0802, + "step": 10904 + }, + { + "epoch": 1.53, + "learning_rate": 4.744876473891073e-05, + "loss": 0.0746, + "step": 10906 + }, + { + "epoch": 1.53, + "learning_rate": 4.744829683698297e-05, + "loss": 0.0678, + "step": 10908 + }, + { + "epoch": 1.53, + "learning_rate": 4.7447828935055215e-05, + "loss": 0.0581, + "step": 10910 + }, + { + "epoch": 1.53, + "learning_rate": 4.7447361033127454e-05, + "loss": 0.0775, + "step": 10912 + }, + { + "epoch": 1.53, + "learning_rate": 4.744689313119971e-05, + "loss": 0.0733, + "step": 10914 + }, + { + "epoch": 1.53, + "learning_rate": 4.7446425229271946e-05, + "loss": 0.0898, + "step": 10916 + }, + { + "epoch": 1.53, + "learning_rate": 4.744595732734419e-05, + "loss": 0.1295, + "step": 10918 + }, + { + "epoch": 1.53, + "learning_rate": 4.744548942541643e-05, + "loss": 0.0819, + "step": 10920 + }, + { + "epoch": 1.53, + "learning_rate": 4.7445021523488684e-05, + "loss": 0.0753, + "step": 10922 + }, + { + "epoch": 1.53, + "learning_rate": 4.744455362156092e-05, + "loss": 0.0874, + "step": 10924 + }, + { + "epoch": 1.53, + "learning_rate": 4.744408571963317e-05, + "loss": 0.0692, + "step": 10926 + }, + { + "epoch": 1.53, + "learning_rate": 4.744361781770541e-05, + "loss": 0.1035, + "step": 10928 + }, + { + "epoch": 1.53, + "learning_rate": 4.7443149915777654e-05, + "loss": 0.0888, + "step": 10930 + }, + { + "epoch": 1.53, + "learning_rate": 4.74426820138499e-05, + "loss": 0.0741, + "step": 10932 + }, + { + "epoch": 1.53, + "learning_rate": 4.7442214111922146e-05, + "loss": 0.0764, + "step": 10934 + }, + { + "epoch": 1.54, + "learning_rate": 4.7441746209994385e-05, + "loss": 0.0679, + "step": 10936 + }, + { + "epoch": 1.54, + "learning_rate": 4.744127830806663e-05, + "loss": 0.0854, + "step": 10938 + }, + { + "epoch": 1.54, + "learning_rate": 4.744081040613888e-05, + "loss": 0.0817, + "step": 10940 + }, + { + "epoch": 1.54, + "learning_rate": 4.744034250421112e-05, + "loss": 0.0852, + "step": 10942 + }, + { + "epoch": 1.54, + "learning_rate": 4.743987460228336e-05, + "loss": 0.0856, + "step": 10944 + }, + { + "epoch": 1.54, + "learning_rate": 4.743940670035561e-05, + "loss": 0.0787, + "step": 10946 + }, + { + "epoch": 1.54, + "learning_rate": 4.7438938798427854e-05, + "loss": 0.0854, + "step": 10948 + }, + { + "epoch": 1.54, + "learning_rate": 4.74384708965001e-05, + "loss": 0.0775, + "step": 10950 + }, + { + "epoch": 1.54, + "learning_rate": 4.743800299457234e-05, + "loss": 0.0794, + "step": 10952 + }, + { + "epoch": 1.54, + "learning_rate": 4.7437535092644585e-05, + "loss": 0.0756, + "step": 10954 + }, + { + "epoch": 1.54, + "learning_rate": 4.743706719071683e-05, + "loss": 0.085, + "step": 10956 + }, + { + "epoch": 1.54, + "learning_rate": 4.7436599288789076e-05, + "loss": 0.0747, + "step": 10958 + }, + { + "epoch": 1.54, + "learning_rate": 4.7436131386861315e-05, + "loss": 0.0997, + "step": 10960 + }, + { + "epoch": 1.54, + "learning_rate": 4.743566348493356e-05, + "loss": 0.0884, + "step": 10962 + }, + { + "epoch": 1.54, + "learning_rate": 4.74351955830058e-05, + "loss": 0.1031, + "step": 10964 + }, + { + "epoch": 1.54, + "learning_rate": 4.743472768107805e-05, + "loss": 0.0583, + "step": 10966 + }, + { + "epoch": 1.54, + "learning_rate": 4.743425977915029e-05, + "loss": 0.0806, + "step": 10968 + }, + { + "epoch": 1.54, + "learning_rate": 4.743379187722254e-05, + "loss": 0.0795, + "step": 10970 + }, + { + "epoch": 1.54, + "learning_rate": 4.743332397529478e-05, + "loss": 0.0618, + "step": 10972 + }, + { + "epoch": 1.54, + "learning_rate": 4.743285607336703e-05, + "loss": 0.0768, + "step": 10974 + }, + { + "epoch": 1.54, + "learning_rate": 4.743238817143927e-05, + "loss": 0.0965, + "step": 10976 + }, + { + "epoch": 1.54, + "learning_rate": 4.7431920269511515e-05, + "loss": 0.0713, + "step": 10978 + }, + { + "epoch": 1.54, + "learning_rate": 4.7431452367583754e-05, + "loss": 0.072, + "step": 10980 + }, + { + "epoch": 1.54, + "learning_rate": 4.7430984465656e-05, + "loss": 0.0749, + "step": 10982 + }, + { + "epoch": 1.54, + "learning_rate": 4.7430516563728246e-05, + "loss": 0.0773, + "step": 10984 + }, + { + "epoch": 1.54, + "learning_rate": 4.743004866180049e-05, + "loss": 0.0777, + "step": 10986 + }, + { + "epoch": 1.54, + "learning_rate": 4.742958075987273e-05, + "loss": 0.0919, + "step": 10988 + }, + { + "epoch": 1.54, + "learning_rate": 4.742911285794498e-05, + "loss": 0.0824, + "step": 10990 + }, + { + "epoch": 1.54, + "learning_rate": 4.742864495601722e-05, + "loss": 0.0676, + "step": 10992 + }, + { + "epoch": 1.54, + "learning_rate": 4.742817705408947e-05, + "loss": 0.0821, + "step": 10994 + }, + { + "epoch": 1.54, + "learning_rate": 4.742770915216171e-05, + "loss": 0.0648, + "step": 10996 + }, + { + "epoch": 1.54, + "learning_rate": 4.7427241250233954e-05, + "loss": 0.0847, + "step": 10998 + }, + { + "epoch": 1.54, + "learning_rate": 4.74267733483062e-05, + "loss": 0.0878, + "step": 11000 + }, + { + "epoch": 1.54, + "eval_gen_len": 29.569, + "eval_loss": 1.0419230461120605, + "eval_meteor": 0.0426, + "eval_runtime": 14.397, + "eval_samples_per_second": 4.029, + "eval_steps_per_second": 0.556, + "step": 11000 + }, + { + "epoch": 1.54, + "learning_rate": 4.7426305446378446e-05, + "loss": 0.0881, + "step": 11002 + }, + { + "epoch": 1.54, + "learning_rate": 4.7425837544450685e-05, + "loss": 0.0838, + "step": 11004 + }, + { + "epoch": 1.54, + "learning_rate": 4.742536964252293e-05, + "loss": 0.0721, + "step": 11006 + }, + { + "epoch": 1.55, + "learning_rate": 4.742490174059518e-05, + "loss": 0.1109, + "step": 11008 + }, + { + "epoch": 1.55, + "learning_rate": 4.742443383866742e-05, + "loss": 0.1078, + "step": 11010 + }, + { + "epoch": 1.55, + "learning_rate": 4.742396593673966e-05, + "loss": 0.0749, + "step": 11012 + }, + { + "epoch": 1.55, + "learning_rate": 4.742349803481191e-05, + "loss": 0.0613, + "step": 11014 + }, + { + "epoch": 1.55, + "learning_rate": 4.742303013288415e-05, + "loss": 0.0906, + "step": 11016 + }, + { + "epoch": 1.55, + "learning_rate": 4.742256223095639e-05, + "loss": 0.0931, + "step": 11018 + }, + { + "epoch": 1.55, + "learning_rate": 4.742209432902864e-05, + "loss": 0.0662, + "step": 11020 + }, + { + "epoch": 1.55, + "learning_rate": 4.742162642710088e-05, + "loss": 0.0639, + "step": 11022 + }, + { + "epoch": 1.55, + "learning_rate": 4.7421158525173124e-05, + "loss": 0.0959, + "step": 11024 + }, + { + "epoch": 1.55, + "learning_rate": 4.742069062324537e-05, + "loss": 0.096, + "step": 11026 + }, + { + "epoch": 1.55, + "learning_rate": 4.7420222721317615e-05, + "loss": 0.0694, + "step": 11028 + }, + { + "epoch": 1.55, + "learning_rate": 4.7419754819389855e-05, + "loss": 0.0707, + "step": 11030 + }, + { + "epoch": 1.55, + "learning_rate": 4.74192869174621e-05, + "loss": 0.068, + "step": 11032 + }, + { + "epoch": 1.55, + "learning_rate": 4.7418819015534346e-05, + "loss": 0.075, + "step": 11034 + }, + { + "epoch": 1.55, + "learning_rate": 4.741835111360659e-05, + "loss": 0.0659, + "step": 11036 + }, + { + "epoch": 1.55, + "learning_rate": 4.741788321167883e-05, + "loss": 0.0663, + "step": 11038 + }, + { + "epoch": 1.55, + "learning_rate": 4.741741530975108e-05, + "loss": 0.1099, + "step": 11040 + }, + { + "epoch": 1.55, + "learning_rate": 4.741694740782332e-05, + "loss": 0.0676, + "step": 11042 + }, + { + "epoch": 1.55, + "learning_rate": 4.741647950589557e-05, + "loss": 0.0628, + "step": 11044 + }, + { + "epoch": 1.55, + "learning_rate": 4.741601160396781e-05, + "loss": 0.0762, + "step": 11046 + }, + { + "epoch": 1.55, + "learning_rate": 4.7415543702040054e-05, + "loss": 0.1015, + "step": 11048 + }, + { + "epoch": 1.55, + "learning_rate": 4.7415075800112293e-05, + "loss": 0.0709, + "step": 11050 + }, + { + "epoch": 1.55, + "learning_rate": 4.7414607898184546e-05, + "loss": 0.0794, + "step": 11052 + }, + { + "epoch": 1.55, + "learning_rate": 4.7414139996256785e-05, + "loss": 0.097, + "step": 11054 + }, + { + "epoch": 1.55, + "learning_rate": 4.741367209432903e-05, + "loss": 0.0749, + "step": 11056 + }, + { + "epoch": 1.55, + "learning_rate": 4.741320419240127e-05, + "loss": 0.0902, + "step": 11058 + }, + { + "epoch": 1.55, + "learning_rate": 4.7412736290473516e-05, + "loss": 0.0622, + "step": 11060 + }, + { + "epoch": 1.55, + "learning_rate": 4.741226838854576e-05, + "loss": 0.0613, + "step": 11062 + }, + { + "epoch": 1.55, + "learning_rate": 4.741180048661801e-05, + "loss": 0.0799, + "step": 11064 + }, + { + "epoch": 1.55, + "learning_rate": 4.741133258469025e-05, + "loss": 0.0979, + "step": 11066 + }, + { + "epoch": 1.55, + "learning_rate": 4.741086468276249e-05, + "loss": 0.0748, + "step": 11068 + }, + { + "epoch": 1.55, + "learning_rate": 4.741039678083474e-05, + "loss": 0.0949, + "step": 11070 + }, + { + "epoch": 1.55, + "learning_rate": 4.7409928878906985e-05, + "loss": 0.0605, + "step": 11072 + }, + { + "epoch": 1.55, + "learning_rate": 4.7409460976979224e-05, + "loss": 0.0771, + "step": 11074 + }, + { + "epoch": 1.55, + "learning_rate": 4.740899307505147e-05, + "loss": 0.0686, + "step": 11076 + }, + { + "epoch": 1.56, + "learning_rate": 4.7408525173123716e-05, + "loss": 0.0721, + "step": 11078 + }, + { + "epoch": 1.56, + "learning_rate": 4.740805727119596e-05, + "loss": 0.0767, + "step": 11080 + }, + { + "epoch": 1.56, + "learning_rate": 4.74075893692682e-05, + "loss": 0.0871, + "step": 11082 + }, + { + "epoch": 1.56, + "learning_rate": 4.740712146734045e-05, + "loss": 0.0645, + "step": 11084 + }, + { + "epoch": 1.56, + "learning_rate": 4.740665356541269e-05, + "loss": 0.0693, + "step": 11086 + }, + { + "epoch": 1.56, + "learning_rate": 4.740618566348494e-05, + "loss": 0.0795, + "step": 11088 + }, + { + "epoch": 1.56, + "learning_rate": 4.740571776155718e-05, + "loss": 0.0766, + "step": 11090 + }, + { + "epoch": 1.56, + "learning_rate": 4.7405249859629424e-05, + "loss": 0.0742, + "step": 11092 + }, + { + "epoch": 1.56, + "learning_rate": 4.740478195770166e-05, + "loss": 0.0835, + "step": 11094 + }, + { + "epoch": 1.56, + "learning_rate": 4.7404314055773916e-05, + "loss": 0.1059, + "step": 11096 + }, + { + "epoch": 1.56, + "learning_rate": 4.7403846153846155e-05, + "loss": 0.0674, + "step": 11098 + }, + { + "epoch": 1.56, + "learning_rate": 4.74033782519184e-05, + "loss": 0.1083, + "step": 11100 + }, + { + "epoch": 1.56, + "learning_rate": 4.740291034999064e-05, + "loss": 0.0774, + "step": 11102 + }, + { + "epoch": 1.56, + "learning_rate": 4.740244244806289e-05, + "loss": 0.0647, + "step": 11104 + }, + { + "epoch": 1.56, + "learning_rate": 4.740197454613513e-05, + "loss": 0.0657, + "step": 11106 + }, + { + "epoch": 1.56, + "learning_rate": 4.740150664420738e-05, + "loss": 0.0769, + "step": 11108 + }, + { + "epoch": 1.56, + "learning_rate": 4.7401038742279617e-05, + "loss": 0.0954, + "step": 11110 + }, + { + "epoch": 1.56, + "learning_rate": 4.740057084035186e-05, + "loss": 0.0713, + "step": 11112 + }, + { + "epoch": 1.56, + "learning_rate": 4.740010293842411e-05, + "loss": 0.1011, + "step": 11114 + }, + { + "epoch": 1.56, + "learning_rate": 4.7399635036496354e-05, + "loss": 0.0694, + "step": 11116 + }, + { + "epoch": 1.56, + "learning_rate": 4.7399167134568593e-05, + "loss": 0.0856, + "step": 11118 + }, + { + "epoch": 1.56, + "learning_rate": 4.739869923264084e-05, + "loss": 0.0795, + "step": 11120 + }, + { + "epoch": 1.56, + "learning_rate": 4.7398231330713085e-05, + "loss": 0.075, + "step": 11122 + }, + { + "epoch": 1.56, + "learning_rate": 4.739776342878533e-05, + "loss": 0.0653, + "step": 11124 + }, + { + "epoch": 1.56, + "learning_rate": 4.739729552685757e-05, + "loss": 0.079, + "step": 11126 + }, + { + "epoch": 1.56, + "learning_rate": 4.7396827624929816e-05, + "loss": 0.0973, + "step": 11128 + }, + { + "epoch": 1.56, + "learning_rate": 4.739635972300206e-05, + "loss": 0.0547, + "step": 11130 + }, + { + "epoch": 1.56, + "learning_rate": 4.739589182107431e-05, + "loss": 0.1045, + "step": 11132 + }, + { + "epoch": 1.56, + "learning_rate": 4.739542391914655e-05, + "loss": 0.0797, + "step": 11134 + }, + { + "epoch": 1.56, + "learning_rate": 4.739495601721879e-05, + "loss": 0.0819, + "step": 11136 + }, + { + "epoch": 1.56, + "learning_rate": 4.739448811529104e-05, + "loss": 0.0838, + "step": 11138 + }, + { + "epoch": 1.56, + "learning_rate": 4.7394020213363285e-05, + "loss": 0.0873, + "step": 11140 + }, + { + "epoch": 1.56, + "learning_rate": 4.7393552311435524e-05, + "loss": 0.0934, + "step": 11142 + }, + { + "epoch": 1.56, + "learning_rate": 4.739308440950777e-05, + "loss": 0.071, + "step": 11144 + }, + { + "epoch": 1.56, + "learning_rate": 4.739261650758001e-05, + "loss": 0.0773, + "step": 11146 + }, + { + "epoch": 1.56, + "learning_rate": 4.739214860565226e-05, + "loss": 0.0948, + "step": 11148 + }, + { + "epoch": 1.57, + "learning_rate": 4.73916807037245e-05, + "loss": 0.0941, + "step": 11150 + }, + { + "epoch": 1.57, + "learning_rate": 4.739121280179675e-05, + "loss": 0.082, + "step": 11152 + }, + { + "epoch": 1.57, + "learning_rate": 4.7390744899868986e-05, + "loss": 0.0725, + "step": 11154 + }, + { + "epoch": 1.57, + "learning_rate": 4.739027699794124e-05, + "loss": 0.0982, + "step": 11156 + }, + { + "epoch": 1.57, + "learning_rate": 4.738980909601348e-05, + "loss": 0.0918, + "step": 11158 + }, + { + "epoch": 1.57, + "learning_rate": 4.7389341194085724e-05, + "loss": 0.0903, + "step": 11160 + }, + { + "epoch": 1.57, + "learning_rate": 4.738887329215796e-05, + "loss": 0.0882, + "step": 11162 + }, + { + "epoch": 1.57, + "learning_rate": 4.738840539023021e-05, + "loss": 0.0734, + "step": 11164 + }, + { + "epoch": 1.57, + "learning_rate": 4.7387937488302455e-05, + "loss": 0.0719, + "step": 11166 + }, + { + "epoch": 1.57, + "learning_rate": 4.73874695863747e-05, + "loss": 0.088, + "step": 11168 + }, + { + "epoch": 1.57, + "learning_rate": 4.738700168444694e-05, + "loss": 0.073, + "step": 11170 + }, + { + "epoch": 1.57, + "learning_rate": 4.7386533782519186e-05, + "loss": 0.0931, + "step": 11172 + }, + { + "epoch": 1.57, + "learning_rate": 4.738606588059143e-05, + "loss": 0.1425, + "step": 11174 + }, + { + "epoch": 1.57, + "learning_rate": 4.738559797866368e-05, + "loss": 0.064, + "step": 11176 + }, + { + "epoch": 1.57, + "learning_rate": 4.7385130076735917e-05, + "loss": 0.0759, + "step": 11178 + }, + { + "epoch": 1.57, + "learning_rate": 4.738466217480816e-05, + "loss": 0.1033, + "step": 11180 + }, + { + "epoch": 1.57, + "learning_rate": 4.738419427288041e-05, + "loss": 0.0938, + "step": 11182 + }, + { + "epoch": 1.57, + "learning_rate": 4.7383726370952654e-05, + "loss": 0.0601, + "step": 11184 + }, + { + "epoch": 1.57, + "learning_rate": 4.7383258469024893e-05, + "loss": 0.0816, + "step": 11186 + }, + { + "epoch": 1.57, + "learning_rate": 4.738279056709714e-05, + "loss": 0.0874, + "step": 11188 + }, + { + "epoch": 1.57, + "learning_rate": 4.7382322665169385e-05, + "loss": 0.0709, + "step": 11190 + }, + { + "epoch": 1.57, + "learning_rate": 4.738185476324163e-05, + "loss": 0.0896, + "step": 11192 + }, + { + "epoch": 1.57, + "learning_rate": 4.738138686131387e-05, + "loss": 0.0777, + "step": 11194 + }, + { + "epoch": 1.57, + "learning_rate": 4.7380918959386116e-05, + "loss": 0.0628, + "step": 11196 + }, + { + "epoch": 1.57, + "learning_rate": 4.7380451057458355e-05, + "loss": 0.1021, + "step": 11198 + }, + { + "epoch": 1.57, + "learning_rate": 4.737998315553061e-05, + "loss": 0.073, + "step": 11200 + }, + { + "epoch": 1.57, + "learning_rate": 4.737951525360285e-05, + "loss": 0.0818, + "step": 11202 + }, + { + "epoch": 1.57, + "learning_rate": 4.737904735167509e-05, + "loss": 0.0749, + "step": 11204 + }, + { + "epoch": 1.57, + "learning_rate": 4.737857944974733e-05, + "loss": 0.0763, + "step": 11206 + }, + { + "epoch": 1.57, + "learning_rate": 4.737811154781958e-05, + "loss": 0.069, + "step": 11208 + }, + { + "epoch": 1.57, + "learning_rate": 4.7377643645891824e-05, + "loss": 0.0722, + "step": 11210 + }, + { + "epoch": 1.57, + "learning_rate": 4.737717574396407e-05, + "loss": 0.0952, + "step": 11212 + }, + { + "epoch": 1.57, + "learning_rate": 4.737670784203631e-05, + "loss": 0.0932, + "step": 11214 + }, + { + "epoch": 1.57, + "learning_rate": 4.7376239940108555e-05, + "loss": 0.0806, + "step": 11216 + }, + { + "epoch": 1.57, + "learning_rate": 4.73757720381808e-05, + "loss": 0.0695, + "step": 11218 + }, + { + "epoch": 1.57, + "learning_rate": 4.737530413625305e-05, + "loss": 0.0598, + "step": 11220 + }, + { + "epoch": 1.58, + "learning_rate": 4.7374836234325286e-05, + "loss": 0.1067, + "step": 11222 + }, + { + "epoch": 1.58, + "learning_rate": 4.737436833239753e-05, + "loss": 0.0689, + "step": 11224 + }, + { + "epoch": 1.58, + "learning_rate": 4.737390043046978e-05, + "loss": 0.1037, + "step": 11226 + }, + { + "epoch": 1.58, + "learning_rate": 4.7373432528542024e-05, + "loss": 0.0744, + "step": 11228 + }, + { + "epoch": 1.58, + "learning_rate": 4.737296462661426e-05, + "loss": 0.0758, + "step": 11230 + }, + { + "epoch": 1.58, + "learning_rate": 4.737249672468651e-05, + "loss": 0.0667, + "step": 11232 + }, + { + "epoch": 1.58, + "learning_rate": 4.7372028822758755e-05, + "loss": 0.0683, + "step": 11234 + }, + { + "epoch": 1.58, + "learning_rate": 4.7371560920831e-05, + "loss": 0.0885, + "step": 11236 + }, + { + "epoch": 1.58, + "learning_rate": 4.737109301890324e-05, + "loss": 0.0687, + "step": 11238 + }, + { + "epoch": 1.58, + "learning_rate": 4.7370625116975486e-05, + "loss": 0.0814, + "step": 11240 + }, + { + "epoch": 1.58, + "learning_rate": 4.7370157215047725e-05, + "loss": 0.0836, + "step": 11242 + }, + { + "epoch": 1.58, + "learning_rate": 4.736968931311998e-05, + "loss": 0.0732, + "step": 11244 + }, + { + "epoch": 1.58, + "learning_rate": 4.7369221411192217e-05, + "loss": 0.0917, + "step": 11246 + }, + { + "epoch": 1.58, + "learning_rate": 4.736875350926446e-05, + "loss": 0.0698, + "step": 11248 + }, + { + "epoch": 1.58, + "learning_rate": 4.73682856073367e-05, + "loss": 0.0857, + "step": 11250 + }, + { + "epoch": 1.58, + "learning_rate": 4.7367817705408954e-05, + "loss": 0.0953, + "step": 11252 + }, + { + "epoch": 1.58, + "learning_rate": 4.7367349803481193e-05, + "loss": 0.0474, + "step": 11254 + }, + { + "epoch": 1.58, + "learning_rate": 4.736688190155344e-05, + "loss": 0.0712, + "step": 11256 + }, + { + "epoch": 1.58, + "learning_rate": 4.736641399962568e-05, + "loss": 0.0865, + "step": 11258 + }, + { + "epoch": 1.58, + "learning_rate": 4.7365946097697924e-05, + "loss": 0.0683, + "step": 11260 + }, + { + "epoch": 1.58, + "learning_rate": 4.736547819577017e-05, + "loss": 0.0531, + "step": 11262 + }, + { + "epoch": 1.58, + "learning_rate": 4.7365010293842416e-05, + "loss": 0.078, + "step": 11264 + }, + { + "epoch": 1.58, + "learning_rate": 4.7364542391914655e-05, + "loss": 0.1029, + "step": 11266 + }, + { + "epoch": 1.58, + "learning_rate": 4.73640744899869e-05, + "loss": 0.0774, + "step": 11268 + }, + { + "epoch": 1.58, + "learning_rate": 4.736360658805915e-05, + "loss": 0.0738, + "step": 11270 + }, + { + "epoch": 1.58, + "learning_rate": 4.7363138686131386e-05, + "loss": 0.084, + "step": 11272 + }, + { + "epoch": 1.58, + "learning_rate": 4.736267078420363e-05, + "loss": 0.0607, + "step": 11274 + }, + { + "epoch": 1.58, + "learning_rate": 4.736220288227587e-05, + "loss": 0.0743, + "step": 11276 + }, + { + "epoch": 1.58, + "learning_rate": 4.7361734980348124e-05, + "loss": 0.0675, + "step": 11278 + }, + { + "epoch": 1.58, + "learning_rate": 4.736126707842036e-05, + "loss": 0.0873, + "step": 11280 + }, + { + "epoch": 1.58, + "learning_rate": 4.736079917649261e-05, + "loss": 0.0808, + "step": 11282 + }, + { + "epoch": 1.58, + "learning_rate": 4.736033127456485e-05, + "loss": 0.0797, + "step": 11284 + }, + { + "epoch": 1.58, + "learning_rate": 4.73598633726371e-05, + "loss": 0.0756, + "step": 11286 + }, + { + "epoch": 1.58, + "learning_rate": 4.735939547070934e-05, + "loss": 0.0828, + "step": 11288 + }, + { + "epoch": 1.58, + "learning_rate": 4.7358927568781586e-05, + "loss": 0.0776, + "step": 11290 + }, + { + "epoch": 1.59, + "learning_rate": 4.7358459666853825e-05, + "loss": 0.1002, + "step": 11292 + }, + { + "epoch": 1.59, + "learning_rate": 4.735799176492607e-05, + "loss": 0.0764, + "step": 11294 + }, + { + "epoch": 1.59, + "learning_rate": 4.735752386299832e-05, + "loss": 0.0853, + "step": 11296 + }, + { + "epoch": 1.59, + "learning_rate": 4.735705596107056e-05, + "loss": 0.0628, + "step": 11298 + }, + { + "epoch": 1.59, + "learning_rate": 4.73565880591428e-05, + "loss": 0.0834, + "step": 11300 + }, + { + "epoch": 1.59, + "learning_rate": 4.735612015721505e-05, + "loss": 0.0747, + "step": 11302 + }, + { + "epoch": 1.59, + "learning_rate": 4.7355652255287294e-05, + "loss": 0.0772, + "step": 11304 + }, + { + "epoch": 1.59, + "learning_rate": 4.735518435335954e-05, + "loss": 0.0777, + "step": 11306 + }, + { + "epoch": 1.59, + "learning_rate": 4.735471645143178e-05, + "loss": 0.0768, + "step": 11308 + }, + { + "epoch": 1.59, + "learning_rate": 4.7354248549504025e-05, + "loss": 0.0784, + "step": 11310 + }, + { + "epoch": 1.59, + "learning_rate": 4.735378064757627e-05, + "loss": 0.0723, + "step": 11312 + }, + { + "epoch": 1.59, + "learning_rate": 4.735331274564852e-05, + "loss": 0.0832, + "step": 11314 + }, + { + "epoch": 1.59, + "learning_rate": 4.7352844843720756e-05, + "loss": 0.086, + "step": 11316 + }, + { + "epoch": 1.59, + "learning_rate": 4.7352376941793e-05, + "loss": 0.087, + "step": 11318 + }, + { + "epoch": 1.59, + "learning_rate": 4.735190903986525e-05, + "loss": 0.0731, + "step": 11320 + }, + { + "epoch": 1.59, + "learning_rate": 4.7351441137937493e-05, + "loss": 0.0677, + "step": 11322 + }, + { + "epoch": 1.59, + "learning_rate": 4.735097323600973e-05, + "loss": 0.0751, + "step": 11324 + }, + { + "epoch": 1.59, + "learning_rate": 4.735050533408198e-05, + "loss": 0.071, + "step": 11326 + }, + { + "epoch": 1.59, + "learning_rate": 4.735003743215422e-05, + "loss": 0.0663, + "step": 11328 + }, + { + "epoch": 1.59, + "learning_rate": 4.734956953022647e-05, + "loss": 0.0683, + "step": 11330 + }, + { + "epoch": 1.59, + "learning_rate": 4.734910162829871e-05, + "loss": 0.0889, + "step": 11332 + }, + { + "epoch": 1.59, + "learning_rate": 4.7348633726370955e-05, + "loss": 0.0515, + "step": 11334 + }, + { + "epoch": 1.59, + "learning_rate": 4.7348165824443195e-05, + "loss": 0.0791, + "step": 11336 + }, + { + "epoch": 1.59, + "learning_rate": 4.734769792251544e-05, + "loss": 0.0804, + "step": 11338 + }, + { + "epoch": 1.59, + "learning_rate": 4.7347230020587686e-05, + "loss": 0.0884, + "step": 11340 + }, + { + "epoch": 1.59, + "learning_rate": 4.734676211865993e-05, + "loss": 0.0555, + "step": 11342 + }, + { + "epoch": 1.59, + "learning_rate": 4.734629421673217e-05, + "loss": 0.0682, + "step": 11344 + }, + { + "epoch": 1.59, + "learning_rate": 4.734582631480442e-05, + "loss": 0.0737, + "step": 11346 + }, + { + "epoch": 1.59, + "learning_rate": 4.734535841287666e-05, + "loss": 0.0816, + "step": 11348 + }, + { + "epoch": 1.59, + "learning_rate": 4.734489051094891e-05, + "loss": 0.1033, + "step": 11350 + }, + { + "epoch": 1.59, + "learning_rate": 4.734442260902115e-05, + "loss": 0.0915, + "step": 11352 + }, + { + "epoch": 1.59, + "learning_rate": 4.7343954707093394e-05, + "loss": 0.0765, + "step": 11354 + }, + { + "epoch": 1.59, + "learning_rate": 4.734348680516564e-05, + "loss": 0.0822, + "step": 11356 + }, + { + "epoch": 1.59, + "learning_rate": 4.7343018903237886e-05, + "loss": 0.0814, + "step": 11358 + }, + { + "epoch": 1.59, + "learning_rate": 4.7342551001310125e-05, + "loss": 0.0828, + "step": 11360 + }, + { + "epoch": 1.59, + "learning_rate": 4.734208309938237e-05, + "loss": 0.0481, + "step": 11362 + }, + { + "epoch": 1.6, + "learning_rate": 4.734161519745462e-05, + "loss": 0.0683, + "step": 11364 + }, + { + "epoch": 1.6, + "learning_rate": 4.734114729552686e-05, + "loss": 0.0599, + "step": 11366 + }, + { + "epoch": 1.6, + "learning_rate": 4.73406793935991e-05, + "loss": 0.0598, + "step": 11368 + }, + { + "epoch": 1.6, + "learning_rate": 4.734021149167135e-05, + "loss": 0.0633, + "step": 11370 + }, + { + "epoch": 1.6, + "learning_rate": 4.733974358974359e-05, + "loss": 0.0766, + "step": 11372 + }, + { + "epoch": 1.6, + "learning_rate": 4.733927568781584e-05, + "loss": 0.0828, + "step": 11374 + }, + { + "epoch": 1.6, + "learning_rate": 4.733880778588808e-05, + "loss": 0.0787, + "step": 11376 + }, + { + "epoch": 1.6, + "learning_rate": 4.7338339883960325e-05, + "loss": 0.074, + "step": 11378 + }, + { + "epoch": 1.6, + "learning_rate": 4.7337871982032564e-05, + "loss": 0.0945, + "step": 11380 + }, + { + "epoch": 1.6, + "learning_rate": 4.733740408010482e-05, + "loss": 0.0493, + "step": 11382 + }, + { + "epoch": 1.6, + "learning_rate": 4.7336936178177056e-05, + "loss": 0.0778, + "step": 11384 + }, + { + "epoch": 1.6, + "learning_rate": 4.73364682762493e-05, + "loss": 0.0715, + "step": 11386 + }, + { + "epoch": 1.6, + "learning_rate": 4.733600037432154e-05, + "loss": 0.0661, + "step": 11388 + }, + { + "epoch": 1.6, + "learning_rate": 4.733553247239379e-05, + "loss": 0.0892, + "step": 11390 + }, + { + "epoch": 1.6, + "learning_rate": 4.733506457046603e-05, + "loss": 0.0931, + "step": 11392 + }, + { + "epoch": 1.6, + "learning_rate": 4.733459666853828e-05, + "loss": 0.0753, + "step": 11394 + }, + { + "epoch": 1.6, + "learning_rate": 4.733412876661052e-05, + "loss": 0.0888, + "step": 11396 + }, + { + "epoch": 1.6, + "learning_rate": 4.7333660864682764e-05, + "loss": 0.0854, + "step": 11398 + }, + { + "epoch": 1.6, + "learning_rate": 4.733319296275501e-05, + "loss": 0.0715, + "step": 11400 + }, + { + "epoch": 1.6, + "learning_rate": 4.7332725060827255e-05, + "loss": 0.089, + "step": 11402 + }, + { + "epoch": 1.6, + "learning_rate": 4.7332257158899495e-05, + "loss": 0.0884, + "step": 11404 + }, + { + "epoch": 1.6, + "learning_rate": 4.733178925697174e-05, + "loss": 0.0781, + "step": 11406 + }, + { + "epoch": 1.6, + "learning_rate": 4.7331321355043986e-05, + "loss": 0.0712, + "step": 11408 + }, + { + "epoch": 1.6, + "learning_rate": 4.733085345311623e-05, + "loss": 0.064, + "step": 11410 + }, + { + "epoch": 1.6, + "learning_rate": 4.733038555118847e-05, + "loss": 0.0692, + "step": 11412 + }, + { + "epoch": 1.6, + "learning_rate": 4.732991764926072e-05, + "loss": 0.085, + "step": 11414 + }, + { + "epoch": 1.6, + "learning_rate": 4.732944974733296e-05, + "loss": 0.0743, + "step": 11416 + }, + { + "epoch": 1.6, + "learning_rate": 4.732898184540521e-05, + "loss": 0.0652, + "step": 11418 + }, + { + "epoch": 1.6, + "learning_rate": 4.732851394347745e-05, + "loss": 0.0898, + "step": 11420 + }, + { + "epoch": 1.6, + "learning_rate": 4.7328046041549694e-05, + "loss": 0.1076, + "step": 11422 + }, + { + "epoch": 1.6, + "learning_rate": 4.732757813962193e-05, + "loss": 0.073, + "step": 11424 + }, + { + "epoch": 1.6, + "learning_rate": 4.7327110237694186e-05, + "loss": 0.0987, + "step": 11426 + }, + { + "epoch": 1.6, + "learning_rate": 4.7326642335766425e-05, + "loss": 0.0615, + "step": 11428 + }, + { + "epoch": 1.6, + "learning_rate": 4.732617443383867e-05, + "loss": 0.0756, + "step": 11430 + }, + { + "epoch": 1.6, + "learning_rate": 4.732570653191091e-05, + "loss": 0.0616, + "step": 11432 + }, + { + "epoch": 1.6, + "learning_rate": 4.732523862998316e-05, + "loss": 0.0834, + "step": 11434 + }, + { + "epoch": 1.61, + "learning_rate": 4.73247707280554e-05, + "loss": 0.0648, + "step": 11436 + }, + { + "epoch": 1.61, + "learning_rate": 4.732430282612765e-05, + "loss": 0.079, + "step": 11438 + }, + { + "epoch": 1.61, + "learning_rate": 4.732383492419989e-05, + "loss": 0.0773, + "step": 11440 + }, + { + "epoch": 1.61, + "learning_rate": 4.732336702227213e-05, + "loss": 0.0984, + "step": 11442 + }, + { + "epoch": 1.61, + "learning_rate": 4.732289912034438e-05, + "loss": 0.0939, + "step": 11444 + }, + { + "epoch": 1.61, + "learning_rate": 4.7322431218416625e-05, + "loss": 0.0867, + "step": 11446 + }, + { + "epoch": 1.61, + "learning_rate": 4.7321963316488864e-05, + "loss": 0.0737, + "step": 11448 + }, + { + "epoch": 1.61, + "learning_rate": 4.732149541456111e-05, + "loss": 0.062, + "step": 11450 + }, + { + "epoch": 1.61, + "learning_rate": 4.7321027512633356e-05, + "loss": 0.0977, + "step": 11452 + }, + { + "epoch": 1.61, + "learning_rate": 4.73205596107056e-05, + "loss": 0.0768, + "step": 11454 + }, + { + "epoch": 1.61, + "learning_rate": 4.732009170877784e-05, + "loss": 0.0847, + "step": 11456 + }, + { + "epoch": 1.61, + "learning_rate": 4.731962380685009e-05, + "loss": 0.0885, + "step": 11458 + }, + { + "epoch": 1.61, + "learning_rate": 4.731915590492233e-05, + "loss": 0.1158, + "step": 11460 + }, + { + "epoch": 1.61, + "learning_rate": 4.731868800299458e-05, + "loss": 0.0986, + "step": 11462 + }, + { + "epoch": 1.61, + "learning_rate": 4.731822010106682e-05, + "loss": 0.088, + "step": 11464 + }, + { + "epoch": 1.61, + "learning_rate": 4.7317752199139064e-05, + "loss": 0.0747, + "step": 11466 + }, + { + "epoch": 1.61, + "learning_rate": 4.731728429721131e-05, + "loss": 0.0672, + "step": 11468 + }, + { + "epoch": 1.61, + "learning_rate": 4.7316816395283555e-05, + "loss": 0.0751, + "step": 11470 + }, + { + "epoch": 1.61, + "learning_rate": 4.7316348493355795e-05, + "loss": 0.0903, + "step": 11472 + }, + { + "epoch": 1.61, + "learning_rate": 4.731588059142804e-05, + "loss": 0.0736, + "step": 11474 + }, + { + "epoch": 1.61, + "learning_rate": 4.731541268950028e-05, + "loss": 0.0677, + "step": 11476 + }, + { + "epoch": 1.61, + "learning_rate": 4.731494478757253e-05, + "loss": 0.0791, + "step": 11478 + }, + { + "epoch": 1.61, + "learning_rate": 4.731447688564477e-05, + "loss": 0.0899, + "step": 11480 + }, + { + "epoch": 1.61, + "learning_rate": 4.731400898371702e-05, + "loss": 0.0824, + "step": 11482 + }, + { + "epoch": 1.61, + "learning_rate": 4.7313541081789256e-05, + "loss": 0.0991, + "step": 11484 + }, + { + "epoch": 1.61, + "learning_rate": 4.73130731798615e-05, + "loss": 0.0627, + "step": 11486 + }, + { + "epoch": 1.61, + "learning_rate": 4.731260527793375e-05, + "loss": 0.1129, + "step": 11488 + }, + { + "epoch": 1.61, + "learning_rate": 4.7312137376005994e-05, + "loss": 0.0945, + "step": 11490 + }, + { + "epoch": 1.61, + "learning_rate": 4.731166947407823e-05, + "loss": 0.0911, + "step": 11492 + }, + { + "epoch": 1.61, + "learning_rate": 4.731120157215048e-05, + "loss": 0.0818, + "step": 11494 + }, + { + "epoch": 1.61, + "learning_rate": 4.7310733670222725e-05, + "loss": 0.0887, + "step": 11496 + }, + { + "epoch": 1.61, + "learning_rate": 4.731026576829497e-05, + "loss": 0.0907, + "step": 11498 + }, + { + "epoch": 1.61, + "learning_rate": 4.730979786636721e-05, + "loss": 0.0902, + "step": 11500 + }, + { + "epoch": 1.61, + "learning_rate": 4.7309329964439456e-05, + "loss": 0.081, + "step": 11502 + }, + { + "epoch": 1.61, + "learning_rate": 4.73088620625117e-05, + "loss": 0.0923, + "step": 11504 + }, + { + "epoch": 1.62, + "learning_rate": 4.730839416058395e-05, + "loss": 0.0881, + "step": 11506 + }, + { + "epoch": 1.62, + "learning_rate": 4.730792625865619e-05, + "loss": 0.1113, + "step": 11508 + }, + { + "epoch": 1.62, + "learning_rate": 4.730745835672843e-05, + "loss": 0.0689, + "step": 11510 + }, + { + "epoch": 1.62, + "learning_rate": 4.730699045480068e-05, + "loss": 0.0657, + "step": 11512 + }, + { + "epoch": 1.62, + "learning_rate": 4.7306522552872925e-05, + "loss": 0.0717, + "step": 11514 + }, + { + "epoch": 1.62, + "learning_rate": 4.7306054650945164e-05, + "loss": 0.1176, + "step": 11516 + }, + { + "epoch": 1.62, + "learning_rate": 4.730558674901741e-05, + "loss": 0.0772, + "step": 11518 + }, + { + "epoch": 1.62, + "learning_rate": 4.730511884708965e-05, + "loss": 0.0606, + "step": 11520 + }, + { + "epoch": 1.62, + "learning_rate": 4.73046509451619e-05, + "loss": 0.0775, + "step": 11522 + }, + { + "epoch": 1.62, + "learning_rate": 4.730418304323414e-05, + "loss": 0.0849, + "step": 11524 + }, + { + "epoch": 1.62, + "learning_rate": 4.730371514130638e-05, + "loss": 0.0774, + "step": 11526 + }, + { + "epoch": 1.62, + "learning_rate": 4.7303247239378626e-05, + "loss": 0.0619, + "step": 11528 + }, + { + "epoch": 1.62, + "learning_rate": 4.730277933745087e-05, + "loss": 0.0864, + "step": 11530 + }, + { + "epoch": 1.62, + "learning_rate": 4.730231143552312e-05, + "loss": 0.0747, + "step": 11532 + }, + { + "epoch": 1.62, + "learning_rate": 4.730184353359536e-05, + "loss": 0.0905, + "step": 11534 + }, + { + "epoch": 1.62, + "learning_rate": 4.73013756316676e-05, + "loss": 0.0724, + "step": 11536 + }, + { + "epoch": 1.62, + "learning_rate": 4.730090772973985e-05, + "loss": 0.0693, + "step": 11538 + }, + { + "epoch": 1.62, + "learning_rate": 4.7300439827812095e-05, + "loss": 0.0869, + "step": 11540 + }, + { + "epoch": 1.62, + "learning_rate": 4.7299971925884334e-05, + "loss": 0.069, + "step": 11542 + }, + { + "epoch": 1.62, + "learning_rate": 4.729950402395658e-05, + "loss": 0.068, + "step": 11544 + }, + { + "epoch": 1.62, + "learning_rate": 4.7299036122028826e-05, + "loss": 0.0752, + "step": 11546 + }, + { + "epoch": 1.62, + "learning_rate": 4.729856822010107e-05, + "loss": 0.0795, + "step": 11548 + }, + { + "epoch": 1.62, + "learning_rate": 4.729810031817331e-05, + "loss": 0.071, + "step": 11550 + }, + { + "epoch": 1.62, + "learning_rate": 4.7297632416245557e-05, + "loss": 0.0766, + "step": 11552 + }, + { + "epoch": 1.62, + "learning_rate": 4.7297164514317796e-05, + "loss": 0.0614, + "step": 11554 + }, + { + "epoch": 1.62, + "learning_rate": 4.729669661239005e-05, + "loss": 0.0749, + "step": 11556 + }, + { + "epoch": 1.62, + "learning_rate": 4.729622871046229e-05, + "loss": 0.0758, + "step": 11558 + }, + { + "epoch": 1.62, + "learning_rate": 4.729576080853453e-05, + "loss": 0.0642, + "step": 11560 + }, + { + "epoch": 1.62, + "learning_rate": 4.729529290660677e-05, + "loss": 0.0569, + "step": 11562 + }, + { + "epoch": 1.62, + "learning_rate": 4.7294825004679025e-05, + "loss": 0.0919, + "step": 11564 + }, + { + "epoch": 1.62, + "learning_rate": 4.7294357102751264e-05, + "loss": 0.0871, + "step": 11566 + }, + { + "epoch": 1.62, + "learning_rate": 4.729388920082351e-05, + "loss": 0.0754, + "step": 11568 + }, + { + "epoch": 1.62, + "learning_rate": 4.729342129889575e-05, + "loss": 0.091, + "step": 11570 + }, + { + "epoch": 1.62, + "learning_rate": 4.7292953396967995e-05, + "loss": 0.0681, + "step": 11572 + }, + { + "epoch": 1.62, + "learning_rate": 4.729248549504024e-05, + "loss": 0.055, + "step": 11574 + }, + { + "epoch": 1.62, + "learning_rate": 4.729201759311249e-05, + "loss": 0.0793, + "step": 11576 + }, + { + "epoch": 1.63, + "learning_rate": 4.7291549691184726e-05, + "loss": 0.0748, + "step": 11578 + }, + { + "epoch": 1.63, + "learning_rate": 4.729108178925697e-05, + "loss": 0.0647, + "step": 11580 + }, + { + "epoch": 1.63, + "learning_rate": 4.729061388732922e-05, + "loss": 0.1023, + "step": 11582 + }, + { + "epoch": 1.63, + "learning_rate": 4.7290145985401464e-05, + "loss": 0.0651, + "step": 11584 + }, + { + "epoch": 1.63, + "learning_rate": 4.72896780834737e-05, + "loss": 0.0554, + "step": 11586 + }, + { + "epoch": 1.63, + "learning_rate": 4.728921018154595e-05, + "loss": 0.0867, + "step": 11588 + }, + { + "epoch": 1.63, + "learning_rate": 4.7288742279618195e-05, + "loss": 0.0707, + "step": 11590 + }, + { + "epoch": 1.63, + "learning_rate": 4.728827437769044e-05, + "loss": 0.0832, + "step": 11592 + }, + { + "epoch": 1.63, + "learning_rate": 4.728780647576268e-05, + "loss": 0.0965, + "step": 11594 + }, + { + "epoch": 1.63, + "learning_rate": 4.7287338573834926e-05, + "loss": 0.08, + "step": 11596 + }, + { + "epoch": 1.63, + "learning_rate": 4.728687067190717e-05, + "loss": 0.0704, + "step": 11598 + }, + { + "epoch": 1.63, + "learning_rate": 4.728640276997942e-05, + "loss": 0.0839, + "step": 11600 + }, + { + "epoch": 1.63, + "learning_rate": 4.728593486805166e-05, + "loss": 0.0665, + "step": 11602 + }, + { + "epoch": 1.63, + "learning_rate": 4.72854669661239e-05, + "loss": 0.0895, + "step": 11604 + }, + { + "epoch": 1.63, + "learning_rate": 4.728499906419614e-05, + "loss": 0.0749, + "step": 11606 + }, + { + "epoch": 1.63, + "learning_rate": 4.7284531162268395e-05, + "loss": 0.0692, + "step": 11608 + }, + { + "epoch": 1.63, + "learning_rate": 4.7284063260340634e-05, + "loss": 0.0591, + "step": 11610 + }, + { + "epoch": 1.63, + "learning_rate": 4.728359535841288e-05, + "loss": 0.0833, + "step": 11612 + }, + { + "epoch": 1.63, + "learning_rate": 4.728312745648512e-05, + "loss": 0.0784, + "step": 11614 + }, + { + "epoch": 1.63, + "learning_rate": 4.7282659554557365e-05, + "loss": 0.0692, + "step": 11616 + }, + { + "epoch": 1.63, + "learning_rate": 4.728219165262961e-05, + "loss": 0.0848, + "step": 11618 + }, + { + "epoch": 1.63, + "learning_rate": 4.7281723750701857e-05, + "loss": 0.0873, + "step": 11620 + }, + { + "epoch": 1.63, + "learning_rate": 4.7281255848774096e-05, + "loss": 0.0795, + "step": 11622 + }, + { + "epoch": 1.63, + "learning_rate": 4.728078794684634e-05, + "loss": 0.0778, + "step": 11624 + }, + { + "epoch": 1.63, + "learning_rate": 4.728032004491859e-05, + "loss": 0.078, + "step": 11626 + }, + { + "epoch": 1.63, + "learning_rate": 4.7279852142990833e-05, + "loss": 0.0792, + "step": 11628 + }, + { + "epoch": 1.63, + "learning_rate": 4.727938424106307e-05, + "loss": 0.098, + "step": 11630 + }, + { + "epoch": 1.63, + "learning_rate": 4.727891633913532e-05, + "loss": 0.0812, + "step": 11632 + }, + { + "epoch": 1.63, + "learning_rate": 4.7278448437207564e-05, + "loss": 0.0792, + "step": 11634 + }, + { + "epoch": 1.63, + "learning_rate": 4.727798053527981e-05, + "loss": 0.0957, + "step": 11636 + }, + { + "epoch": 1.63, + "learning_rate": 4.727751263335205e-05, + "loss": 0.0758, + "step": 11638 + }, + { + "epoch": 1.63, + "learning_rate": 4.7277044731424295e-05, + "loss": 0.0885, + "step": 11640 + }, + { + "epoch": 1.63, + "learning_rate": 4.727657682949654e-05, + "loss": 0.0937, + "step": 11642 + }, + { + "epoch": 1.63, + "learning_rate": 4.727610892756879e-05, + "loss": 0.0795, + "step": 11644 + }, + { + "epoch": 1.63, + "learning_rate": 4.7275641025641026e-05, + "loss": 0.0655, + "step": 11646 + }, + { + "epoch": 1.64, + "learning_rate": 4.727517312371327e-05, + "loss": 0.0676, + "step": 11648 + }, + { + "epoch": 1.64, + "learning_rate": 4.727470522178551e-05, + "loss": 0.064, + "step": 11650 + }, + { + "epoch": 1.64, + "learning_rate": 4.7274237319857764e-05, + "loss": 0.0745, + "step": 11652 + }, + { + "epoch": 1.64, + "learning_rate": 4.727376941793e-05, + "loss": 0.0913, + "step": 11654 + }, + { + "epoch": 1.64, + "learning_rate": 4.727330151600225e-05, + "loss": 0.0831, + "step": 11656 + }, + { + "epoch": 1.64, + "learning_rate": 4.727283361407449e-05, + "loss": 0.0773, + "step": 11658 + }, + { + "epoch": 1.64, + "learning_rate": 4.727236571214674e-05, + "loss": 0.0705, + "step": 11660 + }, + { + "epoch": 1.64, + "learning_rate": 4.727189781021898e-05, + "loss": 0.092, + "step": 11662 + }, + { + "epoch": 1.64, + "learning_rate": 4.7271429908291226e-05, + "loss": 0.0741, + "step": 11664 + }, + { + "epoch": 1.64, + "learning_rate": 4.7270962006363465e-05, + "loss": 0.0664, + "step": 11666 + }, + { + "epoch": 1.64, + "learning_rate": 4.727049410443571e-05, + "loss": 0.0743, + "step": 11668 + }, + { + "epoch": 1.64, + "learning_rate": 4.727002620250796e-05, + "loss": 0.0884, + "step": 11670 + }, + { + "epoch": 1.64, + "learning_rate": 4.72695583005802e-05, + "loss": 0.0837, + "step": 11672 + }, + { + "epoch": 1.64, + "learning_rate": 4.726909039865244e-05, + "loss": 0.0726, + "step": 11674 + }, + { + "epoch": 1.64, + "learning_rate": 4.726862249672469e-05, + "loss": 0.0882, + "step": 11676 + }, + { + "epoch": 1.64, + "learning_rate": 4.7268154594796934e-05, + "loss": 0.0856, + "step": 11678 + }, + { + "epoch": 1.64, + "learning_rate": 4.726768669286918e-05, + "loss": 0.0879, + "step": 11680 + }, + { + "epoch": 1.64, + "learning_rate": 4.726721879094142e-05, + "loss": 0.0768, + "step": 11682 + }, + { + "epoch": 1.64, + "learning_rate": 4.7266750889013665e-05, + "loss": 0.0636, + "step": 11684 + }, + { + "epoch": 1.64, + "learning_rate": 4.726628298708591e-05, + "loss": 0.0611, + "step": 11686 + }, + { + "epoch": 1.64, + "learning_rate": 4.7265815085158157e-05, + "loss": 0.0699, + "step": 11688 + }, + { + "epoch": 1.64, + "learning_rate": 4.7265347183230396e-05, + "loss": 0.0718, + "step": 11690 + }, + { + "epoch": 1.64, + "learning_rate": 4.726487928130264e-05, + "loss": 0.0774, + "step": 11692 + }, + { + "epoch": 1.64, + "learning_rate": 4.726441137937489e-05, + "loss": 0.0655, + "step": 11694 + }, + { + "epoch": 1.64, + "learning_rate": 4.7263943477447133e-05, + "loss": 0.0833, + "step": 11696 + }, + { + "epoch": 1.64, + "learning_rate": 4.726347557551937e-05, + "loss": 0.0958, + "step": 11698 + }, + { + "epoch": 1.64, + "learning_rate": 4.726300767359162e-05, + "loss": 0.0883, + "step": 11700 + }, + { + "epoch": 1.64, + "learning_rate": 4.726253977166386e-05, + "loss": 0.0853, + "step": 11702 + }, + { + "epoch": 1.64, + "learning_rate": 4.726207186973611e-05, + "loss": 0.0585, + "step": 11704 + }, + { + "epoch": 1.64, + "learning_rate": 4.726160396780835e-05, + "loss": 0.0812, + "step": 11706 + }, + { + "epoch": 1.64, + "learning_rate": 4.7261136065880595e-05, + "loss": 0.082, + "step": 11708 + }, + { + "epoch": 1.64, + "learning_rate": 4.7260668163952834e-05, + "loss": 0.0982, + "step": 11710 + }, + { + "epoch": 1.64, + "learning_rate": 4.726020026202509e-05, + "loss": 0.0671, + "step": 11712 + }, + { + "epoch": 1.64, + "learning_rate": 4.7259732360097326e-05, + "loss": 0.0791, + "step": 11714 + }, + { + "epoch": 1.64, + "learning_rate": 4.725926445816957e-05, + "loss": 0.1029, + "step": 11716 + }, + { + "epoch": 1.64, + "learning_rate": 4.725879655624181e-05, + "loss": 0.0689, + "step": 11718 + }, + { + "epoch": 1.65, + "learning_rate": 4.725832865431406e-05, + "loss": 0.0912, + "step": 11720 + }, + { + "epoch": 1.65, + "learning_rate": 4.72578607523863e-05, + "loss": 0.072, + "step": 11722 + }, + { + "epoch": 1.65, + "learning_rate": 4.725739285045855e-05, + "loss": 0.0864, + "step": 11724 + }, + { + "epoch": 1.65, + "learning_rate": 4.725692494853079e-05, + "loss": 0.0821, + "step": 11726 + }, + { + "epoch": 1.65, + "learning_rate": 4.7256457046603034e-05, + "loss": 0.0626, + "step": 11728 + }, + { + "epoch": 1.65, + "learning_rate": 4.725598914467528e-05, + "loss": 0.099, + "step": 11730 + }, + { + "epoch": 1.65, + "learning_rate": 4.7255521242747526e-05, + "loss": 0.0964, + "step": 11732 + }, + { + "epoch": 1.65, + "learning_rate": 4.7255053340819765e-05, + "loss": 0.0935, + "step": 11734 + }, + { + "epoch": 1.65, + "learning_rate": 4.725458543889201e-05, + "loss": 0.0822, + "step": 11736 + }, + { + "epoch": 1.65, + "learning_rate": 4.725411753696426e-05, + "loss": 0.0706, + "step": 11738 + }, + { + "epoch": 1.65, + "learning_rate": 4.72536496350365e-05, + "loss": 0.065, + "step": 11740 + }, + { + "epoch": 1.65, + "learning_rate": 4.725318173310874e-05, + "loss": 0.1036, + "step": 11742 + }, + { + "epoch": 1.65, + "learning_rate": 4.725271383118099e-05, + "loss": 0.0634, + "step": 11744 + }, + { + "epoch": 1.65, + "learning_rate": 4.7252245929253234e-05, + "loss": 0.0652, + "step": 11746 + }, + { + "epoch": 1.65, + "learning_rate": 4.725177802732548e-05, + "loss": 0.0994, + "step": 11748 + }, + { + "epoch": 1.65, + "learning_rate": 4.725131012539772e-05, + "loss": 0.0934, + "step": 11750 + }, + { + "epoch": 1.65, + "learning_rate": 4.7250842223469965e-05, + "loss": 0.0844, + "step": 11752 + }, + { + "epoch": 1.65, + "learning_rate": 4.7250374321542204e-05, + "loss": 0.0772, + "step": 11754 + }, + { + "epoch": 1.65, + "learning_rate": 4.7249906419614457e-05, + "loss": 0.1009, + "step": 11756 + }, + { + "epoch": 1.65, + "learning_rate": 4.7249438517686696e-05, + "loss": 0.086, + "step": 11758 + }, + { + "epoch": 1.65, + "learning_rate": 4.724897061575894e-05, + "loss": 0.0648, + "step": 11760 + }, + { + "epoch": 1.65, + "learning_rate": 4.724850271383118e-05, + "loss": 0.0779, + "step": 11762 + }, + { + "epoch": 1.65, + "learning_rate": 4.724803481190343e-05, + "loss": 0.0707, + "step": 11764 + }, + { + "epoch": 1.65, + "learning_rate": 4.724756690997567e-05, + "loss": 0.076, + "step": 11766 + }, + { + "epoch": 1.65, + "learning_rate": 4.724709900804792e-05, + "loss": 0.0667, + "step": 11768 + }, + { + "epoch": 1.65, + "learning_rate": 4.724663110612016e-05, + "loss": 0.0736, + "step": 11770 + }, + { + "epoch": 1.65, + "learning_rate": 4.7246163204192404e-05, + "loss": 0.0739, + "step": 11772 + }, + { + "epoch": 1.65, + "learning_rate": 4.724569530226465e-05, + "loss": 0.0869, + "step": 11774 + }, + { + "epoch": 1.65, + "learning_rate": 4.724522740033689e-05, + "loss": 0.0745, + "step": 11776 + }, + { + "epoch": 1.65, + "learning_rate": 4.7244759498409134e-05, + "loss": 0.0821, + "step": 11778 + }, + { + "epoch": 1.65, + "learning_rate": 4.724429159648138e-05, + "loss": 0.0757, + "step": 11780 + }, + { + "epoch": 1.65, + "learning_rate": 4.7243823694553626e-05, + "loss": 0.0797, + "step": 11782 + }, + { + "epoch": 1.65, + "learning_rate": 4.7243355792625865e-05, + "loss": 0.0739, + "step": 11784 + }, + { + "epoch": 1.65, + "learning_rate": 4.724288789069811e-05, + "loss": 0.0636, + "step": 11786 + }, + { + "epoch": 1.65, + "learning_rate": 4.724241998877035e-05, + "loss": 0.0643, + "step": 11788 + }, + { + "epoch": 1.65, + "learning_rate": 4.72419520868426e-05, + "loss": 0.1214, + "step": 11790 + }, + { + "epoch": 1.66, + "learning_rate": 4.724148418491484e-05, + "loss": 0.0793, + "step": 11792 + }, + { + "epoch": 1.66, + "learning_rate": 4.724101628298709e-05, + "loss": 0.0764, + "step": 11794 + }, + { + "epoch": 1.66, + "learning_rate": 4.724054838105933e-05, + "loss": 0.0554, + "step": 11796 + }, + { + "epoch": 1.66, + "learning_rate": 4.724008047913157e-05, + "loss": 0.0658, + "step": 11798 + }, + { + "epoch": 1.66, + "learning_rate": 4.723961257720382e-05, + "loss": 0.0616, + "step": 11800 + }, + { + "epoch": 1.66, + "learning_rate": 4.7239144675276065e-05, + "loss": 0.0725, + "step": 11802 + }, + { + "epoch": 1.66, + "learning_rate": 4.7238676773348304e-05, + "loss": 0.0815, + "step": 11804 + }, + { + "epoch": 1.66, + "learning_rate": 4.723820887142055e-05, + "loss": 0.0898, + "step": 11806 + }, + { + "epoch": 1.66, + "learning_rate": 4.7237740969492796e-05, + "loss": 0.0748, + "step": 11808 + }, + { + "epoch": 1.66, + "learning_rate": 4.723727306756504e-05, + "loss": 0.0658, + "step": 11810 + }, + { + "epoch": 1.66, + "learning_rate": 4.723680516563728e-05, + "loss": 0.0806, + "step": 11812 + }, + { + "epoch": 1.66, + "learning_rate": 4.723633726370953e-05, + "loss": 0.0626, + "step": 11814 + }, + { + "epoch": 1.66, + "learning_rate": 4.723586936178177e-05, + "loss": 0.0744, + "step": 11816 + }, + { + "epoch": 1.66, + "learning_rate": 4.723540145985402e-05, + "loss": 0.0747, + "step": 11818 + }, + { + "epoch": 1.66, + "learning_rate": 4.723493355792626e-05, + "loss": 0.0838, + "step": 11820 + }, + { + "epoch": 1.66, + "learning_rate": 4.7234465655998504e-05, + "loss": 0.0588, + "step": 11822 + }, + { + "epoch": 1.66, + "learning_rate": 4.723399775407075e-05, + "loss": 0.0908, + "step": 11824 + }, + { + "epoch": 1.66, + "learning_rate": 4.7233529852142996e-05, + "loss": 0.1086, + "step": 11826 + }, + { + "epoch": 1.66, + "learning_rate": 4.7233061950215235e-05, + "loss": 0.0839, + "step": 11828 + }, + { + "epoch": 1.66, + "learning_rate": 4.723259404828748e-05, + "loss": 0.0733, + "step": 11830 + }, + { + "epoch": 1.66, + "learning_rate": 4.723212614635972e-05, + "loss": 0.0764, + "step": 11832 + }, + { + "epoch": 1.66, + "learning_rate": 4.723165824443197e-05, + "loss": 0.085, + "step": 11834 + }, + { + "epoch": 1.66, + "learning_rate": 4.723119034250421e-05, + "loss": 0.1012, + "step": 11836 + }, + { + "epoch": 1.66, + "learning_rate": 4.723072244057646e-05, + "loss": 0.0772, + "step": 11838 + }, + { + "epoch": 1.66, + "learning_rate": 4.72302545386487e-05, + "loss": 0.0957, + "step": 11840 + }, + { + "epoch": 1.66, + "learning_rate": 4.722978663672095e-05, + "loss": 0.0894, + "step": 11842 + }, + { + "epoch": 1.66, + "learning_rate": 4.722931873479319e-05, + "loss": 0.0849, + "step": 11844 + }, + { + "epoch": 1.66, + "learning_rate": 4.7228850832865435e-05, + "loss": 0.0983, + "step": 11846 + }, + { + "epoch": 1.66, + "learning_rate": 4.7228382930937674e-05, + "loss": 0.0646, + "step": 11848 + }, + { + "epoch": 1.66, + "learning_rate": 4.722791502900992e-05, + "loss": 0.0889, + "step": 11850 + }, + { + "epoch": 1.66, + "learning_rate": 4.7227447127082165e-05, + "loss": 0.0802, + "step": 11852 + }, + { + "epoch": 1.66, + "learning_rate": 4.722697922515441e-05, + "loss": 0.0982, + "step": 11854 + }, + { + "epoch": 1.66, + "learning_rate": 4.722651132322665e-05, + "loss": 0.0758, + "step": 11856 + }, + { + "epoch": 1.66, + "learning_rate": 4.7226043421298896e-05, + "loss": 0.0803, + "step": 11858 + }, + { + "epoch": 1.66, + "learning_rate": 4.722557551937114e-05, + "loss": 0.0862, + "step": 11860 + }, + { + "epoch": 1.67, + "learning_rate": 4.722510761744339e-05, + "loss": 0.0828, + "step": 11862 + }, + { + "epoch": 1.67, + "learning_rate": 4.722463971551563e-05, + "loss": 0.0986, + "step": 11864 + }, + { + "epoch": 1.67, + "learning_rate": 4.722417181358787e-05, + "loss": 0.0898, + "step": 11866 + }, + { + "epoch": 1.67, + "learning_rate": 4.722370391166012e-05, + "loss": 0.0875, + "step": 11868 + }, + { + "epoch": 1.67, + "learning_rate": 4.7223236009732365e-05, + "loss": 0.0923, + "step": 11870 + }, + { + "epoch": 1.67, + "learning_rate": 4.7222768107804604e-05, + "loss": 0.0614, + "step": 11872 + }, + { + "epoch": 1.67, + "learning_rate": 4.722230020587685e-05, + "loss": 0.0825, + "step": 11874 + }, + { + "epoch": 1.67, + "learning_rate": 4.7221832303949096e-05, + "loss": 0.0857, + "step": 11876 + }, + { + "epoch": 1.67, + "learning_rate": 4.722136440202134e-05, + "loss": 0.0788, + "step": 11878 + }, + { + "epoch": 1.67, + "learning_rate": 4.722089650009358e-05, + "loss": 0.0658, + "step": 11880 + }, + { + "epoch": 1.67, + "learning_rate": 4.722042859816583e-05, + "loss": 0.0642, + "step": 11882 + }, + { + "epoch": 1.67, + "learning_rate": 4.7219960696238066e-05, + "loss": 0.0727, + "step": 11884 + }, + { + "epoch": 1.67, + "learning_rate": 4.721949279431032e-05, + "loss": 0.071, + "step": 11886 + }, + { + "epoch": 1.67, + "learning_rate": 4.721902489238256e-05, + "loss": 0.1086, + "step": 11888 + }, + { + "epoch": 1.67, + "learning_rate": 4.7218556990454804e-05, + "loss": 0.0937, + "step": 11890 + }, + { + "epoch": 1.67, + "learning_rate": 4.721808908852704e-05, + "loss": 0.0744, + "step": 11892 + }, + { + "epoch": 1.67, + "learning_rate": 4.7217621186599296e-05, + "loss": 0.0981, + "step": 11894 + }, + { + "epoch": 1.67, + "learning_rate": 4.7217153284671535e-05, + "loss": 0.0722, + "step": 11896 + }, + { + "epoch": 1.67, + "learning_rate": 4.721668538274378e-05, + "loss": 0.091, + "step": 11898 + }, + { + "epoch": 1.67, + "learning_rate": 4.721621748081602e-05, + "loss": 0.0834, + "step": 11900 + }, + { + "epoch": 1.67, + "learning_rate": 4.7215749578888266e-05, + "loss": 0.0942, + "step": 11902 + }, + { + "epoch": 1.67, + "learning_rate": 4.721528167696051e-05, + "loss": 0.0841, + "step": 11904 + }, + { + "epoch": 1.67, + "learning_rate": 4.721481377503276e-05, + "loss": 0.0716, + "step": 11906 + }, + { + "epoch": 1.67, + "learning_rate": 4.7214345873105e-05, + "loss": 0.0618, + "step": 11908 + }, + { + "epoch": 1.67, + "learning_rate": 4.721387797117724e-05, + "loss": 0.0806, + "step": 11910 + }, + { + "epoch": 1.67, + "learning_rate": 4.721341006924949e-05, + "loss": 0.0829, + "step": 11912 + }, + { + "epoch": 1.67, + "learning_rate": 4.7212942167321735e-05, + "loss": 0.0685, + "step": 11914 + }, + { + "epoch": 1.67, + "learning_rate": 4.7212474265393974e-05, + "loss": 0.067, + "step": 11916 + }, + { + "epoch": 1.67, + "learning_rate": 4.721200636346622e-05, + "loss": 0.0713, + "step": 11918 + }, + { + "epoch": 1.67, + "learning_rate": 4.7211538461538465e-05, + "loss": 0.0798, + "step": 11920 + }, + { + "epoch": 1.67, + "learning_rate": 4.721107055961071e-05, + "loss": 0.0572, + "step": 11922 + }, + { + "epoch": 1.67, + "learning_rate": 4.721060265768295e-05, + "loss": 0.0808, + "step": 11924 + }, + { + "epoch": 1.67, + "learning_rate": 4.7210134755755196e-05, + "loss": 0.076, + "step": 11926 + }, + { + "epoch": 1.67, + "learning_rate": 4.7209666853827436e-05, + "loss": 0.0811, + "step": 11928 + }, + { + "epoch": 1.67, + "learning_rate": 4.720919895189969e-05, + "loss": 0.0944, + "step": 11930 + }, + { + "epoch": 1.67, + "learning_rate": 4.720873104997193e-05, + "loss": 0.066, + "step": 11932 + }, + { + "epoch": 1.68, + "learning_rate": 4.720826314804417e-05, + "loss": 0.0877, + "step": 11934 + }, + { + "epoch": 1.68, + "learning_rate": 4.720779524611641e-05, + "loss": 0.0935, + "step": 11936 + }, + { + "epoch": 1.68, + "learning_rate": 4.7207327344188665e-05, + "loss": 0.078, + "step": 11938 + }, + { + "epoch": 1.68, + "learning_rate": 4.7206859442260904e-05, + "loss": 0.081, + "step": 11940 + }, + { + "epoch": 1.68, + "learning_rate": 4.720639154033315e-05, + "loss": 0.0885, + "step": 11942 + }, + { + "epoch": 1.68, + "learning_rate": 4.720592363840539e-05, + "loss": 0.0958, + "step": 11944 + }, + { + "epoch": 1.68, + "learning_rate": 4.7205455736477635e-05, + "loss": 0.0868, + "step": 11946 + }, + { + "epoch": 1.68, + "learning_rate": 4.720498783454988e-05, + "loss": 0.0872, + "step": 11948 + }, + { + "epoch": 1.68, + "learning_rate": 4.720451993262213e-05, + "loss": 0.0728, + "step": 11950 + }, + { + "epoch": 1.68, + "learning_rate": 4.7204052030694366e-05, + "loss": 0.0711, + "step": 11952 + }, + { + "epoch": 1.68, + "learning_rate": 4.720358412876661e-05, + "loss": 0.0688, + "step": 11954 + }, + { + "epoch": 1.68, + "learning_rate": 4.720311622683886e-05, + "loss": 0.0639, + "step": 11956 + }, + { + "epoch": 1.68, + "learning_rate": 4.7202648324911104e-05, + "loss": 0.0766, + "step": 11958 + }, + { + "epoch": 1.68, + "learning_rate": 4.720218042298334e-05, + "loss": 0.0897, + "step": 11960 + }, + { + "epoch": 1.68, + "learning_rate": 4.720171252105559e-05, + "loss": 0.1089, + "step": 11962 + }, + { + "epoch": 1.68, + "learning_rate": 4.7201244619127835e-05, + "loss": 0.0947, + "step": 11964 + }, + { + "epoch": 1.68, + "learning_rate": 4.720077671720008e-05, + "loss": 0.0831, + "step": 11966 + }, + { + "epoch": 1.68, + "learning_rate": 4.720030881527232e-05, + "loss": 0.0735, + "step": 11968 + }, + { + "epoch": 1.68, + "learning_rate": 4.7199840913344566e-05, + "loss": 0.1122, + "step": 11970 + }, + { + "epoch": 1.68, + "learning_rate": 4.719937301141681e-05, + "loss": 0.0753, + "step": 11972 + }, + { + "epoch": 1.68, + "learning_rate": 4.719890510948906e-05, + "loss": 0.0854, + "step": 11974 + }, + { + "epoch": 1.68, + "learning_rate": 4.71984372075613e-05, + "loss": 0.0752, + "step": 11976 + }, + { + "epoch": 1.68, + "learning_rate": 4.719796930563354e-05, + "loss": 0.0778, + "step": 11978 + }, + { + "epoch": 1.68, + "learning_rate": 4.719750140370578e-05, + "loss": 0.0834, + "step": 11980 + }, + { + "epoch": 1.68, + "learning_rate": 4.7197033501778035e-05, + "loss": 0.0861, + "step": 11982 + }, + { + "epoch": 1.68, + "learning_rate": 4.7196565599850274e-05, + "loss": 0.0943, + "step": 11984 + }, + { + "epoch": 1.68, + "learning_rate": 4.719609769792252e-05, + "loss": 0.0579, + "step": 11986 + }, + { + "epoch": 1.68, + "learning_rate": 4.719562979599476e-05, + "loss": 0.0936, + "step": 11988 + }, + { + "epoch": 1.68, + "learning_rate": 4.719516189406701e-05, + "loss": 0.0587, + "step": 11990 + }, + { + "epoch": 1.68, + "learning_rate": 4.719469399213925e-05, + "loss": 0.085, + "step": 11992 + }, + { + "epoch": 1.68, + "learning_rate": 4.7194226090211496e-05, + "loss": 0.0986, + "step": 11994 + }, + { + "epoch": 1.68, + "learning_rate": 4.7193758188283736e-05, + "loss": 0.0844, + "step": 11996 + }, + { + "epoch": 1.68, + "learning_rate": 4.719329028635598e-05, + "loss": 0.0631, + "step": 11998 + }, + { + "epoch": 1.68, + "learning_rate": 4.719282238442823e-05, + "loss": 0.0657, + "step": 12000 + }, + { + "epoch": 1.68, + "eval_gen_len": 28.4483, + "eval_loss": 1.0475629568099976, + "eval_meteor": 0.0375, + "eval_runtime": 14.051, + "eval_samples_per_second": 4.128, + "eval_steps_per_second": 0.569, + "step": 12000 + }, + { + "epoch": 1.68, + "learning_rate": 4.719235448250047e-05, + "loss": 0.0606, + "step": 12002 + }, + { + "epoch": 1.69, + "learning_rate": 4.719188658057271e-05, + "loss": 0.0709, + "step": 12004 + }, + { + "epoch": 1.69, + "learning_rate": 4.719141867864496e-05, + "loss": 0.071, + "step": 12006 + }, + { + "epoch": 1.69, + "learning_rate": 4.7190950776717204e-05, + "loss": 0.0771, + "step": 12008 + }, + { + "epoch": 1.69, + "learning_rate": 4.719048287478945e-05, + "loss": 0.0841, + "step": 12010 + }, + { + "epoch": 1.69, + "learning_rate": 4.719001497286169e-05, + "loss": 0.0795, + "step": 12012 + }, + { + "epoch": 1.69, + "learning_rate": 4.7189547070933935e-05, + "loss": 0.0765, + "step": 12014 + }, + { + "epoch": 1.69, + "learning_rate": 4.718907916900618e-05, + "loss": 0.0778, + "step": 12016 + }, + { + "epoch": 1.69, + "learning_rate": 4.718861126707843e-05, + "loss": 0.0827, + "step": 12018 + }, + { + "epoch": 1.69, + "learning_rate": 4.7188143365150666e-05, + "loss": 0.0841, + "step": 12020 + }, + { + "epoch": 1.69, + "learning_rate": 4.718767546322291e-05, + "loss": 0.0687, + "step": 12022 + }, + { + "epoch": 1.69, + "learning_rate": 4.718720756129516e-05, + "loss": 0.0862, + "step": 12024 + }, + { + "epoch": 1.69, + "learning_rate": 4.7186739659367404e-05, + "loss": 0.0736, + "step": 12026 + }, + { + "epoch": 1.69, + "learning_rate": 4.718627175743964e-05, + "loss": 0.0894, + "step": 12028 + }, + { + "epoch": 1.69, + "learning_rate": 4.718580385551188e-05, + "loss": 0.0997, + "step": 12030 + }, + { + "epoch": 1.69, + "learning_rate": 4.718533595358413e-05, + "loss": 0.0545, + "step": 12032 + }, + { + "epoch": 1.69, + "learning_rate": 4.7184868051656374e-05, + "loss": 0.0907, + "step": 12034 + }, + { + "epoch": 1.69, + "learning_rate": 4.718440014972862e-05, + "loss": 0.098, + "step": 12036 + }, + { + "epoch": 1.69, + "learning_rate": 4.718393224780086e-05, + "loss": 0.0891, + "step": 12038 + }, + { + "epoch": 1.69, + "learning_rate": 4.7183464345873105e-05, + "loss": 0.0955, + "step": 12040 + }, + { + "epoch": 1.69, + "learning_rate": 4.718299644394535e-05, + "loss": 0.0897, + "step": 12042 + }, + { + "epoch": 1.69, + "learning_rate": 4.71825285420176e-05, + "loss": 0.0927, + "step": 12044 + }, + { + "epoch": 1.69, + "learning_rate": 4.7182060640089836e-05, + "loss": 0.0754, + "step": 12046 + }, + { + "epoch": 1.69, + "learning_rate": 4.718159273816208e-05, + "loss": 0.0868, + "step": 12048 + }, + { + "epoch": 1.69, + "learning_rate": 4.718112483623433e-05, + "loss": 0.0834, + "step": 12050 + }, + { + "epoch": 1.69, + "learning_rate": 4.7180656934306574e-05, + "loss": 0.0663, + "step": 12052 + }, + { + "epoch": 1.69, + "learning_rate": 4.718018903237881e-05, + "loss": 0.0725, + "step": 12054 + }, + { + "epoch": 1.69, + "learning_rate": 4.717972113045106e-05, + "loss": 0.0634, + "step": 12056 + }, + { + "epoch": 1.69, + "learning_rate": 4.7179253228523305e-05, + "loss": 0.0943, + "step": 12058 + }, + { + "epoch": 1.69, + "learning_rate": 4.717878532659555e-05, + "loss": 0.0955, + "step": 12060 + }, + { + "epoch": 1.69, + "learning_rate": 4.717831742466779e-05, + "loss": 0.0678, + "step": 12062 + }, + { + "epoch": 1.69, + "learning_rate": 4.7177849522740036e-05, + "loss": 0.0826, + "step": 12064 + }, + { + "epoch": 1.69, + "learning_rate": 4.7177381620812275e-05, + "loss": 0.0751, + "step": 12066 + }, + { + "epoch": 1.69, + "learning_rate": 4.717691371888453e-05, + "loss": 0.0981, + "step": 12068 + }, + { + "epoch": 1.69, + "learning_rate": 4.7176445816956767e-05, + "loss": 0.0855, + "step": 12070 + }, + { + "epoch": 1.69, + "learning_rate": 4.717597791502901e-05, + "loss": 0.0867, + "step": 12072 + }, + { + "epoch": 1.69, + "learning_rate": 4.717551001310125e-05, + "loss": 0.094, + "step": 12074 + }, + { + "epoch": 1.7, + "learning_rate": 4.71750421111735e-05, + "loss": 0.0927, + "step": 12076 + }, + { + "epoch": 1.7, + "learning_rate": 4.7174574209245743e-05, + "loss": 0.086, + "step": 12078 + }, + { + "epoch": 1.7, + "learning_rate": 4.717410630731799e-05, + "loss": 0.0841, + "step": 12080 + }, + { + "epoch": 1.7, + "learning_rate": 4.717363840539023e-05, + "loss": 0.0949, + "step": 12082 + }, + { + "epoch": 1.7, + "learning_rate": 4.7173170503462474e-05, + "loss": 0.0747, + "step": 12084 + }, + { + "epoch": 1.7, + "learning_rate": 4.717270260153472e-05, + "loss": 0.0669, + "step": 12086 + }, + { + "epoch": 1.7, + "learning_rate": 4.7172234699606966e-05, + "loss": 0.0851, + "step": 12088 + }, + { + "epoch": 1.7, + "learning_rate": 4.7171766797679205e-05, + "loss": 0.072, + "step": 12090 + }, + { + "epoch": 1.7, + "learning_rate": 4.717129889575145e-05, + "loss": 0.0783, + "step": 12092 + }, + { + "epoch": 1.7, + "learning_rate": 4.71708309938237e-05, + "loss": 0.0915, + "step": 12094 + }, + { + "epoch": 1.7, + "learning_rate": 4.717036309189594e-05, + "loss": 0.0717, + "step": 12096 + }, + { + "epoch": 1.7, + "learning_rate": 4.716989518996818e-05, + "loss": 0.1175, + "step": 12098 + }, + { + "epoch": 1.7, + "learning_rate": 4.716942728804043e-05, + "loss": 0.101, + "step": 12100 + }, + { + "epoch": 1.7, + "learning_rate": 4.7168959386112674e-05, + "loss": 0.0834, + "step": 12102 + }, + { + "epoch": 1.7, + "learning_rate": 4.716849148418492e-05, + "loss": 0.0789, + "step": 12104 + }, + { + "epoch": 1.7, + "learning_rate": 4.716802358225716e-05, + "loss": 0.0702, + "step": 12106 + }, + { + "epoch": 1.7, + "learning_rate": 4.7167555680329405e-05, + "loss": 0.1083, + "step": 12108 + }, + { + "epoch": 1.7, + "learning_rate": 4.7167087778401644e-05, + "loss": 0.086, + "step": 12110 + }, + { + "epoch": 1.7, + "learning_rate": 4.71666198764739e-05, + "loss": 0.0903, + "step": 12112 + }, + { + "epoch": 1.7, + "learning_rate": 4.7166151974546136e-05, + "loss": 0.0627, + "step": 12114 + }, + { + "epoch": 1.7, + "learning_rate": 4.716568407261838e-05, + "loss": 0.0804, + "step": 12116 + }, + { + "epoch": 1.7, + "learning_rate": 4.716521617069062e-05, + "loss": 0.0791, + "step": 12118 + }, + { + "epoch": 1.7, + "learning_rate": 4.7164748268762874e-05, + "loss": 0.0845, + "step": 12120 + }, + { + "epoch": 1.7, + "learning_rate": 4.716428036683511e-05, + "loss": 0.0908, + "step": 12122 + }, + { + "epoch": 1.7, + "learning_rate": 4.716381246490736e-05, + "loss": 0.0899, + "step": 12124 + }, + { + "epoch": 1.7, + "learning_rate": 4.71633445629796e-05, + "loss": 0.0888, + "step": 12126 + }, + { + "epoch": 1.7, + "learning_rate": 4.7162876661051844e-05, + "loss": 0.0692, + "step": 12128 + }, + { + "epoch": 1.7, + "learning_rate": 4.716240875912409e-05, + "loss": 0.0769, + "step": 12130 + }, + { + "epoch": 1.7, + "learning_rate": 4.7161940857196336e-05, + "loss": 0.1018, + "step": 12132 + }, + { + "epoch": 1.7, + "learning_rate": 4.7161472955268575e-05, + "loss": 0.0935, + "step": 12134 + }, + { + "epoch": 1.7, + "learning_rate": 4.716100505334082e-05, + "loss": 0.0946, + "step": 12136 + }, + { + "epoch": 1.7, + "learning_rate": 4.7160537151413067e-05, + "loss": 0.0889, + "step": 12138 + }, + { + "epoch": 1.7, + "learning_rate": 4.716006924948531e-05, + "loss": 0.107, + "step": 12140 + }, + { + "epoch": 1.7, + "learning_rate": 4.715960134755755e-05, + "loss": 0.0577, + "step": 12142 + }, + { + "epoch": 1.7, + "learning_rate": 4.71591334456298e-05, + "loss": 0.0901, + "step": 12144 + }, + { + "epoch": 1.7, + "learning_rate": 4.7158665543702043e-05, + "loss": 0.0711, + "step": 12146 + }, + { + "epoch": 1.71, + "learning_rate": 4.715819764177429e-05, + "loss": 0.0928, + "step": 12148 + }, + { + "epoch": 1.71, + "learning_rate": 4.715772973984653e-05, + "loss": 0.0766, + "step": 12150 + }, + { + "epoch": 1.71, + "learning_rate": 4.7157261837918774e-05, + "loss": 0.0681, + "step": 12152 + }, + { + "epoch": 1.71, + "learning_rate": 4.715679393599102e-05, + "loss": 0.0737, + "step": 12154 + }, + { + "epoch": 1.71, + "learning_rate": 4.7156326034063266e-05, + "loss": 0.095, + "step": 12156 + }, + { + "epoch": 1.71, + "learning_rate": 4.7155858132135505e-05, + "loss": 0.0668, + "step": 12158 + }, + { + "epoch": 1.71, + "learning_rate": 4.715539023020775e-05, + "loss": 0.0609, + "step": 12160 + }, + { + "epoch": 1.71, + "learning_rate": 4.715492232827999e-05, + "loss": 0.0812, + "step": 12162 + }, + { + "epoch": 1.71, + "learning_rate": 4.715445442635224e-05, + "loss": 0.0752, + "step": 12164 + }, + { + "epoch": 1.71, + "learning_rate": 4.715398652442448e-05, + "loss": 0.0787, + "step": 12166 + }, + { + "epoch": 1.71, + "learning_rate": 4.715351862249673e-05, + "loss": 0.0929, + "step": 12168 + }, + { + "epoch": 1.71, + "learning_rate": 4.715305072056897e-05, + "loss": 0.0748, + "step": 12170 + }, + { + "epoch": 1.71, + "learning_rate": 4.715258281864122e-05, + "loss": 0.0749, + "step": 12172 + }, + { + "epoch": 1.71, + "learning_rate": 4.715211491671346e-05, + "loss": 0.0839, + "step": 12174 + }, + { + "epoch": 1.71, + "learning_rate": 4.7151647014785705e-05, + "loss": 0.0936, + "step": 12176 + }, + { + "epoch": 1.71, + "learning_rate": 4.7151179112857944e-05, + "loss": 0.0704, + "step": 12178 + }, + { + "epoch": 1.71, + "learning_rate": 4.715071121093019e-05, + "loss": 0.0778, + "step": 12180 + }, + { + "epoch": 1.71, + "learning_rate": 4.7150243309002436e-05, + "loss": 0.109, + "step": 12182 + }, + { + "epoch": 1.71, + "learning_rate": 4.714977540707468e-05, + "loss": 0.0578, + "step": 12184 + }, + { + "epoch": 1.71, + "learning_rate": 4.714930750514692e-05, + "loss": 0.0732, + "step": 12186 + }, + { + "epoch": 1.71, + "learning_rate": 4.714883960321917e-05, + "loss": 0.0868, + "step": 12188 + }, + { + "epoch": 1.71, + "learning_rate": 4.714837170129141e-05, + "loss": 0.0722, + "step": 12190 + }, + { + "epoch": 1.71, + "learning_rate": 4.714790379936366e-05, + "loss": 0.0947, + "step": 12192 + }, + { + "epoch": 1.71, + "learning_rate": 4.71474358974359e-05, + "loss": 0.0965, + "step": 12194 + }, + { + "epoch": 1.71, + "learning_rate": 4.7146967995508144e-05, + "loss": 0.0617, + "step": 12196 + }, + { + "epoch": 1.71, + "learning_rate": 4.714650009358039e-05, + "loss": 0.0687, + "step": 12198 + }, + { + "epoch": 1.71, + "learning_rate": 4.7146032191652636e-05, + "loss": 0.0615, + "step": 12200 + }, + { + "epoch": 1.71, + "learning_rate": 4.7145564289724875e-05, + "loss": 0.0919, + "step": 12202 + }, + { + "epoch": 1.71, + "learning_rate": 4.714509638779712e-05, + "loss": 0.0799, + "step": 12204 + }, + { + "epoch": 1.71, + "learning_rate": 4.714462848586936e-05, + "loss": 0.0799, + "step": 12206 + }, + { + "epoch": 1.71, + "learning_rate": 4.714416058394161e-05, + "loss": 0.1178, + "step": 12208 + }, + { + "epoch": 1.71, + "learning_rate": 4.714369268201385e-05, + "loss": 0.0729, + "step": 12210 + }, + { + "epoch": 1.71, + "learning_rate": 4.71432247800861e-05, + "loss": 0.0889, + "step": 12212 + }, + { + "epoch": 1.71, + "learning_rate": 4.714275687815834e-05, + "loss": 0.0741, + "step": 12214 + }, + { + "epoch": 1.71, + "learning_rate": 4.714228897623059e-05, + "loss": 0.0819, + "step": 12216 + }, + { + "epoch": 1.72, + "learning_rate": 4.714182107430283e-05, + "loss": 0.0892, + "step": 12218 + }, + { + "epoch": 1.72, + "learning_rate": 4.7141353172375074e-05, + "loss": 0.0782, + "step": 12220 + }, + { + "epoch": 1.72, + "learning_rate": 4.7140885270447314e-05, + "loss": 0.1023, + "step": 12222 + }, + { + "epoch": 1.72, + "learning_rate": 4.714041736851956e-05, + "loss": 0.0975, + "step": 12224 + }, + { + "epoch": 1.72, + "learning_rate": 4.7139949466591805e-05, + "loss": 0.0752, + "step": 12226 + }, + { + "epoch": 1.72, + "learning_rate": 4.713948156466405e-05, + "loss": 0.0733, + "step": 12228 + }, + { + "epoch": 1.72, + "learning_rate": 4.713901366273629e-05, + "loss": 0.0735, + "step": 12230 + }, + { + "epoch": 1.72, + "learning_rate": 4.7138545760808536e-05, + "loss": 0.0606, + "step": 12232 + }, + { + "epoch": 1.72, + "learning_rate": 4.713807785888078e-05, + "loss": 0.0787, + "step": 12234 + }, + { + "epoch": 1.72, + "learning_rate": 4.713760995695303e-05, + "loss": 0.0747, + "step": 12236 + }, + { + "epoch": 1.72, + "learning_rate": 4.713714205502527e-05, + "loss": 0.077, + "step": 12238 + }, + { + "epoch": 1.72, + "learning_rate": 4.713667415309751e-05, + "loss": 0.0584, + "step": 12240 + }, + { + "epoch": 1.72, + "learning_rate": 4.713620625116976e-05, + "loss": 0.082, + "step": 12242 + }, + { + "epoch": 1.72, + "learning_rate": 4.7135738349242005e-05, + "loss": 0.0814, + "step": 12244 + }, + { + "epoch": 1.72, + "learning_rate": 4.7135270447314244e-05, + "loss": 0.0826, + "step": 12246 + }, + { + "epoch": 1.72, + "learning_rate": 4.713480254538649e-05, + "loss": 0.0768, + "step": 12248 + }, + { + "epoch": 1.72, + "learning_rate": 4.7134334643458736e-05, + "loss": 0.0752, + "step": 12250 + }, + { + "epoch": 1.72, + "learning_rate": 4.713386674153098e-05, + "loss": 0.0939, + "step": 12252 + }, + { + "epoch": 1.72, + "learning_rate": 4.713339883960322e-05, + "loss": 0.0845, + "step": 12254 + }, + { + "epoch": 1.72, + "learning_rate": 4.713293093767547e-05, + "loss": 0.1071, + "step": 12256 + }, + { + "epoch": 1.72, + "learning_rate": 4.7132463035747706e-05, + "loss": 0.0772, + "step": 12258 + }, + { + "epoch": 1.72, + "learning_rate": 4.713199513381996e-05, + "loss": 0.0782, + "step": 12260 + }, + { + "epoch": 1.72, + "learning_rate": 4.71315272318922e-05, + "loss": 0.0823, + "step": 12262 + }, + { + "epoch": 1.72, + "learning_rate": 4.7131059329964444e-05, + "loss": 0.0587, + "step": 12264 + }, + { + "epoch": 1.72, + "learning_rate": 4.713059142803668e-05, + "loss": 0.0788, + "step": 12266 + }, + { + "epoch": 1.72, + "learning_rate": 4.7130123526108936e-05, + "loss": 0.0772, + "step": 12268 + }, + { + "epoch": 1.72, + "learning_rate": 4.7129655624181175e-05, + "loss": 0.0952, + "step": 12270 + }, + { + "epoch": 1.72, + "learning_rate": 4.712918772225342e-05, + "loss": 0.0765, + "step": 12272 + }, + { + "epoch": 1.72, + "learning_rate": 4.712871982032566e-05, + "loss": 0.0812, + "step": 12274 + }, + { + "epoch": 1.72, + "learning_rate": 4.7128251918397906e-05, + "loss": 0.088, + "step": 12276 + }, + { + "epoch": 1.72, + "learning_rate": 4.712778401647015e-05, + "loss": 0.0745, + "step": 12278 + }, + { + "epoch": 1.72, + "learning_rate": 4.71273161145424e-05, + "loss": 0.1049, + "step": 12280 + }, + { + "epoch": 1.72, + "learning_rate": 4.712684821261464e-05, + "loss": 0.0813, + "step": 12282 + }, + { + "epoch": 1.72, + "learning_rate": 4.712638031068688e-05, + "loss": 0.1017, + "step": 12284 + }, + { + "epoch": 1.72, + "learning_rate": 4.712591240875913e-05, + "loss": 0.074, + "step": 12286 + }, + { + "epoch": 1.72, + "learning_rate": 4.712544450683137e-05, + "loss": 0.085, + "step": 12288 + }, + { + "epoch": 1.73, + "learning_rate": 4.7124976604903614e-05, + "loss": 0.1091, + "step": 12290 + }, + { + "epoch": 1.73, + "learning_rate": 4.712450870297585e-05, + "loss": 0.0696, + "step": 12292 + }, + { + "epoch": 1.73, + "learning_rate": 4.7124040801048105e-05, + "loss": 0.056, + "step": 12294 + }, + { + "epoch": 1.73, + "learning_rate": 4.7123572899120345e-05, + "loss": 0.0998, + "step": 12296 + }, + { + "epoch": 1.73, + "learning_rate": 4.712310499719259e-05, + "loss": 0.0656, + "step": 12298 + }, + { + "epoch": 1.73, + "learning_rate": 4.712263709526483e-05, + "loss": 0.0775, + "step": 12300 + }, + { + "epoch": 1.73, + "learning_rate": 4.712216919333708e-05, + "loss": 0.0605, + "step": 12302 + }, + { + "epoch": 1.73, + "learning_rate": 4.712170129140932e-05, + "loss": 0.1085, + "step": 12304 + }, + { + "epoch": 1.73, + "learning_rate": 4.712123338948157e-05, + "loss": 0.0705, + "step": 12306 + }, + { + "epoch": 1.73, + "learning_rate": 4.7120765487553806e-05, + "loss": 0.0794, + "step": 12308 + }, + { + "epoch": 1.73, + "learning_rate": 4.712029758562605e-05, + "loss": 0.0838, + "step": 12310 + }, + { + "epoch": 1.73, + "learning_rate": 4.71198296836983e-05, + "loss": 0.0954, + "step": 12312 + }, + { + "epoch": 1.73, + "learning_rate": 4.7119361781770544e-05, + "loss": 0.0668, + "step": 12314 + }, + { + "epoch": 1.73, + "learning_rate": 4.711889387984278e-05, + "loss": 0.0723, + "step": 12316 + }, + { + "epoch": 1.73, + "learning_rate": 4.711842597791503e-05, + "loss": 0.0811, + "step": 12318 + }, + { + "epoch": 1.73, + "learning_rate": 4.7117958075987275e-05, + "loss": 0.0921, + "step": 12320 + }, + { + "epoch": 1.73, + "learning_rate": 4.711749017405952e-05, + "loss": 0.1172, + "step": 12322 + }, + { + "epoch": 1.73, + "learning_rate": 4.711702227213176e-05, + "loss": 0.0791, + "step": 12324 + }, + { + "epoch": 1.73, + "learning_rate": 4.7116554370204006e-05, + "loss": 0.0773, + "step": 12326 + }, + { + "epoch": 1.73, + "learning_rate": 4.711608646827625e-05, + "loss": 0.0753, + "step": 12328 + }, + { + "epoch": 1.73, + "learning_rate": 4.71156185663485e-05, + "loss": 0.0813, + "step": 12330 + }, + { + "epoch": 1.73, + "learning_rate": 4.711515066442074e-05, + "loss": 0.0911, + "step": 12332 + }, + { + "epoch": 1.73, + "learning_rate": 4.711468276249298e-05, + "loss": 0.1135, + "step": 12334 + }, + { + "epoch": 1.73, + "learning_rate": 4.711421486056523e-05, + "loss": 0.0854, + "step": 12336 + }, + { + "epoch": 1.73, + "learning_rate": 4.7113746958637475e-05, + "loss": 0.0765, + "step": 12338 + }, + { + "epoch": 1.73, + "learning_rate": 4.7113279056709714e-05, + "loss": 0.0631, + "step": 12340 + }, + { + "epoch": 1.73, + "learning_rate": 4.711281115478196e-05, + "loss": 0.0707, + "step": 12342 + }, + { + "epoch": 1.73, + "learning_rate": 4.71123432528542e-05, + "loss": 0.0709, + "step": 12344 + }, + { + "epoch": 1.73, + "learning_rate": 4.711187535092645e-05, + "loss": 0.1139, + "step": 12346 + }, + { + "epoch": 1.73, + "learning_rate": 4.711140744899869e-05, + "loss": 0.0931, + "step": 12348 + }, + { + "epoch": 1.73, + "learning_rate": 4.711093954707094e-05, + "loss": 0.0723, + "step": 12350 + }, + { + "epoch": 1.73, + "learning_rate": 4.7110471645143176e-05, + "loss": 0.082, + "step": 12352 + }, + { + "epoch": 1.73, + "learning_rate": 4.711000374321542e-05, + "loss": 0.0655, + "step": 12354 + }, + { + "epoch": 1.73, + "learning_rate": 4.710953584128767e-05, + "loss": 0.0797, + "step": 12356 + }, + { + "epoch": 1.73, + "learning_rate": 4.7109067939359914e-05, + "loss": 0.0938, + "step": 12358 + }, + { + "epoch": 1.73, + "learning_rate": 4.710860003743215e-05, + "loss": 0.0988, + "step": 12360 + }, + { + "epoch": 1.74, + "learning_rate": 4.71081321355044e-05, + "loss": 0.0772, + "step": 12362 + }, + { + "epoch": 1.74, + "learning_rate": 4.7107664233576645e-05, + "loss": 0.0752, + "step": 12364 + }, + { + "epoch": 1.74, + "learning_rate": 4.710719633164889e-05, + "loss": 0.0749, + "step": 12366 + }, + { + "epoch": 1.74, + "learning_rate": 4.710672842972113e-05, + "loss": 0.087, + "step": 12368 + }, + { + "epoch": 1.74, + "learning_rate": 4.7106260527793376e-05, + "loss": 0.0851, + "step": 12370 + }, + { + "epoch": 1.74, + "learning_rate": 4.710579262586562e-05, + "loss": 0.1129, + "step": 12372 + }, + { + "epoch": 1.74, + "learning_rate": 4.710532472393787e-05, + "loss": 0.0716, + "step": 12374 + }, + { + "epoch": 1.74, + "learning_rate": 4.7104856822010106e-05, + "loss": 0.0682, + "step": 12376 + }, + { + "epoch": 1.74, + "learning_rate": 4.710438892008235e-05, + "loss": 0.0663, + "step": 12378 + }, + { + "epoch": 1.74, + "learning_rate": 4.71039210181546e-05, + "loss": 0.0867, + "step": 12380 + }, + { + "epoch": 1.74, + "learning_rate": 4.7103453116226844e-05, + "loss": 0.0715, + "step": 12382 + }, + { + "epoch": 1.74, + "learning_rate": 4.710298521429908e-05, + "loss": 0.0789, + "step": 12384 + }, + { + "epoch": 1.74, + "learning_rate": 4.710251731237133e-05, + "loss": 0.0809, + "step": 12386 + }, + { + "epoch": 1.74, + "learning_rate": 4.710204941044357e-05, + "loss": 0.0786, + "step": 12388 + }, + { + "epoch": 1.74, + "learning_rate": 4.710158150851582e-05, + "loss": 0.0854, + "step": 12390 + }, + { + "epoch": 1.74, + "learning_rate": 4.710111360658806e-05, + "loss": 0.1066, + "step": 12392 + }, + { + "epoch": 1.74, + "learning_rate": 4.7100645704660306e-05, + "loss": 0.0754, + "step": 12394 + }, + { + "epoch": 1.74, + "learning_rate": 4.7100177802732545e-05, + "loss": 0.0738, + "step": 12396 + }, + { + "epoch": 1.74, + "learning_rate": 4.70997099008048e-05, + "loss": 0.0855, + "step": 12398 + }, + { + "epoch": 1.74, + "learning_rate": 4.709924199887704e-05, + "loss": 0.0652, + "step": 12400 + }, + { + "epoch": 1.74, + "learning_rate": 4.709877409694928e-05, + "loss": 0.0815, + "step": 12402 + }, + { + "epoch": 1.74, + "learning_rate": 4.709830619502152e-05, + "loss": 0.0983, + "step": 12404 + }, + { + "epoch": 1.74, + "learning_rate": 4.709783829309377e-05, + "loss": 0.0887, + "step": 12406 + }, + { + "epoch": 1.74, + "learning_rate": 4.7097370391166014e-05, + "loss": 0.0949, + "step": 12408 + }, + { + "epoch": 1.74, + "learning_rate": 4.709690248923826e-05, + "loss": 0.0763, + "step": 12410 + }, + { + "epoch": 1.74, + "learning_rate": 4.70964345873105e-05, + "loss": 0.0612, + "step": 12412 + }, + { + "epoch": 1.74, + "learning_rate": 4.7095966685382745e-05, + "loss": 0.0726, + "step": 12414 + }, + { + "epoch": 1.74, + "learning_rate": 4.709549878345499e-05, + "loss": 0.0978, + "step": 12416 + }, + { + "epoch": 1.74, + "learning_rate": 4.709503088152724e-05, + "loss": 0.0781, + "step": 12418 + }, + { + "epoch": 1.74, + "learning_rate": 4.7094562979599476e-05, + "loss": 0.1019, + "step": 12420 + }, + { + "epoch": 1.74, + "learning_rate": 4.709409507767172e-05, + "loss": 0.074, + "step": 12422 + }, + { + "epoch": 1.74, + "learning_rate": 4.709362717574397e-05, + "loss": 0.0949, + "step": 12424 + }, + { + "epoch": 1.74, + "learning_rate": 4.7093159273816214e-05, + "loss": 0.0798, + "step": 12426 + }, + { + "epoch": 1.74, + "learning_rate": 4.709269137188845e-05, + "loss": 0.0883, + "step": 12428 + }, + { + "epoch": 1.74, + "learning_rate": 4.70922234699607e-05, + "loss": 0.0895, + "step": 12430 + }, + { + "epoch": 1.75, + "learning_rate": 4.7091755568032945e-05, + "loss": 0.0651, + "step": 12432 + }, + { + "epoch": 1.75, + "learning_rate": 4.709128766610519e-05, + "loss": 0.0896, + "step": 12434 + }, + { + "epoch": 1.75, + "learning_rate": 4.709081976417743e-05, + "loss": 0.0778, + "step": 12436 + }, + { + "epoch": 1.75, + "learning_rate": 4.7090351862249676e-05, + "loss": 0.0979, + "step": 12438 + }, + { + "epoch": 1.75, + "learning_rate": 4.7089883960321915e-05, + "loss": 0.0776, + "step": 12440 + }, + { + "epoch": 1.75, + "learning_rate": 4.708941605839417e-05, + "loss": 0.0702, + "step": 12442 + }, + { + "epoch": 1.75, + "learning_rate": 4.7088948156466407e-05, + "loss": 0.0927, + "step": 12444 + }, + { + "epoch": 1.75, + "learning_rate": 4.708848025453865e-05, + "loss": 0.1335, + "step": 12446 + }, + { + "epoch": 1.75, + "learning_rate": 4.708801235261089e-05, + "loss": 0.0757, + "step": 12448 + }, + { + "epoch": 1.75, + "learning_rate": 4.7087544450683144e-05, + "loss": 0.0841, + "step": 12450 + }, + { + "epoch": 1.75, + "learning_rate": 4.708707654875538e-05, + "loss": 0.1079, + "step": 12452 + }, + { + "epoch": 1.75, + "learning_rate": 4.708660864682763e-05, + "loss": 0.0901, + "step": 12454 + }, + { + "epoch": 1.75, + "learning_rate": 4.708614074489987e-05, + "loss": 0.0774, + "step": 12456 + }, + { + "epoch": 1.75, + "learning_rate": 4.7085672842972114e-05, + "loss": 0.0933, + "step": 12458 + }, + { + "epoch": 1.75, + "learning_rate": 4.708520494104436e-05, + "loss": 0.0999, + "step": 12460 + }, + { + "epoch": 1.75, + "learning_rate": 4.7084737039116606e-05, + "loss": 0.0647, + "step": 12462 + }, + { + "epoch": 1.75, + "learning_rate": 4.7084269137188845e-05, + "loss": 0.1042, + "step": 12464 + }, + { + "epoch": 1.75, + "learning_rate": 4.708380123526109e-05, + "loss": 0.0931, + "step": 12466 + }, + { + "epoch": 1.75, + "learning_rate": 4.708333333333334e-05, + "loss": 0.087, + "step": 12468 + }, + { + "epoch": 1.75, + "learning_rate": 4.708286543140558e-05, + "loss": 0.0654, + "step": 12470 + }, + { + "epoch": 1.75, + "learning_rate": 4.708239752947782e-05, + "loss": 0.1034, + "step": 12472 + }, + { + "epoch": 1.75, + "learning_rate": 4.708192962755007e-05, + "loss": 0.0683, + "step": 12474 + }, + { + "epoch": 1.75, + "learning_rate": 4.7081461725622314e-05, + "loss": 0.0623, + "step": 12476 + }, + { + "epoch": 1.75, + "learning_rate": 4.708099382369456e-05, + "loss": 0.0858, + "step": 12478 + }, + { + "epoch": 1.75, + "learning_rate": 4.70805259217668e-05, + "loss": 0.0856, + "step": 12480 + }, + { + "epoch": 1.75, + "learning_rate": 4.7080058019839045e-05, + "loss": 0.0915, + "step": 12482 + }, + { + "epoch": 1.75, + "learning_rate": 4.707959011791129e-05, + "loss": 0.0699, + "step": 12484 + }, + { + "epoch": 1.75, + "learning_rate": 4.707912221598354e-05, + "loss": 0.0971, + "step": 12486 + }, + { + "epoch": 1.75, + "learning_rate": 4.7078654314055776e-05, + "loss": 0.0924, + "step": 12488 + }, + { + "epoch": 1.75, + "learning_rate": 4.707818641212802e-05, + "loss": 0.079, + "step": 12490 + }, + { + "epoch": 1.75, + "learning_rate": 4.707771851020026e-05, + "loss": 0.1105, + "step": 12492 + }, + { + "epoch": 1.75, + "learning_rate": 4.7077250608272514e-05, + "loss": 0.0796, + "step": 12494 + }, + { + "epoch": 1.75, + "learning_rate": 4.707678270634475e-05, + "loss": 0.0816, + "step": 12496 + }, + { + "epoch": 1.75, + "learning_rate": 4.7076314804417e-05, + "loss": 0.0782, + "step": 12498 + }, + { + "epoch": 1.75, + "learning_rate": 4.707584690248924e-05, + "loss": 0.0678, + "step": 12500 + }, + { + "epoch": 1.75, + "learning_rate": 4.7075379000561484e-05, + "loss": 0.0701, + "step": 12502 + }, + { + "epoch": 1.76, + "learning_rate": 4.707491109863373e-05, + "loss": 0.0743, + "step": 12504 + }, + { + "epoch": 1.76, + "learning_rate": 4.7074443196705976e-05, + "loss": 0.0765, + "step": 12506 + }, + { + "epoch": 1.76, + "learning_rate": 4.7073975294778215e-05, + "loss": 0.0664, + "step": 12508 + }, + { + "epoch": 1.76, + "learning_rate": 4.707350739285046e-05, + "loss": 0.0884, + "step": 12510 + }, + { + "epoch": 1.76, + "learning_rate": 4.7073039490922707e-05, + "loss": 0.0774, + "step": 12512 + }, + { + "epoch": 1.76, + "learning_rate": 4.707257158899495e-05, + "loss": 0.0648, + "step": 12514 + }, + { + "epoch": 1.76, + "learning_rate": 4.707210368706719e-05, + "loss": 0.0909, + "step": 12516 + }, + { + "epoch": 1.76, + "learning_rate": 4.707163578513944e-05, + "loss": 0.1022, + "step": 12518 + }, + { + "epoch": 1.76, + "learning_rate": 4.7071167883211683e-05, + "loss": 0.0714, + "step": 12520 + }, + { + "epoch": 1.76, + "learning_rate": 4.707069998128393e-05, + "loss": 0.0633, + "step": 12522 + }, + { + "epoch": 1.76, + "learning_rate": 4.707023207935617e-05, + "loss": 0.0935, + "step": 12524 + }, + { + "epoch": 1.76, + "learning_rate": 4.7069764177428414e-05, + "loss": 0.0655, + "step": 12526 + }, + { + "epoch": 1.76, + "learning_rate": 4.706929627550066e-05, + "loss": 0.0806, + "step": 12528 + }, + { + "epoch": 1.76, + "learning_rate": 4.7068828373572906e-05, + "loss": 0.0893, + "step": 12530 + }, + { + "epoch": 1.76, + "learning_rate": 4.7068360471645145e-05, + "loss": 0.079, + "step": 12532 + }, + { + "epoch": 1.76, + "learning_rate": 4.706789256971739e-05, + "loss": 0.0798, + "step": 12534 + }, + { + "epoch": 1.76, + "learning_rate": 4.706742466778963e-05, + "loss": 0.0623, + "step": 12536 + }, + { + "epoch": 1.76, + "learning_rate": 4.7066956765861876e-05, + "loss": 0.0825, + "step": 12538 + }, + { + "epoch": 1.76, + "learning_rate": 4.706648886393412e-05, + "loss": 0.0769, + "step": 12540 + }, + { + "epoch": 1.76, + "learning_rate": 4.706602096200636e-05, + "loss": 0.0712, + "step": 12542 + }, + { + "epoch": 1.76, + "learning_rate": 4.706555306007861e-05, + "loss": 0.0721, + "step": 12544 + }, + { + "epoch": 1.76, + "learning_rate": 4.706508515815085e-05, + "loss": 0.0735, + "step": 12546 + }, + { + "epoch": 1.76, + "learning_rate": 4.70646172562231e-05, + "loss": 0.0798, + "step": 12548 + }, + { + "epoch": 1.76, + "learning_rate": 4.706414935429534e-05, + "loss": 0.11, + "step": 12550 + }, + { + "epoch": 1.76, + "learning_rate": 4.7063681452367584e-05, + "loss": 0.085, + "step": 12552 + }, + { + "epoch": 1.76, + "learning_rate": 4.706321355043983e-05, + "loss": 0.0641, + "step": 12554 + }, + { + "epoch": 1.76, + "learning_rate": 4.7062745648512076e-05, + "loss": 0.0903, + "step": 12556 + }, + { + "epoch": 1.76, + "learning_rate": 4.7062277746584315e-05, + "loss": 0.0814, + "step": 12558 + }, + { + "epoch": 1.76, + "learning_rate": 4.706180984465656e-05, + "loss": 0.075, + "step": 12560 + }, + { + "epoch": 1.76, + "learning_rate": 4.706134194272881e-05, + "loss": 0.0837, + "step": 12562 + }, + { + "epoch": 1.76, + "learning_rate": 4.706087404080105e-05, + "loss": 0.0807, + "step": 12564 + }, + { + "epoch": 1.76, + "learning_rate": 4.706040613887329e-05, + "loss": 0.102, + "step": 12566 + }, + { + "epoch": 1.76, + "learning_rate": 4.705993823694554e-05, + "loss": 0.0906, + "step": 12568 + }, + { + "epoch": 1.76, + "learning_rate": 4.705947033501778e-05, + "loss": 0.081, + "step": 12570 + }, + { + "epoch": 1.76, + "learning_rate": 4.705900243309003e-05, + "loss": 0.0921, + "step": 12572 + }, + { + "epoch": 1.77, + "learning_rate": 4.705853453116227e-05, + "loss": 0.093, + "step": 12574 + }, + { + "epoch": 1.77, + "learning_rate": 4.7058066629234515e-05, + "loss": 0.0976, + "step": 12576 + }, + { + "epoch": 1.77, + "learning_rate": 4.7057598727306754e-05, + "loss": 0.0803, + "step": 12578 + }, + { + "epoch": 1.77, + "learning_rate": 4.7057130825379007e-05, + "loss": 0.0962, + "step": 12580 + }, + { + "epoch": 1.77, + "learning_rate": 4.7056662923451246e-05, + "loss": 0.0884, + "step": 12582 + }, + { + "epoch": 1.77, + "learning_rate": 4.705619502152349e-05, + "loss": 0.0813, + "step": 12584 + }, + { + "epoch": 1.77, + "learning_rate": 4.705572711959573e-05, + "loss": 0.07, + "step": 12586 + }, + { + "epoch": 1.77, + "learning_rate": 4.705525921766798e-05, + "loss": 0.0996, + "step": 12588 + }, + { + "epoch": 1.77, + "learning_rate": 4.705479131574022e-05, + "loss": 0.0847, + "step": 12590 + }, + { + "epoch": 1.77, + "learning_rate": 4.705432341381247e-05, + "loss": 0.0983, + "step": 12592 + }, + { + "epoch": 1.77, + "learning_rate": 4.705385551188471e-05, + "loss": 0.0751, + "step": 12594 + }, + { + "epoch": 1.77, + "learning_rate": 4.7053387609956954e-05, + "loss": 0.081, + "step": 12596 + }, + { + "epoch": 1.77, + "learning_rate": 4.70529197080292e-05, + "loss": 0.0744, + "step": 12598 + }, + { + "epoch": 1.77, + "learning_rate": 4.7052451806101445e-05, + "loss": 0.0707, + "step": 12600 + }, + { + "epoch": 1.77, + "learning_rate": 4.7051983904173684e-05, + "loss": 0.0723, + "step": 12602 + }, + { + "epoch": 1.77, + "learning_rate": 4.705151600224593e-05, + "loss": 0.0709, + "step": 12604 + }, + { + "epoch": 1.77, + "learning_rate": 4.7051048100318176e-05, + "loss": 0.0801, + "step": 12606 + }, + { + "epoch": 1.77, + "learning_rate": 4.705058019839042e-05, + "loss": 0.0836, + "step": 12608 + }, + { + "epoch": 1.77, + "learning_rate": 4.705011229646266e-05, + "loss": 0.0852, + "step": 12610 + }, + { + "epoch": 1.77, + "learning_rate": 4.704964439453491e-05, + "loss": 0.0622, + "step": 12612 + }, + { + "epoch": 1.77, + "learning_rate": 4.704917649260715e-05, + "loss": 0.0797, + "step": 12614 + }, + { + "epoch": 1.77, + "learning_rate": 4.70487085906794e-05, + "loss": 0.084, + "step": 12616 + }, + { + "epoch": 1.77, + "learning_rate": 4.704824068875164e-05, + "loss": 0.0826, + "step": 12618 + }, + { + "epoch": 1.77, + "learning_rate": 4.7047772786823884e-05, + "loss": 0.1171, + "step": 12620 + }, + { + "epoch": 1.77, + "learning_rate": 4.704730488489612e-05, + "loss": 0.0766, + "step": 12622 + }, + { + "epoch": 1.77, + "learning_rate": 4.7046836982968376e-05, + "loss": 0.0831, + "step": 12624 + }, + { + "epoch": 1.77, + "learning_rate": 4.7046369081040615e-05, + "loss": 0.098, + "step": 12626 + }, + { + "epoch": 1.77, + "learning_rate": 4.704590117911286e-05, + "loss": 0.0922, + "step": 12628 + }, + { + "epoch": 1.77, + "learning_rate": 4.70454332771851e-05, + "loss": 0.0845, + "step": 12630 + }, + { + "epoch": 1.77, + "learning_rate": 4.7044965375257346e-05, + "loss": 0.0798, + "step": 12632 + }, + { + "epoch": 1.77, + "learning_rate": 4.704449747332959e-05, + "loss": 0.0753, + "step": 12634 + }, + { + "epoch": 1.77, + "learning_rate": 4.704402957140184e-05, + "loss": 0.0997, + "step": 12636 + }, + { + "epoch": 1.77, + "learning_rate": 4.704356166947408e-05, + "loss": 0.0875, + "step": 12638 + }, + { + "epoch": 1.77, + "learning_rate": 4.704309376754632e-05, + "loss": 0.0841, + "step": 12640 + }, + { + "epoch": 1.77, + "learning_rate": 4.704262586561857e-05, + "loss": 0.0661, + "step": 12642 + }, + { + "epoch": 1.77, + "learning_rate": 4.7042157963690815e-05, + "loss": 0.1243, + "step": 12644 + }, + { + "epoch": 1.78, + "learning_rate": 4.7041690061763054e-05, + "loss": 0.0647, + "step": 12646 + }, + { + "epoch": 1.78, + "learning_rate": 4.70412221598353e-05, + "loss": 0.0703, + "step": 12648 + }, + { + "epoch": 1.78, + "learning_rate": 4.7040754257907546e-05, + "loss": 0.0813, + "step": 12650 + }, + { + "epoch": 1.78, + "learning_rate": 4.704028635597979e-05, + "loss": 0.0737, + "step": 12652 + }, + { + "epoch": 1.78, + "learning_rate": 4.703981845405203e-05, + "loss": 0.0834, + "step": 12654 + }, + { + "epoch": 1.78, + "learning_rate": 4.703935055212428e-05, + "loss": 0.0771, + "step": 12656 + }, + { + "epoch": 1.78, + "learning_rate": 4.703888265019652e-05, + "loss": 0.0748, + "step": 12658 + }, + { + "epoch": 1.78, + "learning_rate": 4.703841474826877e-05, + "loss": 0.0708, + "step": 12660 + }, + { + "epoch": 1.78, + "learning_rate": 4.703794684634101e-05, + "loss": 0.0635, + "step": 12662 + }, + { + "epoch": 1.78, + "learning_rate": 4.7037478944413254e-05, + "loss": 0.0806, + "step": 12664 + }, + { + "epoch": 1.78, + "learning_rate": 4.703701104248549e-05, + "loss": 0.0738, + "step": 12666 + }, + { + "epoch": 1.78, + "learning_rate": 4.7036543140557745e-05, + "loss": 0.0633, + "step": 12668 + }, + { + "epoch": 1.78, + "learning_rate": 4.7036075238629984e-05, + "loss": 0.1057, + "step": 12670 + }, + { + "epoch": 1.78, + "learning_rate": 4.703560733670223e-05, + "loss": 0.0974, + "step": 12672 + }, + { + "epoch": 1.78, + "learning_rate": 4.703513943477447e-05, + "loss": 0.0805, + "step": 12674 + }, + { + "epoch": 1.78, + "learning_rate": 4.703467153284672e-05, + "loss": 0.1314, + "step": 12676 + }, + { + "epoch": 1.78, + "learning_rate": 4.703420363091896e-05, + "loss": 0.068, + "step": 12678 + }, + { + "epoch": 1.78, + "learning_rate": 4.703373572899121e-05, + "loss": 0.1, + "step": 12680 + }, + { + "epoch": 1.78, + "learning_rate": 4.7033267827063446e-05, + "loss": 0.1086, + "step": 12682 + }, + { + "epoch": 1.78, + "learning_rate": 4.703279992513569e-05, + "loss": 0.0704, + "step": 12684 + }, + { + "epoch": 1.78, + "learning_rate": 4.703233202320794e-05, + "loss": 0.0832, + "step": 12686 + }, + { + "epoch": 1.78, + "learning_rate": 4.7031864121280184e-05, + "loss": 0.0761, + "step": 12688 + }, + { + "epoch": 1.78, + "learning_rate": 4.703139621935242e-05, + "loss": 0.0811, + "step": 12690 + }, + { + "epoch": 1.78, + "learning_rate": 4.703092831742467e-05, + "loss": 0.0842, + "step": 12692 + }, + { + "epoch": 1.78, + "learning_rate": 4.7030460415496915e-05, + "loss": 0.0915, + "step": 12694 + }, + { + "epoch": 1.78, + "learning_rate": 4.702999251356916e-05, + "loss": 0.0922, + "step": 12696 + }, + { + "epoch": 1.78, + "learning_rate": 4.70295246116414e-05, + "loss": 0.0962, + "step": 12698 + }, + { + "epoch": 1.78, + "learning_rate": 4.7029056709713646e-05, + "loss": 0.0808, + "step": 12700 + }, + { + "epoch": 1.78, + "learning_rate": 4.702858880778589e-05, + "loss": 0.0769, + "step": 12702 + }, + { + "epoch": 1.78, + "learning_rate": 4.702812090585814e-05, + "loss": 0.0768, + "step": 12704 + }, + { + "epoch": 1.78, + "learning_rate": 4.702765300393038e-05, + "loss": 0.1067, + "step": 12706 + }, + { + "epoch": 1.78, + "learning_rate": 4.702718510200262e-05, + "loss": 0.0711, + "step": 12708 + }, + { + "epoch": 1.78, + "learning_rate": 4.702671720007487e-05, + "loss": 0.0673, + "step": 12710 + }, + { + "epoch": 1.78, + "learning_rate": 4.7026249298147115e-05, + "loss": 0.087, + "step": 12712 + }, + { + "epoch": 1.78, + "learning_rate": 4.7025781396219354e-05, + "loss": 0.0766, + "step": 12714 + }, + { + "epoch": 1.78, + "learning_rate": 4.70253134942916e-05, + "loss": 0.079, + "step": 12716 + }, + { + "epoch": 1.79, + "learning_rate": 4.702484559236384e-05, + "loss": 0.0754, + "step": 12718 + }, + { + "epoch": 1.79, + "learning_rate": 4.702437769043609e-05, + "loss": 0.073, + "step": 12720 + }, + { + "epoch": 1.79, + "learning_rate": 4.702390978850833e-05, + "loss": 0.066, + "step": 12722 + }, + { + "epoch": 1.79, + "learning_rate": 4.702344188658058e-05, + "loss": 0.0853, + "step": 12724 + }, + { + "epoch": 1.79, + "learning_rate": 4.7022973984652816e-05, + "loss": 0.0668, + "step": 12726 + }, + { + "epoch": 1.79, + "learning_rate": 4.702250608272507e-05, + "loss": 0.0669, + "step": 12728 + }, + { + "epoch": 1.79, + "learning_rate": 4.702203818079731e-05, + "loss": 0.0992, + "step": 12730 + }, + { + "epoch": 1.79, + "learning_rate": 4.7021570278869554e-05, + "loss": 0.0826, + "step": 12732 + }, + { + "epoch": 1.79, + "learning_rate": 4.702110237694179e-05, + "loss": 0.0781, + "step": 12734 + }, + { + "epoch": 1.79, + "learning_rate": 4.702063447501404e-05, + "loss": 0.0865, + "step": 12736 + }, + { + "epoch": 1.79, + "learning_rate": 4.7020166573086284e-05, + "loss": 0.0927, + "step": 12738 + }, + { + "epoch": 1.79, + "learning_rate": 4.701969867115853e-05, + "loss": 0.0905, + "step": 12740 + }, + { + "epoch": 1.79, + "learning_rate": 4.701923076923077e-05, + "loss": 0.0795, + "step": 12742 + }, + { + "epoch": 1.79, + "learning_rate": 4.7018762867303015e-05, + "loss": 0.0623, + "step": 12744 + }, + { + "epoch": 1.79, + "learning_rate": 4.701829496537526e-05, + "loss": 0.0874, + "step": 12746 + }, + { + "epoch": 1.79, + "learning_rate": 4.701782706344751e-05, + "loss": 0.0702, + "step": 12748 + }, + { + "epoch": 1.79, + "learning_rate": 4.7017359161519746e-05, + "loss": 0.0783, + "step": 12750 + }, + { + "epoch": 1.79, + "learning_rate": 4.701689125959199e-05, + "loss": 0.0527, + "step": 12752 + }, + { + "epoch": 1.79, + "learning_rate": 4.701642335766424e-05, + "loss": 0.0763, + "step": 12754 + }, + { + "epoch": 1.79, + "learning_rate": 4.7015955455736484e-05, + "loss": 0.0902, + "step": 12756 + }, + { + "epoch": 1.79, + "learning_rate": 4.701548755380872e-05, + "loss": 0.0715, + "step": 12758 + }, + { + "epoch": 1.79, + "learning_rate": 4.701501965188097e-05, + "loss": 0.0658, + "step": 12760 + }, + { + "epoch": 1.79, + "learning_rate": 4.7014551749953215e-05, + "loss": 0.0704, + "step": 12762 + }, + { + "epoch": 1.79, + "learning_rate": 4.701408384802546e-05, + "loss": 0.0947, + "step": 12764 + }, + { + "epoch": 1.79, + "learning_rate": 4.70136159460977e-05, + "loss": 0.0937, + "step": 12766 + }, + { + "epoch": 1.79, + "learning_rate": 4.7013148044169946e-05, + "loss": 0.0731, + "step": 12768 + }, + { + "epoch": 1.79, + "learning_rate": 4.7012680142242185e-05, + "loss": 0.0846, + "step": 12770 + }, + { + "epoch": 1.79, + "learning_rate": 4.701221224031444e-05, + "loss": 0.0867, + "step": 12772 + }, + { + "epoch": 1.79, + "learning_rate": 4.701174433838668e-05, + "loss": 0.0654, + "step": 12774 + }, + { + "epoch": 1.79, + "learning_rate": 4.701127643645892e-05, + "loss": 0.086, + "step": 12776 + }, + { + "epoch": 1.79, + "learning_rate": 4.701080853453116e-05, + "loss": 0.0658, + "step": 12778 + }, + { + "epoch": 1.79, + "learning_rate": 4.701034063260341e-05, + "loss": 0.0805, + "step": 12780 + }, + { + "epoch": 1.79, + "learning_rate": 4.7009872730675654e-05, + "loss": 0.0665, + "step": 12782 + }, + { + "epoch": 1.79, + "learning_rate": 4.70094048287479e-05, + "loss": 0.0894, + "step": 12784 + }, + { + "epoch": 1.79, + "learning_rate": 4.700893692682014e-05, + "loss": 0.0769, + "step": 12786 + }, + { + "epoch": 1.8, + "learning_rate": 4.7008469024892385e-05, + "loss": 0.0898, + "step": 12788 + }, + { + "epoch": 1.8, + "learning_rate": 4.700800112296463e-05, + "loss": 0.0977, + "step": 12790 + }, + { + "epoch": 1.8, + "learning_rate": 4.700753322103687e-05, + "loss": 0.0745, + "step": 12792 + }, + { + "epoch": 1.8, + "learning_rate": 4.7007065319109116e-05, + "loss": 0.0907, + "step": 12794 + }, + { + "epoch": 1.8, + "learning_rate": 4.700659741718136e-05, + "loss": 0.0701, + "step": 12796 + }, + { + "epoch": 1.8, + "learning_rate": 4.700612951525361e-05, + "loss": 0.0849, + "step": 12798 + }, + { + "epoch": 1.8, + "learning_rate": 4.700566161332585e-05, + "loss": 0.1005, + "step": 12800 + }, + { + "epoch": 1.8, + "learning_rate": 4.700519371139809e-05, + "loss": 0.0653, + "step": 12802 + }, + { + "epoch": 1.8, + "learning_rate": 4.700472580947033e-05, + "loss": 0.0792, + "step": 12804 + }, + { + "epoch": 1.8, + "learning_rate": 4.7004257907542585e-05, + "loss": 0.1003, + "step": 12806 + }, + { + "epoch": 1.8, + "learning_rate": 4.7003790005614824e-05, + "loss": 0.1019, + "step": 12808 + }, + { + "epoch": 1.8, + "learning_rate": 4.700332210368707e-05, + "loss": 0.1073, + "step": 12810 + }, + { + "epoch": 1.8, + "learning_rate": 4.700285420175931e-05, + "loss": 0.0973, + "step": 12812 + }, + { + "epoch": 1.8, + "learning_rate": 4.7002386299831555e-05, + "loss": 0.1095, + "step": 12814 + }, + { + "epoch": 1.8, + "learning_rate": 4.70019183979038e-05, + "loss": 0.0908, + "step": 12816 + }, + { + "epoch": 1.8, + "learning_rate": 4.7001450495976046e-05, + "loss": 0.1097, + "step": 12818 + }, + { + "epoch": 1.8, + "learning_rate": 4.7000982594048286e-05, + "loss": 0.088, + "step": 12820 + }, + { + "epoch": 1.8, + "learning_rate": 4.700051469212053e-05, + "loss": 0.0861, + "step": 12822 + }, + { + "epoch": 1.8, + "learning_rate": 4.700004679019278e-05, + "loss": 0.0737, + "step": 12824 + }, + { + "epoch": 1.8, + "learning_rate": 4.699957888826502e-05, + "loss": 0.082, + "step": 12826 + }, + { + "epoch": 1.8, + "learning_rate": 4.699911098633726e-05, + "loss": 0.0848, + "step": 12828 + }, + { + "epoch": 1.8, + "learning_rate": 4.699864308440951e-05, + "loss": 0.1033, + "step": 12830 + }, + { + "epoch": 1.8, + "learning_rate": 4.6998175182481754e-05, + "loss": 0.091, + "step": 12832 + }, + { + "epoch": 1.8, + "learning_rate": 4.6997707280554e-05, + "loss": 0.0813, + "step": 12834 + }, + { + "epoch": 1.8, + "learning_rate": 4.699723937862624e-05, + "loss": 0.0833, + "step": 12836 + }, + { + "epoch": 1.8, + "learning_rate": 4.6996771476698485e-05, + "loss": 0.0817, + "step": 12838 + }, + { + "epoch": 1.8, + "learning_rate": 4.699630357477073e-05, + "loss": 0.07, + "step": 12840 + }, + { + "epoch": 1.8, + "learning_rate": 4.699583567284298e-05, + "loss": 0.0913, + "step": 12842 + }, + { + "epoch": 1.8, + "learning_rate": 4.6995367770915216e-05, + "loss": 0.0844, + "step": 12844 + }, + { + "epoch": 1.8, + "learning_rate": 4.699489986898746e-05, + "loss": 0.0987, + "step": 12846 + }, + { + "epoch": 1.8, + "learning_rate": 4.69944319670597e-05, + "loss": 0.0743, + "step": 12848 + }, + { + "epoch": 1.8, + "learning_rate": 4.6993964065131954e-05, + "loss": 0.1001, + "step": 12850 + }, + { + "epoch": 1.8, + "learning_rate": 4.699349616320419e-05, + "loss": 0.0953, + "step": 12852 + }, + { + "epoch": 1.8, + "learning_rate": 4.699302826127644e-05, + "loss": 0.0662, + "step": 12854 + }, + { + "epoch": 1.8, + "learning_rate": 4.699256035934868e-05, + "loss": 0.0551, + "step": 12856 + }, + { + "epoch": 1.8, + "learning_rate": 4.699209245742093e-05, + "loss": 0.101, + "step": 12858 + }, + { + "epoch": 1.81, + "learning_rate": 4.699162455549317e-05, + "loss": 0.083, + "step": 12860 + }, + { + "epoch": 1.81, + "learning_rate": 4.6991156653565416e-05, + "loss": 0.077, + "step": 12862 + }, + { + "epoch": 1.81, + "learning_rate": 4.6990688751637655e-05, + "loss": 0.0892, + "step": 12864 + }, + { + "epoch": 1.81, + "learning_rate": 4.69902208497099e-05, + "loss": 0.0972, + "step": 12866 + }, + { + "epoch": 1.81, + "learning_rate": 4.698975294778215e-05, + "loss": 0.0699, + "step": 12868 + }, + { + "epoch": 1.81, + "learning_rate": 4.698928504585439e-05, + "loss": 0.0878, + "step": 12870 + }, + { + "epoch": 1.81, + "learning_rate": 4.698881714392663e-05, + "loss": 0.0886, + "step": 12872 + }, + { + "epoch": 1.81, + "learning_rate": 4.698834924199888e-05, + "loss": 0.0696, + "step": 12874 + }, + { + "epoch": 1.81, + "learning_rate": 4.6987881340071124e-05, + "loss": 0.0991, + "step": 12876 + }, + { + "epoch": 1.81, + "learning_rate": 4.698741343814337e-05, + "loss": 0.0653, + "step": 12878 + }, + { + "epoch": 1.81, + "learning_rate": 4.698694553621561e-05, + "loss": 0.074, + "step": 12880 + }, + { + "epoch": 1.81, + "learning_rate": 4.6986477634287855e-05, + "loss": 0.0856, + "step": 12882 + }, + { + "epoch": 1.81, + "learning_rate": 4.69860097323601e-05, + "loss": 0.0951, + "step": 12884 + }, + { + "epoch": 1.81, + "learning_rate": 4.6985541830432346e-05, + "loss": 0.0908, + "step": 12886 + }, + { + "epoch": 1.81, + "learning_rate": 4.6985073928504586e-05, + "loss": 0.0951, + "step": 12888 + }, + { + "epoch": 1.81, + "learning_rate": 4.698460602657683e-05, + "loss": 0.0747, + "step": 12890 + }, + { + "epoch": 1.81, + "learning_rate": 4.698413812464908e-05, + "loss": 0.0823, + "step": 12892 + }, + { + "epoch": 1.81, + "learning_rate": 4.698367022272132e-05, + "loss": 0.0784, + "step": 12894 + }, + { + "epoch": 1.81, + "learning_rate": 4.698320232079356e-05, + "loss": 0.0788, + "step": 12896 + }, + { + "epoch": 1.81, + "learning_rate": 4.698273441886581e-05, + "loss": 0.0754, + "step": 12898 + }, + { + "epoch": 1.81, + "learning_rate": 4.698226651693805e-05, + "loss": 0.08, + "step": 12900 + }, + { + "epoch": 1.81, + "learning_rate": 4.69817986150103e-05, + "loss": 0.1191, + "step": 12902 + }, + { + "epoch": 1.81, + "learning_rate": 4.698133071308254e-05, + "loss": 0.0671, + "step": 12904 + }, + { + "epoch": 1.81, + "learning_rate": 4.6980862811154785e-05, + "loss": 0.0759, + "step": 12906 + }, + { + "epoch": 1.81, + "learning_rate": 4.6980394909227024e-05, + "loss": 0.0832, + "step": 12908 + }, + { + "epoch": 1.81, + "learning_rate": 4.697992700729927e-05, + "loss": 0.0873, + "step": 12910 + }, + { + "epoch": 1.81, + "learning_rate": 4.6979459105371516e-05, + "loss": 0.0718, + "step": 12912 + }, + { + "epoch": 1.81, + "learning_rate": 4.697899120344376e-05, + "loss": 0.0744, + "step": 12914 + }, + { + "epoch": 1.81, + "learning_rate": 4.6978523301516e-05, + "loss": 0.0865, + "step": 12916 + }, + { + "epoch": 1.81, + "learning_rate": 4.697805539958825e-05, + "loss": 0.0906, + "step": 12918 + }, + { + "epoch": 1.81, + "learning_rate": 4.697758749766049e-05, + "loss": 0.1134, + "step": 12920 + }, + { + "epoch": 1.81, + "learning_rate": 4.697711959573274e-05, + "loss": 0.0586, + "step": 12922 + }, + { + "epoch": 1.81, + "learning_rate": 4.697665169380498e-05, + "loss": 0.0672, + "step": 12924 + }, + { + "epoch": 1.81, + "learning_rate": 4.6976183791877224e-05, + "loss": 0.0727, + "step": 12926 + }, + { + "epoch": 1.81, + "learning_rate": 4.697571588994947e-05, + "loss": 0.0823, + "step": 12928 + }, + { + "epoch": 1.81, + "learning_rate": 4.6975247988021716e-05, + "loss": 0.0665, + "step": 12930 + }, + { + "epoch": 1.82, + "learning_rate": 4.6974780086093955e-05, + "loss": 0.0841, + "step": 12932 + }, + { + "epoch": 1.82, + "learning_rate": 4.69743121841662e-05, + "loss": 0.073, + "step": 12934 + }, + { + "epoch": 1.82, + "learning_rate": 4.697384428223845e-05, + "loss": 0.0805, + "step": 12936 + }, + { + "epoch": 1.82, + "learning_rate": 4.697337638031069e-05, + "loss": 0.0971, + "step": 12938 + }, + { + "epoch": 1.82, + "learning_rate": 4.697290847838293e-05, + "loss": 0.0782, + "step": 12940 + }, + { + "epoch": 1.82, + "learning_rate": 4.697244057645518e-05, + "loss": 0.0666, + "step": 12942 + }, + { + "epoch": 1.82, + "learning_rate": 4.697197267452742e-05, + "loss": 0.0874, + "step": 12944 + }, + { + "epoch": 1.82, + "learning_rate": 4.697150477259967e-05, + "loss": 0.087, + "step": 12946 + }, + { + "epoch": 1.82, + "learning_rate": 4.697103687067191e-05, + "loss": 0.067, + "step": 12948 + }, + { + "epoch": 1.82, + "learning_rate": 4.6970568968744155e-05, + "loss": 0.0841, + "step": 12950 + }, + { + "epoch": 1.82, + "learning_rate": 4.6970101066816394e-05, + "loss": 0.0915, + "step": 12952 + }, + { + "epoch": 1.82, + "learning_rate": 4.6969633164888646e-05, + "loss": 0.0671, + "step": 12954 + }, + { + "epoch": 1.82, + "learning_rate": 4.6969165262960886e-05, + "loss": 0.0947, + "step": 12956 + }, + { + "epoch": 1.82, + "learning_rate": 4.696869736103313e-05, + "loss": 0.0843, + "step": 12958 + }, + { + "epoch": 1.82, + "learning_rate": 4.696822945910537e-05, + "loss": 0.0736, + "step": 12960 + }, + { + "epoch": 1.82, + "learning_rate": 4.6967761557177617e-05, + "loss": 0.0629, + "step": 12962 + }, + { + "epoch": 1.82, + "learning_rate": 4.696729365524986e-05, + "loss": 0.119, + "step": 12964 + }, + { + "epoch": 1.82, + "learning_rate": 4.696682575332211e-05, + "loss": 0.0911, + "step": 12966 + }, + { + "epoch": 1.82, + "learning_rate": 4.696635785139435e-05, + "loss": 0.0837, + "step": 12968 + }, + { + "epoch": 1.82, + "learning_rate": 4.6965889949466593e-05, + "loss": 0.0801, + "step": 12970 + }, + { + "epoch": 1.82, + "learning_rate": 4.696542204753884e-05, + "loss": 0.07, + "step": 12972 + }, + { + "epoch": 1.82, + "learning_rate": 4.6964954145611085e-05, + "loss": 0.0721, + "step": 12974 + }, + { + "epoch": 1.82, + "learning_rate": 4.6964486243683324e-05, + "loss": 0.0727, + "step": 12976 + }, + { + "epoch": 1.82, + "learning_rate": 4.696401834175557e-05, + "loss": 0.092, + "step": 12978 + }, + { + "epoch": 1.82, + "learning_rate": 4.6963550439827816e-05, + "loss": 0.0906, + "step": 12980 + }, + { + "epoch": 1.82, + "learning_rate": 4.696308253790006e-05, + "loss": 0.0842, + "step": 12982 + }, + { + "epoch": 1.82, + "learning_rate": 4.69626146359723e-05, + "loss": 0.0721, + "step": 12984 + }, + { + "epoch": 1.82, + "learning_rate": 4.696214673404455e-05, + "loss": 0.0861, + "step": 12986 + }, + { + "epoch": 1.82, + "learning_rate": 4.696167883211679e-05, + "loss": 0.0657, + "step": 12988 + }, + { + "epoch": 1.82, + "learning_rate": 4.696121093018904e-05, + "loss": 0.0798, + "step": 12990 + }, + { + "epoch": 1.82, + "learning_rate": 4.696074302826128e-05, + "loss": 0.1028, + "step": 12992 + }, + { + "epoch": 1.82, + "learning_rate": 4.6960275126333524e-05, + "loss": 0.0893, + "step": 12994 + }, + { + "epoch": 1.82, + "learning_rate": 4.695980722440576e-05, + "loss": 0.0804, + "step": 12996 + }, + { + "epoch": 1.82, + "learning_rate": 4.6959339322478016e-05, + "loss": 0.071, + "step": 12998 + }, + { + "epoch": 1.82, + "learning_rate": 4.6958871420550255e-05, + "loss": 0.0995, + "step": 13000 + }, + { + "epoch": 1.82, + "eval_gen_len": 29.2069, + "eval_loss": 1.0517786741256714, + "eval_meteor": 0.0455, + "eval_runtime": 14.382, + "eval_samples_per_second": 4.033, + "eval_steps_per_second": 0.556, + "step": 13000 + }, + { + "epoch": 1.83, + "learning_rate": 4.69584035186225e-05, + "loss": 0.0864, + "step": 13002 + }, + { + "epoch": 1.83, + "learning_rate": 4.695793561669474e-05, + "loss": 0.062, + "step": 13004 + }, + { + "epoch": 1.83, + "learning_rate": 4.695746771476699e-05, + "loss": 0.0822, + "step": 13006 + }, + { + "epoch": 1.83, + "learning_rate": 4.695699981283923e-05, + "loss": 0.071, + "step": 13008 + }, + { + "epoch": 1.83, + "learning_rate": 4.695653191091148e-05, + "loss": 0.1013, + "step": 13010 + }, + { + "epoch": 1.83, + "learning_rate": 4.695606400898372e-05, + "loss": 0.0641, + "step": 13012 + }, + { + "epoch": 1.83, + "learning_rate": 4.695559610705596e-05, + "loss": 0.0776, + "step": 13014 + }, + { + "epoch": 1.83, + "learning_rate": 4.695512820512821e-05, + "loss": 0.081, + "step": 13016 + }, + { + "epoch": 1.83, + "learning_rate": 4.6954660303200455e-05, + "loss": 0.0809, + "step": 13018 + }, + { + "epoch": 1.83, + "learning_rate": 4.6954192401272694e-05, + "loss": 0.0897, + "step": 13020 + }, + { + "epoch": 1.83, + "learning_rate": 4.695372449934494e-05, + "loss": 0.0887, + "step": 13022 + }, + { + "epoch": 1.83, + "learning_rate": 4.6953256597417186e-05, + "loss": 0.0946, + "step": 13024 + }, + { + "epoch": 1.83, + "learning_rate": 4.695278869548943e-05, + "loss": 0.0753, + "step": 13026 + }, + { + "epoch": 1.83, + "learning_rate": 4.695232079356167e-05, + "loss": 0.079, + "step": 13028 + }, + { + "epoch": 1.83, + "learning_rate": 4.6951852891633917e-05, + "loss": 0.0981, + "step": 13030 + }, + { + "epoch": 1.83, + "learning_rate": 4.695138498970616e-05, + "loss": 0.1006, + "step": 13032 + }, + { + "epoch": 1.83, + "learning_rate": 4.695091708777841e-05, + "loss": 0.0621, + "step": 13034 + }, + { + "epoch": 1.83, + "learning_rate": 4.695044918585065e-05, + "loss": 0.0791, + "step": 13036 + }, + { + "epoch": 1.83, + "learning_rate": 4.6949981283922893e-05, + "loss": 0.1089, + "step": 13038 + }, + { + "epoch": 1.83, + "learning_rate": 4.694951338199514e-05, + "loss": 0.0858, + "step": 13040 + }, + { + "epoch": 1.83, + "learning_rate": 4.694904548006738e-05, + "loss": 0.0971, + "step": 13042 + }, + { + "epoch": 1.83, + "learning_rate": 4.6948577578139624e-05, + "loss": 0.0803, + "step": 13044 + }, + { + "epoch": 1.83, + "learning_rate": 4.6948109676211864e-05, + "loss": 0.0635, + "step": 13046 + }, + { + "epoch": 1.83, + "learning_rate": 4.694764177428411e-05, + "loss": 0.0969, + "step": 13048 + }, + { + "epoch": 1.83, + "learning_rate": 4.6947173872356355e-05, + "loss": 0.0941, + "step": 13050 + }, + { + "epoch": 1.83, + "learning_rate": 4.69467059704286e-05, + "loss": 0.0975, + "step": 13052 + }, + { + "epoch": 1.83, + "learning_rate": 4.694623806850084e-05, + "loss": 0.0821, + "step": 13054 + }, + { + "epoch": 1.83, + "learning_rate": 4.6945770166573086e-05, + "loss": 0.0905, + "step": 13056 + }, + { + "epoch": 1.83, + "learning_rate": 4.694530226464533e-05, + "loss": 0.0847, + "step": 13058 + }, + { + "epoch": 1.83, + "learning_rate": 4.694483436271758e-05, + "loss": 0.0875, + "step": 13060 + }, + { + "epoch": 1.83, + "learning_rate": 4.694436646078982e-05, + "loss": 0.0798, + "step": 13062 + }, + { + "epoch": 1.83, + "learning_rate": 4.694389855886206e-05, + "loss": 0.0898, + "step": 13064 + }, + { + "epoch": 1.83, + "learning_rate": 4.694343065693431e-05, + "loss": 0.0901, + "step": 13066 + }, + { + "epoch": 1.83, + "learning_rate": 4.6942962755006555e-05, + "loss": 0.1074, + "step": 13068 + }, + { + "epoch": 1.83, + "learning_rate": 4.6942494853078794e-05, + "loss": 0.0895, + "step": 13070 + }, + { + "epoch": 1.83, + "learning_rate": 4.694202695115104e-05, + "loss": 0.0819, + "step": 13072 + }, + { + "epoch": 1.84, + "learning_rate": 4.6941559049223286e-05, + "loss": 0.1017, + "step": 13074 + }, + { + "epoch": 1.84, + "learning_rate": 4.694109114729553e-05, + "loss": 0.0857, + "step": 13076 + }, + { + "epoch": 1.84, + "learning_rate": 4.694062324536777e-05, + "loss": 0.0768, + "step": 13078 + }, + { + "epoch": 1.84, + "learning_rate": 4.694015534344002e-05, + "loss": 0.0742, + "step": 13080 + }, + { + "epoch": 1.84, + "learning_rate": 4.6939687441512256e-05, + "loss": 0.1015, + "step": 13082 + }, + { + "epoch": 1.84, + "learning_rate": 4.693921953958451e-05, + "loss": 0.0989, + "step": 13084 + }, + { + "epoch": 1.84, + "learning_rate": 4.693875163765675e-05, + "loss": 0.0934, + "step": 13086 + }, + { + "epoch": 1.84, + "learning_rate": 4.6938283735728994e-05, + "loss": 0.0767, + "step": 13088 + }, + { + "epoch": 1.84, + "learning_rate": 4.693781583380123e-05, + "loss": 0.0917, + "step": 13090 + }, + { + "epoch": 1.84, + "learning_rate": 4.693734793187348e-05, + "loss": 0.0758, + "step": 13092 + }, + { + "epoch": 1.84, + "learning_rate": 4.6936880029945725e-05, + "loss": 0.1066, + "step": 13094 + }, + { + "epoch": 1.84, + "learning_rate": 4.693641212801797e-05, + "loss": 0.0982, + "step": 13096 + }, + { + "epoch": 1.84, + "learning_rate": 4.693594422609021e-05, + "loss": 0.0868, + "step": 13098 + }, + { + "epoch": 1.84, + "learning_rate": 4.6935476324162456e-05, + "loss": 0.0634, + "step": 13100 + }, + { + "epoch": 1.84, + "learning_rate": 4.69350084222347e-05, + "loss": 0.0647, + "step": 13102 + }, + { + "epoch": 1.84, + "learning_rate": 4.693454052030695e-05, + "loss": 0.075, + "step": 13104 + }, + { + "epoch": 1.84, + "learning_rate": 4.693407261837919e-05, + "loss": 0.08, + "step": 13106 + }, + { + "epoch": 1.84, + "learning_rate": 4.693360471645143e-05, + "loss": 0.0741, + "step": 13108 + }, + { + "epoch": 1.84, + "learning_rate": 4.693313681452368e-05, + "loss": 0.0889, + "step": 13110 + }, + { + "epoch": 1.84, + "learning_rate": 4.6932668912595924e-05, + "loss": 0.0747, + "step": 13112 + }, + { + "epoch": 1.84, + "learning_rate": 4.6932201010668164e-05, + "loss": 0.071, + "step": 13114 + }, + { + "epoch": 1.84, + "learning_rate": 4.693173310874041e-05, + "loss": 0.0843, + "step": 13116 + }, + { + "epoch": 1.84, + "learning_rate": 4.6931265206812655e-05, + "loss": 0.0856, + "step": 13118 + }, + { + "epoch": 1.84, + "learning_rate": 4.69307973048849e-05, + "loss": 0.1067, + "step": 13120 + }, + { + "epoch": 1.84, + "learning_rate": 4.693032940295714e-05, + "loss": 0.0665, + "step": 13122 + }, + { + "epoch": 1.84, + "learning_rate": 4.6929861501029386e-05, + "loss": 0.0728, + "step": 13124 + }, + { + "epoch": 1.84, + "learning_rate": 4.6929393599101625e-05, + "loss": 0.0911, + "step": 13126 + }, + { + "epoch": 1.84, + "learning_rate": 4.692892569717388e-05, + "loss": 0.1013, + "step": 13128 + }, + { + "epoch": 1.84, + "learning_rate": 4.692845779524612e-05, + "loss": 0.0608, + "step": 13130 + }, + { + "epoch": 1.84, + "learning_rate": 4.692798989331836e-05, + "loss": 0.0821, + "step": 13132 + }, + { + "epoch": 1.84, + "learning_rate": 4.69275219913906e-05, + "loss": 0.1001, + "step": 13134 + }, + { + "epoch": 1.84, + "learning_rate": 4.6927054089462855e-05, + "loss": 0.0867, + "step": 13136 + }, + { + "epoch": 1.84, + "learning_rate": 4.6926586187535094e-05, + "loss": 0.0828, + "step": 13138 + }, + { + "epoch": 1.84, + "learning_rate": 4.692611828560734e-05, + "loss": 0.0871, + "step": 13140 + }, + { + "epoch": 1.84, + "learning_rate": 4.692565038367958e-05, + "loss": 0.095, + "step": 13142 + }, + { + "epoch": 1.85, + "learning_rate": 4.6925182481751825e-05, + "loss": 0.0667, + "step": 13144 + }, + { + "epoch": 1.85, + "learning_rate": 4.692471457982407e-05, + "loss": 0.0967, + "step": 13146 + }, + { + "epoch": 1.85, + "learning_rate": 4.692424667789632e-05, + "loss": 0.0768, + "step": 13148 + }, + { + "epoch": 1.85, + "learning_rate": 4.6923778775968556e-05, + "loss": 0.0924, + "step": 13150 + }, + { + "epoch": 1.85, + "learning_rate": 4.69233108740408e-05, + "loss": 0.0602, + "step": 13152 + }, + { + "epoch": 1.85, + "learning_rate": 4.692284297211305e-05, + "loss": 0.098, + "step": 13154 + }, + { + "epoch": 1.85, + "learning_rate": 4.6922375070185294e-05, + "loss": 0.0871, + "step": 13156 + }, + { + "epoch": 1.85, + "learning_rate": 4.692190716825753e-05, + "loss": 0.0651, + "step": 13158 + }, + { + "epoch": 1.85, + "learning_rate": 4.692167321729366e-05, + "loss": 0.169, + "step": 13160 + }, + { + "epoch": 1.85, + "learning_rate": 4.69212053153659e-05, + "loss": 0.0942, + "step": 13162 + }, + { + "epoch": 1.85, + "learning_rate": 4.692073741343815e-05, + "loss": 0.0786, + "step": 13164 + }, + { + "epoch": 1.85, + "learning_rate": 4.692026951151039e-05, + "loss": 0.0576, + "step": 13166 + }, + { + "epoch": 1.85, + "learning_rate": 4.6919801609582636e-05, + "loss": 0.0804, + "step": 13168 + }, + { + "epoch": 1.85, + "learning_rate": 4.6919333707654875e-05, + "loss": 0.0876, + "step": 13170 + }, + { + "epoch": 1.85, + "learning_rate": 4.691886580572712e-05, + "loss": 0.0728, + "step": 13172 + }, + { + "epoch": 1.85, + "learning_rate": 4.691839790379937e-05, + "loss": 0.0686, + "step": 13174 + }, + { + "epoch": 1.85, + "learning_rate": 4.6917930001871606e-05, + "loss": 0.0839, + "step": 13176 + }, + { + "epoch": 1.85, + "learning_rate": 4.691746209994385e-05, + "loss": 0.0708, + "step": 13178 + }, + { + "epoch": 1.85, + "learning_rate": 4.69169941980161e-05, + "loss": 0.0886, + "step": 13180 + }, + { + "epoch": 1.85, + "learning_rate": 4.6916526296088344e-05, + "loss": 0.0977, + "step": 13182 + }, + { + "epoch": 1.85, + "learning_rate": 4.691605839416058e-05, + "loss": 0.0758, + "step": 13184 + }, + { + "epoch": 1.85, + "learning_rate": 4.691559049223283e-05, + "loss": 0.0708, + "step": 13186 + }, + { + "epoch": 1.85, + "learning_rate": 4.6915122590305075e-05, + "loss": 0.0796, + "step": 13188 + }, + { + "epoch": 1.85, + "learning_rate": 4.691465468837732e-05, + "loss": 0.0808, + "step": 13190 + }, + { + "epoch": 1.85, + "learning_rate": 4.691418678644956e-05, + "loss": 0.0826, + "step": 13192 + }, + { + "epoch": 1.85, + "learning_rate": 4.6913718884521806e-05, + "loss": 0.0899, + "step": 13194 + }, + { + "epoch": 1.85, + "learning_rate": 4.6913250982594045e-05, + "loss": 0.0659, + "step": 13196 + }, + { + "epoch": 1.85, + "learning_rate": 4.69127830806663e-05, + "loss": 0.0826, + "step": 13198 + }, + { + "epoch": 1.85, + "learning_rate": 4.691231517873854e-05, + "loss": 0.0948, + "step": 13200 + }, + { + "epoch": 1.85, + "learning_rate": 4.691184727681078e-05, + "loss": 0.074, + "step": 13202 + }, + { + "epoch": 1.85, + "learning_rate": 4.691137937488302e-05, + "loss": 0.0961, + "step": 13204 + }, + { + "epoch": 1.85, + "learning_rate": 4.691091147295527e-05, + "loss": 0.0726, + "step": 13206 + }, + { + "epoch": 1.85, + "learning_rate": 4.6910443571027514e-05, + "loss": 0.0844, + "step": 13208 + }, + { + "epoch": 1.85, + "learning_rate": 4.690997566909976e-05, + "loss": 0.0673, + "step": 13210 + }, + { + "epoch": 1.85, + "learning_rate": 4.6909507767172e-05, + "loss": 0.0706, + "step": 13212 + }, + { + "epoch": 1.85, + "learning_rate": 4.6909039865244245e-05, + "loss": 0.1078, + "step": 13214 + }, + { + "epoch": 1.86, + "learning_rate": 4.690857196331649e-05, + "loss": 0.0711, + "step": 13216 + }, + { + "epoch": 1.86, + "learning_rate": 4.6908104061388737e-05, + "loss": 0.0799, + "step": 13218 + }, + { + "epoch": 1.86, + "learning_rate": 4.6907636159460976e-05, + "loss": 0.071, + "step": 13220 + }, + { + "epoch": 1.86, + "learning_rate": 4.690716825753322e-05, + "loss": 0.0848, + "step": 13222 + }, + { + "epoch": 1.86, + "learning_rate": 4.690670035560547e-05, + "loss": 0.0854, + "step": 13224 + }, + { + "epoch": 1.86, + "learning_rate": 4.6906232453677713e-05, + "loss": 0.0851, + "step": 13226 + }, + { + "epoch": 1.86, + "learning_rate": 4.690576455174995e-05, + "loss": 0.0686, + "step": 13228 + }, + { + "epoch": 1.86, + "learning_rate": 4.69052966498222e-05, + "loss": 0.1077, + "step": 13230 + }, + { + "epoch": 1.86, + "learning_rate": 4.6904828747894444e-05, + "loss": 0.0749, + "step": 13232 + }, + { + "epoch": 1.86, + "learning_rate": 4.690436084596669e-05, + "loss": 0.0885, + "step": 13234 + }, + { + "epoch": 1.86, + "learning_rate": 4.690389294403893e-05, + "loss": 0.0927, + "step": 13236 + }, + { + "epoch": 1.86, + "learning_rate": 4.6903425042111175e-05, + "loss": 0.0764, + "step": 13238 + }, + { + "epoch": 1.86, + "learning_rate": 4.6902957140183414e-05, + "loss": 0.0975, + "step": 13240 + }, + { + "epoch": 1.86, + "learning_rate": 4.690248923825567e-05, + "loss": 0.0649, + "step": 13242 + }, + { + "epoch": 1.86, + "learning_rate": 4.6902021336327906e-05, + "loss": 0.0905, + "step": 13244 + }, + { + "epoch": 1.86, + "learning_rate": 4.690155343440015e-05, + "loss": 0.0837, + "step": 13246 + }, + { + "epoch": 1.86, + "learning_rate": 4.690108553247239e-05, + "loss": 0.0855, + "step": 13248 + }, + { + "epoch": 1.86, + "learning_rate": 4.6900617630544644e-05, + "loss": 0.0862, + "step": 13250 + }, + { + "epoch": 1.86, + "learning_rate": 4.690014972861688e-05, + "loss": 0.0878, + "step": 13252 + }, + { + "epoch": 1.86, + "learning_rate": 4.689968182668913e-05, + "loss": 0.0909, + "step": 13254 + }, + { + "epoch": 1.86, + "learning_rate": 4.689921392476137e-05, + "loss": 0.0998, + "step": 13256 + }, + { + "epoch": 1.86, + "learning_rate": 4.6898746022833614e-05, + "loss": 0.0955, + "step": 13258 + }, + { + "epoch": 1.86, + "learning_rate": 4.689827812090586e-05, + "loss": 0.1101, + "step": 13260 + }, + { + "epoch": 1.86, + "learning_rate": 4.6897810218978106e-05, + "loss": 0.0848, + "step": 13262 + }, + { + "epoch": 1.86, + "learning_rate": 4.6897342317050345e-05, + "loss": 0.095, + "step": 13264 + }, + { + "epoch": 1.86, + "learning_rate": 4.689687441512259e-05, + "loss": 0.086, + "step": 13266 + }, + { + "epoch": 1.86, + "learning_rate": 4.689640651319484e-05, + "loss": 0.096, + "step": 13268 + }, + { + "epoch": 1.86, + "learning_rate": 4.689593861126708e-05, + "loss": 0.0887, + "step": 13270 + }, + { + "epoch": 1.86, + "learning_rate": 4.689547070933932e-05, + "loss": 0.0871, + "step": 13272 + }, + { + "epoch": 1.86, + "learning_rate": 4.689500280741157e-05, + "loss": 0.1057, + "step": 13274 + }, + { + "epoch": 1.86, + "learning_rate": 4.6894534905483814e-05, + "loss": 0.0844, + "step": 13276 + }, + { + "epoch": 1.86, + "learning_rate": 4.689406700355606e-05, + "loss": 0.0959, + "step": 13278 + }, + { + "epoch": 1.86, + "learning_rate": 4.68935991016283e-05, + "loss": 0.0793, + "step": 13280 + }, + { + "epoch": 1.86, + "learning_rate": 4.6893131199700545e-05, + "loss": 0.0894, + "step": 13282 + }, + { + "epoch": 1.86, + "learning_rate": 4.689266329777279e-05, + "loss": 0.1122, + "step": 13284 + }, + { + "epoch": 1.86, + "learning_rate": 4.6892195395845037e-05, + "loss": 0.0787, + "step": 13286 + }, + { + "epoch": 1.87, + "learning_rate": 4.6891727493917276e-05, + "loss": 0.0738, + "step": 13288 + }, + { + "epoch": 1.87, + "learning_rate": 4.689125959198952e-05, + "loss": 0.1069, + "step": 13290 + }, + { + "epoch": 1.87, + "learning_rate": 4.689079169006176e-05, + "loss": 0.0706, + "step": 13292 + }, + { + "epoch": 1.87, + "learning_rate": 4.6890323788134013e-05, + "loss": 0.0879, + "step": 13294 + }, + { + "epoch": 1.87, + "learning_rate": 4.688985588620625e-05, + "loss": 0.0904, + "step": 13296 + }, + { + "epoch": 1.87, + "learning_rate": 4.68893879842785e-05, + "loss": 0.1037, + "step": 13298 + }, + { + "epoch": 1.87, + "learning_rate": 4.688892008235074e-05, + "loss": 0.0918, + "step": 13300 + }, + { + "epoch": 1.87, + "learning_rate": 4.688845218042299e-05, + "loss": 0.1026, + "step": 13302 + }, + { + "epoch": 1.87, + "learning_rate": 4.688798427849523e-05, + "loss": 0.0706, + "step": 13304 + }, + { + "epoch": 1.87, + "learning_rate": 4.6887516376567475e-05, + "loss": 0.0817, + "step": 13306 + }, + { + "epoch": 1.87, + "learning_rate": 4.6887048474639714e-05, + "loss": 0.0742, + "step": 13308 + }, + { + "epoch": 1.87, + "learning_rate": 4.688658057271196e-05, + "loss": 0.0702, + "step": 13310 + }, + { + "epoch": 1.87, + "learning_rate": 4.6886112670784206e-05, + "loss": 0.1123, + "step": 13312 + }, + { + "epoch": 1.87, + "learning_rate": 4.688564476885645e-05, + "loss": 0.1076, + "step": 13314 + }, + { + "epoch": 1.87, + "learning_rate": 4.688517686692869e-05, + "loss": 0.108, + "step": 13316 + }, + { + "epoch": 1.87, + "learning_rate": 4.688470896500094e-05, + "loss": 0.1129, + "step": 13318 + }, + { + "epoch": 1.87, + "learning_rate": 4.688424106307318e-05, + "loss": 0.0735, + "step": 13320 + }, + { + "epoch": 1.87, + "learning_rate": 4.688377316114543e-05, + "loss": 0.0797, + "step": 13322 + }, + { + "epoch": 1.87, + "learning_rate": 4.688330525921767e-05, + "loss": 0.0802, + "step": 13324 + }, + { + "epoch": 1.87, + "learning_rate": 4.6882837357289914e-05, + "loss": 0.0914, + "step": 13326 + }, + { + "epoch": 1.87, + "learning_rate": 4.688236945536216e-05, + "loss": 0.0757, + "step": 13328 + }, + { + "epoch": 1.87, + "learning_rate": 4.6881901553434406e-05, + "loss": 0.0616, + "step": 13330 + }, + { + "epoch": 1.87, + "learning_rate": 4.6881433651506645e-05, + "loss": 0.0916, + "step": 13332 + }, + { + "epoch": 1.87, + "learning_rate": 4.688096574957889e-05, + "loss": 0.0731, + "step": 13334 + }, + { + "epoch": 1.87, + "learning_rate": 4.688049784765114e-05, + "loss": 0.0945, + "step": 13336 + }, + { + "epoch": 1.87, + "learning_rate": 4.688002994572338e-05, + "loss": 0.1139, + "step": 13338 + }, + { + "epoch": 1.87, + "learning_rate": 4.687956204379562e-05, + "loss": 0.0869, + "step": 13340 + }, + { + "epoch": 1.87, + "learning_rate": 4.687909414186787e-05, + "loss": 0.0807, + "step": 13342 + }, + { + "epoch": 1.87, + "learning_rate": 4.687862623994011e-05, + "loss": 0.0698, + "step": 13344 + }, + { + "epoch": 1.87, + "learning_rate": 4.687815833801236e-05, + "loss": 0.1004, + "step": 13346 + }, + { + "epoch": 1.87, + "learning_rate": 4.68776904360846e-05, + "loss": 0.0944, + "step": 13348 + }, + { + "epoch": 1.87, + "learning_rate": 4.6877222534156845e-05, + "loss": 0.0938, + "step": 13350 + }, + { + "epoch": 1.87, + "learning_rate": 4.6876754632229084e-05, + "loss": 0.0711, + "step": 13352 + }, + { + "epoch": 1.87, + "learning_rate": 4.687628673030133e-05, + "loss": 0.0859, + "step": 13354 + }, + { + "epoch": 1.87, + "learning_rate": 4.6875818828373576e-05, + "loss": 0.0783, + "step": 13356 + }, + { + "epoch": 1.88, + "learning_rate": 4.687535092644582e-05, + "loss": 0.076, + "step": 13358 + }, + { + "epoch": 1.88, + "learning_rate": 4.687488302451806e-05, + "loss": 0.0679, + "step": 13360 + }, + { + "epoch": 1.88, + "learning_rate": 4.687441512259031e-05, + "loss": 0.0665, + "step": 13362 + }, + { + "epoch": 1.88, + "learning_rate": 4.687394722066255e-05, + "loss": 0.0869, + "step": 13364 + }, + { + "epoch": 1.88, + "learning_rate": 4.68734793187348e-05, + "loss": 0.0705, + "step": 13366 + }, + { + "epoch": 1.88, + "learning_rate": 4.687301141680704e-05, + "loss": 0.1087, + "step": 13368 + }, + { + "epoch": 1.88, + "learning_rate": 4.6872543514879284e-05, + "loss": 0.0877, + "step": 13370 + }, + { + "epoch": 1.88, + "learning_rate": 4.687207561295153e-05, + "loss": 0.0842, + "step": 13372 + }, + { + "epoch": 1.88, + "learning_rate": 4.6871607711023775e-05, + "loss": 0.0827, + "step": 13374 + }, + { + "epoch": 1.88, + "learning_rate": 4.6871139809096015e-05, + "loss": 0.0682, + "step": 13376 + }, + { + "epoch": 1.88, + "learning_rate": 4.687067190716826e-05, + "loss": 0.0734, + "step": 13378 + }, + { + "epoch": 1.88, + "learning_rate": 4.6870204005240506e-05, + "loss": 0.1011, + "step": 13380 + }, + { + "epoch": 1.88, + "learning_rate": 4.686973610331275e-05, + "loss": 0.0867, + "step": 13382 + }, + { + "epoch": 1.88, + "learning_rate": 4.686926820138499e-05, + "loss": 0.0709, + "step": 13384 + }, + { + "epoch": 1.88, + "learning_rate": 4.686880029945724e-05, + "loss": 0.0679, + "step": 13386 + }, + { + "epoch": 1.88, + "learning_rate": 4.6868332397529476e-05, + "loss": 0.0864, + "step": 13388 + }, + { + "epoch": 1.88, + "learning_rate": 4.686786449560173e-05, + "loss": 0.0858, + "step": 13390 + }, + { + "epoch": 1.88, + "learning_rate": 4.686739659367397e-05, + "loss": 0.0901, + "step": 13392 + }, + { + "epoch": 1.88, + "learning_rate": 4.6866928691746214e-05, + "loss": 0.0751, + "step": 13394 + }, + { + "epoch": 1.88, + "learning_rate": 4.686646078981845e-05, + "loss": 0.0931, + "step": 13396 + }, + { + "epoch": 1.88, + "learning_rate": 4.6865992887890706e-05, + "loss": 0.1245, + "step": 13398 + }, + { + "epoch": 1.88, + "learning_rate": 4.6865524985962945e-05, + "loss": 0.0741, + "step": 13400 + }, + { + "epoch": 1.88, + "learning_rate": 4.686505708403519e-05, + "loss": 0.0923, + "step": 13402 + }, + { + "epoch": 1.88, + "learning_rate": 4.686458918210743e-05, + "loss": 0.0938, + "step": 13404 + }, + { + "epoch": 1.88, + "learning_rate": 4.6864121280179676e-05, + "loss": 0.0802, + "step": 13406 + }, + { + "epoch": 1.88, + "learning_rate": 4.686365337825192e-05, + "loss": 0.096, + "step": 13408 + }, + { + "epoch": 1.88, + "learning_rate": 4.686318547632417e-05, + "loss": 0.1033, + "step": 13410 + }, + { + "epoch": 1.88, + "learning_rate": 4.686271757439641e-05, + "loss": 0.0859, + "step": 13412 + }, + { + "epoch": 1.88, + "learning_rate": 4.686224967246865e-05, + "loss": 0.1024, + "step": 13414 + }, + { + "epoch": 1.88, + "learning_rate": 4.68617817705409e-05, + "loss": 0.0806, + "step": 13416 + }, + { + "epoch": 1.88, + "learning_rate": 4.6861313868613145e-05, + "loss": 0.1089, + "step": 13418 + }, + { + "epoch": 1.88, + "learning_rate": 4.6860845966685384e-05, + "loss": 0.1144, + "step": 13420 + }, + { + "epoch": 1.88, + "learning_rate": 4.686037806475763e-05, + "loss": 0.089, + "step": 13422 + }, + { + "epoch": 1.88, + "learning_rate": 4.6859910162829876e-05, + "loss": 0.1023, + "step": 13424 + }, + { + "epoch": 1.88, + "learning_rate": 4.6859442260902115e-05, + "loss": 0.0765, + "step": 13426 + }, + { + "epoch": 1.88, + "learning_rate": 4.685897435897436e-05, + "loss": 0.0894, + "step": 13428 + }, + { + "epoch": 1.89, + "learning_rate": 4.68585064570466e-05, + "loss": 0.0809, + "step": 13430 + }, + { + "epoch": 1.89, + "learning_rate": 4.685803855511885e-05, + "loss": 0.0723, + "step": 13432 + }, + { + "epoch": 1.89, + "learning_rate": 4.685757065319109e-05, + "loss": 0.0896, + "step": 13434 + }, + { + "epoch": 1.89, + "learning_rate": 4.685710275126334e-05, + "loss": 0.0995, + "step": 13436 + }, + { + "epoch": 1.89, + "learning_rate": 4.685663484933558e-05, + "loss": 0.0846, + "step": 13438 + }, + { + "epoch": 1.89, + "learning_rate": 4.685616694740782e-05, + "loss": 0.074, + "step": 13440 + }, + { + "epoch": 1.89, + "learning_rate": 4.685569904548007e-05, + "loss": 0.1063, + "step": 13442 + }, + { + "epoch": 1.89, + "learning_rate": 4.6855231143552315e-05, + "loss": 0.0902, + "step": 13444 + }, + { + "epoch": 1.89, + "learning_rate": 4.6854763241624554e-05, + "loss": 0.0906, + "step": 13446 + }, + { + "epoch": 1.89, + "learning_rate": 4.68542953396968e-05, + "loss": 0.0996, + "step": 13448 + }, + { + "epoch": 1.89, + "learning_rate": 4.6853827437769045e-05, + "loss": 0.1113, + "step": 13450 + }, + { + "epoch": 1.89, + "learning_rate": 4.685335953584129e-05, + "loss": 0.0751, + "step": 13452 + }, + { + "epoch": 1.89, + "learning_rate": 4.685289163391353e-05, + "loss": 0.0791, + "step": 13454 + }, + { + "epoch": 1.89, + "learning_rate": 4.6852423731985776e-05, + "loss": 0.0825, + "step": 13456 + }, + { + "epoch": 1.89, + "learning_rate": 4.685195583005802e-05, + "loss": 0.0831, + "step": 13458 + }, + { + "epoch": 1.89, + "learning_rate": 4.685148792813027e-05, + "loss": 0.0823, + "step": 13460 + }, + { + "epoch": 1.89, + "learning_rate": 4.685102002620251e-05, + "loss": 0.0809, + "step": 13462 + }, + { + "epoch": 1.89, + "learning_rate": 4.685055212427475e-05, + "loss": 0.0869, + "step": 13464 + }, + { + "epoch": 1.89, + "learning_rate": 4.6850084222347e-05, + "loss": 0.0849, + "step": 13466 + }, + { + "epoch": 1.89, + "learning_rate": 4.6849616320419245e-05, + "loss": 0.079, + "step": 13468 + }, + { + "epoch": 1.89, + "learning_rate": 4.6849148418491484e-05, + "loss": 0.0844, + "step": 13470 + }, + { + "epoch": 1.89, + "learning_rate": 4.684868051656373e-05, + "loss": 0.0985, + "step": 13472 + }, + { + "epoch": 1.89, + "learning_rate": 4.684821261463597e-05, + "loss": 0.0731, + "step": 13474 + }, + { + "epoch": 1.89, + "learning_rate": 4.684774471270822e-05, + "loss": 0.0709, + "step": 13476 + }, + { + "epoch": 1.89, + "learning_rate": 4.684727681078046e-05, + "loss": 0.0901, + "step": 13478 + }, + { + "epoch": 1.89, + "learning_rate": 4.684680890885271e-05, + "loss": 0.0801, + "step": 13480 + }, + { + "epoch": 1.89, + "learning_rate": 4.6846341006924946e-05, + "loss": 0.0704, + "step": 13482 + }, + { + "epoch": 1.89, + "learning_rate": 4.68458731049972e-05, + "loss": 0.0871, + "step": 13484 + }, + { + "epoch": 1.89, + "learning_rate": 4.684540520306944e-05, + "loss": 0.1005, + "step": 13486 + }, + { + "epoch": 1.89, + "learning_rate": 4.6844937301141684e-05, + "loss": 0.0727, + "step": 13488 + }, + { + "epoch": 1.89, + "learning_rate": 4.684446939921392e-05, + "loss": 0.0639, + "step": 13490 + }, + { + "epoch": 1.89, + "learning_rate": 4.684400149728617e-05, + "loss": 0.0865, + "step": 13492 + }, + { + "epoch": 1.89, + "learning_rate": 4.6843533595358415e-05, + "loss": 0.069, + "step": 13494 + }, + { + "epoch": 1.89, + "learning_rate": 4.684306569343066e-05, + "loss": 0.0564, + "step": 13496 + }, + { + "epoch": 1.89, + "learning_rate": 4.68425977915029e-05, + "loss": 0.0919, + "step": 13498 + }, + { + "epoch": 1.9, + "learning_rate": 4.6842129889575146e-05, + "loss": 0.0692, + "step": 13500 + }, + { + "epoch": 1.9, + "learning_rate": 4.684166198764739e-05, + "loss": 0.0836, + "step": 13502 + }, + { + "epoch": 1.9, + "learning_rate": 4.684119408571964e-05, + "loss": 0.0906, + "step": 13504 + }, + { + "epoch": 1.9, + "learning_rate": 4.684072618379188e-05, + "loss": 0.0736, + "step": 13506 + }, + { + "epoch": 1.9, + "learning_rate": 4.684025828186412e-05, + "loss": 0.0942, + "step": 13508 + }, + { + "epoch": 1.9, + "learning_rate": 4.683979037993637e-05, + "loss": 0.0831, + "step": 13510 + }, + { + "epoch": 1.9, + "learning_rate": 4.6839322478008615e-05, + "loss": 0.0843, + "step": 13512 + }, + { + "epoch": 1.9, + "learning_rate": 4.6838854576080854e-05, + "loss": 0.0816, + "step": 13514 + }, + { + "epoch": 1.9, + "learning_rate": 4.68383866741531e-05, + "loss": 0.0848, + "step": 13516 + }, + { + "epoch": 1.9, + "learning_rate": 4.683791877222534e-05, + "loss": 0.0976, + "step": 13518 + }, + { + "epoch": 1.9, + "learning_rate": 4.683745087029759e-05, + "loss": 0.0681, + "step": 13520 + }, + { + "epoch": 1.9, + "learning_rate": 4.683698296836983e-05, + "loss": 0.0999, + "step": 13522 + }, + { + "epoch": 1.9, + "learning_rate": 4.6836515066442076e-05, + "loss": 0.0753, + "step": 13524 + }, + { + "epoch": 1.9, + "learning_rate": 4.6836047164514316e-05, + "loss": 0.0841, + "step": 13526 + }, + { + "epoch": 1.9, + "learning_rate": 4.683557926258657e-05, + "loss": 0.0818, + "step": 13528 + }, + { + "epoch": 1.9, + "learning_rate": 4.683511136065881e-05, + "loss": 0.0804, + "step": 13530 + }, + { + "epoch": 1.9, + "learning_rate": 4.683464345873105e-05, + "loss": 0.0679, + "step": 13532 + }, + { + "epoch": 1.9, + "learning_rate": 4.683417555680329e-05, + "loss": 0.091, + "step": 13534 + }, + { + "epoch": 1.9, + "learning_rate": 4.683370765487554e-05, + "loss": 0.0876, + "step": 13536 + }, + { + "epoch": 1.9, + "learning_rate": 4.6833239752947784e-05, + "loss": 0.0914, + "step": 13538 + }, + { + "epoch": 1.9, + "learning_rate": 4.683277185102003e-05, + "loss": 0.1029, + "step": 13540 + }, + { + "epoch": 1.9, + "learning_rate": 4.683230394909227e-05, + "loss": 0.0933, + "step": 13542 + }, + { + "epoch": 1.9, + "learning_rate": 4.6831836047164515e-05, + "loss": 0.0913, + "step": 13544 + }, + { + "epoch": 1.9, + "learning_rate": 4.683136814523676e-05, + "loss": 0.1064, + "step": 13546 + }, + { + "epoch": 1.9, + "learning_rate": 4.683090024330901e-05, + "loss": 0.0847, + "step": 13548 + }, + { + "epoch": 1.9, + "learning_rate": 4.6830432341381246e-05, + "loss": 0.101, + "step": 13550 + }, + { + "epoch": 1.9, + "learning_rate": 4.682996443945349e-05, + "loss": 0.0917, + "step": 13552 + }, + { + "epoch": 1.9, + "learning_rate": 4.682949653752574e-05, + "loss": 0.0954, + "step": 13554 + }, + { + "epoch": 1.9, + "learning_rate": 4.6829028635597984e-05, + "loss": 0.0735, + "step": 13556 + }, + { + "epoch": 1.9, + "learning_rate": 4.682856073367022e-05, + "loss": 0.0715, + "step": 13558 + }, + { + "epoch": 1.9, + "learning_rate": 4.682809283174247e-05, + "loss": 0.0936, + "step": 13560 + }, + { + "epoch": 1.9, + "learning_rate": 4.6827624929814715e-05, + "loss": 0.0892, + "step": 13562 + }, + { + "epoch": 1.9, + "learning_rate": 4.682715702788696e-05, + "loss": 0.085, + "step": 13564 + }, + { + "epoch": 1.9, + "learning_rate": 4.68266891259592e-05, + "loss": 0.0854, + "step": 13566 + }, + { + "epoch": 1.9, + "learning_rate": 4.6826221224031446e-05, + "loss": 0.0997, + "step": 13568 + }, + { + "epoch": 1.9, + "learning_rate": 4.6825753322103685e-05, + "loss": 0.0778, + "step": 13570 + }, + { + "epoch": 1.91, + "learning_rate": 4.682528542017594e-05, + "loss": 0.0742, + "step": 13572 + }, + { + "epoch": 1.91, + "learning_rate": 4.682481751824818e-05, + "loss": 0.0959, + "step": 13574 + }, + { + "epoch": 1.91, + "learning_rate": 4.682434961632042e-05, + "loss": 0.0921, + "step": 13576 + }, + { + "epoch": 1.91, + "learning_rate": 4.682388171439266e-05, + "loss": 0.0991, + "step": 13578 + }, + { + "epoch": 1.91, + "learning_rate": 4.6823413812464915e-05, + "loss": 0.0825, + "step": 13580 + }, + { + "epoch": 1.91, + "learning_rate": 4.6822945910537154e-05, + "loss": 0.0795, + "step": 13582 + }, + { + "epoch": 1.91, + "learning_rate": 4.68224780086094e-05, + "loss": 0.0881, + "step": 13584 + }, + { + "epoch": 1.91, + "learning_rate": 4.682201010668164e-05, + "loss": 0.0804, + "step": 13586 + }, + { + "epoch": 1.91, + "learning_rate": 4.6821542204753885e-05, + "loss": 0.0766, + "step": 13588 + }, + { + "epoch": 1.91, + "learning_rate": 4.682107430282613e-05, + "loss": 0.0833, + "step": 13590 + }, + { + "epoch": 1.91, + "learning_rate": 4.6820606400898376e-05, + "loss": 0.0696, + "step": 13592 + }, + { + "epoch": 1.91, + "learning_rate": 4.6820138498970616e-05, + "loss": 0.0769, + "step": 13594 + }, + { + "epoch": 1.91, + "learning_rate": 4.681967059704286e-05, + "loss": 0.0775, + "step": 13596 + }, + { + "epoch": 1.91, + "learning_rate": 4.681920269511511e-05, + "loss": 0.0818, + "step": 13598 + }, + { + "epoch": 1.91, + "learning_rate": 4.681873479318735e-05, + "loss": 0.0834, + "step": 13600 + }, + { + "epoch": 1.91, + "learning_rate": 4.681826689125959e-05, + "loss": 0.1017, + "step": 13602 + }, + { + "epoch": 1.91, + "learning_rate": 4.681779898933184e-05, + "loss": 0.077, + "step": 13604 + }, + { + "epoch": 1.91, + "learning_rate": 4.6817331087404084e-05, + "loss": 0.0925, + "step": 13606 + }, + { + "epoch": 1.91, + "learning_rate": 4.681686318547633e-05, + "loss": 0.0859, + "step": 13608 + }, + { + "epoch": 1.91, + "learning_rate": 4.681639528354857e-05, + "loss": 0.0888, + "step": 13610 + }, + { + "epoch": 1.91, + "learning_rate": 4.6815927381620815e-05, + "loss": 0.1129, + "step": 13612 + }, + { + "epoch": 1.91, + "learning_rate": 4.681545947969306e-05, + "loss": 0.0808, + "step": 13614 + }, + { + "epoch": 1.91, + "learning_rate": 4.681499157776531e-05, + "loss": 0.0833, + "step": 13616 + }, + { + "epoch": 1.91, + "learning_rate": 4.6814523675837546e-05, + "loss": 0.0898, + "step": 13618 + }, + { + "epoch": 1.91, + "learning_rate": 4.681405577390979e-05, + "loss": 0.0736, + "step": 13620 + }, + { + "epoch": 1.91, + "learning_rate": 4.681358787198203e-05, + "loss": 0.0834, + "step": 13622 + }, + { + "epoch": 1.91, + "learning_rate": 4.6813119970054284e-05, + "loss": 0.0705, + "step": 13624 + }, + { + "epoch": 1.91, + "learning_rate": 4.681265206812652e-05, + "loss": 0.0742, + "step": 13626 + }, + { + "epoch": 1.91, + "learning_rate": 4.681241811716264e-05, + "loss": 0.0926, + "step": 13628 + }, + { + "epoch": 1.91, + "learning_rate": 4.681195021523489e-05, + "loss": 0.0784, + "step": 13630 + }, + { + "epoch": 1.91, + "learning_rate": 4.6811482313307134e-05, + "loss": 0.0995, + "step": 13632 + }, + { + "epoch": 1.91, + "learning_rate": 4.681101441137938e-05, + "loss": 0.0833, + "step": 13634 + }, + { + "epoch": 1.91, + "learning_rate": 4.681054650945162e-05, + "loss": 0.0907, + "step": 13636 + }, + { + "epoch": 1.91, + "learning_rate": 4.6810078607523865e-05, + "loss": 0.0694, + "step": 13638 + }, + { + "epoch": 1.91, + "learning_rate": 4.6809610705596105e-05, + "loss": 0.07, + "step": 13640 + }, + { + "epoch": 1.91, + "learning_rate": 4.680914280366836e-05, + "loss": 0.0885, + "step": 13642 + }, + { + "epoch": 1.92, + "learning_rate": 4.6808674901740596e-05, + "loss": 0.0748, + "step": 13644 + }, + { + "epoch": 1.92, + "learning_rate": 4.680820699981284e-05, + "loss": 0.07, + "step": 13646 + }, + { + "epoch": 1.92, + "learning_rate": 4.680773909788508e-05, + "loss": 0.0625, + "step": 13648 + }, + { + "epoch": 1.92, + "learning_rate": 4.680727119595733e-05, + "loss": 0.0632, + "step": 13650 + }, + { + "epoch": 1.92, + "learning_rate": 4.680680329402957e-05, + "loss": 0.0726, + "step": 13652 + }, + { + "epoch": 1.92, + "learning_rate": 4.680633539210182e-05, + "loss": 0.0738, + "step": 13654 + }, + { + "epoch": 1.92, + "learning_rate": 4.680586749017406e-05, + "loss": 0.0956, + "step": 13656 + }, + { + "epoch": 1.92, + "learning_rate": 4.6805399588246304e-05, + "loss": 0.0864, + "step": 13658 + }, + { + "epoch": 1.92, + "learning_rate": 4.680493168631855e-05, + "loss": 0.0802, + "step": 13660 + }, + { + "epoch": 1.92, + "learning_rate": 4.6804463784390796e-05, + "loss": 0.0783, + "step": 13662 + }, + { + "epoch": 1.92, + "learning_rate": 4.6803995882463035e-05, + "loss": 0.0808, + "step": 13664 + }, + { + "epoch": 1.92, + "learning_rate": 4.680352798053528e-05, + "loss": 0.083, + "step": 13666 + }, + { + "epoch": 1.92, + "learning_rate": 4.680306007860753e-05, + "loss": 0.0921, + "step": 13668 + }, + { + "epoch": 1.92, + "learning_rate": 4.680259217667977e-05, + "loss": 0.0798, + "step": 13670 + }, + { + "epoch": 1.92, + "learning_rate": 4.680212427475201e-05, + "loss": 0.0709, + "step": 13672 + }, + { + "epoch": 1.92, + "learning_rate": 4.680165637282426e-05, + "loss": 0.0752, + "step": 13674 + }, + { + "epoch": 1.92, + "learning_rate": 4.6801188470896504e-05, + "loss": 0.0701, + "step": 13676 + }, + { + "epoch": 1.92, + "learning_rate": 4.680072056896875e-05, + "loss": 0.1258, + "step": 13678 + }, + { + "epoch": 1.92, + "learning_rate": 4.680025266704099e-05, + "loss": 0.0913, + "step": 13680 + }, + { + "epoch": 1.92, + "learning_rate": 4.6799784765113235e-05, + "loss": 0.0848, + "step": 13682 + }, + { + "epoch": 1.92, + "learning_rate": 4.6799316863185474e-05, + "loss": 0.0862, + "step": 13684 + }, + { + "epoch": 1.92, + "learning_rate": 4.679884896125773e-05, + "loss": 0.1008, + "step": 13686 + }, + { + "epoch": 1.92, + "learning_rate": 4.6798381059329966e-05, + "loss": 0.0804, + "step": 13688 + }, + { + "epoch": 1.92, + "learning_rate": 4.679791315740221e-05, + "loss": 0.0801, + "step": 13690 + }, + { + "epoch": 1.92, + "learning_rate": 4.679744525547445e-05, + "loss": 0.0761, + "step": 13692 + }, + { + "epoch": 1.92, + "learning_rate": 4.6796977353546704e-05, + "loss": 0.1032, + "step": 13694 + }, + { + "epoch": 1.92, + "learning_rate": 4.679650945161894e-05, + "loss": 0.078, + "step": 13696 + }, + { + "epoch": 1.92, + "learning_rate": 4.679604154969119e-05, + "loss": 0.0774, + "step": 13698 + }, + { + "epoch": 1.92, + "learning_rate": 4.679557364776343e-05, + "loss": 0.0831, + "step": 13700 + }, + { + "epoch": 1.92, + "learning_rate": 4.6795105745835674e-05, + "loss": 0.0748, + "step": 13702 + }, + { + "epoch": 1.92, + "learning_rate": 4.679463784390792e-05, + "loss": 0.0989, + "step": 13704 + }, + { + "epoch": 1.92, + "learning_rate": 4.6794169941980165e-05, + "loss": 0.0964, + "step": 13706 + }, + { + "epoch": 1.92, + "learning_rate": 4.6793702040052405e-05, + "loss": 0.0724, + "step": 13708 + }, + { + "epoch": 1.92, + "learning_rate": 4.679323413812465e-05, + "loss": 0.0925, + "step": 13710 + }, + { + "epoch": 1.92, + "learning_rate": 4.6792766236196896e-05, + "loss": 0.0866, + "step": 13712 + }, + { + "epoch": 1.93, + "learning_rate": 4.679229833426914e-05, + "loss": 0.0824, + "step": 13714 + }, + { + "epoch": 1.93, + "learning_rate": 4.679183043234138e-05, + "loss": 0.0734, + "step": 13716 + }, + { + "epoch": 1.93, + "learning_rate": 4.679136253041363e-05, + "loss": 0.0826, + "step": 13718 + }, + { + "epoch": 1.93, + "learning_rate": 4.679089462848587e-05, + "loss": 0.0684, + "step": 13720 + }, + { + "epoch": 1.93, + "learning_rate": 4.679042672655812e-05, + "loss": 0.0783, + "step": 13722 + }, + { + "epoch": 1.93, + "learning_rate": 4.678995882463036e-05, + "loss": 0.0761, + "step": 13724 + }, + { + "epoch": 1.93, + "learning_rate": 4.6789490922702604e-05, + "loss": 0.0723, + "step": 13726 + }, + { + "epoch": 1.93, + "learning_rate": 4.678902302077485e-05, + "loss": 0.093, + "step": 13728 + }, + { + "epoch": 1.93, + "learning_rate": 4.6788555118847096e-05, + "loss": 0.0994, + "step": 13730 + }, + { + "epoch": 1.93, + "learning_rate": 4.6788087216919335e-05, + "loss": 0.0745, + "step": 13732 + }, + { + "epoch": 1.93, + "learning_rate": 4.678761931499158e-05, + "loss": 0.0751, + "step": 13734 + }, + { + "epoch": 1.93, + "learning_rate": 4.678715141306382e-05, + "loss": 0.0986, + "step": 13736 + }, + { + "epoch": 1.93, + "learning_rate": 4.678668351113607e-05, + "loss": 0.0797, + "step": 13738 + }, + { + "epoch": 1.93, + "learning_rate": 4.678621560920831e-05, + "loss": 0.0839, + "step": 13740 + }, + { + "epoch": 1.93, + "learning_rate": 4.678574770728056e-05, + "loss": 0.0674, + "step": 13742 + }, + { + "epoch": 1.93, + "learning_rate": 4.67852798053528e-05, + "loss": 0.0608, + "step": 13744 + }, + { + "epoch": 1.93, + "learning_rate": 4.678481190342505e-05, + "loss": 0.0822, + "step": 13746 + }, + { + "epoch": 1.93, + "learning_rate": 4.678434400149729e-05, + "loss": 0.0805, + "step": 13748 + }, + { + "epoch": 1.93, + "learning_rate": 4.6783876099569535e-05, + "loss": 0.129, + "step": 13750 + }, + { + "epoch": 1.93, + "learning_rate": 4.6783408197641774e-05, + "loss": 0.089, + "step": 13752 + }, + { + "epoch": 1.93, + "learning_rate": 4.678294029571402e-05, + "loss": 0.0838, + "step": 13754 + }, + { + "epoch": 1.93, + "learning_rate": 4.6782472393786266e-05, + "loss": 0.0747, + "step": 13756 + }, + { + "epoch": 1.93, + "learning_rate": 4.678200449185851e-05, + "loss": 0.082, + "step": 13758 + }, + { + "epoch": 1.93, + "learning_rate": 4.678153658993075e-05, + "loss": 0.0776, + "step": 13760 + }, + { + "epoch": 1.93, + "learning_rate": 4.6781068688003e-05, + "loss": 0.0814, + "step": 13762 + }, + { + "epoch": 1.93, + "learning_rate": 4.678060078607524e-05, + "loss": 0.0779, + "step": 13764 + }, + { + "epoch": 1.93, + "learning_rate": 4.678013288414749e-05, + "loss": 0.0998, + "step": 13766 + }, + { + "epoch": 1.93, + "learning_rate": 4.677966498221973e-05, + "loss": 0.0682, + "step": 13768 + }, + { + "epoch": 1.93, + "learning_rate": 4.6779197080291974e-05, + "loss": 0.0751, + "step": 13770 + }, + { + "epoch": 1.93, + "learning_rate": 4.677872917836422e-05, + "loss": 0.0841, + "step": 13772 + }, + { + "epoch": 1.93, + "learning_rate": 4.6778261276436465e-05, + "loss": 0.0675, + "step": 13774 + }, + { + "epoch": 1.93, + "learning_rate": 4.6777793374508705e-05, + "loss": 0.0807, + "step": 13776 + }, + { + "epoch": 1.93, + "learning_rate": 4.677732547258095e-05, + "loss": 0.0729, + "step": 13778 + }, + { + "epoch": 1.93, + "learning_rate": 4.6776857570653196e-05, + "loss": 0.0751, + "step": 13780 + }, + { + "epoch": 1.93, + "learning_rate": 4.677638966872544e-05, + "loss": 0.0849, + "step": 13782 + }, + { + "epoch": 1.93, + "learning_rate": 4.677592176679768e-05, + "loss": 0.099, + "step": 13784 + }, + { + "epoch": 1.94, + "learning_rate": 4.677545386486993e-05, + "loss": 0.0918, + "step": 13786 + }, + { + "epoch": 1.94, + "learning_rate": 4.6774985962942167e-05, + "loss": 0.1082, + "step": 13788 + }, + { + "epoch": 1.94, + "learning_rate": 4.677451806101442e-05, + "loss": 0.071, + "step": 13790 + }, + { + "epoch": 1.94, + "learning_rate": 4.677405015908666e-05, + "loss": 0.1029, + "step": 13792 + }, + { + "epoch": 1.94, + "learning_rate": 4.6773582257158904e-05, + "loss": 0.1171, + "step": 13794 + }, + { + "epoch": 1.94, + "learning_rate": 4.6773114355231143e-05, + "loss": 0.0791, + "step": 13796 + }, + { + "epoch": 1.94, + "learning_rate": 4.677264645330339e-05, + "loss": 0.0978, + "step": 13798 + }, + { + "epoch": 1.94, + "learning_rate": 4.6772178551375635e-05, + "loss": 0.0806, + "step": 13800 + }, + { + "epoch": 1.94, + "learning_rate": 4.677171064944788e-05, + "loss": 0.0735, + "step": 13802 + }, + { + "epoch": 1.94, + "learning_rate": 4.677124274752012e-05, + "loss": 0.0793, + "step": 13804 + }, + { + "epoch": 1.94, + "learning_rate": 4.6770774845592366e-05, + "loss": 0.0637, + "step": 13806 + }, + { + "epoch": 1.94, + "learning_rate": 4.677030694366461e-05, + "loss": 0.0803, + "step": 13808 + }, + { + "epoch": 1.94, + "learning_rate": 4.676983904173685e-05, + "loss": 0.0876, + "step": 13810 + }, + { + "epoch": 1.94, + "learning_rate": 4.67693711398091e-05, + "loss": 0.081, + "step": 13812 + }, + { + "epoch": 1.94, + "learning_rate": 4.6768903237881336e-05, + "loss": 0.0778, + "step": 13814 + }, + { + "epoch": 1.94, + "learning_rate": 4.676843533595359e-05, + "loss": 0.0915, + "step": 13816 + }, + { + "epoch": 1.94, + "learning_rate": 4.676796743402583e-05, + "loss": 0.1146, + "step": 13818 + }, + { + "epoch": 1.94, + "learning_rate": 4.6767499532098074e-05, + "loss": 0.0943, + "step": 13820 + }, + { + "epoch": 1.94, + "learning_rate": 4.676703163017031e-05, + "loss": 0.0681, + "step": 13822 + }, + { + "epoch": 1.94, + "learning_rate": 4.6766563728242566e-05, + "loss": 0.0717, + "step": 13824 + }, + { + "epoch": 1.94, + "learning_rate": 4.6766095826314805e-05, + "loss": 0.0849, + "step": 13826 + }, + { + "epoch": 1.94, + "learning_rate": 4.676562792438705e-05, + "loss": 0.0759, + "step": 13828 + }, + { + "epoch": 1.94, + "learning_rate": 4.676516002245929e-05, + "loss": 0.0588, + "step": 13830 + }, + { + "epoch": 1.94, + "learning_rate": 4.6764692120531536e-05, + "loss": 0.0837, + "step": 13832 + }, + { + "epoch": 1.94, + "learning_rate": 4.676422421860378e-05, + "loss": 0.0741, + "step": 13834 + }, + { + "epoch": 1.94, + "learning_rate": 4.676375631667603e-05, + "loss": 0.0802, + "step": 13836 + }, + { + "epoch": 1.94, + "learning_rate": 4.676328841474827e-05, + "loss": 0.0932, + "step": 13838 + }, + { + "epoch": 1.94, + "learning_rate": 4.676282051282051e-05, + "loss": 0.0816, + "step": 13840 + }, + { + "epoch": 1.94, + "learning_rate": 4.676235261089276e-05, + "loss": 0.0683, + "step": 13842 + }, + { + "epoch": 1.94, + "learning_rate": 4.6761884708965005e-05, + "loss": 0.0657, + "step": 13844 + }, + { + "epoch": 1.94, + "learning_rate": 4.6761416807037244e-05, + "loss": 0.0708, + "step": 13846 + }, + { + "epoch": 1.94, + "learning_rate": 4.676094890510949e-05, + "loss": 0.0979, + "step": 13848 + }, + { + "epoch": 1.94, + "learning_rate": 4.6760481003181736e-05, + "loss": 0.075, + "step": 13850 + }, + { + "epoch": 1.94, + "learning_rate": 4.676001310125398e-05, + "loss": 0.0686, + "step": 13852 + }, + { + "epoch": 1.94, + "learning_rate": 4.675954519932622e-05, + "loss": 0.0878, + "step": 13854 + }, + { + "epoch": 1.94, + "learning_rate": 4.6759077297398467e-05, + "loss": 0.0896, + "step": 13856 + }, + { + "epoch": 1.95, + "learning_rate": 4.675860939547071e-05, + "loss": 0.0891, + "step": 13858 + }, + { + "epoch": 1.95, + "learning_rate": 4.675814149354296e-05, + "loss": 0.0731, + "step": 13860 + }, + { + "epoch": 1.95, + "learning_rate": 4.67576735916152e-05, + "loss": 0.0842, + "step": 13862 + }, + { + "epoch": 1.95, + "learning_rate": 4.6757205689687443e-05, + "loss": 0.0897, + "step": 13864 + }, + { + "epoch": 1.95, + "learning_rate": 4.675673778775968e-05, + "loss": 0.116, + "step": 13866 + }, + { + "epoch": 1.95, + "learning_rate": 4.6756269885831935e-05, + "loss": 0.0838, + "step": 13868 + }, + { + "epoch": 1.95, + "learning_rate": 4.6755801983904174e-05, + "loss": 0.0742, + "step": 13870 + }, + { + "epoch": 1.95, + "learning_rate": 4.675533408197642e-05, + "loss": 0.0804, + "step": 13872 + }, + { + "epoch": 1.95, + "learning_rate": 4.675486618004866e-05, + "loss": 0.0811, + "step": 13874 + }, + { + "epoch": 1.95, + "learning_rate": 4.675439827812091e-05, + "loss": 0.0847, + "step": 13876 + }, + { + "epoch": 1.95, + "learning_rate": 4.675393037619315e-05, + "loss": 0.1092, + "step": 13878 + }, + { + "epoch": 1.95, + "learning_rate": 4.67534624742654e-05, + "loss": 0.0723, + "step": 13880 + }, + { + "epoch": 1.95, + "learning_rate": 4.6752994572337636e-05, + "loss": 0.0644, + "step": 13882 + }, + { + "epoch": 1.95, + "learning_rate": 4.675252667040988e-05, + "loss": 0.0887, + "step": 13884 + }, + { + "epoch": 1.95, + "learning_rate": 4.675205876848213e-05, + "loss": 0.1023, + "step": 13886 + }, + { + "epoch": 1.95, + "learning_rate": 4.6751590866554374e-05, + "loss": 0.075, + "step": 13888 + }, + { + "epoch": 1.95, + "learning_rate": 4.675112296462661e-05, + "loss": 0.0983, + "step": 13890 + }, + { + "epoch": 1.95, + "learning_rate": 4.675065506269886e-05, + "loss": 0.1074, + "step": 13892 + }, + { + "epoch": 1.95, + "learning_rate": 4.6750187160771105e-05, + "loss": 0.0783, + "step": 13894 + }, + { + "epoch": 1.95, + "learning_rate": 4.674971925884335e-05, + "loss": 0.0913, + "step": 13896 + }, + { + "epoch": 1.95, + "learning_rate": 4.674925135691559e-05, + "loss": 0.0891, + "step": 13898 + }, + { + "epoch": 1.95, + "learning_rate": 4.6748783454987836e-05, + "loss": 0.087, + "step": 13900 + }, + { + "epoch": 1.95, + "learning_rate": 4.674831555306008e-05, + "loss": 0.0637, + "step": 13902 + }, + { + "epoch": 1.95, + "learning_rate": 4.674784765113233e-05, + "loss": 0.0705, + "step": 13904 + }, + { + "epoch": 1.95, + "learning_rate": 4.674737974920457e-05, + "loss": 0.0833, + "step": 13906 + }, + { + "epoch": 1.95, + "learning_rate": 4.674691184727681e-05, + "loss": 0.0829, + "step": 13908 + }, + { + "epoch": 1.95, + "learning_rate": 4.674644394534906e-05, + "loss": 0.0869, + "step": 13910 + }, + { + "epoch": 1.95, + "learning_rate": 4.6745976043421305e-05, + "loss": 0.0823, + "step": 13912 + }, + { + "epoch": 1.95, + "learning_rate": 4.6745508141493544e-05, + "loss": 0.0874, + "step": 13914 + }, + { + "epoch": 1.95, + "learning_rate": 4.674504023956579e-05, + "loss": 0.0743, + "step": 13916 + }, + { + "epoch": 1.95, + "learning_rate": 4.674457233763803e-05, + "loss": 0.1075, + "step": 13918 + }, + { + "epoch": 1.95, + "learning_rate": 4.674410443571028e-05, + "loss": 0.0799, + "step": 13920 + }, + { + "epoch": 1.95, + "learning_rate": 4.674363653378252e-05, + "loss": 0.1071, + "step": 13922 + }, + { + "epoch": 1.95, + "learning_rate": 4.6743168631854767e-05, + "loss": 0.0778, + "step": 13924 + }, + { + "epoch": 1.95, + "learning_rate": 4.6742700729927006e-05, + "loss": 0.0834, + "step": 13926 + }, + { + "epoch": 1.96, + "learning_rate": 4.674223282799925e-05, + "loss": 0.0907, + "step": 13928 + }, + { + "epoch": 1.96, + "learning_rate": 4.67417649260715e-05, + "loss": 0.0761, + "step": 13930 + }, + { + "epoch": 1.96, + "learning_rate": 4.6741297024143743e-05, + "loss": 0.1059, + "step": 13932 + }, + { + "epoch": 1.96, + "learning_rate": 4.674082912221598e-05, + "loss": 0.0793, + "step": 13934 + }, + { + "epoch": 1.96, + "learning_rate": 4.674036122028823e-05, + "loss": 0.0738, + "step": 13936 + }, + { + "epoch": 1.96, + "learning_rate": 4.6739893318360474e-05, + "loss": 0.1102, + "step": 13938 + }, + { + "epoch": 1.96, + "learning_rate": 4.673942541643272e-05, + "loss": 0.0918, + "step": 13940 + }, + { + "epoch": 1.96, + "learning_rate": 4.673895751450496e-05, + "loss": 0.0736, + "step": 13942 + }, + { + "epoch": 1.96, + "learning_rate": 4.6738489612577205e-05, + "loss": 0.0583, + "step": 13944 + }, + { + "epoch": 1.96, + "learning_rate": 4.673802171064945e-05, + "loss": 0.114, + "step": 13946 + }, + { + "epoch": 1.96, + "learning_rate": 4.67375538087217e-05, + "loss": 0.1063, + "step": 13948 + }, + { + "epoch": 1.96, + "learning_rate": 4.6737085906793936e-05, + "loss": 0.0842, + "step": 13950 + }, + { + "epoch": 1.96, + "learning_rate": 4.673661800486618e-05, + "loss": 0.074, + "step": 13952 + }, + { + "epoch": 1.96, + "learning_rate": 4.673615010293843e-05, + "loss": 0.0677, + "step": 13954 + }, + { + "epoch": 1.96, + "learning_rate": 4.6735682201010674e-05, + "loss": 0.0717, + "step": 13956 + }, + { + "epoch": 1.96, + "learning_rate": 4.673521429908291e-05, + "loss": 0.0879, + "step": 13958 + }, + { + "epoch": 1.96, + "learning_rate": 4.673474639715516e-05, + "loss": 0.069, + "step": 13960 + }, + { + "epoch": 1.96, + "learning_rate": 4.67342784952274e-05, + "loss": 0.0787, + "step": 13962 + }, + { + "epoch": 1.96, + "learning_rate": 4.673381059329965e-05, + "loss": 0.0953, + "step": 13964 + }, + { + "epoch": 1.96, + "learning_rate": 4.673334269137189e-05, + "loss": 0.0925, + "step": 13966 + }, + { + "epoch": 1.96, + "learning_rate": 4.6732874789444136e-05, + "loss": 0.0702, + "step": 13968 + }, + { + "epoch": 1.96, + "learning_rate": 4.6732406887516375e-05, + "loss": 0.1242, + "step": 13970 + }, + { + "epoch": 1.96, + "learning_rate": 4.673193898558863e-05, + "loss": 0.0616, + "step": 13972 + }, + { + "epoch": 1.96, + "learning_rate": 4.673147108366087e-05, + "loss": 0.1254, + "step": 13974 + }, + { + "epoch": 1.96, + "learning_rate": 4.673100318173311e-05, + "loss": 0.1014, + "step": 13976 + }, + { + "epoch": 1.96, + "learning_rate": 4.673053527980535e-05, + "loss": 0.0924, + "step": 13978 + }, + { + "epoch": 1.96, + "learning_rate": 4.67300673778776e-05, + "loss": 0.0804, + "step": 13980 + }, + { + "epoch": 1.96, + "learning_rate": 4.6729599475949844e-05, + "loss": 0.0965, + "step": 13982 + }, + { + "epoch": 1.96, + "learning_rate": 4.672913157402209e-05, + "loss": 0.0666, + "step": 13984 + }, + { + "epoch": 1.96, + "learning_rate": 4.672866367209433e-05, + "loss": 0.0786, + "step": 13986 + }, + { + "epoch": 1.96, + "learning_rate": 4.6728195770166575e-05, + "loss": 0.0736, + "step": 13988 + }, + { + "epoch": 1.96, + "learning_rate": 4.672772786823882e-05, + "loss": 0.0724, + "step": 13990 + }, + { + "epoch": 1.96, + "learning_rate": 4.6727259966311067e-05, + "loss": 0.069, + "step": 13992 + }, + { + "epoch": 1.96, + "learning_rate": 4.6726792064383306e-05, + "loss": 0.0954, + "step": 13994 + }, + { + "epoch": 1.96, + "learning_rate": 4.672632416245555e-05, + "loss": 0.0802, + "step": 13996 + }, + { + "epoch": 1.96, + "learning_rate": 4.67258562605278e-05, + "loss": 0.0789, + "step": 13998 + }, + { + "epoch": 1.97, + "learning_rate": 4.6725388358600043e-05, + "loss": 0.1104, + "step": 14000 + }, + { + "epoch": 1.97, + "eval_gen_len": 28.2931, + "eval_loss": 1.0474748611450195, + "eval_meteor": 0.048, + "eval_runtime": 13.517, + "eval_samples_per_second": 4.291, + "eval_steps_per_second": 0.592, + "step": 14000 + }, + { + "epoch": 1.97, + "learning_rate": 4.672492045667228e-05, + "loss": 0.119, + "step": 14002 + }, + { + "epoch": 1.97, + "learning_rate": 4.672445255474453e-05, + "loss": 0.0843, + "step": 14004 + }, + { + "epoch": 1.97, + "learning_rate": 4.6723984652816774e-05, + "loss": 0.0611, + "step": 14006 + }, + { + "epoch": 1.97, + "learning_rate": 4.672351675088902e-05, + "loss": 0.0763, + "step": 14008 + }, + { + "epoch": 1.97, + "learning_rate": 4.672304884896126e-05, + "loss": 0.0992, + "step": 14010 + }, + { + "epoch": 1.97, + "learning_rate": 4.6722580947033505e-05, + "loss": 0.1185, + "step": 14012 + }, + { + "epoch": 1.97, + "learning_rate": 4.6722113045105745e-05, + "loss": 0.0689, + "step": 14014 + }, + { + "epoch": 1.97, + "learning_rate": 4.6721645143178e-05, + "loss": 0.0783, + "step": 14016 + }, + { + "epoch": 1.97, + "learning_rate": 4.6721177241250236e-05, + "loss": 0.0736, + "step": 14018 + }, + { + "epoch": 1.97, + "learning_rate": 4.672070933932248e-05, + "loss": 0.0734, + "step": 14020 + }, + { + "epoch": 1.97, + "learning_rate": 4.672024143739472e-05, + "loss": 0.08, + "step": 14022 + }, + { + "epoch": 1.97, + "learning_rate": 4.6719773535466974e-05, + "loss": 0.0764, + "step": 14024 + }, + { + "epoch": 1.97, + "learning_rate": 4.671930563353921e-05, + "loss": 0.0828, + "step": 14026 + }, + { + "epoch": 1.97, + "learning_rate": 4.671883773161146e-05, + "loss": 0.0766, + "step": 14028 + }, + { + "epoch": 1.97, + "learning_rate": 4.67183698296837e-05, + "loss": 0.0948, + "step": 14030 + }, + { + "epoch": 1.97, + "learning_rate": 4.6717901927755944e-05, + "loss": 0.1003, + "step": 14032 + }, + { + "epoch": 1.97, + "learning_rate": 4.671743402582819e-05, + "loss": 0.0863, + "step": 14034 + }, + { + "epoch": 1.97, + "learning_rate": 4.6716966123900436e-05, + "loss": 0.0965, + "step": 14036 + }, + { + "epoch": 1.97, + "learning_rate": 4.6716498221972675e-05, + "loss": 0.0851, + "step": 14038 + }, + { + "epoch": 1.97, + "learning_rate": 4.671603032004492e-05, + "loss": 0.1, + "step": 14040 + }, + { + "epoch": 1.97, + "learning_rate": 4.671556241811717e-05, + "loss": 0.096, + "step": 14042 + }, + { + "epoch": 1.97, + "learning_rate": 4.671509451618941e-05, + "loss": 0.0889, + "step": 14044 + }, + { + "epoch": 1.97, + "learning_rate": 4.671462661426165e-05, + "loss": 0.0792, + "step": 14046 + }, + { + "epoch": 1.97, + "learning_rate": 4.67141587123339e-05, + "loss": 0.082, + "step": 14048 + }, + { + "epoch": 1.97, + "learning_rate": 4.6713690810406144e-05, + "loss": 0.0832, + "step": 14050 + }, + { + "epoch": 1.97, + "learning_rate": 4.671322290847839e-05, + "loss": 0.0758, + "step": 14052 + }, + { + "epoch": 1.97, + "learning_rate": 4.671275500655063e-05, + "loss": 0.0826, + "step": 14054 + }, + { + "epoch": 1.97, + "learning_rate": 4.6712287104622875e-05, + "loss": 0.0842, + "step": 14056 + }, + { + "epoch": 1.97, + "learning_rate": 4.671181920269512e-05, + "loss": 0.099, + "step": 14058 + }, + { + "epoch": 1.97, + "learning_rate": 4.671135130076736e-05, + "loss": 0.0892, + "step": 14060 + }, + { + "epoch": 1.97, + "learning_rate": 4.6710883398839606e-05, + "loss": 0.1079, + "step": 14062 + }, + { + "epoch": 1.97, + "learning_rate": 4.6710415496911845e-05, + "loss": 0.0814, + "step": 14064 + }, + { + "epoch": 1.97, + "learning_rate": 4.670994759498409e-05, + "loss": 0.1106, + "step": 14066 + }, + { + "epoch": 1.97, + "learning_rate": 4.670947969305634e-05, + "loss": 0.0782, + "step": 14068 + }, + { + "epoch": 1.98, + "learning_rate": 4.670901179112858e-05, + "loss": 0.1078, + "step": 14070 + }, + { + "epoch": 1.98, + "learning_rate": 4.670854388920082e-05, + "loss": 0.0754, + "step": 14072 + }, + { + "epoch": 1.98, + "learning_rate": 4.670807598727307e-05, + "loss": 0.0854, + "step": 14074 + }, + { + "epoch": 1.98, + "learning_rate": 4.6707608085345314e-05, + "loss": 0.0843, + "step": 14076 + }, + { + "epoch": 1.98, + "learning_rate": 4.670714018341756e-05, + "loss": 0.0886, + "step": 14078 + }, + { + "epoch": 1.98, + "learning_rate": 4.67066722814898e-05, + "loss": 0.0833, + "step": 14080 + }, + { + "epoch": 1.98, + "learning_rate": 4.6706204379562045e-05, + "loss": 0.0828, + "step": 14082 + }, + { + "epoch": 1.98, + "learning_rate": 4.670573647763429e-05, + "loss": 0.079, + "step": 14084 + }, + { + "epoch": 1.98, + "learning_rate": 4.6705268575706536e-05, + "loss": 0.1032, + "step": 14086 + }, + { + "epoch": 1.98, + "learning_rate": 4.6704800673778775e-05, + "loss": 0.0608, + "step": 14088 + }, + { + "epoch": 1.98, + "learning_rate": 4.670433277185102e-05, + "loss": 0.0956, + "step": 14090 + }, + { + "epoch": 1.98, + "learning_rate": 4.670386486992327e-05, + "loss": 0.1089, + "step": 14092 + }, + { + "epoch": 1.98, + "learning_rate": 4.670339696799551e-05, + "loss": 0.0756, + "step": 14094 + }, + { + "epoch": 1.98, + "learning_rate": 4.670292906606775e-05, + "loss": 0.0905, + "step": 14096 + }, + { + "epoch": 1.98, + "learning_rate": 4.670246116414e-05, + "loss": 0.1066, + "step": 14098 + }, + { + "epoch": 1.98, + "learning_rate": 4.670199326221224e-05, + "loss": 0.096, + "step": 14100 + }, + { + "epoch": 1.98, + "learning_rate": 4.670152536028449e-05, + "loss": 0.0998, + "step": 14102 + }, + { + "epoch": 1.98, + "learning_rate": 4.670105745835673e-05, + "loss": 0.1179, + "step": 14104 + }, + { + "epoch": 1.98, + "learning_rate": 4.6700589556428975e-05, + "loss": 0.1025, + "step": 14106 + }, + { + "epoch": 1.98, + "learning_rate": 4.6700121654501214e-05, + "loss": 0.0868, + "step": 14108 + }, + { + "epoch": 1.98, + "learning_rate": 4.669965375257346e-05, + "loss": 0.071, + "step": 14110 + }, + { + "epoch": 1.98, + "learning_rate": 4.6699185850645706e-05, + "loss": 0.1024, + "step": 14112 + }, + { + "epoch": 1.98, + "learning_rate": 4.669871794871795e-05, + "loss": 0.0889, + "step": 14114 + }, + { + "epoch": 1.98, + "learning_rate": 4.669825004679019e-05, + "loss": 0.084, + "step": 14116 + }, + { + "epoch": 1.98, + "learning_rate": 4.669778214486244e-05, + "loss": 0.1139, + "step": 14118 + }, + { + "epoch": 1.98, + "learning_rate": 4.669731424293468e-05, + "loss": 0.0789, + "step": 14120 + }, + { + "epoch": 1.98, + "learning_rate": 4.669684634100693e-05, + "loss": 0.1202, + "step": 14122 + }, + { + "epoch": 1.98, + "learning_rate": 4.669637843907917e-05, + "loss": 0.0674, + "step": 14124 + }, + { + "epoch": 1.98, + "learning_rate": 4.6695910537151414e-05, + "loss": 0.0751, + "step": 14126 + }, + { + "epoch": 1.98, + "learning_rate": 4.669544263522366e-05, + "loss": 0.0863, + "step": 14128 + }, + { + "epoch": 1.98, + "learning_rate": 4.6694974733295906e-05, + "loss": 0.0698, + "step": 14130 + }, + { + "epoch": 1.98, + "learning_rate": 4.6694506831368145e-05, + "loss": 0.0945, + "step": 14132 + }, + { + "epoch": 1.98, + "learning_rate": 4.669403892944039e-05, + "loss": 0.1053, + "step": 14134 + }, + { + "epoch": 1.98, + "learning_rate": 4.669357102751264e-05, + "loss": 0.1102, + "step": 14136 + }, + { + "epoch": 1.98, + "learning_rate": 4.669310312558488e-05, + "loss": 0.0887, + "step": 14138 + }, + { + "epoch": 1.98, + "learning_rate": 4.669263522365712e-05, + "loss": 0.096, + "step": 14140 + }, + { + "epoch": 1.99, + "learning_rate": 4.669216732172937e-05, + "loss": 0.0923, + "step": 14142 + }, + { + "epoch": 1.99, + "learning_rate": 4.669169941980161e-05, + "loss": 0.0653, + "step": 14144 + }, + { + "epoch": 1.99, + "learning_rate": 4.669123151787386e-05, + "loss": 0.0936, + "step": 14146 + }, + { + "epoch": 1.99, + "learning_rate": 4.66907636159461e-05, + "loss": 0.1005, + "step": 14148 + }, + { + "epoch": 1.99, + "learning_rate": 4.6690295714018345e-05, + "loss": 0.0783, + "step": 14150 + }, + { + "epoch": 1.99, + "learning_rate": 4.6689827812090584e-05, + "loss": 0.0802, + "step": 14152 + }, + { + "epoch": 1.99, + "learning_rate": 4.6689359910162836e-05, + "loss": 0.0849, + "step": 14154 + }, + { + "epoch": 1.99, + "learning_rate": 4.6688892008235076e-05, + "loss": 0.0584, + "step": 14156 + }, + { + "epoch": 1.99, + "learning_rate": 4.668842410630732e-05, + "loss": 0.0892, + "step": 14158 + }, + { + "epoch": 1.99, + "learning_rate": 4.668795620437956e-05, + "loss": 0.0815, + "step": 14160 + }, + { + "epoch": 1.99, + "learning_rate": 4.6687488302451806e-05, + "loss": 0.0966, + "step": 14162 + }, + { + "epoch": 1.99, + "learning_rate": 4.668702040052405e-05, + "loss": 0.0969, + "step": 14164 + }, + { + "epoch": 1.99, + "learning_rate": 4.66865524985963e-05, + "loss": 0.0947, + "step": 14166 + }, + { + "epoch": 1.99, + "learning_rate": 4.668608459666854e-05, + "loss": 0.0803, + "step": 14168 + }, + { + "epoch": 1.99, + "learning_rate": 4.668561669474078e-05, + "loss": 0.0776, + "step": 14170 + }, + { + "epoch": 1.99, + "learning_rate": 4.668514879281303e-05, + "loss": 0.0828, + "step": 14172 + }, + { + "epoch": 1.99, + "learning_rate": 4.6684680890885275e-05, + "loss": 0.0849, + "step": 14174 + }, + { + "epoch": 1.99, + "learning_rate": 4.6684212988957514e-05, + "loss": 0.0927, + "step": 14176 + }, + { + "epoch": 1.99, + "learning_rate": 4.668374508702976e-05, + "loss": 0.1052, + "step": 14178 + }, + { + "epoch": 1.99, + "learning_rate": 4.6683277185102006e-05, + "loss": 0.1121, + "step": 14180 + }, + { + "epoch": 1.99, + "learning_rate": 4.668280928317425e-05, + "loss": 0.0738, + "step": 14182 + }, + { + "epoch": 1.99, + "learning_rate": 4.668234138124649e-05, + "loss": 0.0724, + "step": 14184 + }, + { + "epoch": 1.99, + "learning_rate": 4.668187347931874e-05, + "loss": 0.0853, + "step": 14186 + }, + { + "epoch": 1.99, + "learning_rate": 4.668140557739098e-05, + "loss": 0.086, + "step": 14188 + }, + { + "epoch": 1.99, + "learning_rate": 4.668093767546323e-05, + "loss": 0.0838, + "step": 14190 + }, + { + "epoch": 1.99, + "learning_rate": 4.668046977353547e-05, + "loss": 0.064, + "step": 14192 + }, + { + "epoch": 1.99, + "learning_rate": 4.6680001871607714e-05, + "loss": 0.0782, + "step": 14194 + }, + { + "epoch": 1.99, + "learning_rate": 4.667953396967995e-05, + "loss": 0.0698, + "step": 14196 + }, + { + "epoch": 1.99, + "learning_rate": 4.6679066067752206e-05, + "loss": 0.099, + "step": 14198 + }, + { + "epoch": 1.99, + "learning_rate": 4.6678598165824445e-05, + "loss": 0.089, + "step": 14200 + }, + { + "epoch": 1.99, + "learning_rate": 4.667813026389669e-05, + "loss": 0.0976, + "step": 14202 + }, + { + "epoch": 1.99, + "learning_rate": 4.667766236196893e-05, + "loss": 0.0739, + "step": 14204 + }, + { + "epoch": 1.99, + "learning_rate": 4.6677194460041176e-05, + "loss": 0.0998, + "step": 14206 + }, + { + "epoch": 1.99, + "learning_rate": 4.667672655811342e-05, + "loss": 0.0815, + "step": 14208 + }, + { + "epoch": 1.99, + "learning_rate": 4.667625865618567e-05, + "loss": 0.0717, + "step": 14210 + }, + { + "epoch": 1.99, + "learning_rate": 4.667579075425791e-05, + "loss": 0.0927, + "step": 14212 + }, + { + "epoch": 2.0, + "learning_rate": 4.667532285233015e-05, + "loss": 0.0788, + "step": 14214 + }, + { + "epoch": 2.0, + "learning_rate": 4.66748549504024e-05, + "loss": 0.0717, + "step": 14216 + }, + { + "epoch": 2.0, + "learning_rate": 4.6674387048474645e-05, + "loss": 0.0824, + "step": 14218 + }, + { + "epoch": 2.0, + "learning_rate": 4.6673919146546884e-05, + "loss": 0.0815, + "step": 14220 + }, + { + "epoch": 2.0, + "learning_rate": 4.667345124461913e-05, + "loss": 0.0733, + "step": 14222 + }, + { + "epoch": 2.0, + "learning_rate": 4.6672983342691376e-05, + "loss": 0.068, + "step": 14224 + }, + { + "epoch": 2.0, + "learning_rate": 4.667251544076362e-05, + "loss": 0.0959, + "step": 14226 + }, + { + "epoch": 2.0, + "learning_rate": 4.667204753883586e-05, + "loss": 0.0813, + "step": 14228 + }, + { + "epoch": 2.0, + "learning_rate": 4.6671579636908106e-05, + "loss": 0.1064, + "step": 14230 + }, + { + "epoch": 2.0, + "learning_rate": 4.667111173498035e-05, + "loss": 0.0949, + "step": 14232 + }, + { + "epoch": 2.0, + "learning_rate": 4.66706438330526e-05, + "loss": 0.104, + "step": 14234 + }, + { + "epoch": 2.0, + "learning_rate": 4.667017593112484e-05, + "loss": 0.0782, + "step": 14236 + }, + { + "epoch": 2.0, + "learning_rate": 4.666970802919708e-05, + "loss": 0.0857, + "step": 14238 + }, + { + "epoch": 2.0, + "learning_rate": 4.666924012726932e-05, + "loss": 0.0854, + "step": 14240 + }, + { + "epoch": 2.0, + "learning_rate": 4.6668772225341575e-05, + "loss": 0.1081, + "step": 14242 + }, + { + "epoch": 2.0, + "learning_rate": 4.6668304323413814e-05, + "loss": 0.0633, + "step": 14244 + }, + { + "epoch": 2.0, + "learning_rate": 4.666783642148606e-05, + "loss": 0.0887, + "step": 14246 + }, + { + "epoch": 2.0, + "learning_rate": 4.666760247052218e-05, + "loss": 0.1334, + "step": 14248 + }, + { + "epoch": 2.0, + "learning_rate": 4.6667134568594426e-05, + "loss": 0.0486, + "step": 14250 + }, + { + "epoch": 2.0, + "learning_rate": 4.666666666666667e-05, + "loss": 0.0616, + "step": 14252 + }, + { + "epoch": 2.0, + "learning_rate": 4.666619876473891e-05, + "loss": 0.0566, + "step": 14254 + }, + { + "epoch": 2.0, + "learning_rate": 4.666573086281116e-05, + "loss": 0.0476, + "step": 14256 + }, + { + "epoch": 2.0, + "learning_rate": 4.6665262960883396e-05, + "loss": 0.0517, + "step": 14258 + }, + { + "epoch": 2.0, + "learning_rate": 4.666479505895565e-05, + "loss": 0.0507, + "step": 14260 + }, + { + "epoch": 2.0, + "learning_rate": 4.666432715702789e-05, + "loss": 0.0484, + "step": 14262 + }, + { + "epoch": 2.0, + "learning_rate": 4.6663859255100134e-05, + "loss": 0.0614, + "step": 14264 + }, + { + "epoch": 2.0, + "learning_rate": 4.666339135317237e-05, + "loss": 0.0486, + "step": 14266 + }, + { + "epoch": 2.0, + "learning_rate": 4.6662923451244625e-05, + "loss": 0.0598, + "step": 14268 + }, + { + "epoch": 2.0, + "learning_rate": 4.6662455549316864e-05, + "loss": 0.0537, + "step": 14270 + }, + { + "epoch": 2.0, + "learning_rate": 4.666198764738911e-05, + "loss": 0.0677, + "step": 14272 + }, + { + "epoch": 2.0, + "learning_rate": 4.666151974546135e-05, + "loss": 0.0623, + "step": 14274 + }, + { + "epoch": 2.0, + "learning_rate": 4.6661051843533595e-05, + "loss": 0.0255, + "step": 14276 + }, + { + "epoch": 2.0, + "learning_rate": 4.666058394160584e-05, + "loss": 0.0463, + "step": 14278 + }, + { + "epoch": 2.0, + "learning_rate": 4.666011603967809e-05, + "loss": 0.0572, + "step": 14280 + }, + { + "epoch": 2.0, + "learning_rate": 4.6659648137750326e-05, + "loss": 0.0471, + "step": 14282 + }, + { + "epoch": 2.01, + "learning_rate": 4.665918023582257e-05, + "loss": 0.0744, + "step": 14284 + }, + { + "epoch": 2.01, + "learning_rate": 4.665871233389482e-05, + "loss": 0.0569, + "step": 14286 + }, + { + "epoch": 2.01, + "learning_rate": 4.6658244431967064e-05, + "loss": 0.0458, + "step": 14288 + }, + { + "epoch": 2.01, + "learning_rate": 4.66577765300393e-05, + "loss": 0.0442, + "step": 14290 + }, + { + "epoch": 2.01, + "learning_rate": 4.665730862811155e-05, + "loss": 0.0391, + "step": 14292 + }, + { + "epoch": 2.01, + "learning_rate": 4.6656840726183795e-05, + "loss": 0.0603, + "step": 14294 + }, + { + "epoch": 2.01, + "learning_rate": 4.665637282425604e-05, + "loss": 0.0438, + "step": 14296 + }, + { + "epoch": 2.01, + "learning_rate": 4.665590492232828e-05, + "loss": 0.0394, + "step": 14298 + }, + { + "epoch": 2.01, + "learning_rate": 4.6655437020400526e-05, + "loss": 0.0503, + "step": 14300 + }, + { + "epoch": 2.01, + "learning_rate": 4.665496911847277e-05, + "loss": 0.0505, + "step": 14302 + }, + { + "epoch": 2.01, + "learning_rate": 4.665450121654502e-05, + "loss": 0.0406, + "step": 14304 + }, + { + "epoch": 2.01, + "learning_rate": 4.665403331461726e-05, + "loss": 0.0445, + "step": 14306 + }, + { + "epoch": 2.01, + "learning_rate": 4.66535654126895e-05, + "loss": 0.0477, + "step": 14308 + }, + { + "epoch": 2.01, + "learning_rate": 4.665309751076174e-05, + "loss": 0.0571, + "step": 14310 + }, + { + "epoch": 2.01, + "learning_rate": 4.6652629608833995e-05, + "loss": 0.0366, + "step": 14312 + }, + { + "epoch": 2.01, + "learning_rate": 4.6652161706906234e-05, + "loss": 0.0402, + "step": 14314 + }, + { + "epoch": 2.01, + "learning_rate": 4.665169380497848e-05, + "loss": 0.0381, + "step": 14316 + }, + { + "epoch": 2.01, + "learning_rate": 4.665122590305072e-05, + "loss": 0.0498, + "step": 14318 + }, + { + "epoch": 2.01, + "learning_rate": 4.665075800112297e-05, + "loss": 0.0558, + "step": 14320 + }, + { + "epoch": 2.01, + "learning_rate": 4.665029009919521e-05, + "loss": 0.0462, + "step": 14322 + }, + { + "epoch": 2.01, + "learning_rate": 4.664982219726746e-05, + "loss": 0.0783, + "step": 14324 + }, + { + "epoch": 2.01, + "learning_rate": 4.6649354295339696e-05, + "loss": 0.061, + "step": 14326 + }, + { + "epoch": 2.01, + "learning_rate": 4.664888639341194e-05, + "loss": 0.0652, + "step": 14328 + }, + { + "epoch": 2.01, + "learning_rate": 4.664841849148419e-05, + "loss": 0.0561, + "step": 14330 + }, + { + "epoch": 2.01, + "learning_rate": 4.6647950589556434e-05, + "loss": 0.0597, + "step": 14332 + }, + { + "epoch": 2.01, + "learning_rate": 4.664748268762867e-05, + "loss": 0.0651, + "step": 14334 + }, + { + "epoch": 2.01, + "learning_rate": 4.664701478570092e-05, + "loss": 0.0486, + "step": 14336 + }, + { + "epoch": 2.01, + "learning_rate": 4.6646546883773165e-05, + "loss": 0.0443, + "step": 14338 + }, + { + "epoch": 2.01, + "learning_rate": 4.664607898184541e-05, + "loss": 0.0463, + "step": 14340 + }, + { + "epoch": 2.01, + "learning_rate": 4.664561107991765e-05, + "loss": 0.0457, + "step": 14342 + }, + { + "epoch": 2.01, + "learning_rate": 4.6645143177989895e-05, + "loss": 0.0446, + "step": 14344 + }, + { + "epoch": 2.01, + "learning_rate": 4.664467527606214e-05, + "loss": 0.0688, + "step": 14346 + }, + { + "epoch": 2.01, + "learning_rate": 4.664420737413439e-05, + "loss": 0.04, + "step": 14348 + }, + { + "epoch": 2.01, + "learning_rate": 4.6643739472206626e-05, + "loss": 0.048, + "step": 14350 + }, + { + "epoch": 2.01, + "learning_rate": 4.664327157027887e-05, + "loss": 0.0438, + "step": 14352 + }, + { + "epoch": 2.01, + "learning_rate": 4.664280366835112e-05, + "loss": 0.0598, + "step": 14354 + }, + { + "epoch": 2.02, + "learning_rate": 4.6642335766423364e-05, + "loss": 0.051, + "step": 14356 + }, + { + "epoch": 2.02, + "learning_rate": 4.66418678644956e-05, + "loss": 0.0522, + "step": 14358 + }, + { + "epoch": 2.02, + "learning_rate": 4.664139996256785e-05, + "loss": 0.0475, + "step": 14360 + }, + { + "epoch": 2.02, + "learning_rate": 4.664093206064009e-05, + "loss": 0.0378, + "step": 14362 + }, + { + "epoch": 2.02, + "learning_rate": 4.664046415871234e-05, + "loss": 0.0474, + "step": 14364 + }, + { + "epoch": 2.02, + "learning_rate": 4.663999625678458e-05, + "loss": 0.0579, + "step": 14366 + }, + { + "epoch": 2.02, + "learning_rate": 4.6639528354856826e-05, + "loss": 0.0463, + "step": 14368 + }, + { + "epoch": 2.02, + "learning_rate": 4.6639060452929065e-05, + "loss": 0.0532, + "step": 14370 + }, + { + "epoch": 2.02, + "learning_rate": 4.663859255100131e-05, + "loss": 0.0422, + "step": 14372 + }, + { + "epoch": 2.02, + "learning_rate": 4.663812464907356e-05, + "loss": 0.0569, + "step": 14374 + }, + { + "epoch": 2.02, + "learning_rate": 4.66376567471458e-05, + "loss": 0.0525, + "step": 14376 + }, + { + "epoch": 2.02, + "learning_rate": 4.663718884521804e-05, + "loss": 0.0692, + "step": 14378 + }, + { + "epoch": 2.02, + "learning_rate": 4.663672094329029e-05, + "loss": 0.0638, + "step": 14380 + }, + { + "epoch": 2.02, + "learning_rate": 4.6636253041362534e-05, + "loss": 0.0563, + "step": 14382 + }, + { + "epoch": 2.02, + "learning_rate": 4.663578513943478e-05, + "loss": 0.0384, + "step": 14384 + }, + { + "epoch": 2.02, + "learning_rate": 4.663531723750702e-05, + "loss": 0.0473, + "step": 14386 + }, + { + "epoch": 2.02, + "learning_rate": 4.6634849335579265e-05, + "loss": 0.0254, + "step": 14388 + }, + { + "epoch": 2.02, + "learning_rate": 4.663438143365151e-05, + "loss": 0.0498, + "step": 14390 + }, + { + "epoch": 2.02, + "learning_rate": 4.663391353172376e-05, + "loss": 0.0498, + "step": 14392 + }, + { + "epoch": 2.02, + "learning_rate": 4.6633445629795996e-05, + "loss": 0.0393, + "step": 14394 + }, + { + "epoch": 2.02, + "learning_rate": 4.663297772786824e-05, + "loss": 0.0416, + "step": 14396 + }, + { + "epoch": 2.02, + "learning_rate": 4.663250982594049e-05, + "loss": 0.0513, + "step": 14398 + }, + { + "epoch": 2.02, + "learning_rate": 4.6632041924012734e-05, + "loss": 0.0553, + "step": 14400 + }, + { + "epoch": 2.02, + "learning_rate": 4.663157402208497e-05, + "loss": 0.0621, + "step": 14402 + }, + { + "epoch": 2.02, + "learning_rate": 4.663110612015722e-05, + "loss": 0.0484, + "step": 14404 + }, + { + "epoch": 2.02, + "learning_rate": 4.663063821822946e-05, + "loss": 0.0625, + "step": 14406 + }, + { + "epoch": 2.02, + "learning_rate": 4.663017031630171e-05, + "loss": 0.0455, + "step": 14408 + }, + { + "epoch": 2.02, + "learning_rate": 4.662970241437395e-05, + "loss": 0.0517, + "step": 14410 + }, + { + "epoch": 2.02, + "learning_rate": 4.6629234512446195e-05, + "loss": 0.053, + "step": 14412 + }, + { + "epoch": 2.02, + "learning_rate": 4.6628766610518435e-05, + "loss": 0.0519, + "step": 14414 + }, + { + "epoch": 2.02, + "learning_rate": 4.662829870859069e-05, + "loss": 0.0597, + "step": 14416 + }, + { + "epoch": 2.02, + "learning_rate": 4.6627830806662926e-05, + "loss": 0.0446, + "step": 14418 + }, + { + "epoch": 2.02, + "learning_rate": 4.662736290473517e-05, + "loss": 0.036, + "step": 14420 + }, + { + "epoch": 2.02, + "learning_rate": 4.662689500280741e-05, + "loss": 0.0692, + "step": 14422 + }, + { + "epoch": 2.02, + "learning_rate": 4.662642710087966e-05, + "loss": 0.0571, + "step": 14424 + }, + { + "epoch": 2.02, + "learning_rate": 4.66259591989519e-05, + "loss": 0.064, + "step": 14426 + }, + { + "epoch": 2.03, + "learning_rate": 4.662549129702415e-05, + "loss": 0.0612, + "step": 14428 + }, + { + "epoch": 2.03, + "learning_rate": 4.662502339509639e-05, + "loss": 0.0411, + "step": 14430 + }, + { + "epoch": 2.03, + "learning_rate": 4.6624555493168634e-05, + "loss": 0.0502, + "step": 14432 + }, + { + "epoch": 2.03, + "learning_rate": 4.662408759124088e-05, + "loss": 0.0466, + "step": 14434 + }, + { + "epoch": 2.03, + "learning_rate": 4.6623619689313126e-05, + "loss": 0.0592, + "step": 14436 + }, + { + "epoch": 2.03, + "learning_rate": 4.6623151787385365e-05, + "loss": 0.0625, + "step": 14438 + }, + { + "epoch": 2.03, + "learning_rate": 4.6622683885457604e-05, + "loss": 0.0395, + "step": 14440 + }, + { + "epoch": 2.03, + "learning_rate": 4.662221598352986e-05, + "loss": 0.0376, + "step": 14442 + }, + { + "epoch": 2.03, + "learning_rate": 4.6621748081602096e-05, + "loss": 0.0531, + "step": 14444 + }, + { + "epoch": 2.03, + "learning_rate": 4.662128017967434e-05, + "loss": 0.0522, + "step": 14446 + }, + { + "epoch": 2.03, + "learning_rate": 4.662081227774658e-05, + "loss": 0.0555, + "step": 14448 + }, + { + "epoch": 2.03, + "learning_rate": 4.6620344375818834e-05, + "loss": 0.0436, + "step": 14450 + }, + { + "epoch": 2.03, + "learning_rate": 4.661987647389107e-05, + "loss": 0.036, + "step": 14452 + }, + { + "epoch": 2.03, + "learning_rate": 4.661940857196332e-05, + "loss": 0.0492, + "step": 14454 + }, + { + "epoch": 2.03, + "learning_rate": 4.661894067003556e-05, + "loss": 0.0557, + "step": 14456 + }, + { + "epoch": 2.03, + "learning_rate": 4.6618472768107804e-05, + "loss": 0.0332, + "step": 14458 + }, + { + "epoch": 2.03, + "learning_rate": 4.661800486618005e-05, + "loss": 0.0464, + "step": 14460 + }, + { + "epoch": 2.03, + "learning_rate": 4.6617536964252296e-05, + "loss": 0.0492, + "step": 14462 + }, + { + "epoch": 2.03, + "learning_rate": 4.6617069062324535e-05, + "loss": 0.0456, + "step": 14464 + }, + { + "epoch": 2.03, + "learning_rate": 4.661660116039678e-05, + "loss": 0.0545, + "step": 14466 + }, + { + "epoch": 2.03, + "learning_rate": 4.661613325846903e-05, + "loss": 0.0428, + "step": 14468 + }, + { + "epoch": 2.03, + "learning_rate": 4.661566535654127e-05, + "loss": 0.0435, + "step": 14470 + }, + { + "epoch": 2.03, + "learning_rate": 4.661519745461351e-05, + "loss": 0.0409, + "step": 14472 + }, + { + "epoch": 2.03, + "learning_rate": 4.661472955268576e-05, + "loss": 0.0642, + "step": 14474 + }, + { + "epoch": 2.03, + "learning_rate": 4.6614261650758004e-05, + "loss": 0.066, + "step": 14476 + }, + { + "epoch": 2.03, + "learning_rate": 4.661379374883025e-05, + "loss": 0.064, + "step": 14478 + }, + { + "epoch": 2.03, + "learning_rate": 4.661332584690249e-05, + "loss": 0.0546, + "step": 14480 + }, + { + "epoch": 2.03, + "learning_rate": 4.6612857944974735e-05, + "loss": 0.0944, + "step": 14482 + }, + { + "epoch": 2.03, + "learning_rate": 4.661239004304698e-05, + "loss": 0.0726, + "step": 14484 + }, + { + "epoch": 2.03, + "learning_rate": 4.6611922141119226e-05, + "loss": 0.0468, + "step": 14486 + }, + { + "epoch": 2.03, + "learning_rate": 4.6611454239191466e-05, + "loss": 0.0404, + "step": 14488 + }, + { + "epoch": 2.03, + "learning_rate": 4.661098633726371e-05, + "loss": 0.056, + "step": 14490 + }, + { + "epoch": 2.03, + "learning_rate": 4.661051843533595e-05, + "loss": 0.0486, + "step": 14492 + }, + { + "epoch": 2.03, + "learning_rate": 4.66100505334082e-05, + "loss": 0.0692, + "step": 14494 + }, + { + "epoch": 2.03, + "learning_rate": 4.660958263148044e-05, + "loss": 0.0455, + "step": 14496 + }, + { + "epoch": 2.04, + "learning_rate": 4.660911472955269e-05, + "loss": 0.042, + "step": 14498 + }, + { + "epoch": 2.04, + "learning_rate": 4.660864682762493e-05, + "loss": 0.0303, + "step": 14500 + }, + { + "epoch": 2.04, + "learning_rate": 4.6608178925697173e-05, + "loss": 0.0518, + "step": 14502 + }, + { + "epoch": 2.04, + "learning_rate": 4.660771102376942e-05, + "loss": 0.0586, + "step": 14504 + }, + { + "epoch": 2.04, + "learning_rate": 4.6607243121841665e-05, + "loss": 0.0516, + "step": 14506 + }, + { + "epoch": 2.04, + "learning_rate": 4.6606775219913904e-05, + "loss": 0.0457, + "step": 14508 + }, + { + "epoch": 2.04, + "learning_rate": 4.660630731798615e-05, + "loss": 0.07, + "step": 14510 + }, + { + "epoch": 2.04, + "learning_rate": 4.6605839416058396e-05, + "loss": 0.0537, + "step": 14512 + }, + { + "epoch": 2.04, + "learning_rate": 4.660537151413064e-05, + "loss": 0.041, + "step": 14514 + }, + { + "epoch": 2.04, + "learning_rate": 4.660490361220288e-05, + "loss": 0.0478, + "step": 14516 + }, + { + "epoch": 2.04, + "learning_rate": 4.660443571027513e-05, + "loss": 0.0361, + "step": 14518 + }, + { + "epoch": 2.04, + "learning_rate": 4.660396780834737e-05, + "loss": 0.0689, + "step": 14520 + }, + { + "epoch": 2.04, + "learning_rate": 4.660349990641962e-05, + "loss": 0.0567, + "step": 14522 + }, + { + "epoch": 2.04, + "learning_rate": 4.660303200449186e-05, + "loss": 0.0406, + "step": 14524 + }, + { + "epoch": 2.04, + "learning_rate": 4.6602564102564104e-05, + "loss": 0.0564, + "step": 14526 + }, + { + "epoch": 2.04, + "learning_rate": 4.660209620063635e-05, + "loss": 0.0621, + "step": 14528 + }, + { + "epoch": 2.04, + "learning_rate": 4.6601628298708596e-05, + "loss": 0.063, + "step": 14530 + }, + { + "epoch": 2.04, + "learning_rate": 4.6601160396780835e-05, + "loss": 0.058, + "step": 14532 + }, + { + "epoch": 2.04, + "learning_rate": 4.660069249485308e-05, + "loss": 0.0404, + "step": 14534 + }, + { + "epoch": 2.04, + "learning_rate": 4.660022459292532e-05, + "loss": 0.0621, + "step": 14536 + }, + { + "epoch": 2.04, + "learning_rate": 4.659975669099757e-05, + "loss": 0.0534, + "step": 14538 + }, + { + "epoch": 2.04, + "learning_rate": 4.659928878906981e-05, + "loss": 0.07, + "step": 14540 + }, + { + "epoch": 2.04, + "learning_rate": 4.659882088714206e-05, + "loss": 0.043, + "step": 14542 + }, + { + "epoch": 2.04, + "learning_rate": 4.65983529852143e-05, + "loss": 0.0656, + "step": 14544 + }, + { + "epoch": 2.04, + "learning_rate": 4.659788508328655e-05, + "loss": 0.05, + "step": 14546 + }, + { + "epoch": 2.04, + "learning_rate": 4.659741718135879e-05, + "loss": 0.063, + "step": 14548 + }, + { + "epoch": 2.04, + "learning_rate": 4.6596949279431035e-05, + "loss": 0.0544, + "step": 14550 + }, + { + "epoch": 2.04, + "learning_rate": 4.6596481377503274e-05, + "loss": 0.0418, + "step": 14552 + }, + { + "epoch": 2.04, + "learning_rate": 4.659601347557552e-05, + "loss": 0.0418, + "step": 14554 + }, + { + "epoch": 2.04, + "learning_rate": 4.6595545573647766e-05, + "loss": 0.0735, + "step": 14556 + }, + { + "epoch": 2.04, + "learning_rate": 4.659507767172001e-05, + "loss": 0.0715, + "step": 14558 + }, + { + "epoch": 2.04, + "learning_rate": 4.659460976979225e-05, + "loss": 0.0477, + "step": 14560 + }, + { + "epoch": 2.04, + "learning_rate": 4.6594141867864497e-05, + "loss": 0.0454, + "step": 14562 + }, + { + "epoch": 2.04, + "learning_rate": 4.659367396593674e-05, + "loss": 0.0515, + "step": 14564 + }, + { + "epoch": 2.04, + "learning_rate": 4.659320606400899e-05, + "loss": 0.0504, + "step": 14566 + }, + { + "epoch": 2.04, + "learning_rate": 4.659273816208123e-05, + "loss": 0.0641, + "step": 14568 + }, + { + "epoch": 2.05, + "learning_rate": 4.6592270260153473e-05, + "loss": 0.0577, + "step": 14570 + }, + { + "epoch": 2.05, + "learning_rate": 4.659180235822572e-05, + "loss": 0.0744, + "step": 14572 + }, + { + "epoch": 2.05, + "learning_rate": 4.6591334456297965e-05, + "loss": 0.0584, + "step": 14574 + }, + { + "epoch": 2.05, + "learning_rate": 4.6590866554370204e-05, + "loss": 0.0404, + "step": 14576 + }, + { + "epoch": 2.05, + "learning_rate": 4.659039865244245e-05, + "loss": 0.0616, + "step": 14578 + }, + { + "epoch": 2.05, + "learning_rate": 4.6589930750514696e-05, + "loss": 0.0401, + "step": 14580 + }, + { + "epoch": 2.05, + "learning_rate": 4.658946284858694e-05, + "loss": 0.0437, + "step": 14582 + }, + { + "epoch": 2.05, + "learning_rate": 4.658899494665918e-05, + "loss": 0.0641, + "step": 14584 + }, + { + "epoch": 2.05, + "learning_rate": 4.658852704473143e-05, + "loss": 0.0548, + "step": 14586 + }, + { + "epoch": 2.05, + "learning_rate": 4.6588059142803666e-05, + "loss": 0.0503, + "step": 14588 + }, + { + "epoch": 2.05, + "learning_rate": 4.658759124087592e-05, + "loss": 0.0467, + "step": 14590 + }, + { + "epoch": 2.05, + "learning_rate": 4.658712333894816e-05, + "loss": 0.0517, + "step": 14592 + }, + { + "epoch": 2.05, + "learning_rate": 4.6586655437020404e-05, + "loss": 0.0374, + "step": 14594 + }, + { + "epoch": 2.05, + "learning_rate": 4.658618753509264e-05, + "loss": 0.0573, + "step": 14596 + }, + { + "epoch": 2.05, + "learning_rate": 4.6585719633164896e-05, + "loss": 0.0446, + "step": 14598 + }, + { + "epoch": 2.05, + "learning_rate": 4.6585251731237135e-05, + "loss": 0.0477, + "step": 14600 + }, + { + "epoch": 2.05, + "learning_rate": 4.658478382930938e-05, + "loss": 0.056, + "step": 14602 + }, + { + "epoch": 2.05, + "learning_rate": 4.658431592738162e-05, + "loss": 0.0613, + "step": 14604 + }, + { + "epoch": 2.05, + "learning_rate": 4.6583848025453866e-05, + "loss": 0.0651, + "step": 14606 + }, + { + "epoch": 2.05, + "learning_rate": 4.658338012352611e-05, + "loss": 0.0466, + "step": 14608 + }, + { + "epoch": 2.05, + "learning_rate": 4.658291222159836e-05, + "loss": 0.0451, + "step": 14610 + }, + { + "epoch": 2.05, + "learning_rate": 4.65824443196706e-05, + "loss": 0.0697, + "step": 14612 + }, + { + "epoch": 2.05, + "learning_rate": 4.658197641774284e-05, + "loss": 0.0467, + "step": 14614 + }, + { + "epoch": 2.05, + "learning_rate": 4.658150851581509e-05, + "loss": 0.0579, + "step": 14616 + }, + { + "epoch": 2.05, + "learning_rate": 4.6581040613887335e-05, + "loss": 0.0528, + "step": 14618 + }, + { + "epoch": 2.05, + "learning_rate": 4.6580572711959574e-05, + "loss": 0.0398, + "step": 14620 + }, + { + "epoch": 2.05, + "learning_rate": 4.658010481003182e-05, + "loss": 0.0467, + "step": 14622 + }, + { + "epoch": 2.05, + "learning_rate": 4.6579636908104066e-05, + "loss": 0.0417, + "step": 14624 + }, + { + "epoch": 2.05, + "learning_rate": 4.657916900617631e-05, + "loss": 0.054, + "step": 14626 + }, + { + "epoch": 2.05, + "learning_rate": 4.657870110424855e-05, + "loss": 0.0376, + "step": 14628 + }, + { + "epoch": 2.05, + "learning_rate": 4.6578233202320797e-05, + "loss": 0.0616, + "step": 14630 + }, + { + "epoch": 2.05, + "learning_rate": 4.657776530039304e-05, + "loss": 0.0564, + "step": 14632 + }, + { + "epoch": 2.05, + "learning_rate": 4.657729739846529e-05, + "loss": 0.06, + "step": 14634 + }, + { + "epoch": 2.05, + "learning_rate": 4.657682949653753e-05, + "loss": 0.0604, + "step": 14636 + }, + { + "epoch": 2.05, + "learning_rate": 4.6576361594609773e-05, + "loss": 0.0494, + "step": 14638 + }, + { + "epoch": 2.06, + "learning_rate": 4.657589369268201e-05, + "loss": 0.0466, + "step": 14640 + }, + { + "epoch": 2.06, + "learning_rate": 4.6575425790754265e-05, + "loss": 0.0515, + "step": 14642 + }, + { + "epoch": 2.06, + "learning_rate": 4.6574957888826504e-05, + "loss": 0.0423, + "step": 14644 + }, + { + "epoch": 2.06, + "learning_rate": 4.657448998689875e-05, + "loss": 0.0596, + "step": 14646 + }, + { + "epoch": 2.06, + "learning_rate": 4.657402208497099e-05, + "loss": 0.0368, + "step": 14648 + }, + { + "epoch": 2.06, + "learning_rate": 4.6573554183043235e-05, + "loss": 0.0494, + "step": 14650 + }, + { + "epoch": 2.06, + "learning_rate": 4.657308628111548e-05, + "loss": 0.0445, + "step": 14652 + }, + { + "epoch": 2.06, + "learning_rate": 4.657261837918773e-05, + "loss": 0.0517, + "step": 14654 + }, + { + "epoch": 2.06, + "learning_rate": 4.6572150477259966e-05, + "loss": 0.0482, + "step": 14656 + }, + { + "epoch": 2.06, + "learning_rate": 4.657168257533221e-05, + "loss": 0.0476, + "step": 14658 + }, + { + "epoch": 2.06, + "learning_rate": 4.657121467340446e-05, + "loss": 0.0343, + "step": 14660 + }, + { + "epoch": 2.06, + "learning_rate": 4.6570746771476704e-05, + "loss": 0.0455, + "step": 14662 + }, + { + "epoch": 2.06, + "learning_rate": 4.657027886954894e-05, + "loss": 0.0417, + "step": 14664 + }, + { + "epoch": 2.06, + "learning_rate": 4.656981096762119e-05, + "loss": 0.0496, + "step": 14666 + }, + { + "epoch": 2.06, + "learning_rate": 4.6569343065693435e-05, + "loss": 0.0366, + "step": 14668 + }, + { + "epoch": 2.06, + "learning_rate": 4.656887516376568e-05, + "loss": 0.0536, + "step": 14670 + }, + { + "epoch": 2.06, + "learning_rate": 4.656840726183792e-05, + "loss": 0.0497, + "step": 14672 + }, + { + "epoch": 2.06, + "learning_rate": 4.6567939359910166e-05, + "loss": 0.0493, + "step": 14674 + }, + { + "epoch": 2.06, + "learning_rate": 4.656747145798241e-05, + "loss": 0.0546, + "step": 14676 + }, + { + "epoch": 2.06, + "learning_rate": 4.656700355605466e-05, + "loss": 0.0519, + "step": 14678 + }, + { + "epoch": 2.06, + "learning_rate": 4.65665356541269e-05, + "loss": 0.0568, + "step": 14680 + }, + { + "epoch": 2.06, + "learning_rate": 4.656606775219914e-05, + "loss": 0.0414, + "step": 14682 + }, + { + "epoch": 2.06, + "learning_rate": 4.656559985027138e-05, + "loss": 0.0476, + "step": 14684 + }, + { + "epoch": 2.06, + "learning_rate": 4.6565131948343635e-05, + "loss": 0.0621, + "step": 14686 + }, + { + "epoch": 2.06, + "learning_rate": 4.6564664046415874e-05, + "loss": 0.0392, + "step": 14688 + }, + { + "epoch": 2.06, + "learning_rate": 4.656419614448812e-05, + "loss": 0.0544, + "step": 14690 + }, + { + "epoch": 2.06, + "learning_rate": 4.656372824256036e-05, + "loss": 0.0524, + "step": 14692 + }, + { + "epoch": 2.06, + "learning_rate": 4.6563260340632605e-05, + "loss": 0.0526, + "step": 14694 + }, + { + "epoch": 2.06, + "learning_rate": 4.656279243870485e-05, + "loss": 0.0513, + "step": 14696 + }, + { + "epoch": 2.06, + "learning_rate": 4.656232453677709e-05, + "loss": 0.0559, + "step": 14698 + }, + { + "epoch": 2.06, + "learning_rate": 4.6561856634849336e-05, + "loss": 0.0438, + "step": 14700 + }, + { + "epoch": 2.06, + "learning_rate": 4.656138873292158e-05, + "loss": 0.055, + "step": 14702 + }, + { + "epoch": 2.06, + "learning_rate": 4.656092083099383e-05, + "loss": 0.0487, + "step": 14704 + }, + { + "epoch": 2.06, + "learning_rate": 4.656045292906607e-05, + "loss": 0.0491, + "step": 14706 + }, + { + "epoch": 2.06, + "learning_rate": 4.655998502713831e-05, + "loss": 0.0372, + "step": 14708 + }, + { + "epoch": 2.06, + "learning_rate": 4.655951712521056e-05, + "loss": 0.056, + "step": 14710 + }, + { + "epoch": 2.07, + "learning_rate": 4.6559049223282804e-05, + "loss": 0.054, + "step": 14712 + }, + { + "epoch": 2.07, + "learning_rate": 4.6558581321355044e-05, + "loss": 0.0396, + "step": 14714 + }, + { + "epoch": 2.07, + "learning_rate": 4.655811341942729e-05, + "loss": 0.0446, + "step": 14716 + }, + { + "epoch": 2.07, + "learning_rate": 4.655764551749953e-05, + "loss": 0.05, + "step": 14718 + }, + { + "epoch": 2.07, + "learning_rate": 4.655717761557178e-05, + "loss": 0.0512, + "step": 14720 + }, + { + "epoch": 2.07, + "learning_rate": 4.655670971364402e-05, + "loss": 0.0376, + "step": 14722 + }, + { + "epoch": 2.07, + "learning_rate": 4.6556241811716266e-05, + "loss": 0.0605, + "step": 14724 + }, + { + "epoch": 2.07, + "learning_rate": 4.6555773909788505e-05, + "loss": 0.0378, + "step": 14726 + }, + { + "epoch": 2.07, + "learning_rate": 4.655530600786076e-05, + "loss": 0.0732, + "step": 14728 + }, + { + "epoch": 2.07, + "learning_rate": 4.6554838105933e-05, + "loss": 0.0635, + "step": 14730 + }, + { + "epoch": 2.07, + "learning_rate": 4.655437020400524e-05, + "loss": 0.0476, + "step": 14732 + }, + { + "epoch": 2.07, + "learning_rate": 4.655390230207748e-05, + "loss": 0.0382, + "step": 14734 + }, + { + "epoch": 2.07, + "learning_rate": 4.655343440014973e-05, + "loss": 0.0371, + "step": 14736 + }, + { + "epoch": 2.07, + "learning_rate": 4.6552966498221974e-05, + "loss": 0.0559, + "step": 14738 + }, + { + "epoch": 2.07, + "learning_rate": 4.655249859629422e-05, + "loss": 0.0363, + "step": 14740 + }, + { + "epoch": 2.07, + "learning_rate": 4.655203069436646e-05, + "loss": 0.0508, + "step": 14742 + }, + { + "epoch": 2.07, + "learning_rate": 4.6551562792438705e-05, + "loss": 0.0466, + "step": 14744 + }, + { + "epoch": 2.07, + "learning_rate": 4.655109489051095e-05, + "loss": 0.0489, + "step": 14746 + }, + { + "epoch": 2.07, + "learning_rate": 4.65506269885832e-05, + "loss": 0.066, + "step": 14748 + }, + { + "epoch": 2.07, + "learning_rate": 4.6550159086655436e-05, + "loss": 0.0509, + "step": 14750 + }, + { + "epoch": 2.07, + "learning_rate": 4.654969118472768e-05, + "loss": 0.0368, + "step": 14752 + }, + { + "epoch": 2.07, + "learning_rate": 4.654922328279993e-05, + "loss": 0.0315, + "step": 14754 + }, + { + "epoch": 2.07, + "learning_rate": 4.6548755380872174e-05, + "loss": 0.0598, + "step": 14756 + }, + { + "epoch": 2.07, + "learning_rate": 4.654828747894441e-05, + "loss": 0.0678, + "step": 14758 + }, + { + "epoch": 2.07, + "learning_rate": 4.654781957701666e-05, + "loss": 0.0628, + "step": 14760 + }, + { + "epoch": 2.07, + "learning_rate": 4.6547351675088905e-05, + "loss": 0.0318, + "step": 14762 + }, + { + "epoch": 2.07, + "learning_rate": 4.654688377316115e-05, + "loss": 0.0581, + "step": 14764 + }, + { + "epoch": 2.07, + "learning_rate": 4.654641587123339e-05, + "loss": 0.0387, + "step": 14766 + }, + { + "epoch": 2.07, + "learning_rate": 4.6545947969305636e-05, + "loss": 0.0604, + "step": 14768 + }, + { + "epoch": 2.07, + "learning_rate": 4.6545480067377875e-05, + "loss": 0.0608, + "step": 14770 + }, + { + "epoch": 2.07, + "learning_rate": 4.654501216545013e-05, + "loss": 0.0671, + "step": 14772 + }, + { + "epoch": 2.07, + "learning_rate": 4.654454426352237e-05, + "loss": 0.0544, + "step": 14774 + }, + { + "epoch": 2.07, + "learning_rate": 4.654407636159461e-05, + "loss": 0.0495, + "step": 14776 + }, + { + "epoch": 2.07, + "learning_rate": 4.654360845966685e-05, + "loss": 0.0484, + "step": 14778 + }, + { + "epoch": 2.07, + "learning_rate": 4.6543140557739104e-05, + "loss": 0.0563, + "step": 14780 + }, + { + "epoch": 2.07, + "learning_rate": 4.6542672655811344e-05, + "loss": 0.0578, + "step": 14782 + }, + { + "epoch": 2.08, + "learning_rate": 4.654220475388359e-05, + "loss": 0.0636, + "step": 14784 + }, + { + "epoch": 2.08, + "learning_rate": 4.654173685195583e-05, + "loss": 0.0592, + "step": 14786 + }, + { + "epoch": 2.08, + "learning_rate": 4.6541268950028075e-05, + "loss": 0.0511, + "step": 14788 + }, + { + "epoch": 2.08, + "learning_rate": 4.654080104810032e-05, + "loss": 0.0665, + "step": 14790 + }, + { + "epoch": 2.08, + "learning_rate": 4.6540333146172566e-05, + "loss": 0.0539, + "step": 14792 + }, + { + "epoch": 2.08, + "learning_rate": 4.6539865244244806e-05, + "loss": 0.059, + "step": 14794 + }, + { + "epoch": 2.08, + "learning_rate": 4.653939734231705e-05, + "loss": 0.0649, + "step": 14796 + }, + { + "epoch": 2.08, + "learning_rate": 4.65389294403893e-05, + "loss": 0.0448, + "step": 14798 + }, + { + "epoch": 2.08, + "learning_rate": 4.653846153846154e-05, + "loss": 0.0811, + "step": 14800 + }, + { + "epoch": 2.08, + "learning_rate": 4.653799363653378e-05, + "loss": 0.0506, + "step": 14802 + }, + { + "epoch": 2.08, + "learning_rate": 4.653752573460603e-05, + "loss": 0.0577, + "step": 14804 + }, + { + "epoch": 2.08, + "learning_rate": 4.6537057832678274e-05, + "loss": 0.0445, + "step": 14806 + }, + { + "epoch": 2.08, + "learning_rate": 4.653658993075052e-05, + "loss": 0.04, + "step": 14808 + }, + { + "epoch": 2.08, + "learning_rate": 4.653612202882276e-05, + "loss": 0.0642, + "step": 14810 + }, + { + "epoch": 2.08, + "learning_rate": 4.6535654126895005e-05, + "loss": 0.0449, + "step": 14812 + }, + { + "epoch": 2.08, + "learning_rate": 4.6535186224967244e-05, + "loss": 0.0496, + "step": 14814 + }, + { + "epoch": 2.08, + "learning_rate": 4.65347183230395e-05, + "loss": 0.0708, + "step": 14816 + }, + { + "epoch": 2.08, + "learning_rate": 4.6534250421111736e-05, + "loss": 0.0607, + "step": 14818 + }, + { + "epoch": 2.08, + "learning_rate": 4.653378251918398e-05, + "loss": 0.0564, + "step": 14820 + }, + { + "epoch": 2.08, + "learning_rate": 4.653331461725622e-05, + "loss": 0.0459, + "step": 14822 + }, + { + "epoch": 2.08, + "learning_rate": 4.6532846715328474e-05, + "loss": 0.0708, + "step": 14824 + }, + { + "epoch": 2.08, + "learning_rate": 4.653237881340071e-05, + "loss": 0.0561, + "step": 14826 + }, + { + "epoch": 2.08, + "learning_rate": 4.653191091147296e-05, + "loss": 0.0717, + "step": 14828 + }, + { + "epoch": 2.08, + "learning_rate": 4.65314430095452e-05, + "loss": 0.0797, + "step": 14830 + }, + { + "epoch": 2.08, + "learning_rate": 4.6530975107617444e-05, + "loss": 0.0392, + "step": 14832 + }, + { + "epoch": 2.08, + "learning_rate": 4.653050720568969e-05, + "loss": 0.0628, + "step": 14834 + }, + { + "epoch": 2.08, + "learning_rate": 4.6530039303761936e-05, + "loss": 0.0623, + "step": 14836 + }, + { + "epoch": 2.08, + "learning_rate": 4.6529571401834175e-05, + "loss": 0.0539, + "step": 14838 + }, + { + "epoch": 2.08, + "learning_rate": 4.652910349990642e-05, + "loss": 0.0444, + "step": 14840 + }, + { + "epoch": 2.08, + "learning_rate": 4.652863559797867e-05, + "loss": 0.0597, + "step": 14842 + }, + { + "epoch": 2.08, + "learning_rate": 4.652816769605091e-05, + "loss": 0.046, + "step": 14844 + }, + { + "epoch": 2.08, + "learning_rate": 4.652769979412315e-05, + "loss": 0.0736, + "step": 14846 + }, + { + "epoch": 2.08, + "learning_rate": 4.65272318921954e-05, + "loss": 0.0652, + "step": 14848 + }, + { + "epoch": 2.08, + "learning_rate": 4.6526763990267644e-05, + "loss": 0.0419, + "step": 14850 + }, + { + "epoch": 2.08, + "learning_rate": 4.652629608833989e-05, + "loss": 0.0417, + "step": 14852 + }, + { + "epoch": 2.09, + "learning_rate": 4.652582818641213e-05, + "loss": 0.0475, + "step": 14854 + }, + { + "epoch": 2.09, + "learning_rate": 4.6525360284484375e-05, + "loss": 0.0626, + "step": 14856 + }, + { + "epoch": 2.09, + "learning_rate": 4.652489238255662e-05, + "loss": 0.0506, + "step": 14858 + }, + { + "epoch": 2.09, + "learning_rate": 4.6524424480628866e-05, + "loss": 0.0565, + "step": 14860 + }, + { + "epoch": 2.09, + "learning_rate": 4.6523956578701106e-05, + "loss": 0.0566, + "step": 14862 + }, + { + "epoch": 2.09, + "learning_rate": 4.652348867677335e-05, + "loss": 0.0614, + "step": 14864 + }, + { + "epoch": 2.09, + "learning_rate": 4.652302077484559e-05, + "loss": 0.049, + "step": 14866 + }, + { + "epoch": 2.09, + "learning_rate": 4.652255287291784e-05, + "loss": 0.0626, + "step": 14868 + }, + { + "epoch": 2.09, + "learning_rate": 4.652208497099008e-05, + "loss": 0.0448, + "step": 14870 + }, + { + "epoch": 2.09, + "learning_rate": 4.652161706906233e-05, + "loss": 0.0537, + "step": 14872 + }, + { + "epoch": 2.09, + "learning_rate": 4.652114916713457e-05, + "loss": 0.0431, + "step": 14874 + }, + { + "epoch": 2.09, + "learning_rate": 4.652068126520682e-05, + "loss": 0.047, + "step": 14876 + }, + { + "epoch": 2.09, + "learning_rate": 4.652021336327906e-05, + "loss": 0.0668, + "step": 14878 + }, + { + "epoch": 2.09, + "learning_rate": 4.6519745461351305e-05, + "loss": 0.0505, + "step": 14880 + }, + { + "epoch": 2.09, + "learning_rate": 4.6519277559423544e-05, + "loss": 0.0652, + "step": 14882 + }, + { + "epoch": 2.09, + "learning_rate": 4.651880965749579e-05, + "loss": 0.0646, + "step": 14884 + }, + { + "epoch": 2.09, + "learning_rate": 4.6518341755568036e-05, + "loss": 0.0597, + "step": 14886 + }, + { + "epoch": 2.09, + "learning_rate": 4.651787385364028e-05, + "loss": 0.0604, + "step": 14888 + }, + { + "epoch": 2.09, + "learning_rate": 4.651740595171252e-05, + "loss": 0.0598, + "step": 14890 + }, + { + "epoch": 2.09, + "learning_rate": 4.651693804978477e-05, + "loss": 0.0547, + "step": 14892 + }, + { + "epoch": 2.09, + "learning_rate": 4.651647014785701e-05, + "loss": 0.0596, + "step": 14894 + }, + { + "epoch": 2.09, + "learning_rate": 4.651600224592926e-05, + "loss": 0.0751, + "step": 14896 + }, + { + "epoch": 2.09, + "learning_rate": 4.65155343440015e-05, + "loss": 0.0511, + "step": 14898 + }, + { + "epoch": 2.09, + "learning_rate": 4.6515066442073744e-05, + "loss": 0.0748, + "step": 14900 + }, + { + "epoch": 2.09, + "learning_rate": 4.651459854014599e-05, + "loss": 0.0514, + "step": 14902 + }, + { + "epoch": 2.09, + "learning_rate": 4.6514130638218236e-05, + "loss": 0.0444, + "step": 14904 + }, + { + "epoch": 2.09, + "learning_rate": 4.6513662736290475e-05, + "loss": 0.0424, + "step": 14906 + }, + { + "epoch": 2.09, + "learning_rate": 4.651319483436272e-05, + "loss": 0.0598, + "step": 14908 + }, + { + "epoch": 2.09, + "learning_rate": 4.651272693243497e-05, + "loss": 0.0619, + "step": 14910 + }, + { + "epoch": 2.09, + "learning_rate": 4.651225903050721e-05, + "loss": 0.0885, + "step": 14912 + }, + { + "epoch": 2.09, + "learning_rate": 4.651179112857945e-05, + "loss": 0.0641, + "step": 14914 + }, + { + "epoch": 2.09, + "learning_rate": 4.65113232266517e-05, + "loss": 0.0525, + "step": 14916 + }, + { + "epoch": 2.09, + "learning_rate": 4.651085532472394e-05, + "loss": 0.0531, + "step": 14918 + }, + { + "epoch": 2.09, + "learning_rate": 4.651038742279619e-05, + "loss": 0.0573, + "step": 14920 + }, + { + "epoch": 2.09, + "learning_rate": 4.650991952086843e-05, + "loss": 0.0528, + "step": 14922 + }, + { + "epoch": 2.09, + "learning_rate": 4.6509451618940675e-05, + "loss": 0.0504, + "step": 14924 + }, + { + "epoch": 2.1, + "learning_rate": 4.6508983717012914e-05, + "loss": 0.0463, + "step": 14926 + }, + { + "epoch": 2.1, + "learning_rate": 4.650851581508516e-05, + "loss": 0.0617, + "step": 14928 + }, + { + "epoch": 2.1, + "learning_rate": 4.6508047913157406e-05, + "loss": 0.0479, + "step": 14930 + }, + { + "epoch": 2.1, + "learning_rate": 4.650758001122965e-05, + "loss": 0.0539, + "step": 14932 + }, + { + "epoch": 2.1, + "learning_rate": 4.650711210930189e-05, + "loss": 0.0714, + "step": 14934 + }, + { + "epoch": 2.1, + "learning_rate": 4.6506644207374137e-05, + "loss": 0.0437, + "step": 14936 + }, + { + "epoch": 2.1, + "learning_rate": 4.650617630544638e-05, + "loss": 0.0589, + "step": 14938 + }, + { + "epoch": 2.1, + "learning_rate": 4.650570840351863e-05, + "loss": 0.0713, + "step": 14940 + }, + { + "epoch": 2.1, + "learning_rate": 4.650524050159087e-05, + "loss": 0.055, + "step": 14942 + }, + { + "epoch": 2.1, + "learning_rate": 4.650477259966311e-05, + "loss": 0.053, + "step": 14944 + }, + { + "epoch": 2.1, + "learning_rate": 4.650430469773536e-05, + "loss": 0.0679, + "step": 14946 + }, + { + "epoch": 2.1, + "learning_rate": 4.65038367958076e-05, + "loss": 0.066, + "step": 14948 + }, + { + "epoch": 2.1, + "learning_rate": 4.6503368893879844e-05, + "loss": 0.0706, + "step": 14950 + }, + { + "epoch": 2.1, + "learning_rate": 4.6502900991952083e-05, + "loss": 0.0536, + "step": 14952 + }, + { + "epoch": 2.1, + "learning_rate": 4.6502433090024336e-05, + "loss": 0.0565, + "step": 14954 + }, + { + "epoch": 2.1, + "learning_rate": 4.6501965188096575e-05, + "loss": 0.0618, + "step": 14956 + }, + { + "epoch": 2.1, + "learning_rate": 4.650149728616882e-05, + "loss": 0.0537, + "step": 14958 + }, + { + "epoch": 2.1, + "learning_rate": 4.650102938424106e-05, + "loss": 0.0347, + "step": 14960 + }, + { + "epoch": 2.1, + "learning_rate": 4.6500561482313306e-05, + "loss": 0.0448, + "step": 14962 + }, + { + "epoch": 2.1, + "learning_rate": 4.650009358038555e-05, + "loss": 0.0494, + "step": 14964 + }, + { + "epoch": 2.1, + "learning_rate": 4.64996256784578e-05, + "loss": 0.0313, + "step": 14966 + }, + { + "epoch": 2.1, + "learning_rate": 4.649915777653004e-05, + "loss": 0.0549, + "step": 14968 + }, + { + "epoch": 2.1, + "learning_rate": 4.649868987460228e-05, + "loss": 0.0697, + "step": 14970 + }, + { + "epoch": 2.1, + "learning_rate": 4.649822197267453e-05, + "loss": 0.0573, + "step": 14972 + }, + { + "epoch": 2.1, + "learning_rate": 4.6497754070746775e-05, + "loss": 0.057, + "step": 14974 + }, + { + "epoch": 2.1, + "learning_rate": 4.6497286168819014e-05, + "loss": 0.0701, + "step": 14976 + }, + { + "epoch": 2.1, + "learning_rate": 4.649681826689126e-05, + "loss": 0.0424, + "step": 14978 + }, + { + "epoch": 2.1, + "learning_rate": 4.6496350364963506e-05, + "loss": 0.0565, + "step": 14980 + }, + { + "epoch": 2.1, + "learning_rate": 4.649588246303575e-05, + "loss": 0.0474, + "step": 14982 + }, + { + "epoch": 2.1, + "learning_rate": 4.649541456110799e-05, + "loss": 0.0531, + "step": 14984 + }, + { + "epoch": 2.1, + "learning_rate": 4.649494665918024e-05, + "loss": 0.0607, + "step": 14986 + }, + { + "epoch": 2.1, + "learning_rate": 4.649447875725248e-05, + "loss": 0.0822, + "step": 14988 + }, + { + "epoch": 2.1, + "learning_rate": 4.649401085532473e-05, + "loss": 0.0507, + "step": 14990 + }, + { + "epoch": 2.1, + "learning_rate": 4.649354295339697e-05, + "loss": 0.0385, + "step": 14992 + }, + { + "epoch": 2.1, + "learning_rate": 4.6493075051469214e-05, + "loss": 0.0401, + "step": 14994 + }, + { + "epoch": 2.1, + "learning_rate": 4.649260714954145e-05, + "loss": 0.0452, + "step": 14996 + }, + { + "epoch": 2.11, + "learning_rate": 4.6492139247613706e-05, + "loss": 0.0527, + "step": 14998 + }, + { + "epoch": 2.11, + "learning_rate": 4.6491671345685945e-05, + "loss": 0.0805, + "step": 15000 + }, + { + "epoch": 2.11, + "eval_gen_len": 30.6379, + "eval_loss": 1.070984959602356, + "eval_meteor": 0.0449, + "eval_runtime": 14.582, + "eval_samples_per_second": 3.978, + "eval_steps_per_second": 0.549, + "step": 15000 + }, + { + "epoch": 2.11, + "learning_rate": 4.649120344375819e-05, + "loss": 0.0477, + "step": 15002 + }, + { + "epoch": 2.11, + "learning_rate": 4.649073554183043e-05, + "loss": 0.0534, + "step": 15004 + }, + { + "epoch": 2.11, + "learning_rate": 4.649026763990268e-05, + "loss": 0.0488, + "step": 15006 + }, + { + "epoch": 2.11, + "learning_rate": 4.648979973797492e-05, + "loss": 0.0484, + "step": 15008 + }, + { + "epoch": 2.11, + "learning_rate": 4.648933183604717e-05, + "loss": 0.0495, + "step": 15010 + }, + { + "epoch": 2.11, + "learning_rate": 4.648886393411941e-05, + "loss": 0.0635, + "step": 15012 + }, + { + "epoch": 2.11, + "learning_rate": 4.648839603219165e-05, + "loss": 0.0501, + "step": 15014 + }, + { + "epoch": 2.11, + "learning_rate": 4.64879281302639e-05, + "loss": 0.0803, + "step": 15016 + }, + { + "epoch": 2.11, + "learning_rate": 4.6487460228336144e-05, + "loss": 0.0396, + "step": 15018 + }, + { + "epoch": 2.11, + "learning_rate": 4.6486992326408383e-05, + "loss": 0.0516, + "step": 15020 + }, + { + "epoch": 2.11, + "learning_rate": 4.648652442448063e-05, + "loss": 0.0522, + "step": 15022 + }, + { + "epoch": 2.11, + "learning_rate": 4.6486056522552875e-05, + "loss": 0.0452, + "step": 15024 + }, + { + "epoch": 2.11, + "learning_rate": 4.648558862062512e-05, + "loss": 0.0603, + "step": 15026 + }, + { + "epoch": 2.11, + "learning_rate": 4.648512071869736e-05, + "loss": 0.0586, + "step": 15028 + }, + { + "epoch": 2.11, + "learning_rate": 4.6484652816769606e-05, + "loss": 0.0389, + "step": 15030 + }, + { + "epoch": 2.11, + "learning_rate": 4.648418491484185e-05, + "loss": 0.0535, + "step": 15032 + }, + { + "epoch": 2.11, + "learning_rate": 4.64837170129141e-05, + "loss": 0.0582, + "step": 15034 + }, + { + "epoch": 2.11, + "learning_rate": 4.648324911098634e-05, + "loss": 0.0479, + "step": 15036 + }, + { + "epoch": 2.11, + "learning_rate": 4.648278120905858e-05, + "loss": 0.057, + "step": 15038 + }, + { + "epoch": 2.11, + "learning_rate": 4.648231330713083e-05, + "loss": 0.0658, + "step": 15040 + }, + { + "epoch": 2.11, + "learning_rate": 4.6481845405203075e-05, + "loss": 0.0594, + "step": 15042 + }, + { + "epoch": 2.11, + "learning_rate": 4.6481377503275314e-05, + "loss": 0.0462, + "step": 15044 + }, + { + "epoch": 2.11, + "learning_rate": 4.648090960134756e-05, + "loss": 0.0506, + "step": 15046 + }, + { + "epoch": 2.11, + "learning_rate": 4.64804416994198e-05, + "loss": 0.0614, + "step": 15048 + }, + { + "epoch": 2.11, + "learning_rate": 4.647997379749205e-05, + "loss": 0.0835, + "step": 15050 + }, + { + "epoch": 2.11, + "learning_rate": 4.647950589556429e-05, + "loss": 0.0583, + "step": 15052 + }, + { + "epoch": 2.11, + "learning_rate": 4.647903799363654e-05, + "loss": 0.0607, + "step": 15054 + }, + { + "epoch": 2.11, + "learning_rate": 4.6478570091708776e-05, + "loss": 0.0574, + "step": 15056 + }, + { + "epoch": 2.11, + "learning_rate": 4.647810218978103e-05, + "loss": 0.0558, + "step": 15058 + }, + { + "epoch": 2.11, + "learning_rate": 4.647763428785327e-05, + "loss": 0.0432, + "step": 15060 + }, + { + "epoch": 2.11, + "learning_rate": 4.6477166385925514e-05, + "loss": 0.0598, + "step": 15062 + }, + { + "epoch": 2.11, + "learning_rate": 4.647669848399775e-05, + "loss": 0.0699, + "step": 15064 + }, + { + "epoch": 2.11, + "learning_rate": 4.647623058207e-05, + "loss": 0.051, + "step": 15066 + }, + { + "epoch": 2.12, + "learning_rate": 4.6475762680142245e-05, + "loss": 0.0436, + "step": 15068 + }, + { + "epoch": 2.12, + "learning_rate": 4.647529477821449e-05, + "loss": 0.0566, + "step": 15070 + }, + { + "epoch": 2.12, + "learning_rate": 4.647482687628673e-05, + "loss": 0.0457, + "step": 15072 + }, + { + "epoch": 2.12, + "learning_rate": 4.6474358974358976e-05, + "loss": 0.0543, + "step": 15074 + }, + { + "epoch": 2.12, + "learning_rate": 4.647389107243122e-05, + "loss": 0.0551, + "step": 15076 + }, + { + "epoch": 2.12, + "learning_rate": 4.647342317050347e-05, + "loss": 0.0436, + "step": 15078 + }, + { + "epoch": 2.12, + "learning_rate": 4.647295526857571e-05, + "loss": 0.0405, + "step": 15080 + }, + { + "epoch": 2.12, + "learning_rate": 4.647248736664795e-05, + "loss": 0.0448, + "step": 15082 + }, + { + "epoch": 2.12, + "learning_rate": 4.64720194647202e-05, + "loss": 0.0623, + "step": 15084 + }, + { + "epoch": 2.12, + "learning_rate": 4.6471551562792444e-05, + "loss": 0.0591, + "step": 15086 + }, + { + "epoch": 2.12, + "learning_rate": 4.6471083660864684e-05, + "loss": 0.0362, + "step": 15088 + }, + { + "epoch": 2.12, + "learning_rate": 4.647061575893693e-05, + "loss": 0.0531, + "step": 15090 + }, + { + "epoch": 2.12, + "learning_rate": 4.6470147857009175e-05, + "loss": 0.0579, + "step": 15092 + }, + { + "epoch": 2.12, + "learning_rate": 4.646967995508142e-05, + "loss": 0.045, + "step": 15094 + }, + { + "epoch": 2.12, + "learning_rate": 4.646921205315366e-05, + "loss": 0.0718, + "step": 15096 + }, + { + "epoch": 2.12, + "learning_rate": 4.6468744151225906e-05, + "loss": 0.0662, + "step": 15098 + }, + { + "epoch": 2.12, + "learning_rate": 4.6468276249298145e-05, + "loss": 0.0498, + "step": 15100 + }, + { + "epoch": 2.12, + "learning_rate": 4.64678083473704e-05, + "loss": 0.0631, + "step": 15102 + }, + { + "epoch": 2.12, + "learning_rate": 4.646734044544264e-05, + "loss": 0.0572, + "step": 15104 + }, + { + "epoch": 2.12, + "learning_rate": 4.646687254351488e-05, + "loss": 0.0628, + "step": 15106 + }, + { + "epoch": 2.12, + "learning_rate": 4.646640464158712e-05, + "loss": 0.045, + "step": 15108 + }, + { + "epoch": 2.12, + "learning_rate": 4.646593673965937e-05, + "loss": 0.0361, + "step": 15110 + }, + { + "epoch": 2.12, + "learning_rate": 4.6465468837731614e-05, + "loss": 0.0522, + "step": 15112 + }, + { + "epoch": 2.12, + "learning_rate": 4.646500093580386e-05, + "loss": 0.0517, + "step": 15114 + }, + { + "epoch": 2.12, + "learning_rate": 4.64645330338761e-05, + "loss": 0.0408, + "step": 15116 + }, + { + "epoch": 2.12, + "learning_rate": 4.6464065131948345e-05, + "loss": 0.0477, + "step": 15118 + }, + { + "epoch": 2.12, + "learning_rate": 4.646359723002059e-05, + "loss": 0.0487, + "step": 15120 + }, + { + "epoch": 2.12, + "learning_rate": 4.646312932809284e-05, + "loss": 0.0528, + "step": 15122 + }, + { + "epoch": 2.12, + "learning_rate": 4.6462661426165076e-05, + "loss": 0.0603, + "step": 15124 + }, + { + "epoch": 2.12, + "learning_rate": 4.646219352423732e-05, + "loss": 0.0583, + "step": 15126 + }, + { + "epoch": 2.12, + "learning_rate": 4.646172562230957e-05, + "loss": 0.0488, + "step": 15128 + }, + { + "epoch": 2.12, + "learning_rate": 4.6461257720381814e-05, + "loss": 0.06, + "step": 15130 + }, + { + "epoch": 2.12, + "learning_rate": 4.646078981845405e-05, + "loss": 0.0546, + "step": 15132 + }, + { + "epoch": 2.12, + "learning_rate": 4.64603219165263e-05, + "loss": 0.0397, + "step": 15134 + }, + { + "epoch": 2.12, + "learning_rate": 4.6459854014598545e-05, + "loss": 0.063, + "step": 15136 + }, + { + "epoch": 2.12, + "learning_rate": 4.645938611267079e-05, + "loss": 0.0425, + "step": 15138 + }, + { + "epoch": 2.13, + "learning_rate": 4.645891821074303e-05, + "loss": 0.0743, + "step": 15140 + }, + { + "epoch": 2.13, + "learning_rate": 4.6458450308815276e-05, + "loss": 0.0632, + "step": 15142 + }, + { + "epoch": 2.13, + "learning_rate": 4.6457982406887515e-05, + "loss": 0.054, + "step": 15144 + }, + { + "epoch": 2.13, + "learning_rate": 4.645751450495977e-05, + "loss": 0.0592, + "step": 15146 + }, + { + "epoch": 2.13, + "learning_rate": 4.645704660303201e-05, + "loss": 0.0652, + "step": 15148 + }, + { + "epoch": 2.13, + "learning_rate": 4.645657870110425e-05, + "loss": 0.0531, + "step": 15150 + }, + { + "epoch": 2.13, + "learning_rate": 4.645611079917649e-05, + "loss": 0.0572, + "step": 15152 + }, + { + "epoch": 2.13, + "learning_rate": 4.6455642897248744e-05, + "loss": 0.0478, + "step": 15154 + }, + { + "epoch": 2.13, + "learning_rate": 4.6455174995320984e-05, + "loss": 0.0781, + "step": 15156 + }, + { + "epoch": 2.13, + "learning_rate": 4.645470709339323e-05, + "loss": 0.0587, + "step": 15158 + }, + { + "epoch": 2.13, + "learning_rate": 4.645423919146547e-05, + "loss": 0.0478, + "step": 15160 + }, + { + "epoch": 2.13, + "learning_rate": 4.6453771289537714e-05, + "loss": 0.0492, + "step": 15162 + }, + { + "epoch": 2.13, + "learning_rate": 4.645330338760996e-05, + "loss": 0.0515, + "step": 15164 + }, + { + "epoch": 2.13, + "learning_rate": 4.6452835485682206e-05, + "loss": 0.0323, + "step": 15166 + }, + { + "epoch": 2.13, + "learning_rate": 4.6452367583754445e-05, + "loss": 0.0474, + "step": 15168 + }, + { + "epoch": 2.13, + "learning_rate": 4.645189968182669e-05, + "loss": 0.0621, + "step": 15170 + }, + { + "epoch": 2.13, + "learning_rate": 4.645143177989894e-05, + "loss": 0.0607, + "step": 15172 + }, + { + "epoch": 2.13, + "learning_rate": 4.645096387797118e-05, + "loss": 0.062, + "step": 15174 + }, + { + "epoch": 2.13, + "learning_rate": 4.645049597604342e-05, + "loss": 0.0483, + "step": 15176 + }, + { + "epoch": 2.13, + "learning_rate": 4.645002807411567e-05, + "loss": 0.0559, + "step": 15178 + }, + { + "epoch": 2.13, + "learning_rate": 4.6449560172187914e-05, + "loss": 0.0626, + "step": 15180 + }, + { + "epoch": 2.13, + "learning_rate": 4.644909227026016e-05, + "loss": 0.0545, + "step": 15182 + }, + { + "epoch": 2.13, + "learning_rate": 4.64486243683324e-05, + "loss": 0.0888, + "step": 15184 + }, + { + "epoch": 2.13, + "learning_rate": 4.6448156466404645e-05, + "loss": 0.1247, + "step": 15186 + }, + { + "epoch": 2.13, + "learning_rate": 4.644768856447689e-05, + "loss": 0.046, + "step": 15188 + }, + { + "epoch": 2.13, + "learning_rate": 4.644722066254914e-05, + "loss": 0.0655, + "step": 15190 + }, + { + "epoch": 2.13, + "learning_rate": 4.6446752760621376e-05, + "loss": 0.062, + "step": 15192 + }, + { + "epoch": 2.13, + "learning_rate": 4.644628485869362e-05, + "loss": 0.047, + "step": 15194 + }, + { + "epoch": 2.13, + "learning_rate": 4.644581695676586e-05, + "loss": 0.0581, + "step": 15196 + }, + { + "epoch": 2.13, + "learning_rate": 4.644534905483811e-05, + "loss": 0.058, + "step": 15198 + }, + { + "epoch": 2.13, + "learning_rate": 4.644488115291035e-05, + "loss": 0.0498, + "step": 15200 + }, + { + "epoch": 2.13, + "learning_rate": 4.644441325098259e-05, + "loss": 0.0443, + "step": 15202 + }, + { + "epoch": 2.13, + "learning_rate": 4.644394534905484e-05, + "loss": 0.0454, + "step": 15204 + }, + { + "epoch": 2.13, + "learning_rate": 4.6443477447127084e-05, + "loss": 0.0568, + "step": 15206 + }, + { + "epoch": 2.13, + "learning_rate": 4.644300954519933e-05, + "loss": 0.053, + "step": 15208 + }, + { + "epoch": 2.14, + "learning_rate": 4.644254164327157e-05, + "loss": 0.059, + "step": 15210 + }, + { + "epoch": 2.14, + "learning_rate": 4.6442073741343815e-05, + "loss": 0.062, + "step": 15212 + }, + { + "epoch": 2.14, + "learning_rate": 4.644160583941606e-05, + "loss": 0.0483, + "step": 15214 + }, + { + "epoch": 2.14, + "learning_rate": 4.644113793748831e-05, + "loss": 0.0564, + "step": 15216 + }, + { + "epoch": 2.14, + "learning_rate": 4.6440670035560546e-05, + "loss": 0.0625, + "step": 15218 + }, + { + "epoch": 2.14, + "learning_rate": 4.644020213363279e-05, + "loss": 0.0437, + "step": 15220 + }, + { + "epoch": 2.14, + "learning_rate": 4.643973423170504e-05, + "loss": 0.0526, + "step": 15222 + }, + { + "epoch": 2.14, + "learning_rate": 4.6439266329777284e-05, + "loss": 0.0499, + "step": 15224 + }, + { + "epoch": 2.14, + "learning_rate": 4.643879842784952e-05, + "loss": 0.0658, + "step": 15226 + }, + { + "epoch": 2.14, + "learning_rate": 4.643833052592177e-05, + "loss": 0.0556, + "step": 15228 + }, + { + "epoch": 2.14, + "learning_rate": 4.643786262399401e-05, + "loss": 0.0673, + "step": 15230 + }, + { + "epoch": 2.14, + "learning_rate": 4.643739472206626e-05, + "loss": 0.0553, + "step": 15232 + }, + { + "epoch": 2.14, + "learning_rate": 4.64369268201385e-05, + "loss": 0.0614, + "step": 15234 + }, + { + "epoch": 2.14, + "learning_rate": 4.6436458918210745e-05, + "loss": 0.0884, + "step": 15236 + }, + { + "epoch": 2.14, + "learning_rate": 4.6435991016282985e-05, + "loss": 0.0455, + "step": 15238 + }, + { + "epoch": 2.14, + "learning_rate": 4.643552311435523e-05, + "loss": 0.0463, + "step": 15240 + }, + { + "epoch": 2.14, + "learning_rate": 4.6435055212427476e-05, + "loss": 0.0636, + "step": 15242 + }, + { + "epoch": 2.14, + "learning_rate": 4.643458731049972e-05, + "loss": 0.0556, + "step": 15244 + }, + { + "epoch": 2.14, + "learning_rate": 4.643411940857196e-05, + "loss": 0.0569, + "step": 15246 + }, + { + "epoch": 2.14, + "learning_rate": 4.643365150664421e-05, + "loss": 0.0563, + "step": 15248 + }, + { + "epoch": 2.14, + "learning_rate": 4.643318360471645e-05, + "loss": 0.059, + "step": 15250 + }, + { + "epoch": 2.14, + "learning_rate": 4.64327157027887e-05, + "loss": 0.0442, + "step": 15252 + }, + { + "epoch": 2.14, + "learning_rate": 4.643224780086094e-05, + "loss": 0.0612, + "step": 15254 + }, + { + "epoch": 2.14, + "learning_rate": 4.6431779898933184e-05, + "loss": 0.0571, + "step": 15256 + }, + { + "epoch": 2.14, + "learning_rate": 4.643131199700543e-05, + "loss": 0.0552, + "step": 15258 + }, + { + "epoch": 2.14, + "learning_rate": 4.6430844095077676e-05, + "loss": 0.0851, + "step": 15260 + }, + { + "epoch": 2.14, + "learning_rate": 4.6430376193149915e-05, + "loss": 0.0524, + "step": 15262 + }, + { + "epoch": 2.14, + "learning_rate": 4.642990829122216e-05, + "loss": 0.0518, + "step": 15264 + }, + { + "epoch": 2.14, + "learning_rate": 4.642944038929441e-05, + "loss": 0.0576, + "step": 15266 + }, + { + "epoch": 2.14, + "learning_rate": 4.642897248736665e-05, + "loss": 0.039, + "step": 15268 + }, + { + "epoch": 2.14, + "learning_rate": 4.642850458543889e-05, + "loss": 0.046, + "step": 15270 + }, + { + "epoch": 2.14, + "learning_rate": 4.642803668351114e-05, + "loss": 0.0452, + "step": 15272 + }, + { + "epoch": 2.14, + "learning_rate": 4.642756878158338e-05, + "loss": 0.0541, + "step": 15274 + }, + { + "epoch": 2.14, + "learning_rate": 4.642710087965563e-05, + "loss": 0.094, + "step": 15276 + }, + { + "epoch": 2.14, + "learning_rate": 4.642663297772787e-05, + "loss": 0.054, + "step": 15278 + }, + { + "epoch": 2.14, + "learning_rate": 4.6426165075800115e-05, + "loss": 0.0606, + "step": 15280 + }, + { + "epoch": 2.15, + "learning_rate": 4.6425697173872354e-05, + "loss": 0.0469, + "step": 15282 + }, + { + "epoch": 2.15, + "learning_rate": 4.642522927194461e-05, + "loss": 0.0518, + "step": 15284 + }, + { + "epoch": 2.15, + "learning_rate": 4.6424761370016846e-05, + "loss": 0.0563, + "step": 15286 + }, + { + "epoch": 2.15, + "learning_rate": 4.642429346808909e-05, + "loss": 0.0486, + "step": 15288 + }, + { + "epoch": 2.15, + "learning_rate": 4.642382556616133e-05, + "loss": 0.0513, + "step": 15290 + }, + { + "epoch": 2.15, + "learning_rate": 4.642335766423358e-05, + "loss": 0.0571, + "step": 15292 + }, + { + "epoch": 2.15, + "learning_rate": 4.642288976230582e-05, + "loss": 0.0572, + "step": 15294 + }, + { + "epoch": 2.15, + "learning_rate": 4.642242186037807e-05, + "loss": 0.0496, + "step": 15296 + }, + { + "epoch": 2.15, + "learning_rate": 4.642195395845031e-05, + "loss": 0.0754, + "step": 15298 + }, + { + "epoch": 2.15, + "learning_rate": 4.6421486056522554e-05, + "loss": 0.0485, + "step": 15300 + }, + { + "epoch": 2.15, + "learning_rate": 4.64210181545948e-05, + "loss": 0.0543, + "step": 15302 + }, + { + "epoch": 2.15, + "learning_rate": 4.6420550252667045e-05, + "loss": 0.0426, + "step": 15304 + }, + { + "epoch": 2.15, + "learning_rate": 4.6420082350739285e-05, + "loss": 0.066, + "step": 15306 + }, + { + "epoch": 2.15, + "learning_rate": 4.641961444881153e-05, + "loss": 0.0548, + "step": 15308 + }, + { + "epoch": 2.15, + "learning_rate": 4.6419146546883776e-05, + "loss": 0.0523, + "step": 15310 + }, + { + "epoch": 2.15, + "learning_rate": 4.641867864495602e-05, + "loss": 0.0636, + "step": 15312 + }, + { + "epoch": 2.15, + "learning_rate": 4.641821074302826e-05, + "loss": 0.058, + "step": 15314 + }, + { + "epoch": 2.15, + "learning_rate": 4.641774284110051e-05, + "loss": 0.0528, + "step": 15316 + }, + { + "epoch": 2.15, + "learning_rate": 4.641727493917275e-05, + "loss": 0.0574, + "step": 15318 + }, + { + "epoch": 2.15, + "learning_rate": 4.6416807037245e-05, + "loss": 0.0475, + "step": 15320 + }, + { + "epoch": 2.15, + "learning_rate": 4.641633913531724e-05, + "loss": 0.0413, + "step": 15322 + }, + { + "epoch": 2.15, + "learning_rate": 4.6415871233389484e-05, + "loss": 0.0584, + "step": 15324 + }, + { + "epoch": 2.15, + "learning_rate": 4.6415403331461723e-05, + "loss": 0.0588, + "step": 15326 + }, + { + "epoch": 2.15, + "learning_rate": 4.6414935429533976e-05, + "loss": 0.05, + "step": 15328 + }, + { + "epoch": 2.15, + "learning_rate": 4.6414467527606215e-05, + "loss": 0.0552, + "step": 15330 + }, + { + "epoch": 2.15, + "learning_rate": 4.641399962567846e-05, + "loss": 0.0602, + "step": 15332 + }, + { + "epoch": 2.15, + "learning_rate": 4.64135317237507e-05, + "loss": 0.0541, + "step": 15334 + }, + { + "epoch": 2.15, + "learning_rate": 4.641306382182295e-05, + "loss": 0.0666, + "step": 15336 + }, + { + "epoch": 2.15, + "learning_rate": 4.641259591989519e-05, + "loss": 0.0538, + "step": 15338 + }, + { + "epoch": 2.15, + "learning_rate": 4.641212801796744e-05, + "loss": 0.0529, + "step": 15340 + }, + { + "epoch": 2.15, + "learning_rate": 4.641166011603968e-05, + "loss": 0.0535, + "step": 15342 + }, + { + "epoch": 2.15, + "learning_rate": 4.641119221411192e-05, + "loss": 0.0572, + "step": 15344 + }, + { + "epoch": 2.15, + "learning_rate": 4.641072431218417e-05, + "loss": 0.0448, + "step": 15346 + }, + { + "epoch": 2.15, + "learning_rate": 4.6410256410256415e-05, + "loss": 0.0659, + "step": 15348 + }, + { + "epoch": 2.15, + "learning_rate": 4.6409788508328654e-05, + "loss": 0.0585, + "step": 15350 + }, + { + "epoch": 2.15, + "learning_rate": 4.64093206064009e-05, + "loss": 0.0567, + "step": 15352 + }, + { + "epoch": 2.16, + "learning_rate": 4.6408852704473146e-05, + "loss": 0.0545, + "step": 15354 + }, + { + "epoch": 2.16, + "learning_rate": 4.640838480254539e-05, + "loss": 0.0571, + "step": 15356 + }, + { + "epoch": 2.16, + "learning_rate": 4.640791690061763e-05, + "loss": 0.0446, + "step": 15358 + }, + { + "epoch": 2.16, + "learning_rate": 4.640744899868988e-05, + "loss": 0.0552, + "step": 15360 + }, + { + "epoch": 2.16, + "learning_rate": 4.640698109676212e-05, + "loss": 0.0586, + "step": 15362 + }, + { + "epoch": 2.16, + "learning_rate": 4.640651319483437e-05, + "loss": 0.0646, + "step": 15364 + }, + { + "epoch": 2.16, + "learning_rate": 4.640604529290661e-05, + "loss": 0.0577, + "step": 15366 + }, + { + "epoch": 2.16, + "learning_rate": 4.6405577390978854e-05, + "loss": 0.0524, + "step": 15368 + }, + { + "epoch": 2.16, + "learning_rate": 4.64051094890511e-05, + "loss": 0.0343, + "step": 15370 + }, + { + "epoch": 2.16, + "learning_rate": 4.6404641587123345e-05, + "loss": 0.0588, + "step": 15372 + }, + { + "epoch": 2.16, + "learning_rate": 4.6404173685195585e-05, + "loss": 0.0514, + "step": 15374 + }, + { + "epoch": 2.16, + "learning_rate": 4.640370578326783e-05, + "loss": 0.0534, + "step": 15376 + }, + { + "epoch": 2.16, + "learning_rate": 4.640323788134007e-05, + "loss": 0.0449, + "step": 15378 + }, + { + "epoch": 2.16, + "learning_rate": 4.640276997941232e-05, + "loss": 0.0473, + "step": 15380 + }, + { + "epoch": 2.16, + "learning_rate": 4.640230207748456e-05, + "loss": 0.0454, + "step": 15382 + }, + { + "epoch": 2.16, + "learning_rate": 4.640183417555681e-05, + "loss": 0.0465, + "step": 15384 + }, + { + "epoch": 2.16, + "learning_rate": 4.6401366273629047e-05, + "loss": 0.0691, + "step": 15386 + }, + { + "epoch": 2.16, + "learning_rate": 4.640089837170129e-05, + "loss": 0.0444, + "step": 15388 + }, + { + "epoch": 2.16, + "learning_rate": 4.640043046977354e-05, + "loss": 0.0527, + "step": 15390 + }, + { + "epoch": 2.16, + "learning_rate": 4.6399962567845784e-05, + "loss": 0.047, + "step": 15392 + }, + { + "epoch": 2.16, + "learning_rate": 4.6399494665918023e-05, + "loss": 0.0557, + "step": 15394 + }, + { + "epoch": 2.16, + "learning_rate": 4.639902676399027e-05, + "loss": 0.0504, + "step": 15396 + }, + { + "epoch": 2.16, + "learning_rate": 4.6398558862062515e-05, + "loss": 0.0716, + "step": 15398 + }, + { + "epoch": 2.16, + "learning_rate": 4.639809096013476e-05, + "loss": 0.0585, + "step": 15400 + }, + { + "epoch": 2.16, + "learning_rate": 4.6397623058207e-05, + "loss": 0.0546, + "step": 15402 + }, + { + "epoch": 2.16, + "learning_rate": 4.6397155156279246e-05, + "loss": 0.0537, + "step": 15404 + }, + { + "epoch": 2.16, + "learning_rate": 4.639668725435149e-05, + "loss": 0.0489, + "step": 15406 + }, + { + "epoch": 2.16, + "learning_rate": 4.639621935242374e-05, + "loss": 0.0468, + "step": 15408 + }, + { + "epoch": 2.16, + "learning_rate": 4.639575145049598e-05, + "loss": 0.0536, + "step": 15410 + }, + { + "epoch": 2.16, + "learning_rate": 4.639528354856822e-05, + "loss": 0.0616, + "step": 15412 + }, + { + "epoch": 2.16, + "learning_rate": 4.639481564664047e-05, + "loss": 0.0497, + "step": 15414 + }, + { + "epoch": 2.16, + "learning_rate": 4.6394347744712715e-05, + "loss": 0.0816, + "step": 15416 + }, + { + "epoch": 2.16, + "learning_rate": 4.6393879842784954e-05, + "loss": 0.0655, + "step": 15418 + }, + { + "epoch": 2.16, + "learning_rate": 4.63934119408572e-05, + "loss": 0.0619, + "step": 15420 + }, + { + "epoch": 2.16, + "learning_rate": 4.639294403892944e-05, + "loss": 0.0545, + "step": 15422 + }, + { + "epoch": 2.17, + "learning_rate": 4.639247613700169e-05, + "loss": 0.0582, + "step": 15424 + }, + { + "epoch": 2.17, + "learning_rate": 4.639200823507393e-05, + "loss": 0.0548, + "step": 15426 + }, + { + "epoch": 2.17, + "learning_rate": 4.639154033314618e-05, + "loss": 0.057, + "step": 15428 + }, + { + "epoch": 2.17, + "learning_rate": 4.6391072431218416e-05, + "loss": 0.0554, + "step": 15430 + }, + { + "epoch": 2.17, + "learning_rate": 4.639060452929067e-05, + "loss": 0.0669, + "step": 15432 + }, + { + "epoch": 2.17, + "learning_rate": 4.639013662736291e-05, + "loss": 0.0555, + "step": 15434 + }, + { + "epoch": 2.17, + "learning_rate": 4.6389668725435154e-05, + "loss": 0.0639, + "step": 15436 + }, + { + "epoch": 2.17, + "learning_rate": 4.638920082350739e-05, + "loss": 0.0716, + "step": 15438 + }, + { + "epoch": 2.17, + "learning_rate": 4.638873292157964e-05, + "loss": 0.0643, + "step": 15440 + }, + { + "epoch": 2.17, + "learning_rate": 4.6388265019651885e-05, + "loss": 0.0448, + "step": 15442 + }, + { + "epoch": 2.17, + "learning_rate": 4.638779711772413e-05, + "loss": 0.0432, + "step": 15444 + }, + { + "epoch": 2.17, + "learning_rate": 4.638732921579637e-05, + "loss": 0.0594, + "step": 15446 + }, + { + "epoch": 2.17, + "learning_rate": 4.6386861313868616e-05, + "loss": 0.0545, + "step": 15448 + }, + { + "epoch": 2.17, + "learning_rate": 4.638639341194086e-05, + "loss": 0.0535, + "step": 15450 + }, + { + "epoch": 2.17, + "learning_rate": 4.63859255100131e-05, + "loss": 0.0547, + "step": 15452 + }, + { + "epoch": 2.17, + "learning_rate": 4.6385457608085347e-05, + "loss": 0.0533, + "step": 15454 + }, + { + "epoch": 2.17, + "learning_rate": 4.6384989706157586e-05, + "loss": 0.0608, + "step": 15456 + }, + { + "epoch": 2.17, + "learning_rate": 4.638452180422984e-05, + "loss": 0.0604, + "step": 15458 + }, + { + "epoch": 2.17, + "learning_rate": 4.638405390230208e-05, + "loss": 0.0497, + "step": 15460 + }, + { + "epoch": 2.17, + "learning_rate": 4.6383586000374323e-05, + "loss": 0.0541, + "step": 15462 + }, + { + "epoch": 2.17, + "learning_rate": 4.638311809844656e-05, + "loss": 0.0518, + "step": 15464 + }, + { + "epoch": 2.17, + "learning_rate": 4.6382650196518815e-05, + "loss": 0.05, + "step": 15466 + }, + { + "epoch": 2.17, + "learning_rate": 4.6382182294591054e-05, + "loss": 0.0651, + "step": 15468 + }, + { + "epoch": 2.17, + "learning_rate": 4.63817143926633e-05, + "loss": 0.0633, + "step": 15470 + }, + { + "epoch": 2.17, + "learning_rate": 4.638124649073554e-05, + "loss": 0.0395, + "step": 15472 + }, + { + "epoch": 2.17, + "learning_rate": 4.6380778588807785e-05, + "loss": 0.0576, + "step": 15474 + }, + { + "epoch": 2.17, + "learning_rate": 4.638031068688003e-05, + "loss": 0.0611, + "step": 15476 + }, + { + "epoch": 2.17, + "learning_rate": 4.637984278495228e-05, + "loss": 0.0513, + "step": 15478 + }, + { + "epoch": 2.17, + "learning_rate": 4.6379374883024516e-05, + "loss": 0.0625, + "step": 15480 + }, + { + "epoch": 2.17, + "learning_rate": 4.637890698109676e-05, + "loss": 0.0531, + "step": 15482 + }, + { + "epoch": 2.17, + "learning_rate": 4.637843907916901e-05, + "loss": 0.063, + "step": 15484 + }, + { + "epoch": 2.17, + "learning_rate": 4.6377971177241254e-05, + "loss": 0.0545, + "step": 15486 + }, + { + "epoch": 2.17, + "learning_rate": 4.637750327531349e-05, + "loss": 0.0325, + "step": 15488 + }, + { + "epoch": 2.17, + "learning_rate": 4.637703537338574e-05, + "loss": 0.056, + "step": 15490 + }, + { + "epoch": 2.17, + "learning_rate": 4.6376567471457985e-05, + "loss": 0.04, + "step": 15492 + }, + { + "epoch": 2.17, + "learning_rate": 4.637609956953023e-05, + "loss": 0.0766, + "step": 15494 + }, + { + "epoch": 2.18, + "learning_rate": 4.637563166760247e-05, + "loss": 0.0583, + "step": 15496 + }, + { + "epoch": 2.18, + "learning_rate": 4.6375163765674716e-05, + "loss": 0.0625, + "step": 15498 + }, + { + "epoch": 2.18, + "learning_rate": 4.637469586374696e-05, + "loss": 0.0655, + "step": 15500 + }, + { + "epoch": 2.18, + "learning_rate": 4.637422796181921e-05, + "loss": 0.039, + "step": 15502 + }, + { + "epoch": 2.18, + "learning_rate": 4.637376005989145e-05, + "loss": 0.0538, + "step": 15504 + }, + { + "epoch": 2.18, + "learning_rate": 4.637329215796369e-05, + "loss": 0.0585, + "step": 15506 + }, + { + "epoch": 2.18, + "learning_rate": 4.637282425603593e-05, + "loss": 0.0564, + "step": 15508 + }, + { + "epoch": 2.18, + "learning_rate": 4.6372356354108185e-05, + "loss": 0.0359, + "step": 15510 + }, + { + "epoch": 2.18, + "learning_rate": 4.6371888452180424e-05, + "loss": 0.0466, + "step": 15512 + }, + { + "epoch": 2.18, + "learning_rate": 4.637142055025267e-05, + "loss": 0.0471, + "step": 15514 + }, + { + "epoch": 2.18, + "learning_rate": 4.637095264832491e-05, + "loss": 0.0497, + "step": 15516 + }, + { + "epoch": 2.18, + "learning_rate": 4.6370484746397155e-05, + "loss": 0.0497, + "step": 15518 + }, + { + "epoch": 2.18, + "learning_rate": 4.63700168444694e-05, + "loss": 0.0579, + "step": 15520 + }, + { + "epoch": 2.18, + "learning_rate": 4.6369548942541647e-05, + "loss": 0.0632, + "step": 15522 + }, + { + "epoch": 2.18, + "learning_rate": 4.6369081040613886e-05, + "loss": 0.0546, + "step": 15524 + }, + { + "epoch": 2.18, + "learning_rate": 4.636861313868613e-05, + "loss": 0.0536, + "step": 15526 + }, + { + "epoch": 2.18, + "learning_rate": 4.636814523675838e-05, + "loss": 0.0689, + "step": 15528 + }, + { + "epoch": 2.18, + "learning_rate": 4.6367677334830623e-05, + "loss": 0.0513, + "step": 15530 + }, + { + "epoch": 2.18, + "learning_rate": 4.636720943290286e-05, + "loss": 0.0614, + "step": 15532 + }, + { + "epoch": 2.18, + "learning_rate": 4.636674153097511e-05, + "loss": 0.0648, + "step": 15534 + }, + { + "epoch": 2.18, + "learning_rate": 4.6366273629047354e-05, + "loss": 0.0535, + "step": 15536 + }, + { + "epoch": 2.18, + "learning_rate": 4.63658057271196e-05, + "loss": 0.0775, + "step": 15538 + }, + { + "epoch": 2.18, + "learning_rate": 4.636533782519184e-05, + "loss": 0.0444, + "step": 15540 + }, + { + "epoch": 2.18, + "learning_rate": 4.6364869923264085e-05, + "loss": 0.0726, + "step": 15542 + }, + { + "epoch": 2.18, + "learning_rate": 4.636440202133633e-05, + "loss": 0.0636, + "step": 15544 + }, + { + "epoch": 2.18, + "learning_rate": 4.636393411940858e-05, + "loss": 0.0536, + "step": 15546 + }, + { + "epoch": 2.18, + "learning_rate": 4.6363466217480816e-05, + "loss": 0.0899, + "step": 15548 + }, + { + "epoch": 2.18, + "learning_rate": 4.636299831555306e-05, + "loss": 0.0589, + "step": 15550 + }, + { + "epoch": 2.18, + "learning_rate": 4.63625304136253e-05, + "loss": 0.0591, + "step": 15552 + }, + { + "epoch": 2.18, + "learning_rate": 4.6362062511697554e-05, + "loss": 0.0833, + "step": 15554 + }, + { + "epoch": 2.18, + "learning_rate": 4.636159460976979e-05, + "loss": 0.056, + "step": 15556 + }, + { + "epoch": 2.18, + "learning_rate": 4.636112670784204e-05, + "loss": 0.0591, + "step": 15558 + }, + { + "epoch": 2.18, + "learning_rate": 4.636065880591428e-05, + "loss": 0.0595, + "step": 15560 + }, + { + "epoch": 2.18, + "learning_rate": 4.636019090398653e-05, + "loss": 0.0524, + "step": 15562 + }, + { + "epoch": 2.18, + "learning_rate": 4.635972300205877e-05, + "loss": 0.0499, + "step": 15564 + }, + { + "epoch": 2.19, + "learning_rate": 4.6359255100131016e-05, + "loss": 0.0536, + "step": 15566 + }, + { + "epoch": 2.19, + "learning_rate": 4.6358787198203255e-05, + "loss": 0.0428, + "step": 15568 + }, + { + "epoch": 2.19, + "learning_rate": 4.63583192962755e-05, + "loss": 0.0692, + "step": 15570 + }, + { + "epoch": 2.19, + "learning_rate": 4.635785139434775e-05, + "loss": 0.0556, + "step": 15572 + }, + { + "epoch": 2.19, + "learning_rate": 4.635738349241999e-05, + "loss": 0.0433, + "step": 15574 + }, + { + "epoch": 2.19, + "learning_rate": 4.635691559049223e-05, + "loss": 0.0503, + "step": 15576 + }, + { + "epoch": 2.19, + "learning_rate": 4.635644768856448e-05, + "loss": 0.049, + "step": 15578 + }, + { + "epoch": 2.19, + "learning_rate": 4.6355979786636724e-05, + "loss": 0.0619, + "step": 15580 + }, + { + "epoch": 2.19, + "learning_rate": 4.635551188470897e-05, + "loss": 0.0455, + "step": 15582 + }, + { + "epoch": 2.19, + "learning_rate": 4.635504398278121e-05, + "loss": 0.0665, + "step": 15584 + }, + { + "epoch": 2.19, + "learning_rate": 4.6354576080853455e-05, + "loss": 0.0644, + "step": 15586 + }, + { + "epoch": 2.19, + "learning_rate": 4.63541081789257e-05, + "loss": 0.0636, + "step": 15588 + }, + { + "epoch": 2.19, + "learning_rate": 4.6353640276997947e-05, + "loss": 0.0647, + "step": 15590 + }, + { + "epoch": 2.19, + "learning_rate": 4.6353172375070186e-05, + "loss": 0.063, + "step": 15592 + }, + { + "epoch": 2.19, + "learning_rate": 4.635270447314243e-05, + "loss": 0.1058, + "step": 15594 + }, + { + "epoch": 2.19, + "learning_rate": 4.635223657121468e-05, + "loss": 0.0463, + "step": 15596 + }, + { + "epoch": 2.19, + "learning_rate": 4.6351768669286923e-05, + "loss": 0.0686, + "step": 15598 + }, + { + "epoch": 2.19, + "learning_rate": 4.635130076735916e-05, + "loss": 0.0621, + "step": 15600 + }, + { + "epoch": 2.19, + "learning_rate": 4.635083286543141e-05, + "loss": 0.0481, + "step": 15602 + }, + { + "epoch": 2.19, + "learning_rate": 4.635036496350365e-05, + "loss": 0.0616, + "step": 15604 + }, + { + "epoch": 2.19, + "learning_rate": 4.63498970615759e-05, + "loss": 0.0559, + "step": 15606 + }, + { + "epoch": 2.19, + "learning_rate": 4.634942915964814e-05, + "loss": 0.0528, + "step": 15608 + }, + { + "epoch": 2.19, + "learning_rate": 4.6348961257720385e-05, + "loss": 0.0511, + "step": 15610 + }, + { + "epoch": 2.19, + "learning_rate": 4.6348493355792625e-05, + "loss": 0.0385, + "step": 15612 + }, + { + "epoch": 2.19, + "learning_rate": 4.634802545386488e-05, + "loss": 0.0599, + "step": 15614 + }, + { + "epoch": 2.19, + "learning_rate": 4.6347557551937116e-05, + "loss": 0.0457, + "step": 15616 + }, + { + "epoch": 2.19, + "learning_rate": 4.634708965000936e-05, + "loss": 0.0578, + "step": 15618 + }, + { + "epoch": 2.19, + "learning_rate": 4.63466217480816e-05, + "loss": 0.05, + "step": 15620 + }, + { + "epoch": 2.19, + "learning_rate": 4.634615384615385e-05, + "loss": 0.0419, + "step": 15622 + }, + { + "epoch": 2.19, + "learning_rate": 4.634568594422609e-05, + "loss": 0.082, + "step": 15624 + }, + { + "epoch": 2.19, + "learning_rate": 4.634521804229834e-05, + "loss": 0.0633, + "step": 15626 + }, + { + "epoch": 2.19, + "learning_rate": 4.634475014037058e-05, + "loss": 0.0507, + "step": 15628 + }, + { + "epoch": 2.19, + "learning_rate": 4.6344282238442824e-05, + "loss": 0.0467, + "step": 15630 + }, + { + "epoch": 2.19, + "learning_rate": 4.634381433651507e-05, + "loss": 0.0654, + "step": 15632 + }, + { + "epoch": 2.19, + "learning_rate": 4.6343346434587316e-05, + "loss": 0.0531, + "step": 15634 + }, + { + "epoch": 2.19, + "learning_rate": 4.6342878532659555e-05, + "loss": 0.0542, + "step": 15636 + }, + { + "epoch": 2.2, + "learning_rate": 4.63424106307318e-05, + "loss": 0.0747, + "step": 15638 + }, + { + "epoch": 2.2, + "learning_rate": 4.634194272880405e-05, + "loss": 0.0491, + "step": 15640 + }, + { + "epoch": 2.2, + "learning_rate": 4.634147482687629e-05, + "loss": 0.0477, + "step": 15642 + }, + { + "epoch": 2.2, + "learning_rate": 4.634100692494853e-05, + "loss": 0.0781, + "step": 15644 + }, + { + "epoch": 2.2, + "learning_rate": 4.634053902302078e-05, + "loss": 0.0875, + "step": 15646 + }, + { + "epoch": 2.2, + "learning_rate": 4.6340071121093024e-05, + "loss": 0.0636, + "step": 15648 + }, + { + "epoch": 2.2, + "learning_rate": 4.633960321916527e-05, + "loss": 0.0407, + "step": 15650 + }, + { + "epoch": 2.2, + "learning_rate": 4.633913531723751e-05, + "loss": 0.0512, + "step": 15652 + }, + { + "epoch": 2.2, + "learning_rate": 4.6338667415309755e-05, + "loss": 0.0644, + "step": 15654 + }, + { + "epoch": 2.2, + "learning_rate": 4.6338199513381994e-05, + "loss": 0.0408, + "step": 15656 + }, + { + "epoch": 2.2, + "learning_rate": 4.633773161145425e-05, + "loss": 0.0575, + "step": 15658 + }, + { + "epoch": 2.2, + "learning_rate": 4.6337263709526486e-05, + "loss": 0.0611, + "step": 15660 + }, + { + "epoch": 2.2, + "learning_rate": 4.633679580759873e-05, + "loss": 0.0643, + "step": 15662 + }, + { + "epoch": 2.2, + "learning_rate": 4.633632790567097e-05, + "loss": 0.1125, + "step": 15664 + }, + { + "epoch": 2.2, + "learning_rate": 4.633586000374322e-05, + "loss": 0.0515, + "step": 15666 + }, + { + "epoch": 2.2, + "learning_rate": 4.633539210181546e-05, + "loss": 0.0465, + "step": 15668 + }, + { + "epoch": 2.2, + "learning_rate": 4.633492419988771e-05, + "loss": 0.073, + "step": 15670 + }, + { + "epoch": 2.2, + "learning_rate": 4.633445629795995e-05, + "loss": 0.0571, + "step": 15672 + }, + { + "epoch": 2.2, + "learning_rate": 4.6333988396032194e-05, + "loss": 0.0672, + "step": 15674 + }, + { + "epoch": 2.2, + "learning_rate": 4.633352049410444e-05, + "loss": 0.0739, + "step": 15676 + }, + { + "epoch": 2.2, + "learning_rate": 4.6333052592176685e-05, + "loss": 0.0513, + "step": 15678 + }, + { + "epoch": 2.2, + "learning_rate": 4.6332584690248925e-05, + "loss": 0.0375, + "step": 15680 + }, + { + "epoch": 2.2, + "learning_rate": 4.633211678832117e-05, + "loss": 0.0816, + "step": 15682 + }, + { + "epoch": 2.2, + "learning_rate": 4.6331648886393416e-05, + "loss": 0.0483, + "step": 15684 + }, + { + "epoch": 2.2, + "learning_rate": 4.633118098446566e-05, + "loss": 0.0672, + "step": 15686 + }, + { + "epoch": 2.2, + "learning_rate": 4.63307130825379e-05, + "loss": 0.0518, + "step": 15688 + }, + { + "epoch": 2.2, + "learning_rate": 4.633024518061015e-05, + "loss": 0.0551, + "step": 15690 + }, + { + "epoch": 2.2, + "learning_rate": 4.632977727868239e-05, + "loss": 0.053, + "step": 15692 + }, + { + "epoch": 2.2, + "learning_rate": 4.632930937675464e-05, + "loss": 0.0707, + "step": 15694 + }, + { + "epoch": 2.2, + "learning_rate": 4.632884147482688e-05, + "loss": 0.0538, + "step": 15696 + }, + { + "epoch": 2.2, + "learning_rate": 4.6328373572899124e-05, + "loss": 0.0524, + "step": 15698 + }, + { + "epoch": 2.2, + "learning_rate": 4.632790567097136e-05, + "loss": 0.0526, + "step": 15700 + }, + { + "epoch": 2.2, + "learning_rate": 4.6327437769043616e-05, + "loss": 0.0603, + "step": 15702 + }, + { + "epoch": 2.2, + "learning_rate": 4.6326969867115855e-05, + "loss": 0.0508, + "step": 15704 + }, + { + "epoch": 2.2, + "learning_rate": 4.6326501965188094e-05, + "loss": 0.0504, + "step": 15706 + }, + { + "epoch": 2.2, + "learning_rate": 4.632603406326034e-05, + "loss": 0.0759, + "step": 15708 + }, + { + "epoch": 2.21, + "learning_rate": 4.6325566161332586e-05, + "loss": 0.0701, + "step": 15710 + }, + { + "epoch": 2.21, + "learning_rate": 4.632509825940483e-05, + "loss": 0.0525, + "step": 15712 + }, + { + "epoch": 2.21, + "learning_rate": 4.632463035747707e-05, + "loss": 0.0752, + "step": 15714 + }, + { + "epoch": 2.21, + "learning_rate": 4.632416245554932e-05, + "loss": 0.0466, + "step": 15716 + }, + { + "epoch": 2.21, + "learning_rate": 4.632369455362156e-05, + "loss": 0.0748, + "step": 15718 + }, + { + "epoch": 2.21, + "learning_rate": 4.632322665169381e-05, + "loss": 0.0522, + "step": 15720 + }, + { + "epoch": 2.21, + "learning_rate": 4.632275874976605e-05, + "loss": 0.0591, + "step": 15722 + }, + { + "epoch": 2.21, + "learning_rate": 4.6322290847838294e-05, + "loss": 0.0554, + "step": 15724 + }, + { + "epoch": 2.21, + "learning_rate": 4.632182294591054e-05, + "loss": 0.069, + "step": 15726 + }, + { + "epoch": 2.21, + "learning_rate": 4.6321355043982786e-05, + "loss": 0.043, + "step": 15728 + }, + { + "epoch": 2.21, + "learning_rate": 4.6320887142055025e-05, + "loss": 0.0566, + "step": 15730 + }, + { + "epoch": 2.21, + "learning_rate": 4.632041924012727e-05, + "loss": 0.0616, + "step": 15732 + }, + { + "epoch": 2.21, + "learning_rate": 4.631995133819951e-05, + "loss": 0.0504, + "step": 15734 + }, + { + "epoch": 2.21, + "learning_rate": 4.631948343627176e-05, + "loss": 0.038, + "step": 15736 + }, + { + "epoch": 2.21, + "learning_rate": 4.6319015534344e-05, + "loss": 0.0668, + "step": 15738 + }, + { + "epoch": 2.21, + "learning_rate": 4.631854763241625e-05, + "loss": 0.0513, + "step": 15740 + }, + { + "epoch": 2.21, + "learning_rate": 4.631807973048849e-05, + "loss": 0.0499, + "step": 15742 + }, + { + "epoch": 2.21, + "learning_rate": 4.631761182856074e-05, + "loss": 0.0645, + "step": 15744 + }, + { + "epoch": 2.21, + "learning_rate": 4.631714392663298e-05, + "loss": 0.0597, + "step": 15746 + }, + { + "epoch": 2.21, + "learning_rate": 4.6316676024705225e-05, + "loss": 0.0603, + "step": 15748 + }, + { + "epoch": 2.21, + "learning_rate": 4.6316208122777464e-05, + "loss": 0.0607, + "step": 15750 + }, + { + "epoch": 2.21, + "learning_rate": 4.631574022084971e-05, + "loss": 0.072, + "step": 15752 + }, + { + "epoch": 2.21, + "learning_rate": 4.6315272318921956e-05, + "loss": 0.0732, + "step": 15754 + }, + { + "epoch": 2.21, + "learning_rate": 4.63148044169942e-05, + "loss": 0.0526, + "step": 15756 + }, + { + "epoch": 2.21, + "learning_rate": 4.631433651506644e-05, + "loss": 0.06, + "step": 15758 + }, + { + "epoch": 2.21, + "learning_rate": 4.6313868613138686e-05, + "loss": 0.0551, + "step": 15760 + }, + { + "epoch": 2.21, + "learning_rate": 4.631340071121093e-05, + "loss": 0.0413, + "step": 15762 + }, + { + "epoch": 2.21, + "learning_rate": 4.631293280928318e-05, + "loss": 0.0723, + "step": 15764 + }, + { + "epoch": 2.21, + "learning_rate": 4.631246490735542e-05, + "loss": 0.0541, + "step": 15766 + }, + { + "epoch": 2.21, + "learning_rate": 4.631199700542766e-05, + "loss": 0.0587, + "step": 15768 + }, + { + "epoch": 2.21, + "learning_rate": 4.631152910349991e-05, + "loss": 0.0835, + "step": 15770 + }, + { + "epoch": 2.21, + "learning_rate": 4.6311061201572155e-05, + "loss": 0.0397, + "step": 15772 + }, + { + "epoch": 2.21, + "learning_rate": 4.6310593299644394e-05, + "loss": 0.073, + "step": 15774 + }, + { + "epoch": 2.21, + "learning_rate": 4.631012539771664e-05, + "loss": 0.066, + "step": 15776 + }, + { + "epoch": 2.21, + "learning_rate": 4.6309657495788886e-05, + "loss": 0.0558, + "step": 15778 + }, + { + "epoch": 2.22, + "learning_rate": 4.630918959386113e-05, + "loss": 0.0481, + "step": 15780 + }, + { + "epoch": 2.22, + "learning_rate": 4.630872169193337e-05, + "loss": 0.0479, + "step": 15782 + }, + { + "epoch": 2.22, + "learning_rate": 4.630825379000562e-05, + "loss": 0.0566, + "step": 15784 + }, + { + "epoch": 2.22, + "learning_rate": 4.6307785888077856e-05, + "loss": 0.0748, + "step": 15786 + }, + { + "epoch": 2.22, + "learning_rate": 4.630731798615011e-05, + "loss": 0.0521, + "step": 15788 + }, + { + "epoch": 2.22, + "learning_rate": 4.630685008422235e-05, + "loss": 0.07, + "step": 15790 + }, + { + "epoch": 2.22, + "learning_rate": 4.6306382182294594e-05, + "loss": 0.0689, + "step": 15792 + }, + { + "epoch": 2.22, + "learning_rate": 4.630591428036683e-05, + "loss": 0.0412, + "step": 15794 + }, + { + "epoch": 2.22, + "learning_rate": 4.6305446378439086e-05, + "loss": 0.0472, + "step": 15796 + }, + { + "epoch": 2.22, + "learning_rate": 4.6304978476511325e-05, + "loss": 0.0605, + "step": 15798 + }, + { + "epoch": 2.22, + "learning_rate": 4.630451057458357e-05, + "loss": 0.0562, + "step": 15800 + }, + { + "epoch": 2.22, + "learning_rate": 4.630404267265581e-05, + "loss": 0.0456, + "step": 15802 + }, + { + "epoch": 2.22, + "learning_rate": 4.6303574770728056e-05, + "loss": 0.0515, + "step": 15804 + }, + { + "epoch": 2.22, + "learning_rate": 4.63031068688003e-05, + "loss": 0.0508, + "step": 15806 + }, + { + "epoch": 2.22, + "learning_rate": 4.630263896687255e-05, + "loss": 0.0717, + "step": 15808 + }, + { + "epoch": 2.22, + "learning_rate": 4.630217106494479e-05, + "loss": 0.0532, + "step": 15810 + }, + { + "epoch": 2.22, + "learning_rate": 4.630170316301703e-05, + "loss": 0.0498, + "step": 15812 + }, + { + "epoch": 2.22, + "learning_rate": 4.630123526108928e-05, + "loss": 0.0589, + "step": 15814 + }, + { + "epoch": 2.22, + "learning_rate": 4.6300767359161525e-05, + "loss": 0.0525, + "step": 15816 + }, + { + "epoch": 2.22, + "learning_rate": 4.6300299457233764e-05, + "loss": 0.0576, + "step": 15818 + }, + { + "epoch": 2.22, + "learning_rate": 4.629983155530601e-05, + "loss": 0.0634, + "step": 15820 + }, + { + "epoch": 2.22, + "learning_rate": 4.6299363653378256e-05, + "loss": 0.0351, + "step": 15822 + }, + { + "epoch": 2.22, + "learning_rate": 4.62988957514505e-05, + "loss": 0.0523, + "step": 15824 + }, + { + "epoch": 2.22, + "learning_rate": 4.629842784952274e-05, + "loss": 0.0544, + "step": 15826 + }, + { + "epoch": 2.22, + "learning_rate": 4.6297959947594986e-05, + "loss": 0.0492, + "step": 15828 + }, + { + "epoch": 2.22, + "learning_rate": 4.6297492045667226e-05, + "loss": 0.0475, + "step": 15830 + }, + { + "epoch": 2.22, + "learning_rate": 4.629702414373948e-05, + "loss": 0.043, + "step": 15832 + }, + { + "epoch": 2.22, + "learning_rate": 4.629655624181172e-05, + "loss": 0.0636, + "step": 15834 + }, + { + "epoch": 2.22, + "learning_rate": 4.629608833988396e-05, + "loss": 0.0614, + "step": 15836 + }, + { + "epoch": 2.22, + "learning_rate": 4.62956204379562e-05, + "loss": 0.0643, + "step": 15838 + }, + { + "epoch": 2.22, + "learning_rate": 4.6295152536028455e-05, + "loss": 0.0549, + "step": 15840 + }, + { + "epoch": 2.22, + "learning_rate": 4.6294684634100694e-05, + "loss": 0.0648, + "step": 15842 + }, + { + "epoch": 2.22, + "learning_rate": 4.629421673217294e-05, + "loss": 0.0555, + "step": 15844 + }, + { + "epoch": 2.22, + "learning_rate": 4.629374883024518e-05, + "loss": 0.047, + "step": 15846 + }, + { + "epoch": 2.22, + "learning_rate": 4.6293280928317425e-05, + "loss": 0.0622, + "step": 15848 + }, + { + "epoch": 2.22, + "learning_rate": 4.629281302638967e-05, + "loss": 0.0447, + "step": 15850 + }, + { + "epoch": 2.23, + "learning_rate": 4.629234512446192e-05, + "loss": 0.0678, + "step": 15852 + }, + { + "epoch": 2.23, + "learning_rate": 4.6291877222534156e-05, + "loss": 0.0686, + "step": 15854 + }, + { + "epoch": 2.23, + "learning_rate": 4.62914093206064e-05, + "loss": 0.0524, + "step": 15856 + }, + { + "epoch": 2.23, + "learning_rate": 4.629094141867865e-05, + "loss": 0.0439, + "step": 15858 + }, + { + "epoch": 2.23, + "learning_rate": 4.6290473516750894e-05, + "loss": 0.0538, + "step": 15860 + }, + { + "epoch": 2.23, + "learning_rate": 4.629000561482313e-05, + "loss": 0.0602, + "step": 15862 + }, + { + "epoch": 2.23, + "learning_rate": 4.628953771289538e-05, + "loss": 0.05, + "step": 15864 + }, + { + "epoch": 2.23, + "learning_rate": 4.6289069810967625e-05, + "loss": 0.0514, + "step": 15866 + }, + { + "epoch": 2.23, + "learning_rate": 4.628860190903987e-05, + "loss": 0.0504, + "step": 15868 + }, + { + "epoch": 2.23, + "learning_rate": 4.628813400711211e-05, + "loss": 0.0541, + "step": 15870 + }, + { + "epoch": 2.23, + "learning_rate": 4.6287666105184356e-05, + "loss": 0.0529, + "step": 15872 + }, + { + "epoch": 2.23, + "learning_rate": 4.62871982032566e-05, + "loss": 0.0441, + "step": 15874 + }, + { + "epoch": 2.23, + "learning_rate": 4.628673030132885e-05, + "loss": 0.0595, + "step": 15876 + }, + { + "epoch": 2.23, + "learning_rate": 4.628626239940109e-05, + "loss": 0.0677, + "step": 15878 + }, + { + "epoch": 2.23, + "learning_rate": 4.628579449747333e-05, + "loss": 0.0418, + "step": 15880 + }, + { + "epoch": 2.23, + "learning_rate": 4.628532659554557e-05, + "loss": 0.0485, + "step": 15882 + }, + { + "epoch": 2.23, + "learning_rate": 4.6284858693617825e-05, + "loss": 0.0765, + "step": 15884 + }, + { + "epoch": 2.23, + "learning_rate": 4.6284390791690064e-05, + "loss": 0.0502, + "step": 15886 + }, + { + "epoch": 2.23, + "learning_rate": 4.628392288976231e-05, + "loss": 0.0586, + "step": 15888 + }, + { + "epoch": 2.23, + "learning_rate": 4.628345498783455e-05, + "loss": 0.0559, + "step": 15890 + }, + { + "epoch": 2.23, + "learning_rate": 4.62829870859068e-05, + "loss": 0.0506, + "step": 15892 + }, + { + "epoch": 2.23, + "learning_rate": 4.628251918397904e-05, + "loss": 0.0451, + "step": 15894 + }, + { + "epoch": 2.23, + "learning_rate": 4.6282051282051287e-05, + "loss": 0.0502, + "step": 15896 + }, + { + "epoch": 2.23, + "learning_rate": 4.6281583380123526e-05, + "loss": 0.0514, + "step": 15898 + }, + { + "epoch": 2.23, + "learning_rate": 4.628111547819577e-05, + "loss": 0.0448, + "step": 15900 + }, + { + "epoch": 2.23, + "learning_rate": 4.628064757626802e-05, + "loss": 0.0621, + "step": 15902 + }, + { + "epoch": 2.23, + "learning_rate": 4.628017967434026e-05, + "loss": 0.0698, + "step": 15904 + }, + { + "epoch": 2.23, + "learning_rate": 4.62797117724125e-05, + "loss": 0.0967, + "step": 15906 + }, + { + "epoch": 2.23, + "learning_rate": 4.627924387048475e-05, + "loss": 0.0529, + "step": 15908 + }, + { + "epoch": 2.23, + "learning_rate": 4.6278775968556994e-05, + "loss": 0.0491, + "step": 15910 + }, + { + "epoch": 2.23, + "learning_rate": 4.627830806662924e-05, + "loss": 0.0508, + "step": 15912 + }, + { + "epoch": 2.23, + "learning_rate": 4.627784016470148e-05, + "loss": 0.0655, + "step": 15914 + }, + { + "epoch": 2.23, + "learning_rate": 4.6277372262773725e-05, + "loss": 0.0729, + "step": 15916 + }, + { + "epoch": 2.23, + "learning_rate": 4.627690436084597e-05, + "loss": 0.0731, + "step": 15918 + }, + { + "epoch": 2.23, + "learning_rate": 4.627643645891822e-05, + "loss": 0.0545, + "step": 15920 + }, + { + "epoch": 2.23, + "learning_rate": 4.6275968556990456e-05, + "loss": 0.0717, + "step": 15922 + }, + { + "epoch": 2.24, + "learning_rate": 4.62755006550627e-05, + "loss": 0.0521, + "step": 15924 + }, + { + "epoch": 2.24, + "learning_rate": 4.627503275313495e-05, + "loss": 0.0446, + "step": 15926 + }, + { + "epoch": 2.24, + "learning_rate": 4.6274564851207194e-05, + "loss": 0.0545, + "step": 15928 + }, + { + "epoch": 2.24, + "learning_rate": 4.627409694927943e-05, + "loss": 0.0442, + "step": 15930 + }, + { + "epoch": 2.24, + "learning_rate": 4.627362904735168e-05, + "loss": 0.0688, + "step": 15932 + }, + { + "epoch": 2.24, + "learning_rate": 4.627316114542392e-05, + "loss": 0.0469, + "step": 15934 + }, + { + "epoch": 2.24, + "learning_rate": 4.627269324349617e-05, + "loss": 0.0658, + "step": 15936 + }, + { + "epoch": 2.24, + "learning_rate": 4.627222534156841e-05, + "loss": 0.0646, + "step": 15938 + }, + { + "epoch": 2.24, + "learning_rate": 4.6271757439640656e-05, + "loss": 0.0568, + "step": 15940 + }, + { + "epoch": 2.24, + "learning_rate": 4.6271289537712895e-05, + "loss": 0.0526, + "step": 15942 + }, + { + "epoch": 2.24, + "learning_rate": 4.627082163578514e-05, + "loss": 0.0592, + "step": 15944 + }, + { + "epoch": 2.24, + "learning_rate": 4.627035373385739e-05, + "loss": 0.0532, + "step": 15946 + }, + { + "epoch": 2.24, + "learning_rate": 4.626988583192963e-05, + "loss": 0.0567, + "step": 15948 + }, + { + "epoch": 2.24, + "learning_rate": 4.626941793000187e-05, + "loss": 0.0497, + "step": 15950 + }, + { + "epoch": 2.24, + "learning_rate": 4.626895002807412e-05, + "loss": 0.0551, + "step": 15952 + }, + { + "epoch": 2.24, + "learning_rate": 4.6268482126146364e-05, + "loss": 0.061, + "step": 15954 + }, + { + "epoch": 2.24, + "learning_rate": 4.626801422421861e-05, + "loss": 0.0532, + "step": 15956 + }, + { + "epoch": 2.24, + "learning_rate": 4.626754632229085e-05, + "loss": 0.0593, + "step": 15958 + }, + { + "epoch": 2.24, + "learning_rate": 4.6267078420363095e-05, + "loss": 0.0664, + "step": 15960 + }, + { + "epoch": 2.24, + "learning_rate": 4.626661051843534e-05, + "loss": 0.0697, + "step": 15962 + }, + { + "epoch": 2.24, + "learning_rate": 4.626614261650758e-05, + "loss": 0.0644, + "step": 15964 + }, + { + "epoch": 2.24, + "learning_rate": 4.6265674714579826e-05, + "loss": 0.0635, + "step": 15966 + }, + { + "epoch": 2.24, + "learning_rate": 4.6265206812652065e-05, + "loss": 0.0528, + "step": 15968 + }, + { + "epoch": 2.24, + "learning_rate": 4.626473891072432e-05, + "loss": 0.0927, + "step": 15970 + }, + { + "epoch": 2.24, + "learning_rate": 4.626427100879656e-05, + "loss": 0.0519, + "step": 15972 + }, + { + "epoch": 2.24, + "learning_rate": 4.62638031068688e-05, + "loss": 0.0556, + "step": 15974 + }, + { + "epoch": 2.24, + "learning_rate": 4.626333520494104e-05, + "loss": 0.0586, + "step": 15976 + }, + { + "epoch": 2.24, + "learning_rate": 4.626286730301329e-05, + "loss": 0.063, + "step": 15978 + }, + { + "epoch": 2.24, + "learning_rate": 4.6262399401085533e-05, + "loss": 0.0455, + "step": 15980 + }, + { + "epoch": 2.24, + "learning_rate": 4.626193149915778e-05, + "loss": 0.0621, + "step": 15982 + }, + { + "epoch": 2.24, + "learning_rate": 4.626146359723002e-05, + "loss": 0.0717, + "step": 15984 + }, + { + "epoch": 2.24, + "learning_rate": 4.6260995695302264e-05, + "loss": 0.0523, + "step": 15986 + }, + { + "epoch": 2.24, + "learning_rate": 4.626052779337451e-05, + "loss": 0.0531, + "step": 15988 + }, + { + "epoch": 2.24, + "learning_rate": 4.6260059891446756e-05, + "loss": 0.0528, + "step": 15990 + }, + { + "epoch": 2.24, + "learning_rate": 4.6259591989518995e-05, + "loss": 0.0591, + "step": 15992 + }, + { + "epoch": 2.25, + "learning_rate": 4.625912408759124e-05, + "loss": 0.049, + "step": 15994 + }, + { + "epoch": 2.25, + "learning_rate": 4.625865618566349e-05, + "loss": 0.0585, + "step": 15996 + }, + { + "epoch": 2.25, + "learning_rate": 4.625818828373573e-05, + "loss": 0.0465, + "step": 15998 + }, + { + "epoch": 2.25, + "learning_rate": 4.625772038180797e-05, + "loss": 0.0579, + "step": 16000 + }, + { + "epoch": 2.25, + "eval_gen_len": 31.2759, + "eval_loss": 1.0764782428741455, + "eval_meteor": 0.0444, + "eval_runtime": 16.106, + "eval_samples_per_second": 3.601, + "eval_steps_per_second": 0.497, + "step": 16000 + }, + { + "epoch": 2.25, + "learning_rate": 4.625725247988022e-05, + "loss": 0.0719, + "step": 16002 + }, + { + "epoch": 2.25, + "learning_rate": 4.6256784577952464e-05, + "loss": 0.0528, + "step": 16004 + }, + { + "epoch": 2.25, + "learning_rate": 4.625631667602471e-05, + "loss": 0.0561, + "step": 16006 + }, + { + "epoch": 2.25, + "learning_rate": 4.625584877409695e-05, + "loss": 0.076, + "step": 16008 + }, + { + "epoch": 2.25, + "learning_rate": 4.6255380872169195e-05, + "loss": 0.078, + "step": 16010 + }, + { + "epoch": 2.25, + "learning_rate": 4.6254912970241434e-05, + "loss": 0.059, + "step": 16012 + }, + { + "epoch": 2.25, + "learning_rate": 4.625444506831369e-05, + "loss": 0.0592, + "step": 16014 + }, + { + "epoch": 2.25, + "learning_rate": 4.6253977166385926e-05, + "loss": 0.0463, + "step": 16016 + }, + { + "epoch": 2.25, + "learning_rate": 4.625350926445817e-05, + "loss": 0.0571, + "step": 16018 + }, + { + "epoch": 2.25, + "learning_rate": 4.625304136253041e-05, + "loss": 0.0479, + "step": 16020 + }, + { + "epoch": 2.25, + "learning_rate": 4.6252573460602664e-05, + "loss": 0.0529, + "step": 16022 + }, + { + "epoch": 2.25, + "learning_rate": 4.62521055586749e-05, + "loss": 0.0784, + "step": 16024 + }, + { + "epoch": 2.25, + "learning_rate": 4.625163765674715e-05, + "loss": 0.0507, + "step": 16026 + }, + { + "epoch": 2.25, + "learning_rate": 4.625116975481939e-05, + "loss": 0.0442, + "step": 16028 + }, + { + "epoch": 2.25, + "learning_rate": 4.6250701852891634e-05, + "loss": 0.0547, + "step": 16030 + }, + { + "epoch": 2.25, + "learning_rate": 4.625023395096388e-05, + "loss": 0.0469, + "step": 16032 + }, + { + "epoch": 2.25, + "learning_rate": 4.6249766049036126e-05, + "loss": 0.0338, + "step": 16034 + }, + { + "epoch": 2.25, + "learning_rate": 4.6249298147108365e-05, + "loss": 0.0775, + "step": 16036 + }, + { + "epoch": 2.25, + "learning_rate": 4.624883024518061e-05, + "loss": 0.0377, + "step": 16038 + }, + { + "epoch": 2.25, + "learning_rate": 4.624836234325286e-05, + "loss": 0.062, + "step": 16040 + }, + { + "epoch": 2.25, + "learning_rate": 4.62478944413251e-05, + "loss": 0.0591, + "step": 16042 + }, + { + "epoch": 2.25, + "learning_rate": 4.624742653939734e-05, + "loss": 0.0792, + "step": 16044 + }, + { + "epoch": 2.25, + "learning_rate": 4.624695863746959e-05, + "loss": 0.0659, + "step": 16046 + }, + { + "epoch": 2.25, + "learning_rate": 4.6246490735541834e-05, + "loss": 0.0681, + "step": 16048 + }, + { + "epoch": 2.25, + "learning_rate": 4.624602283361408e-05, + "loss": 0.0465, + "step": 16050 + }, + { + "epoch": 2.25, + "learning_rate": 4.624555493168632e-05, + "loss": 0.0468, + "step": 16052 + }, + { + "epoch": 2.25, + "learning_rate": 4.6245087029758564e-05, + "loss": 0.0424, + "step": 16054 + }, + { + "epoch": 2.25, + "learning_rate": 4.624461912783081e-05, + "loss": 0.0625, + "step": 16056 + }, + { + "epoch": 2.25, + "learning_rate": 4.6244151225903056e-05, + "loss": 0.0727, + "step": 16058 + }, + { + "epoch": 2.25, + "learning_rate": 4.6243683323975295e-05, + "loss": 0.0527, + "step": 16060 + }, + { + "epoch": 2.25, + "learning_rate": 4.624321542204754e-05, + "loss": 0.0547, + "step": 16062 + }, + { + "epoch": 2.25, + "learning_rate": 4.624274752011978e-05, + "loss": 0.0608, + "step": 16064 + }, + { + "epoch": 2.26, + "learning_rate": 4.624227961819203e-05, + "loss": 0.0504, + "step": 16066 + }, + { + "epoch": 2.26, + "learning_rate": 4.624181171626427e-05, + "loss": 0.0585, + "step": 16068 + }, + { + "epoch": 2.26, + "learning_rate": 4.624134381433652e-05, + "loss": 0.0524, + "step": 16070 + }, + { + "epoch": 2.26, + "learning_rate": 4.624087591240876e-05, + "loss": 0.0604, + "step": 16072 + }, + { + "epoch": 2.26, + "learning_rate": 4.624040801048101e-05, + "loss": 0.0621, + "step": 16074 + }, + { + "epoch": 2.26, + "learning_rate": 4.623994010855325e-05, + "loss": 0.0873, + "step": 16076 + }, + { + "epoch": 2.26, + "learning_rate": 4.6239472206625495e-05, + "loss": 0.0553, + "step": 16078 + }, + { + "epoch": 2.26, + "learning_rate": 4.6239004304697734e-05, + "loss": 0.0554, + "step": 16080 + }, + { + "epoch": 2.26, + "learning_rate": 4.623853640276998e-05, + "loss": 0.0522, + "step": 16082 + }, + { + "epoch": 2.26, + "learning_rate": 4.6238068500842226e-05, + "loss": 0.0606, + "step": 16084 + }, + { + "epoch": 2.26, + "learning_rate": 4.623760059891447e-05, + "loss": 0.0513, + "step": 16086 + }, + { + "epoch": 2.26, + "learning_rate": 4.623713269698671e-05, + "loss": 0.0511, + "step": 16088 + }, + { + "epoch": 2.26, + "learning_rate": 4.623666479505896e-05, + "loss": 0.0557, + "step": 16090 + }, + { + "epoch": 2.26, + "learning_rate": 4.62361968931312e-05, + "loss": 0.0704, + "step": 16092 + }, + { + "epoch": 2.26, + "learning_rate": 4.623572899120345e-05, + "loss": 0.049, + "step": 16094 + }, + { + "epoch": 2.26, + "learning_rate": 4.623526108927569e-05, + "loss": 0.0575, + "step": 16096 + }, + { + "epoch": 2.26, + "learning_rate": 4.6234793187347934e-05, + "loss": 0.0567, + "step": 16098 + }, + { + "epoch": 2.26, + "learning_rate": 4.623432528542018e-05, + "loss": 0.0604, + "step": 16100 + }, + { + "epoch": 2.26, + "learning_rate": 4.6233857383492426e-05, + "loss": 0.0472, + "step": 16102 + }, + { + "epoch": 2.26, + "learning_rate": 4.6233389481564665e-05, + "loss": 0.0522, + "step": 16104 + }, + { + "epoch": 2.26, + "learning_rate": 4.623292157963691e-05, + "loss": 0.063, + "step": 16106 + }, + { + "epoch": 2.26, + "learning_rate": 4.623245367770915e-05, + "loss": 0.0545, + "step": 16108 + }, + { + "epoch": 2.26, + "learning_rate": 4.62319857757814e-05, + "loss": 0.0519, + "step": 16110 + }, + { + "epoch": 2.26, + "learning_rate": 4.623151787385364e-05, + "loss": 0.0685, + "step": 16112 + }, + { + "epoch": 2.26, + "learning_rate": 4.623104997192589e-05, + "loss": 0.0856, + "step": 16114 + }, + { + "epoch": 2.26, + "learning_rate": 4.623058206999813e-05, + "loss": 0.069, + "step": 16116 + }, + { + "epoch": 2.26, + "learning_rate": 4.623011416807038e-05, + "loss": 0.0548, + "step": 16118 + }, + { + "epoch": 2.26, + "learning_rate": 4.622964626614262e-05, + "loss": 0.0451, + "step": 16120 + }, + { + "epoch": 2.26, + "learning_rate": 4.6229178364214864e-05, + "loss": 0.0634, + "step": 16122 + }, + { + "epoch": 2.26, + "learning_rate": 4.6228710462287104e-05, + "loss": 0.0577, + "step": 16124 + }, + { + "epoch": 2.26, + "learning_rate": 4.622824256035935e-05, + "loss": 0.0692, + "step": 16126 + }, + { + "epoch": 2.26, + "learning_rate": 4.6227774658431595e-05, + "loss": 0.0533, + "step": 16128 + }, + { + "epoch": 2.26, + "learning_rate": 4.622730675650384e-05, + "loss": 0.0734, + "step": 16130 + }, + { + "epoch": 2.26, + "learning_rate": 4.622683885457608e-05, + "loss": 0.0567, + "step": 16132 + }, + { + "epoch": 2.26, + "learning_rate": 4.6226370952648326e-05, + "loss": 0.0465, + "step": 16134 + }, + { + "epoch": 2.27, + "learning_rate": 4.622590305072057e-05, + "loss": 0.0433, + "step": 16136 + }, + { + "epoch": 2.27, + "learning_rate": 4.622543514879282e-05, + "loss": 0.0711, + "step": 16138 + }, + { + "epoch": 2.27, + "learning_rate": 4.622496724686506e-05, + "loss": 0.0676, + "step": 16140 + }, + { + "epoch": 2.27, + "learning_rate": 4.62244993449373e-05, + "loss": 0.0466, + "step": 16142 + }, + { + "epoch": 2.27, + "learning_rate": 4.622403144300955e-05, + "loss": 0.0457, + "step": 16144 + }, + { + "epoch": 2.27, + "learning_rate": 4.6223563541081795e-05, + "loss": 0.076, + "step": 16146 + }, + { + "epoch": 2.27, + "learning_rate": 4.6223095639154034e-05, + "loss": 0.049, + "step": 16148 + }, + { + "epoch": 2.27, + "learning_rate": 4.622262773722628e-05, + "loss": 0.066, + "step": 16150 + }, + { + "epoch": 2.27, + "learning_rate": 4.6222159835298526e-05, + "loss": 0.0499, + "step": 16152 + }, + { + "epoch": 2.27, + "learning_rate": 4.622169193337077e-05, + "loss": 0.0615, + "step": 16154 + }, + { + "epoch": 2.27, + "learning_rate": 4.622122403144301e-05, + "loss": 0.0845, + "step": 16156 + }, + { + "epoch": 2.27, + "learning_rate": 4.622075612951526e-05, + "loss": 0.0564, + "step": 16158 + }, + { + "epoch": 2.27, + "learning_rate": 4.6220288227587496e-05, + "loss": 0.0744, + "step": 16160 + }, + { + "epoch": 2.27, + "learning_rate": 4.621982032565975e-05, + "loss": 0.0741, + "step": 16162 + }, + { + "epoch": 2.27, + "learning_rate": 4.621935242373199e-05, + "loss": 0.0589, + "step": 16164 + }, + { + "epoch": 2.27, + "learning_rate": 4.6218884521804234e-05, + "loss": 0.048, + "step": 16166 + }, + { + "epoch": 2.27, + "learning_rate": 4.621841661987647e-05, + "loss": 0.0606, + "step": 16168 + }, + { + "epoch": 2.27, + "learning_rate": 4.6217948717948726e-05, + "loss": 0.0448, + "step": 16170 + }, + { + "epoch": 2.27, + "learning_rate": 4.6217480816020965e-05, + "loss": 0.0572, + "step": 16172 + }, + { + "epoch": 2.27, + "learning_rate": 4.621701291409321e-05, + "loss": 0.0815, + "step": 16174 + }, + { + "epoch": 2.27, + "learning_rate": 4.621654501216545e-05, + "loss": 0.055, + "step": 16176 + }, + { + "epoch": 2.27, + "learning_rate": 4.6216077110237696e-05, + "loss": 0.0532, + "step": 16178 + }, + { + "epoch": 2.27, + "learning_rate": 4.621560920830994e-05, + "loss": 0.0919, + "step": 16180 + }, + { + "epoch": 2.27, + "learning_rate": 4.621514130638219e-05, + "loss": 0.0571, + "step": 16182 + }, + { + "epoch": 2.27, + "learning_rate": 4.621467340445443e-05, + "loss": 0.0619, + "step": 16184 + }, + { + "epoch": 2.27, + "learning_rate": 4.621420550252667e-05, + "loss": 0.0578, + "step": 16186 + }, + { + "epoch": 2.27, + "learning_rate": 4.621373760059892e-05, + "loss": 0.0743, + "step": 16188 + }, + { + "epoch": 2.27, + "learning_rate": 4.6213269698671165e-05, + "loss": 0.0552, + "step": 16190 + }, + { + "epoch": 2.27, + "learning_rate": 4.6212801796743404e-05, + "loss": 0.0691, + "step": 16192 + }, + { + "epoch": 2.27, + "learning_rate": 4.621233389481565e-05, + "loss": 0.0478, + "step": 16194 + }, + { + "epoch": 2.27, + "learning_rate": 4.6211865992887895e-05, + "loss": 0.0586, + "step": 16196 + }, + { + "epoch": 2.27, + "learning_rate": 4.621139809096014e-05, + "loss": 0.076, + "step": 16198 + }, + { + "epoch": 2.27, + "learning_rate": 4.621093018903238e-05, + "loss": 0.0608, + "step": 16200 + }, + { + "epoch": 2.27, + "learning_rate": 4.6210462287104626e-05, + "loss": 0.0567, + "step": 16202 + }, + { + "epoch": 2.27, + "learning_rate": 4.620999438517687e-05, + "loss": 0.0479, + "step": 16204 + }, + { + "epoch": 2.27, + "learning_rate": 4.620952648324912e-05, + "loss": 0.0485, + "step": 16206 + }, + { + "epoch": 2.28, + "learning_rate": 4.620905858132136e-05, + "loss": 0.0612, + "step": 16208 + }, + { + "epoch": 2.28, + "learning_rate": 4.62085906793936e-05, + "loss": 0.0601, + "step": 16210 + }, + { + "epoch": 2.28, + "learning_rate": 4.620812277746584e-05, + "loss": 0.0503, + "step": 16212 + }, + { + "epoch": 2.28, + "learning_rate": 4.620765487553809e-05, + "loss": 0.0633, + "step": 16214 + }, + { + "epoch": 2.28, + "learning_rate": 4.6207186973610334e-05, + "loss": 0.0617, + "step": 16216 + }, + { + "epoch": 2.28, + "learning_rate": 4.6206719071682573e-05, + "loss": 0.0533, + "step": 16218 + }, + { + "epoch": 2.28, + "learning_rate": 4.620625116975482e-05, + "loss": 0.0569, + "step": 16220 + }, + { + "epoch": 2.28, + "learning_rate": 4.6205783267827065e-05, + "loss": 0.0582, + "step": 16222 + }, + { + "epoch": 2.28, + "learning_rate": 4.620531536589931e-05, + "loss": 0.0469, + "step": 16224 + }, + { + "epoch": 2.28, + "learning_rate": 4.620484746397155e-05, + "loss": 0.1163, + "step": 16226 + }, + { + "epoch": 2.28, + "learning_rate": 4.6204379562043796e-05, + "loss": 0.0874, + "step": 16228 + }, + { + "epoch": 2.28, + "learning_rate": 4.620391166011604e-05, + "loss": 0.0459, + "step": 16230 + }, + { + "epoch": 2.28, + "learning_rate": 4.620344375818829e-05, + "loss": 0.0586, + "step": 16232 + }, + { + "epoch": 2.28, + "learning_rate": 4.620297585626053e-05, + "loss": 0.0801, + "step": 16234 + }, + { + "epoch": 2.28, + "learning_rate": 4.620250795433277e-05, + "loss": 0.0812, + "step": 16236 + }, + { + "epoch": 2.28, + "learning_rate": 4.620204005240502e-05, + "loss": 0.0583, + "step": 16238 + }, + { + "epoch": 2.28, + "learning_rate": 4.6201572150477265e-05, + "loss": 0.0785, + "step": 16240 + }, + { + "epoch": 2.28, + "learning_rate": 4.6201104248549504e-05, + "loss": 0.0425, + "step": 16242 + }, + { + "epoch": 2.28, + "learning_rate": 4.620063634662175e-05, + "loss": 0.0945, + "step": 16244 + }, + { + "epoch": 2.28, + "learning_rate": 4.620016844469399e-05, + "loss": 0.0518, + "step": 16246 + }, + { + "epoch": 2.28, + "learning_rate": 4.619970054276624e-05, + "loss": 0.0438, + "step": 16248 + }, + { + "epoch": 2.28, + "learning_rate": 4.619923264083848e-05, + "loss": 0.0681, + "step": 16250 + }, + { + "epoch": 2.28, + "learning_rate": 4.619876473891073e-05, + "loss": 0.0466, + "step": 16252 + }, + { + "epoch": 2.28, + "learning_rate": 4.6198296836982966e-05, + "loss": 0.0776, + "step": 16254 + }, + { + "epoch": 2.28, + "learning_rate": 4.619782893505521e-05, + "loss": 0.0648, + "step": 16256 + }, + { + "epoch": 2.28, + "learning_rate": 4.619736103312746e-05, + "loss": 0.0503, + "step": 16258 + }, + { + "epoch": 2.28, + "learning_rate": 4.6196893131199704e-05, + "loss": 0.0909, + "step": 16260 + }, + { + "epoch": 2.28, + "learning_rate": 4.619642522927194e-05, + "loss": 0.0644, + "step": 16262 + }, + { + "epoch": 2.28, + "learning_rate": 4.619595732734419e-05, + "loss": 0.0481, + "step": 16264 + }, + { + "epoch": 2.28, + "learning_rate": 4.6195489425416435e-05, + "loss": 0.0545, + "step": 16266 + }, + { + "epoch": 2.28, + "learning_rate": 4.619502152348868e-05, + "loss": 0.0611, + "step": 16268 + }, + { + "epoch": 2.28, + "learning_rate": 4.619455362156092e-05, + "loss": 0.0729, + "step": 16270 + }, + { + "epoch": 2.28, + "learning_rate": 4.6194085719633166e-05, + "loss": 0.0636, + "step": 16272 + }, + { + "epoch": 2.28, + "learning_rate": 4.619361781770541e-05, + "loss": 0.0756, + "step": 16274 + }, + { + "epoch": 2.28, + "learning_rate": 4.619314991577766e-05, + "loss": 0.0516, + "step": 16276 + }, + { + "epoch": 2.28, + "learning_rate": 4.6192682013849897e-05, + "loss": 0.0693, + "step": 16278 + }, + { + "epoch": 2.29, + "learning_rate": 4.619221411192214e-05, + "loss": 0.0627, + "step": 16280 + }, + { + "epoch": 2.29, + "learning_rate": 4.619174620999439e-05, + "loss": 0.062, + "step": 16282 + }, + { + "epoch": 2.29, + "learning_rate": 4.6191278308066634e-05, + "loss": 0.0543, + "step": 16284 + }, + { + "epoch": 2.29, + "learning_rate": 4.6190810406138873e-05, + "loss": 0.0464, + "step": 16286 + }, + { + "epoch": 2.29, + "learning_rate": 4.619034250421112e-05, + "loss": 0.0653, + "step": 16288 + }, + { + "epoch": 2.29, + "learning_rate": 4.618987460228336e-05, + "loss": 0.0498, + "step": 16290 + }, + { + "epoch": 2.29, + "learning_rate": 4.618940670035561e-05, + "loss": 0.062, + "step": 16292 + }, + { + "epoch": 2.29, + "learning_rate": 4.618893879842785e-05, + "loss": 0.0462, + "step": 16294 + }, + { + "epoch": 2.29, + "learning_rate": 4.6188470896500096e-05, + "loss": 0.0664, + "step": 16296 + }, + { + "epoch": 2.29, + "learning_rate": 4.6188002994572335e-05, + "loss": 0.0627, + "step": 16298 + }, + { + "epoch": 2.29, + "learning_rate": 4.618753509264459e-05, + "loss": 0.0553, + "step": 16300 + }, + { + "epoch": 2.29, + "learning_rate": 4.618706719071683e-05, + "loss": 0.0562, + "step": 16302 + }, + { + "epoch": 2.29, + "learning_rate": 4.618659928878907e-05, + "loss": 0.0546, + "step": 16304 + }, + { + "epoch": 2.29, + "learning_rate": 4.618613138686131e-05, + "loss": 0.0595, + "step": 16306 + }, + { + "epoch": 2.29, + "learning_rate": 4.618566348493356e-05, + "loss": 0.0518, + "step": 16308 + }, + { + "epoch": 2.29, + "learning_rate": 4.6185195583005804e-05, + "loss": 0.0709, + "step": 16310 + }, + { + "epoch": 2.29, + "learning_rate": 4.618472768107805e-05, + "loss": 0.055, + "step": 16312 + }, + { + "epoch": 2.29, + "learning_rate": 4.618425977915029e-05, + "loss": 0.0624, + "step": 16314 + }, + { + "epoch": 2.29, + "learning_rate": 4.6183791877222535e-05, + "loss": 0.0775, + "step": 16316 + }, + { + "epoch": 2.29, + "learning_rate": 4.618332397529478e-05, + "loss": 0.0514, + "step": 16318 + }, + { + "epoch": 2.29, + "learning_rate": 4.618285607336703e-05, + "loss": 0.0788, + "step": 16320 + }, + { + "epoch": 2.29, + "learning_rate": 4.6182388171439266e-05, + "loss": 0.0521, + "step": 16322 + }, + { + "epoch": 2.29, + "learning_rate": 4.618192026951151e-05, + "loss": 0.0568, + "step": 16324 + }, + { + "epoch": 2.29, + "learning_rate": 4.618145236758376e-05, + "loss": 0.0472, + "step": 16326 + }, + { + "epoch": 2.29, + "learning_rate": 4.6180984465656004e-05, + "loss": 0.0519, + "step": 16328 + }, + { + "epoch": 2.29, + "learning_rate": 4.618051656372824e-05, + "loss": 0.0589, + "step": 16330 + }, + { + "epoch": 2.29, + "learning_rate": 4.618004866180049e-05, + "loss": 0.0736, + "step": 16332 + }, + { + "epoch": 2.29, + "learning_rate": 4.6179580759872735e-05, + "loss": 0.0629, + "step": 16334 + }, + { + "epoch": 2.29, + "learning_rate": 4.617911285794498e-05, + "loss": 0.0486, + "step": 16336 + }, + { + "epoch": 2.29, + "learning_rate": 4.617864495601722e-05, + "loss": 0.0601, + "step": 16338 + }, + { + "epoch": 2.29, + "learning_rate": 4.6178177054089466e-05, + "loss": 0.0581, + "step": 16340 + }, + { + "epoch": 2.29, + "learning_rate": 4.6177709152161705e-05, + "loss": 0.0396, + "step": 16342 + }, + { + "epoch": 2.29, + "learning_rate": 4.617724125023396e-05, + "loss": 0.0938, + "step": 16344 + }, + { + "epoch": 2.29, + "learning_rate": 4.6176773348306197e-05, + "loss": 0.0623, + "step": 16346 + }, + { + "epoch": 2.29, + "learning_rate": 4.617630544637844e-05, + "loss": 0.0791, + "step": 16348 + }, + { + "epoch": 2.3, + "learning_rate": 4.617583754445068e-05, + "loss": 0.057, + "step": 16350 + }, + { + "epoch": 2.3, + "learning_rate": 4.6175369642522934e-05, + "loss": 0.0463, + "step": 16352 + }, + { + "epoch": 2.3, + "learning_rate": 4.6174901740595173e-05, + "loss": 0.046, + "step": 16354 + }, + { + "epoch": 2.3, + "learning_rate": 4.617443383866742e-05, + "loss": 0.0841, + "step": 16356 + }, + { + "epoch": 2.3, + "learning_rate": 4.617396593673966e-05, + "loss": 0.0467, + "step": 16358 + }, + { + "epoch": 2.3, + "learning_rate": 4.6173498034811904e-05, + "loss": 0.0535, + "step": 16360 + }, + { + "epoch": 2.3, + "learning_rate": 4.617303013288415e-05, + "loss": 0.047, + "step": 16362 + }, + { + "epoch": 2.3, + "learning_rate": 4.6172562230956396e-05, + "loss": 0.1096, + "step": 16364 + }, + { + "epoch": 2.3, + "learning_rate": 4.6172094329028635e-05, + "loss": 0.068, + "step": 16366 + }, + { + "epoch": 2.3, + "learning_rate": 4.617162642710088e-05, + "loss": 0.0582, + "step": 16368 + }, + { + "epoch": 2.3, + "learning_rate": 4.617115852517313e-05, + "loss": 0.0677, + "step": 16370 + }, + { + "epoch": 2.3, + "learning_rate": 4.617069062324537e-05, + "loss": 0.0565, + "step": 16372 + }, + { + "epoch": 2.3, + "learning_rate": 4.617022272131761e-05, + "loss": 0.07, + "step": 16374 + }, + { + "epoch": 2.3, + "learning_rate": 4.616975481938986e-05, + "loss": 0.0644, + "step": 16376 + }, + { + "epoch": 2.3, + "learning_rate": 4.6169286917462104e-05, + "loss": 0.0604, + "step": 16378 + }, + { + "epoch": 2.3, + "learning_rate": 4.616881901553435e-05, + "loss": 0.0556, + "step": 16380 + }, + { + "epoch": 2.3, + "learning_rate": 4.616835111360659e-05, + "loss": 0.0456, + "step": 16382 + }, + { + "epoch": 2.3, + "learning_rate": 4.6167883211678835e-05, + "loss": 0.0548, + "step": 16384 + }, + { + "epoch": 2.3, + "learning_rate": 4.616741530975108e-05, + "loss": 0.0578, + "step": 16386 + }, + { + "epoch": 2.3, + "learning_rate": 4.616694740782333e-05, + "loss": 0.0583, + "step": 16388 + }, + { + "epoch": 2.3, + "learning_rate": 4.6166479505895566e-05, + "loss": 0.0528, + "step": 16390 + }, + { + "epoch": 2.3, + "learning_rate": 4.616601160396781e-05, + "loss": 0.064, + "step": 16392 + }, + { + "epoch": 2.3, + "learning_rate": 4.616554370204005e-05, + "loss": 0.058, + "step": 16394 + }, + { + "epoch": 2.3, + "learning_rate": 4.6165075800112304e-05, + "loss": 0.0615, + "step": 16396 + }, + { + "epoch": 2.3, + "learning_rate": 4.616460789818454e-05, + "loss": 0.0498, + "step": 16398 + }, + { + "epoch": 2.3, + "learning_rate": 4.616413999625679e-05, + "loss": 0.0625, + "step": 16400 + }, + { + "epoch": 2.3, + "learning_rate": 4.616367209432903e-05, + "loss": 0.0654, + "step": 16402 + }, + { + "epoch": 2.3, + "learning_rate": 4.6163204192401274e-05, + "loss": 0.0408, + "step": 16404 + }, + { + "epoch": 2.3, + "learning_rate": 4.616273629047352e-05, + "loss": 0.057, + "step": 16406 + }, + { + "epoch": 2.3, + "learning_rate": 4.6162268388545766e-05, + "loss": 0.0513, + "step": 16408 + }, + { + "epoch": 2.3, + "learning_rate": 4.6161800486618005e-05, + "loss": 0.0735, + "step": 16410 + }, + { + "epoch": 2.3, + "learning_rate": 4.616133258469025e-05, + "loss": 0.0664, + "step": 16412 + }, + { + "epoch": 2.3, + "learning_rate": 4.6160864682762497e-05, + "loss": 0.0707, + "step": 16414 + }, + { + "epoch": 2.3, + "learning_rate": 4.616039678083474e-05, + "loss": 0.0516, + "step": 16416 + }, + { + "epoch": 2.3, + "learning_rate": 4.615992887890698e-05, + "loss": 0.0514, + "step": 16418 + }, + { + "epoch": 2.3, + "learning_rate": 4.615946097697923e-05, + "loss": 0.0582, + "step": 16420 + }, + { + "epoch": 2.31, + "learning_rate": 4.6158993075051473e-05, + "loss": 0.0472, + "step": 16422 + }, + { + "epoch": 2.31, + "learning_rate": 4.615852517312372e-05, + "loss": 0.0581, + "step": 16424 + }, + { + "epoch": 2.31, + "learning_rate": 4.615805727119596e-05, + "loss": 0.0539, + "step": 16426 + }, + { + "epoch": 2.31, + "learning_rate": 4.6157589369268204e-05, + "loss": 0.0751, + "step": 16428 + }, + { + "epoch": 2.31, + "learning_rate": 4.615712146734045e-05, + "loss": 0.0493, + "step": 16430 + }, + { + "epoch": 2.31, + "learning_rate": 4.6156653565412696e-05, + "loss": 0.0724, + "step": 16432 + }, + { + "epoch": 2.31, + "learning_rate": 4.6156185663484935e-05, + "loss": 0.0493, + "step": 16434 + }, + { + "epoch": 2.31, + "learning_rate": 4.615571776155718e-05, + "loss": 0.0519, + "step": 16436 + }, + { + "epoch": 2.31, + "learning_rate": 4.615524985962942e-05, + "loss": 0.0593, + "step": 16438 + }, + { + "epoch": 2.31, + "learning_rate": 4.615478195770167e-05, + "loss": 0.048, + "step": 16440 + }, + { + "epoch": 2.31, + "learning_rate": 4.615431405577391e-05, + "loss": 0.0508, + "step": 16442 + }, + { + "epoch": 2.31, + "learning_rate": 4.615384615384616e-05, + "loss": 0.046, + "step": 16444 + }, + { + "epoch": 2.31, + "learning_rate": 4.61533782519184e-05, + "loss": 0.0685, + "step": 16446 + }, + { + "epoch": 2.31, + "learning_rate": 4.615291034999065e-05, + "loss": 0.0537, + "step": 16448 + }, + { + "epoch": 2.31, + "learning_rate": 4.615244244806289e-05, + "loss": 0.0448, + "step": 16450 + }, + { + "epoch": 2.31, + "learning_rate": 4.6151974546135135e-05, + "loss": 0.0671, + "step": 16452 + }, + { + "epoch": 2.31, + "learning_rate": 4.6151506644207374e-05, + "loss": 0.0448, + "step": 16454 + }, + { + "epoch": 2.31, + "learning_rate": 4.615103874227962e-05, + "loss": 0.0539, + "step": 16456 + }, + { + "epoch": 2.31, + "learning_rate": 4.6150570840351866e-05, + "loss": 0.0512, + "step": 16458 + }, + { + "epoch": 2.31, + "learning_rate": 4.615010293842411e-05, + "loss": 0.0685, + "step": 16460 + }, + { + "epoch": 2.31, + "learning_rate": 4.614963503649635e-05, + "loss": 0.0577, + "step": 16462 + }, + { + "epoch": 2.31, + "learning_rate": 4.61491671345686e-05, + "loss": 0.0531, + "step": 16464 + }, + { + "epoch": 2.31, + "learning_rate": 4.614869923264084e-05, + "loss": 0.0841, + "step": 16466 + }, + { + "epoch": 2.31, + "learning_rate": 4.614823133071308e-05, + "loss": 0.0471, + "step": 16468 + }, + { + "epoch": 2.31, + "learning_rate": 4.614776342878533e-05, + "loss": 0.0877, + "step": 16470 + }, + { + "epoch": 2.31, + "learning_rate": 4.614729552685757e-05, + "loss": 0.07, + "step": 16472 + }, + { + "epoch": 2.31, + "learning_rate": 4.614682762492982e-05, + "loss": 0.0508, + "step": 16474 + }, + { + "epoch": 2.31, + "learning_rate": 4.614635972300206e-05, + "loss": 0.0608, + "step": 16476 + }, + { + "epoch": 2.31, + "learning_rate": 4.6145891821074305e-05, + "loss": 0.0594, + "step": 16478 + }, + { + "epoch": 2.31, + "learning_rate": 4.6145423919146544e-05, + "loss": 0.0865, + "step": 16480 + }, + { + "epoch": 2.31, + "learning_rate": 4.6144956017218797e-05, + "loss": 0.0721, + "step": 16482 + }, + { + "epoch": 2.31, + "learning_rate": 4.6144488115291036e-05, + "loss": 0.0509, + "step": 16484 + }, + { + "epoch": 2.31, + "learning_rate": 4.614402021336328e-05, + "loss": 0.0556, + "step": 16486 + }, + { + "epoch": 2.31, + "learning_rate": 4.614355231143552e-05, + "loss": 0.0462, + "step": 16488 + }, + { + "epoch": 2.31, + "learning_rate": 4.614308440950777e-05, + "loss": 0.0593, + "step": 16490 + }, + { + "epoch": 2.31, + "learning_rate": 4.614261650758001e-05, + "loss": 0.0748, + "step": 16492 + }, + { + "epoch": 2.32, + "learning_rate": 4.614214860565226e-05, + "loss": 0.0551, + "step": 16494 + }, + { + "epoch": 2.32, + "learning_rate": 4.61416807037245e-05, + "loss": 0.0499, + "step": 16496 + }, + { + "epoch": 2.32, + "learning_rate": 4.6141212801796744e-05, + "loss": 0.0581, + "step": 16498 + }, + { + "epoch": 2.32, + "learning_rate": 4.614074489986899e-05, + "loss": 0.0746, + "step": 16500 + }, + { + "epoch": 2.32, + "learning_rate": 4.6140276997941235e-05, + "loss": 0.0484, + "step": 16502 + }, + { + "epoch": 2.32, + "learning_rate": 4.6139809096013475e-05, + "loss": 0.0661, + "step": 16504 + }, + { + "epoch": 2.32, + "learning_rate": 4.613934119408572e-05, + "loss": 0.0562, + "step": 16506 + }, + { + "epoch": 2.32, + "learning_rate": 4.6138873292157966e-05, + "loss": 0.0686, + "step": 16508 + }, + { + "epoch": 2.32, + "learning_rate": 4.613840539023021e-05, + "loss": 0.0752, + "step": 16510 + }, + { + "epoch": 2.32, + "learning_rate": 4.613793748830245e-05, + "loss": 0.0491, + "step": 16512 + }, + { + "epoch": 2.32, + "learning_rate": 4.61374695863747e-05, + "loss": 0.0613, + "step": 16514 + }, + { + "epoch": 2.32, + "learning_rate": 4.613700168444694e-05, + "loss": 0.0764, + "step": 16516 + }, + { + "epoch": 2.32, + "learning_rate": 4.613653378251919e-05, + "loss": 0.0659, + "step": 16518 + }, + { + "epoch": 2.32, + "learning_rate": 4.613606588059143e-05, + "loss": 0.0626, + "step": 16520 + }, + { + "epoch": 2.32, + "learning_rate": 4.6135597978663674e-05, + "loss": 0.0582, + "step": 16522 + }, + { + "epoch": 2.32, + "learning_rate": 4.613513007673591e-05, + "loss": 0.0557, + "step": 16524 + }, + { + "epoch": 2.32, + "learning_rate": 4.6134662174808166e-05, + "loss": 0.0657, + "step": 16526 + }, + { + "epoch": 2.32, + "learning_rate": 4.6134194272880405e-05, + "loss": 0.0618, + "step": 16528 + }, + { + "epoch": 2.32, + "learning_rate": 4.613372637095265e-05, + "loss": 0.072, + "step": 16530 + }, + { + "epoch": 2.32, + "learning_rate": 4.613325846902489e-05, + "loss": 0.062, + "step": 16532 + }, + { + "epoch": 2.32, + "learning_rate": 4.6132790567097136e-05, + "loss": 0.0533, + "step": 16534 + }, + { + "epoch": 2.32, + "learning_rate": 4.613232266516938e-05, + "loss": 0.0631, + "step": 16536 + }, + { + "epoch": 2.32, + "learning_rate": 4.613185476324163e-05, + "loss": 0.0842, + "step": 16538 + }, + { + "epoch": 2.32, + "learning_rate": 4.613138686131387e-05, + "loss": 0.0708, + "step": 16540 + }, + { + "epoch": 2.32, + "learning_rate": 4.613091895938611e-05, + "loss": 0.0484, + "step": 16542 + }, + { + "epoch": 2.32, + "learning_rate": 4.613045105745836e-05, + "loss": 0.0605, + "step": 16544 + }, + { + "epoch": 2.32, + "learning_rate": 4.6129983155530605e-05, + "loss": 0.0594, + "step": 16546 + }, + { + "epoch": 2.32, + "learning_rate": 4.6129515253602844e-05, + "loss": 0.0636, + "step": 16548 + }, + { + "epoch": 2.32, + "learning_rate": 4.612904735167509e-05, + "loss": 0.0584, + "step": 16550 + }, + { + "epoch": 2.32, + "learning_rate": 4.6128579449747336e-05, + "loss": 0.0532, + "step": 16552 + }, + { + "epoch": 2.32, + "learning_rate": 4.612811154781958e-05, + "loss": 0.073, + "step": 16554 + }, + { + "epoch": 2.32, + "learning_rate": 4.612764364589182e-05, + "loss": 0.0531, + "step": 16556 + }, + { + "epoch": 2.32, + "learning_rate": 4.612717574396407e-05, + "loss": 0.0558, + "step": 16558 + }, + { + "epoch": 2.32, + "learning_rate": 4.612670784203631e-05, + "loss": 0.0779, + "step": 16560 + }, + { + "epoch": 2.32, + "learning_rate": 4.612623994010856e-05, + "loss": 0.0613, + "step": 16562 + }, + { + "epoch": 2.33, + "learning_rate": 4.61257720381808e-05, + "loss": 0.0509, + "step": 16564 + }, + { + "epoch": 2.33, + "learning_rate": 4.6125304136253044e-05, + "loss": 0.0541, + "step": 16566 + }, + { + "epoch": 2.33, + "learning_rate": 4.612483623432528e-05, + "loss": 0.0821, + "step": 16568 + }, + { + "epoch": 2.33, + "learning_rate": 4.6124368332397535e-05, + "loss": 0.0678, + "step": 16570 + }, + { + "epoch": 2.33, + "learning_rate": 4.6123900430469775e-05, + "loss": 0.0801, + "step": 16572 + }, + { + "epoch": 2.33, + "learning_rate": 4.612343252854202e-05, + "loss": 0.0473, + "step": 16574 + }, + { + "epoch": 2.33, + "learning_rate": 4.612296462661426e-05, + "loss": 0.0618, + "step": 16576 + }, + { + "epoch": 2.33, + "learning_rate": 4.612249672468651e-05, + "loss": 0.0674, + "step": 16578 + }, + { + "epoch": 2.33, + "learning_rate": 4.612202882275875e-05, + "loss": 0.0641, + "step": 16580 + }, + { + "epoch": 2.33, + "learning_rate": 4.6121560920831e-05, + "loss": 0.0386, + "step": 16582 + }, + { + "epoch": 2.33, + "learning_rate": 4.6121093018903236e-05, + "loss": 0.0587, + "step": 16584 + }, + { + "epoch": 2.33, + "learning_rate": 4.612062511697548e-05, + "loss": 0.0815, + "step": 16586 + }, + { + "epoch": 2.33, + "learning_rate": 4.612015721504773e-05, + "loss": 0.0677, + "step": 16588 + }, + { + "epoch": 2.33, + "learning_rate": 4.6119689313119974e-05, + "loss": 0.0587, + "step": 16590 + }, + { + "epoch": 2.33, + "learning_rate": 4.611922141119221e-05, + "loss": 0.0756, + "step": 16592 + }, + { + "epoch": 2.33, + "learning_rate": 4.611875350926446e-05, + "loss": 0.052, + "step": 16594 + }, + { + "epoch": 2.33, + "learning_rate": 4.6118285607336705e-05, + "loss": 0.0793, + "step": 16596 + }, + { + "epoch": 2.33, + "learning_rate": 4.611781770540895e-05, + "loss": 0.0521, + "step": 16598 + }, + { + "epoch": 2.33, + "learning_rate": 4.611734980348119e-05, + "loss": 0.0568, + "step": 16600 + }, + { + "epoch": 2.33, + "learning_rate": 4.6116881901553436e-05, + "loss": 0.0561, + "step": 16602 + }, + { + "epoch": 2.33, + "learning_rate": 4.611641399962568e-05, + "loss": 0.0714, + "step": 16604 + }, + { + "epoch": 2.33, + "learning_rate": 4.611594609769793e-05, + "loss": 0.0629, + "step": 16606 + }, + { + "epoch": 2.33, + "learning_rate": 4.611547819577017e-05, + "loss": 0.0693, + "step": 16608 + }, + { + "epoch": 2.33, + "learning_rate": 4.611501029384241e-05, + "loss": 0.0908, + "step": 16610 + }, + { + "epoch": 2.33, + "learning_rate": 4.611454239191466e-05, + "loss": 0.0742, + "step": 16612 + }, + { + "epoch": 2.33, + "learning_rate": 4.6114074489986905e-05, + "loss": 0.0683, + "step": 16614 + }, + { + "epoch": 2.33, + "learning_rate": 4.6113606588059144e-05, + "loss": 0.0545, + "step": 16616 + }, + { + "epoch": 2.33, + "learning_rate": 4.611313868613139e-05, + "loss": 0.0542, + "step": 16618 + }, + { + "epoch": 2.33, + "learning_rate": 4.611267078420363e-05, + "loss": 0.074, + "step": 16620 + }, + { + "epoch": 2.33, + "learning_rate": 4.611220288227588e-05, + "loss": 0.051, + "step": 16622 + }, + { + "epoch": 2.33, + "learning_rate": 4.611173498034812e-05, + "loss": 0.0808, + "step": 16624 + }, + { + "epoch": 2.33, + "learning_rate": 4.611126707842037e-05, + "loss": 0.0537, + "step": 16626 + }, + { + "epoch": 2.33, + "learning_rate": 4.6110799176492606e-05, + "loss": 0.0457, + "step": 16628 + }, + { + "epoch": 2.33, + "learning_rate": 4.611033127456486e-05, + "loss": 0.087, + "step": 16630 + }, + { + "epoch": 2.33, + "learning_rate": 4.61098633726371e-05, + "loss": 0.0687, + "step": 16632 + }, + { + "epoch": 2.33, + "learning_rate": 4.6109395470709344e-05, + "loss": 0.056, + "step": 16634 + }, + { + "epoch": 2.34, + "learning_rate": 4.610892756878158e-05, + "loss": 0.0518, + "step": 16636 + }, + { + "epoch": 2.34, + "learning_rate": 4.610845966685383e-05, + "loss": 0.0531, + "step": 16638 + }, + { + "epoch": 2.34, + "learning_rate": 4.6107991764926075e-05, + "loss": 0.0789, + "step": 16640 + }, + { + "epoch": 2.34, + "learning_rate": 4.610752386299832e-05, + "loss": 0.0628, + "step": 16642 + }, + { + "epoch": 2.34, + "learning_rate": 4.610705596107056e-05, + "loss": 0.0588, + "step": 16644 + }, + { + "epoch": 2.34, + "learning_rate": 4.6106588059142806e-05, + "loss": 0.0638, + "step": 16646 + }, + { + "epoch": 2.34, + "learning_rate": 4.610612015721505e-05, + "loss": 0.061, + "step": 16648 + }, + { + "epoch": 2.34, + "learning_rate": 4.61056522552873e-05, + "loss": 0.0556, + "step": 16650 + }, + { + "epoch": 2.34, + "learning_rate": 4.6105184353359536e-05, + "loss": 0.0462, + "step": 16652 + }, + { + "epoch": 2.34, + "learning_rate": 4.610471645143178e-05, + "loss": 0.0714, + "step": 16654 + }, + { + "epoch": 2.34, + "learning_rate": 4.610424854950403e-05, + "loss": 0.0501, + "step": 16656 + }, + { + "epoch": 2.34, + "learning_rate": 4.6103780647576274e-05, + "loss": 0.0516, + "step": 16658 + }, + { + "epoch": 2.34, + "learning_rate": 4.610331274564851e-05, + "loss": 0.0599, + "step": 16660 + }, + { + "epoch": 2.34, + "learning_rate": 4.610284484372076e-05, + "loss": 0.0821, + "step": 16662 + }, + { + "epoch": 2.34, + "learning_rate": 4.6102376941793005e-05, + "loss": 0.0811, + "step": 16664 + }, + { + "epoch": 2.34, + "learning_rate": 4.610190903986525e-05, + "loss": 0.0708, + "step": 16666 + }, + { + "epoch": 2.34, + "learning_rate": 4.610144113793749e-05, + "loss": 0.0741, + "step": 16668 + }, + { + "epoch": 2.34, + "learning_rate": 4.6100973236009736e-05, + "loss": 0.0713, + "step": 16670 + }, + { + "epoch": 2.34, + "learning_rate": 4.6100505334081975e-05, + "loss": 0.0618, + "step": 16672 + }, + { + "epoch": 2.34, + "learning_rate": 4.610003743215423e-05, + "loss": 0.0671, + "step": 16674 + }, + { + "epoch": 2.34, + "learning_rate": 4.609956953022647e-05, + "loss": 0.0601, + "step": 16676 + }, + { + "epoch": 2.34, + "learning_rate": 4.609910162829871e-05, + "loss": 0.0528, + "step": 16678 + }, + { + "epoch": 2.34, + "learning_rate": 4.609863372637095e-05, + "loss": 0.0526, + "step": 16680 + }, + { + "epoch": 2.34, + "learning_rate": 4.60981658244432e-05, + "loss": 0.0576, + "step": 16682 + }, + { + "epoch": 2.34, + "learning_rate": 4.6097697922515444e-05, + "loss": 0.0426, + "step": 16684 + }, + { + "epoch": 2.34, + "learning_rate": 4.609723002058769e-05, + "loss": 0.0726, + "step": 16686 + }, + { + "epoch": 2.34, + "learning_rate": 4.609676211865993e-05, + "loss": 0.0705, + "step": 16688 + }, + { + "epoch": 2.34, + "learning_rate": 4.6096294216732175e-05, + "loss": 0.0621, + "step": 16690 + }, + { + "epoch": 2.34, + "learning_rate": 4.609582631480442e-05, + "loss": 0.0509, + "step": 16692 + }, + { + "epoch": 2.34, + "learning_rate": 4.609535841287667e-05, + "loss": 0.0782, + "step": 16694 + }, + { + "epoch": 2.34, + "learning_rate": 4.6094890510948906e-05, + "loss": 0.0638, + "step": 16696 + }, + { + "epoch": 2.34, + "learning_rate": 4.609442260902115e-05, + "loss": 0.0521, + "step": 16698 + }, + { + "epoch": 2.34, + "learning_rate": 4.60939547070934e-05, + "loss": 0.0451, + "step": 16700 + }, + { + "epoch": 2.34, + "learning_rate": 4.6093486805165644e-05, + "loss": 0.0591, + "step": 16702 + }, + { + "epoch": 2.34, + "learning_rate": 4.609301890323788e-05, + "loss": 0.0667, + "step": 16704 + }, + { + "epoch": 2.35, + "learning_rate": 4.609255100131013e-05, + "loss": 0.0561, + "step": 16706 + }, + { + "epoch": 2.35, + "learning_rate": 4.6092083099382375e-05, + "loss": 0.0688, + "step": 16708 + }, + { + "epoch": 2.35, + "learning_rate": 4.609161519745462e-05, + "loss": 0.0598, + "step": 16710 + }, + { + "epoch": 2.35, + "learning_rate": 4.609114729552686e-05, + "loss": 0.0753, + "step": 16712 + }, + { + "epoch": 2.35, + "learning_rate": 4.6090679393599106e-05, + "loss": 0.0469, + "step": 16714 + }, + { + "epoch": 2.35, + "learning_rate": 4.6090211491671345e-05, + "loss": 0.0704, + "step": 16716 + }, + { + "epoch": 2.35, + "learning_rate": 4.608974358974359e-05, + "loss": 0.077, + "step": 16718 + }, + { + "epoch": 2.35, + "learning_rate": 4.6089275687815836e-05, + "loss": 0.0525, + "step": 16720 + }, + { + "epoch": 2.35, + "learning_rate": 4.6088807785888076e-05, + "loss": 0.0562, + "step": 16722 + }, + { + "epoch": 2.35, + "learning_rate": 4.608833988396032e-05, + "loss": 0.052, + "step": 16724 + }, + { + "epoch": 2.35, + "learning_rate": 4.608787198203257e-05, + "loss": 0.0595, + "step": 16726 + }, + { + "epoch": 2.35, + "learning_rate": 4.608740408010481e-05, + "loss": 0.0414, + "step": 16728 + }, + { + "epoch": 2.35, + "learning_rate": 4.608693617817705e-05, + "loss": 0.0572, + "step": 16730 + }, + { + "epoch": 2.35, + "learning_rate": 4.60864682762493e-05, + "loss": 0.0627, + "step": 16732 + }, + { + "epoch": 2.35, + "learning_rate": 4.6086000374321544e-05, + "loss": 0.0548, + "step": 16734 + }, + { + "epoch": 2.35, + "learning_rate": 4.608553247239379e-05, + "loss": 0.0753, + "step": 16736 + }, + { + "epoch": 2.35, + "learning_rate": 4.608506457046603e-05, + "loss": 0.0451, + "step": 16738 + }, + { + "epoch": 2.35, + "learning_rate": 4.6084596668538275e-05, + "loss": 0.0617, + "step": 16740 + }, + { + "epoch": 2.35, + "learning_rate": 4.608412876661052e-05, + "loss": 0.053, + "step": 16742 + }, + { + "epoch": 2.35, + "learning_rate": 4.608366086468277e-05, + "loss": 0.0546, + "step": 16744 + }, + { + "epoch": 2.35, + "learning_rate": 4.6083192962755006e-05, + "loss": 0.0679, + "step": 16746 + }, + { + "epoch": 2.35, + "learning_rate": 4.608272506082725e-05, + "loss": 0.0787, + "step": 16748 + }, + { + "epoch": 2.35, + "learning_rate": 4.608225715889949e-05, + "loss": 0.0687, + "step": 16750 + }, + { + "epoch": 2.35, + "learning_rate": 4.6081789256971744e-05, + "loss": 0.0536, + "step": 16752 + }, + { + "epoch": 2.35, + "learning_rate": 4.608132135504398e-05, + "loss": 0.0475, + "step": 16754 + }, + { + "epoch": 2.35, + "learning_rate": 4.608085345311623e-05, + "loss": 0.0521, + "step": 16756 + }, + { + "epoch": 2.35, + "learning_rate": 4.608038555118847e-05, + "loss": 0.0566, + "step": 16758 + }, + { + "epoch": 2.35, + "learning_rate": 4.607991764926072e-05, + "loss": 0.0481, + "step": 16760 + }, + { + "epoch": 2.35, + "learning_rate": 4.607944974733296e-05, + "loss": 0.0376, + "step": 16762 + }, + { + "epoch": 2.35, + "learning_rate": 4.6078981845405206e-05, + "loss": 0.0459, + "step": 16764 + }, + { + "epoch": 2.35, + "learning_rate": 4.6078513943477445e-05, + "loss": 0.0415, + "step": 16766 + }, + { + "epoch": 2.35, + "learning_rate": 4.607804604154969e-05, + "loss": 0.0703, + "step": 16768 + }, + { + "epoch": 2.35, + "learning_rate": 4.607757813962194e-05, + "loss": 0.0706, + "step": 16770 + }, + { + "epoch": 2.35, + "learning_rate": 4.607711023769418e-05, + "loss": 0.064, + "step": 16772 + }, + { + "epoch": 2.35, + "learning_rate": 4.607664233576642e-05, + "loss": 0.0482, + "step": 16774 + }, + { + "epoch": 2.35, + "learning_rate": 4.607617443383867e-05, + "loss": 0.0616, + "step": 16776 + }, + { + "epoch": 2.36, + "learning_rate": 4.6075706531910914e-05, + "loss": 0.0534, + "step": 16778 + }, + { + "epoch": 2.36, + "learning_rate": 4.607523862998316e-05, + "loss": 0.0702, + "step": 16780 + }, + { + "epoch": 2.36, + "learning_rate": 4.60747707280554e-05, + "loss": 0.0701, + "step": 16782 + }, + { + "epoch": 2.36, + "learning_rate": 4.6074302826127645e-05, + "loss": 0.0727, + "step": 16784 + }, + { + "epoch": 2.36, + "learning_rate": 4.607383492419989e-05, + "loss": 0.0622, + "step": 16786 + }, + { + "epoch": 2.36, + "learning_rate": 4.6073367022272137e-05, + "loss": 0.0635, + "step": 16788 + }, + { + "epoch": 2.36, + "learning_rate": 4.6072899120344376e-05, + "loss": 0.0579, + "step": 16790 + }, + { + "epoch": 2.36, + "learning_rate": 4.607243121841662e-05, + "loss": 0.0479, + "step": 16792 + }, + { + "epoch": 2.36, + "learning_rate": 4.607196331648887e-05, + "loss": 0.0494, + "step": 16794 + }, + { + "epoch": 2.36, + "learning_rate": 4.607149541456111e-05, + "loss": 0.0831, + "step": 16796 + }, + { + "epoch": 2.36, + "learning_rate": 4.607102751263335e-05, + "loss": 0.0533, + "step": 16798 + }, + { + "epoch": 2.36, + "learning_rate": 4.60705596107056e-05, + "loss": 0.0537, + "step": 16800 + }, + { + "epoch": 2.36, + "learning_rate": 4.607009170877784e-05, + "loss": 0.0819, + "step": 16802 + }, + { + "epoch": 2.36, + "learning_rate": 4.606962380685009e-05, + "loss": 0.0561, + "step": 16804 + }, + { + "epoch": 2.36, + "learning_rate": 4.606915590492233e-05, + "loss": 0.06, + "step": 16806 + }, + { + "epoch": 2.36, + "learning_rate": 4.6068688002994575e-05, + "loss": 0.0597, + "step": 16808 + }, + { + "epoch": 2.36, + "learning_rate": 4.6068220101066814e-05, + "loss": 0.0701, + "step": 16810 + }, + { + "epoch": 2.36, + "learning_rate": 4.606775219913906e-05, + "loss": 0.0679, + "step": 16812 + }, + { + "epoch": 2.36, + "learning_rate": 4.6067284297211306e-05, + "loss": 0.0591, + "step": 16814 + }, + { + "epoch": 2.36, + "learning_rate": 4.606681639528355e-05, + "loss": 0.0587, + "step": 16816 + }, + { + "epoch": 2.36, + "learning_rate": 4.606634849335579e-05, + "loss": 0.0487, + "step": 16818 + }, + { + "epoch": 2.36, + "learning_rate": 4.606588059142804e-05, + "loss": 0.0621, + "step": 16820 + }, + { + "epoch": 2.36, + "learning_rate": 4.606541268950028e-05, + "loss": 0.0735, + "step": 16822 + }, + { + "epoch": 2.36, + "learning_rate": 4.606494478757253e-05, + "loss": 0.0654, + "step": 16824 + }, + { + "epoch": 2.36, + "learning_rate": 4.606447688564477e-05, + "loss": 0.0682, + "step": 16826 + }, + { + "epoch": 2.36, + "learning_rate": 4.6064008983717014e-05, + "loss": 0.0346, + "step": 16828 + }, + { + "epoch": 2.36, + "learning_rate": 4.606354108178926e-05, + "loss": 0.0667, + "step": 16830 + }, + { + "epoch": 2.36, + "learning_rate": 4.6063073179861506e-05, + "loss": 0.0453, + "step": 16832 + }, + { + "epoch": 2.36, + "learning_rate": 4.6062605277933745e-05, + "loss": 0.0623, + "step": 16834 + }, + { + "epoch": 2.36, + "learning_rate": 4.606213737600599e-05, + "loss": 0.0471, + "step": 16836 + }, + { + "epoch": 2.36, + "learning_rate": 4.606166947407824e-05, + "loss": 0.0709, + "step": 16838 + }, + { + "epoch": 2.36, + "learning_rate": 4.606120157215048e-05, + "loss": 0.0435, + "step": 16840 + }, + { + "epoch": 2.36, + "learning_rate": 4.606073367022272e-05, + "loss": 0.0719, + "step": 16842 + }, + { + "epoch": 2.36, + "learning_rate": 4.606026576829497e-05, + "loss": 0.0587, + "step": 16844 + }, + { + "epoch": 2.36, + "learning_rate": 4.605979786636721e-05, + "loss": 0.0595, + "step": 16846 + }, + { + "epoch": 2.36, + "learning_rate": 4.605932996443946e-05, + "loss": 0.0608, + "step": 16848 + }, + { + "epoch": 2.37, + "learning_rate": 4.60588620625117e-05, + "loss": 0.0538, + "step": 16850 + }, + { + "epoch": 2.37, + "learning_rate": 4.6058394160583945e-05, + "loss": 0.0636, + "step": 16852 + }, + { + "epoch": 2.37, + "learning_rate": 4.6057926258656184e-05, + "loss": 0.0599, + "step": 16854 + }, + { + "epoch": 2.37, + "learning_rate": 4.6057458356728437e-05, + "loss": 0.0517, + "step": 16856 + }, + { + "epoch": 2.37, + "learning_rate": 4.6056990454800676e-05, + "loss": 0.0577, + "step": 16858 + }, + { + "epoch": 2.37, + "learning_rate": 4.605652255287292e-05, + "loss": 0.0617, + "step": 16860 + }, + { + "epoch": 2.37, + "learning_rate": 4.605605465094516e-05, + "loss": 0.0459, + "step": 16862 + }, + { + "epoch": 2.37, + "learning_rate": 4.605558674901741e-05, + "loss": 0.0593, + "step": 16864 + }, + { + "epoch": 2.37, + "learning_rate": 4.605511884708965e-05, + "loss": 0.0528, + "step": 16866 + }, + { + "epoch": 2.37, + "learning_rate": 4.60546509451619e-05, + "loss": 0.069, + "step": 16868 + }, + { + "epoch": 2.37, + "learning_rate": 4.605418304323414e-05, + "loss": 0.0485, + "step": 16870 + }, + { + "epoch": 2.37, + "learning_rate": 4.6053715141306383e-05, + "loss": 0.0633, + "step": 16872 + }, + { + "epoch": 2.37, + "learning_rate": 4.605324723937863e-05, + "loss": 0.0693, + "step": 16874 + }, + { + "epoch": 2.37, + "learning_rate": 4.6052779337450875e-05, + "loss": 0.0531, + "step": 16876 + }, + { + "epoch": 2.37, + "learning_rate": 4.6052311435523114e-05, + "loss": 0.0652, + "step": 16878 + }, + { + "epoch": 2.37, + "learning_rate": 4.605184353359536e-05, + "loss": 0.0575, + "step": 16880 + }, + { + "epoch": 2.37, + "learning_rate": 4.6051375631667606e-05, + "loss": 0.0489, + "step": 16882 + }, + { + "epoch": 2.37, + "learning_rate": 4.605090772973985e-05, + "loss": 0.0633, + "step": 16884 + }, + { + "epoch": 2.37, + "learning_rate": 4.605043982781209e-05, + "loss": 0.0673, + "step": 16886 + }, + { + "epoch": 2.37, + "learning_rate": 4.604997192588434e-05, + "loss": 0.0693, + "step": 16888 + }, + { + "epoch": 2.37, + "learning_rate": 4.604950402395658e-05, + "loss": 0.0557, + "step": 16890 + }, + { + "epoch": 2.37, + "learning_rate": 4.604903612202883e-05, + "loss": 0.07, + "step": 16892 + }, + { + "epoch": 2.37, + "learning_rate": 4.604856822010107e-05, + "loss": 0.0546, + "step": 16894 + }, + { + "epoch": 2.37, + "learning_rate": 4.6048100318173314e-05, + "loss": 0.0549, + "step": 16896 + }, + { + "epoch": 2.37, + "learning_rate": 4.604763241624555e-05, + "loss": 0.0663, + "step": 16898 + }, + { + "epoch": 2.37, + "learning_rate": 4.6047164514317806e-05, + "loss": 0.0624, + "step": 16900 + }, + { + "epoch": 2.37, + "learning_rate": 4.6046696612390045e-05, + "loss": 0.043, + "step": 16902 + }, + { + "epoch": 2.37, + "learning_rate": 4.604622871046229e-05, + "loss": 0.0709, + "step": 16904 + }, + { + "epoch": 2.37, + "learning_rate": 4.604576080853453e-05, + "loss": 0.0474, + "step": 16906 + }, + { + "epoch": 2.37, + "learning_rate": 4.604529290660678e-05, + "loss": 0.0657, + "step": 16908 + }, + { + "epoch": 2.37, + "learning_rate": 4.604482500467902e-05, + "loss": 0.0666, + "step": 16910 + }, + { + "epoch": 2.37, + "learning_rate": 4.604435710275127e-05, + "loss": 0.0794, + "step": 16912 + }, + { + "epoch": 2.37, + "learning_rate": 4.604388920082351e-05, + "loss": 0.0522, + "step": 16914 + }, + { + "epoch": 2.37, + "learning_rate": 4.604342129889575e-05, + "loss": 0.0546, + "step": 16916 + }, + { + "epoch": 2.37, + "learning_rate": 4.6042953396968e-05, + "loss": 0.0876, + "step": 16918 + }, + { + "epoch": 2.38, + "learning_rate": 4.6042485495040245e-05, + "loss": 0.0542, + "step": 16920 + }, + { + "epoch": 2.38, + "learning_rate": 4.6042017593112484e-05, + "loss": 0.0445, + "step": 16922 + }, + { + "epoch": 2.38, + "learning_rate": 4.604154969118473e-05, + "loss": 0.0845, + "step": 16924 + }, + { + "epoch": 2.38, + "learning_rate": 4.6041081789256976e-05, + "loss": 0.0591, + "step": 16926 + }, + { + "epoch": 2.38, + "learning_rate": 4.604061388732922e-05, + "loss": 0.0515, + "step": 16928 + }, + { + "epoch": 2.38, + "learning_rate": 4.604014598540146e-05, + "loss": 0.0716, + "step": 16930 + }, + { + "epoch": 2.38, + "learning_rate": 4.603967808347371e-05, + "loss": 0.045, + "step": 16932 + }, + { + "epoch": 2.38, + "learning_rate": 4.603921018154595e-05, + "loss": 0.0721, + "step": 16934 + }, + { + "epoch": 2.38, + "learning_rate": 4.60387422796182e-05, + "loss": 0.0594, + "step": 16936 + }, + { + "epoch": 2.38, + "learning_rate": 4.603827437769044e-05, + "loss": 0.0614, + "step": 16938 + }, + { + "epoch": 2.38, + "learning_rate": 4.6037806475762684e-05, + "loss": 0.0675, + "step": 16940 + }, + { + "epoch": 2.38, + "learning_rate": 4.603733857383493e-05, + "loss": 0.0688, + "step": 16942 + }, + { + "epoch": 2.38, + "learning_rate": 4.6036870671907175e-05, + "loss": 0.0542, + "step": 16944 + }, + { + "epoch": 2.38, + "learning_rate": 4.6036402769979414e-05, + "loss": 0.0424, + "step": 16946 + }, + { + "epoch": 2.38, + "learning_rate": 4.603593486805166e-05, + "loss": 0.0735, + "step": 16948 + }, + { + "epoch": 2.38, + "learning_rate": 4.60354669661239e-05, + "loss": 0.0488, + "step": 16950 + }, + { + "epoch": 2.38, + "learning_rate": 4.603499906419615e-05, + "loss": 0.0603, + "step": 16952 + }, + { + "epoch": 2.38, + "learning_rate": 4.603453116226839e-05, + "loss": 0.0782, + "step": 16954 + }, + { + "epoch": 2.38, + "learning_rate": 4.603406326034064e-05, + "loss": 0.0582, + "step": 16956 + }, + { + "epoch": 2.38, + "learning_rate": 4.6033595358412876e-05, + "loss": 0.064, + "step": 16958 + }, + { + "epoch": 2.38, + "learning_rate": 4.603312745648512e-05, + "loss": 0.0792, + "step": 16960 + }, + { + "epoch": 2.38, + "learning_rate": 4.603265955455737e-05, + "loss": 0.0692, + "step": 16962 + }, + { + "epoch": 2.38, + "learning_rate": 4.6032191652629614e-05, + "loss": 0.0634, + "step": 16964 + }, + { + "epoch": 2.38, + "learning_rate": 4.603172375070185e-05, + "loss": 0.0441, + "step": 16966 + }, + { + "epoch": 2.38, + "learning_rate": 4.60312558487741e-05, + "loss": 0.0729, + "step": 16968 + }, + { + "epoch": 2.38, + "learning_rate": 4.6030787946846345e-05, + "loss": 0.0599, + "step": 16970 + }, + { + "epoch": 2.38, + "learning_rate": 4.6030320044918584e-05, + "loss": 0.0608, + "step": 16972 + }, + { + "epoch": 2.38, + "learning_rate": 4.602985214299083e-05, + "loss": 0.0811, + "step": 16974 + }, + { + "epoch": 2.38, + "learning_rate": 4.6029384241063076e-05, + "loss": 0.0585, + "step": 16976 + }, + { + "epoch": 2.38, + "learning_rate": 4.602891633913532e-05, + "loss": 0.0783, + "step": 16978 + }, + { + "epoch": 2.38, + "learning_rate": 4.602844843720756e-05, + "loss": 0.066, + "step": 16980 + }, + { + "epoch": 2.38, + "learning_rate": 4.602798053527981e-05, + "loss": 0.0548, + "step": 16982 + }, + { + "epoch": 2.38, + "learning_rate": 4.6027512633352046e-05, + "loss": 0.0649, + "step": 16984 + }, + { + "epoch": 2.38, + "learning_rate": 4.60270447314243e-05, + "loss": 0.0574, + "step": 16986 + }, + { + "epoch": 2.38, + "learning_rate": 4.602657682949654e-05, + "loss": 0.0691, + "step": 16988 + }, + { + "epoch": 2.38, + "learning_rate": 4.6026108927568784e-05, + "loss": 0.0485, + "step": 16990 + }, + { + "epoch": 2.39, + "learning_rate": 4.602564102564102e-05, + "loss": 0.0437, + "step": 16992 + }, + { + "epoch": 2.39, + "learning_rate": 4.602517312371327e-05, + "loss": 0.0648, + "step": 16994 + }, + { + "epoch": 2.39, + "learning_rate": 4.6024705221785515e-05, + "loss": 0.062, + "step": 16996 + }, + { + "epoch": 2.39, + "learning_rate": 4.602423731985776e-05, + "loss": 0.0586, + "step": 16998 + }, + { + "epoch": 2.39, + "learning_rate": 4.602376941793e-05, + "loss": 0.0713, + "step": 17000 + }, + { + "epoch": 2.39, + "eval_gen_len": 29.3103, + "eval_loss": 1.077703595161438, + "eval_meteor": 0.0453, + "eval_runtime": 16.122, + "eval_samples_per_second": 3.598, + "eval_steps_per_second": 0.496, + "step": 17000 + }, + { + "epoch": 2.39, + "learning_rate": 4.6023301516002246e-05, + "loss": 0.0617, + "step": 17002 + }, + { + "epoch": 2.39, + "learning_rate": 4.602283361407449e-05, + "loss": 0.0574, + "step": 17004 + }, + { + "epoch": 2.39, + "learning_rate": 4.602236571214674e-05, + "loss": 0.0454, + "step": 17006 + }, + { + "epoch": 2.39, + "learning_rate": 4.602189781021898e-05, + "loss": 0.0754, + "step": 17008 + }, + { + "epoch": 2.39, + "learning_rate": 4.602142990829122e-05, + "loss": 0.076, + "step": 17010 + }, + { + "epoch": 2.39, + "learning_rate": 4.602096200636347e-05, + "loss": 0.0908, + "step": 17012 + }, + { + "epoch": 2.39, + "learning_rate": 4.6020494104435714e-05, + "loss": 0.0524, + "step": 17014 + }, + { + "epoch": 2.39, + "learning_rate": 4.6020026202507954e-05, + "loss": 0.0693, + "step": 17016 + }, + { + "epoch": 2.39, + "learning_rate": 4.60195583005802e-05, + "loss": 0.0546, + "step": 17018 + }, + { + "epoch": 2.39, + "learning_rate": 4.6019090398652445e-05, + "loss": 0.0899, + "step": 17020 + }, + { + "epoch": 2.39, + "learning_rate": 4.601862249672469e-05, + "loss": 0.0628, + "step": 17022 + }, + { + "epoch": 2.39, + "learning_rate": 4.601815459479693e-05, + "loss": 0.0701, + "step": 17024 + }, + { + "epoch": 2.39, + "learning_rate": 4.6017686692869176e-05, + "loss": 0.0752, + "step": 17026 + }, + { + "epoch": 2.39, + "learning_rate": 4.6017218790941416e-05, + "loss": 0.0449, + "step": 17028 + }, + { + "epoch": 2.39, + "learning_rate": 4.601675088901367e-05, + "loss": 0.066, + "step": 17030 + }, + { + "epoch": 2.39, + "learning_rate": 4.601628298708591e-05, + "loss": 0.0833, + "step": 17032 + }, + { + "epoch": 2.39, + "learning_rate": 4.601581508515815e-05, + "loss": 0.0474, + "step": 17034 + }, + { + "epoch": 2.39, + "learning_rate": 4.601534718323039e-05, + "loss": 0.0568, + "step": 17036 + }, + { + "epoch": 2.39, + "learning_rate": 4.6014879281302645e-05, + "loss": 0.0506, + "step": 17038 + }, + { + "epoch": 2.39, + "learning_rate": 4.6014411379374884e-05, + "loss": 0.061, + "step": 17040 + }, + { + "epoch": 2.39, + "learning_rate": 4.601394347744713e-05, + "loss": 0.0601, + "step": 17042 + }, + { + "epoch": 2.39, + "learning_rate": 4.601347557551937e-05, + "loss": 0.0619, + "step": 17044 + }, + { + "epoch": 2.39, + "learning_rate": 4.6013007673591615e-05, + "loss": 0.0704, + "step": 17046 + }, + { + "epoch": 2.39, + "learning_rate": 4.601253977166386e-05, + "loss": 0.0464, + "step": 17048 + }, + { + "epoch": 2.39, + "learning_rate": 4.601207186973611e-05, + "loss": 0.065, + "step": 17050 + }, + { + "epoch": 2.39, + "learning_rate": 4.6011603967808346e-05, + "loss": 0.0587, + "step": 17052 + }, + { + "epoch": 2.39, + "learning_rate": 4.601113606588059e-05, + "loss": 0.0758, + "step": 17054 + }, + { + "epoch": 2.39, + "learning_rate": 4.601066816395284e-05, + "loss": 0.0538, + "step": 17056 + }, + { + "epoch": 2.39, + "learning_rate": 4.6010200262025084e-05, + "loss": 0.0675, + "step": 17058 + }, + { + "epoch": 2.39, + "learning_rate": 4.600973236009732e-05, + "loss": 0.0759, + "step": 17060 + }, + { + "epoch": 2.4, + "learning_rate": 4.600926445816957e-05, + "loss": 0.0533, + "step": 17062 + }, + { + "epoch": 2.4, + "learning_rate": 4.6008796556241815e-05, + "loss": 0.0525, + "step": 17064 + }, + { + "epoch": 2.4, + "learning_rate": 4.600832865431406e-05, + "loss": 0.0541, + "step": 17066 + }, + { + "epoch": 2.4, + "learning_rate": 4.60078607523863e-05, + "loss": 0.0637, + "step": 17068 + }, + { + "epoch": 2.4, + "learning_rate": 4.6007392850458546e-05, + "loss": 0.0586, + "step": 17070 + }, + { + "epoch": 2.4, + "learning_rate": 4.600692494853079e-05, + "loss": 0.072, + "step": 17072 + }, + { + "epoch": 2.4, + "learning_rate": 4.600645704660304e-05, + "loss": 0.0754, + "step": 17074 + }, + { + "epoch": 2.4, + "learning_rate": 4.600598914467528e-05, + "loss": 0.0752, + "step": 17076 + }, + { + "epoch": 2.4, + "learning_rate": 4.600552124274752e-05, + "loss": 0.0705, + "step": 17078 + }, + { + "epoch": 2.4, + "learning_rate": 4.600505334081976e-05, + "loss": 0.0503, + "step": 17080 + }, + { + "epoch": 2.4, + "learning_rate": 4.6004585438892014e-05, + "loss": 0.0637, + "step": 17082 + }, + { + "epoch": 2.4, + "learning_rate": 4.6004117536964254e-05, + "loss": 0.0715, + "step": 17084 + }, + { + "epoch": 2.4, + "learning_rate": 4.60036496350365e-05, + "loss": 0.057, + "step": 17086 + }, + { + "epoch": 2.4, + "learning_rate": 4.600318173310874e-05, + "loss": 0.0567, + "step": 17088 + }, + { + "epoch": 2.4, + "learning_rate": 4.600271383118099e-05, + "loss": 0.0731, + "step": 17090 + }, + { + "epoch": 2.4, + "learning_rate": 4.600224592925323e-05, + "loss": 0.0605, + "step": 17092 + }, + { + "epoch": 2.4, + "learning_rate": 4.6001778027325476e-05, + "loss": 0.0607, + "step": 17094 + }, + { + "epoch": 2.4, + "learning_rate": 4.6001310125397716e-05, + "loss": 0.0753, + "step": 17096 + }, + { + "epoch": 2.4, + "learning_rate": 4.600084222346996e-05, + "loss": 0.0527, + "step": 17098 + }, + { + "epoch": 2.4, + "learning_rate": 4.600037432154221e-05, + "loss": 0.0481, + "step": 17100 + }, + { + "epoch": 2.4, + "learning_rate": 4.599990641961445e-05, + "loss": 0.0617, + "step": 17102 + }, + { + "epoch": 2.4, + "learning_rate": 4.599943851768669e-05, + "loss": 0.0761, + "step": 17104 + }, + { + "epoch": 2.4, + "learning_rate": 4.599897061575894e-05, + "loss": 0.0574, + "step": 17106 + }, + { + "epoch": 2.4, + "learning_rate": 4.5998502713831184e-05, + "loss": 0.0524, + "step": 17108 + }, + { + "epoch": 2.4, + "learning_rate": 4.599803481190343e-05, + "loss": 0.0573, + "step": 17110 + }, + { + "epoch": 2.4, + "learning_rate": 4.599756690997567e-05, + "loss": 0.0556, + "step": 17112 + }, + { + "epoch": 2.4, + "learning_rate": 4.5997099008047915e-05, + "loss": 0.0389, + "step": 17114 + }, + { + "epoch": 2.4, + "learning_rate": 4.599663110612016e-05, + "loss": 0.0475, + "step": 17116 + }, + { + "epoch": 2.4, + "learning_rate": 4.599616320419241e-05, + "loss": 0.0517, + "step": 17118 + }, + { + "epoch": 2.4, + "learning_rate": 4.5995695302264646e-05, + "loss": 0.0611, + "step": 17120 + }, + { + "epoch": 2.4, + "learning_rate": 4.599522740033689e-05, + "loss": 0.054, + "step": 17122 + }, + { + "epoch": 2.4, + "learning_rate": 4.599475949840913e-05, + "loss": 0.0561, + "step": 17124 + }, + { + "epoch": 2.4, + "learning_rate": 4.5994291596481384e-05, + "loss": 0.072, + "step": 17126 + }, + { + "epoch": 2.4, + "learning_rate": 4.599382369455362e-05, + "loss": 0.054, + "step": 17128 + }, + { + "epoch": 2.4, + "learning_rate": 4.599335579262587e-05, + "loss": 0.0766, + "step": 17130 + }, + { + "epoch": 2.4, + "learning_rate": 4.599288789069811e-05, + "loss": 0.0794, + "step": 17132 + }, + { + "epoch": 2.41, + "learning_rate": 4.599241998877036e-05, + "loss": 0.0664, + "step": 17134 + }, + { + "epoch": 2.41, + "learning_rate": 4.59919520868426e-05, + "loss": 0.067, + "step": 17136 + }, + { + "epoch": 2.41, + "learning_rate": 4.5991484184914846e-05, + "loss": 0.0645, + "step": 17138 + }, + { + "epoch": 2.41, + "learning_rate": 4.5991016282987085e-05, + "loss": 0.0591, + "step": 17140 + }, + { + "epoch": 2.41, + "learning_rate": 4.599054838105933e-05, + "loss": 0.0638, + "step": 17142 + }, + { + "epoch": 2.41, + "learning_rate": 4.599008047913158e-05, + "loss": 0.0713, + "step": 17144 + }, + { + "epoch": 2.41, + "learning_rate": 4.598961257720382e-05, + "loss": 0.061, + "step": 17146 + }, + { + "epoch": 2.41, + "learning_rate": 4.598914467527606e-05, + "loss": 0.0592, + "step": 17148 + }, + { + "epoch": 2.41, + "learning_rate": 4.598867677334831e-05, + "loss": 0.0666, + "step": 17150 + }, + { + "epoch": 2.41, + "learning_rate": 4.5988208871420554e-05, + "loss": 0.0607, + "step": 17152 + }, + { + "epoch": 2.41, + "learning_rate": 4.59877409694928e-05, + "loss": 0.0546, + "step": 17154 + }, + { + "epoch": 2.41, + "learning_rate": 4.598727306756504e-05, + "loss": 0.0609, + "step": 17156 + }, + { + "epoch": 2.41, + "learning_rate": 4.5986805165637285e-05, + "loss": 0.0678, + "step": 17158 + }, + { + "epoch": 2.41, + "learning_rate": 4.598633726370953e-05, + "loss": 0.0629, + "step": 17160 + }, + { + "epoch": 2.41, + "learning_rate": 4.5985869361781776e-05, + "loss": 0.0774, + "step": 17162 + }, + { + "epoch": 2.41, + "learning_rate": 4.5985401459854016e-05, + "loss": 0.0653, + "step": 17164 + }, + { + "epoch": 2.41, + "learning_rate": 4.598493355792626e-05, + "loss": 0.0508, + "step": 17166 + }, + { + "epoch": 2.41, + "learning_rate": 4.598446565599851e-05, + "loss": 0.0537, + "step": 17168 + }, + { + "epoch": 2.41, + "learning_rate": 4.598399775407075e-05, + "loss": 0.0452, + "step": 17170 + }, + { + "epoch": 2.41, + "learning_rate": 4.598352985214299e-05, + "loss": 0.0739, + "step": 17172 + }, + { + "epoch": 2.41, + "learning_rate": 4.598306195021524e-05, + "loss": 0.0569, + "step": 17174 + }, + { + "epoch": 2.41, + "learning_rate": 4.598259404828748e-05, + "loss": 0.0488, + "step": 17176 + }, + { + "epoch": 2.41, + "learning_rate": 4.598212614635973e-05, + "loss": 0.0352, + "step": 17178 + }, + { + "epoch": 2.41, + "learning_rate": 4.598165824443197e-05, + "loss": 0.0631, + "step": 17180 + }, + { + "epoch": 2.41, + "learning_rate": 4.5981190342504215e-05, + "loss": 0.0624, + "step": 17182 + }, + { + "epoch": 2.41, + "learning_rate": 4.5980722440576454e-05, + "loss": 0.0418, + "step": 17184 + }, + { + "epoch": 2.41, + "learning_rate": 4.598025453864871e-05, + "loss": 0.0597, + "step": 17186 + }, + { + "epoch": 2.41, + "learning_rate": 4.5979786636720946e-05, + "loss": 0.0612, + "step": 17188 + }, + { + "epoch": 2.41, + "learning_rate": 4.597931873479319e-05, + "loss": 0.0543, + "step": 17190 + }, + { + "epoch": 2.41, + "learning_rate": 4.597885083286543e-05, + "loss": 0.0512, + "step": 17192 + }, + { + "epoch": 2.41, + "learning_rate": 4.597838293093768e-05, + "loss": 0.0516, + "step": 17194 + }, + { + "epoch": 2.41, + "learning_rate": 4.597791502900992e-05, + "loss": 0.0556, + "step": 17196 + }, + { + "epoch": 2.41, + "learning_rate": 4.597744712708217e-05, + "loss": 0.0564, + "step": 17198 + }, + { + "epoch": 2.41, + "learning_rate": 4.597697922515441e-05, + "loss": 0.044, + "step": 17200 + }, + { + "epoch": 2.41, + "learning_rate": 4.5976511323226654e-05, + "loss": 0.078, + "step": 17202 + }, + { + "epoch": 2.41, + "learning_rate": 4.59760434212989e-05, + "loss": 0.0606, + "step": 17204 + }, + { + "epoch": 2.42, + "learning_rate": 4.5975575519371146e-05, + "loss": 0.0552, + "step": 17206 + }, + { + "epoch": 2.42, + "learning_rate": 4.5975107617443385e-05, + "loss": 0.0616, + "step": 17208 + }, + { + "epoch": 2.42, + "learning_rate": 4.597463971551563e-05, + "loss": 0.0567, + "step": 17210 + }, + { + "epoch": 2.42, + "learning_rate": 4.597417181358788e-05, + "loss": 0.0598, + "step": 17212 + }, + { + "epoch": 2.42, + "learning_rate": 4.597370391166012e-05, + "loss": 0.0723, + "step": 17214 + }, + { + "epoch": 2.42, + "learning_rate": 4.597323600973236e-05, + "loss": 0.0473, + "step": 17216 + }, + { + "epoch": 2.42, + "learning_rate": 4.597276810780461e-05, + "loss": 0.0779, + "step": 17218 + }, + { + "epoch": 2.42, + "learning_rate": 4.5972300205876854e-05, + "loss": 0.0689, + "step": 17220 + }, + { + "epoch": 2.42, + "learning_rate": 4.59718323039491e-05, + "loss": 0.0804, + "step": 17222 + }, + { + "epoch": 2.42, + "learning_rate": 4.597136440202134e-05, + "loss": 0.0717, + "step": 17224 + }, + { + "epoch": 2.42, + "learning_rate": 4.597089650009358e-05, + "loss": 0.0595, + "step": 17226 + }, + { + "epoch": 2.42, + "learning_rate": 4.5970428598165824e-05, + "loss": 0.055, + "step": 17228 + }, + { + "epoch": 2.42, + "learning_rate": 4.596996069623807e-05, + "loss": 0.0618, + "step": 17230 + }, + { + "epoch": 2.42, + "learning_rate": 4.5969492794310316e-05, + "loss": 0.0858, + "step": 17232 + }, + { + "epoch": 2.42, + "learning_rate": 4.5969024892382555e-05, + "loss": 0.0511, + "step": 17234 + }, + { + "epoch": 2.42, + "learning_rate": 4.59685569904548e-05, + "loss": 0.0913, + "step": 17236 + }, + { + "epoch": 2.42, + "learning_rate": 4.5968089088527047e-05, + "loss": 0.0728, + "step": 17238 + }, + { + "epoch": 2.42, + "learning_rate": 4.596762118659929e-05, + "loss": 0.0665, + "step": 17240 + }, + { + "epoch": 2.42, + "learning_rate": 4.596715328467153e-05, + "loss": 0.0573, + "step": 17242 + }, + { + "epoch": 2.42, + "learning_rate": 4.596668538274378e-05, + "loss": 0.0545, + "step": 17244 + }, + { + "epoch": 2.42, + "learning_rate": 4.5966217480816023e-05, + "loss": 0.0544, + "step": 17246 + }, + { + "epoch": 2.42, + "learning_rate": 4.596574957888827e-05, + "loss": 0.062, + "step": 17248 + }, + { + "epoch": 2.42, + "learning_rate": 4.596528167696051e-05, + "loss": 0.0514, + "step": 17250 + }, + { + "epoch": 2.42, + "learning_rate": 4.5964813775032754e-05, + "loss": 0.061, + "step": 17252 + }, + { + "epoch": 2.42, + "learning_rate": 4.5964345873105e-05, + "loss": 0.0646, + "step": 17254 + }, + { + "epoch": 2.42, + "learning_rate": 4.5963877971177246e-05, + "loss": 0.0504, + "step": 17256 + }, + { + "epoch": 2.42, + "learning_rate": 4.5963410069249485e-05, + "loss": 0.0726, + "step": 17258 + }, + { + "epoch": 2.42, + "learning_rate": 4.596294216732173e-05, + "loss": 0.054, + "step": 17260 + }, + { + "epoch": 2.42, + "learning_rate": 4.596247426539397e-05, + "loss": 0.0761, + "step": 17262 + }, + { + "epoch": 2.42, + "learning_rate": 4.596200636346622e-05, + "loss": 0.0594, + "step": 17264 + }, + { + "epoch": 2.42, + "learning_rate": 4.596153846153846e-05, + "loss": 0.0682, + "step": 17266 + }, + { + "epoch": 2.42, + "learning_rate": 4.596107055961071e-05, + "loss": 0.0634, + "step": 17268 + }, + { + "epoch": 2.42, + "learning_rate": 4.596060265768295e-05, + "loss": 0.0604, + "step": 17270 + }, + { + "epoch": 2.42, + "learning_rate": 4.596013475575519e-05, + "loss": 0.0801, + "step": 17272 + }, + { + "epoch": 2.42, + "learning_rate": 4.595966685382744e-05, + "loss": 0.0758, + "step": 17274 + }, + { + "epoch": 2.43, + "learning_rate": 4.5959198951899685e-05, + "loss": 0.0713, + "step": 17276 + }, + { + "epoch": 2.43, + "learning_rate": 4.5958731049971924e-05, + "loss": 0.0481, + "step": 17278 + }, + { + "epoch": 2.43, + "learning_rate": 4.595826314804417e-05, + "loss": 0.0451, + "step": 17280 + }, + { + "epoch": 2.43, + "learning_rate": 4.5957795246116416e-05, + "loss": 0.0611, + "step": 17282 + }, + { + "epoch": 2.43, + "learning_rate": 4.595732734418866e-05, + "loss": 0.0627, + "step": 17284 + }, + { + "epoch": 2.43, + "learning_rate": 4.59568594422609e-05, + "loss": 0.0542, + "step": 17286 + }, + { + "epoch": 2.43, + "learning_rate": 4.595639154033315e-05, + "loss": 0.054, + "step": 17288 + }, + { + "epoch": 2.43, + "learning_rate": 4.595592363840539e-05, + "loss": 0.0746, + "step": 17290 + }, + { + "epoch": 2.43, + "learning_rate": 4.595545573647764e-05, + "loss": 0.0646, + "step": 17292 + }, + { + "epoch": 2.43, + "learning_rate": 4.595498783454988e-05, + "loss": 0.0576, + "step": 17294 + }, + { + "epoch": 2.43, + "learning_rate": 4.5954519932622124e-05, + "loss": 0.0434, + "step": 17296 + }, + { + "epoch": 2.43, + "learning_rate": 4.595405203069437e-05, + "loss": 0.0592, + "step": 17298 + }, + { + "epoch": 2.43, + "learning_rate": 4.5953584128766616e-05, + "loss": 0.0621, + "step": 17300 + }, + { + "epoch": 2.43, + "learning_rate": 4.5953116226838855e-05, + "loss": 0.0599, + "step": 17302 + }, + { + "epoch": 2.43, + "learning_rate": 4.59526483249111e-05, + "loss": 0.0931, + "step": 17304 + }, + { + "epoch": 2.43, + "learning_rate": 4.595218042298334e-05, + "loss": 0.0755, + "step": 17306 + }, + { + "epoch": 2.43, + "learning_rate": 4.595171252105559e-05, + "loss": 0.0653, + "step": 17308 + }, + { + "epoch": 2.43, + "learning_rate": 4.595124461912783e-05, + "loss": 0.0642, + "step": 17310 + }, + { + "epoch": 2.43, + "learning_rate": 4.595077671720008e-05, + "loss": 0.0703, + "step": 17312 + }, + { + "epoch": 2.43, + "learning_rate": 4.595030881527232e-05, + "loss": 0.0717, + "step": 17314 + }, + { + "epoch": 2.43, + "learning_rate": 4.594984091334457e-05, + "loss": 0.0565, + "step": 17316 + }, + { + "epoch": 2.43, + "learning_rate": 4.594937301141681e-05, + "loss": 0.066, + "step": 17318 + }, + { + "epoch": 2.43, + "learning_rate": 4.5948905109489054e-05, + "loss": 0.0815, + "step": 17320 + }, + { + "epoch": 2.43, + "learning_rate": 4.5948437207561294e-05, + "loss": 0.0583, + "step": 17322 + }, + { + "epoch": 2.43, + "learning_rate": 4.594796930563354e-05, + "loss": 0.0635, + "step": 17324 + }, + { + "epoch": 2.43, + "learning_rate": 4.5947501403705785e-05, + "loss": 0.0643, + "step": 17326 + }, + { + "epoch": 2.43, + "learning_rate": 4.594703350177803e-05, + "loss": 0.0534, + "step": 17328 + }, + { + "epoch": 2.43, + "learning_rate": 4.594656559985027e-05, + "loss": 0.0778, + "step": 17330 + }, + { + "epoch": 2.43, + "learning_rate": 4.5946097697922516e-05, + "loss": 0.0661, + "step": 17332 + }, + { + "epoch": 2.43, + "learning_rate": 4.594562979599476e-05, + "loss": 0.0731, + "step": 17334 + }, + { + "epoch": 2.43, + "learning_rate": 4.594516189406701e-05, + "loss": 0.0527, + "step": 17336 + }, + { + "epoch": 2.43, + "learning_rate": 4.594469399213925e-05, + "loss": 0.0644, + "step": 17338 + }, + { + "epoch": 2.43, + "learning_rate": 4.594422609021149e-05, + "loss": 0.0626, + "step": 17340 + }, + { + "epoch": 2.43, + "learning_rate": 4.594375818828374e-05, + "loss": 0.0721, + "step": 17342 + }, + { + "epoch": 2.43, + "learning_rate": 4.5943290286355985e-05, + "loss": 0.0726, + "step": 17344 + }, + { + "epoch": 2.43, + "learning_rate": 4.5942822384428224e-05, + "loss": 0.0613, + "step": 17346 + }, + { + "epoch": 2.44, + "learning_rate": 4.594235448250047e-05, + "loss": 0.0407, + "step": 17348 + }, + { + "epoch": 2.44, + "learning_rate": 4.5941886580572716e-05, + "loss": 0.0511, + "step": 17350 + }, + { + "epoch": 2.44, + "learning_rate": 4.594141867864496e-05, + "loss": 0.0644, + "step": 17352 + }, + { + "epoch": 2.44, + "learning_rate": 4.59409507767172e-05, + "loss": 0.0679, + "step": 17354 + }, + { + "epoch": 2.44, + "learning_rate": 4.594048287478945e-05, + "loss": 0.0597, + "step": 17356 + }, + { + "epoch": 2.44, + "learning_rate": 4.5940014972861686e-05, + "loss": 0.0555, + "step": 17358 + }, + { + "epoch": 2.44, + "learning_rate": 4.593954707093394e-05, + "loss": 0.0446, + "step": 17360 + }, + { + "epoch": 2.44, + "learning_rate": 4.593907916900618e-05, + "loss": 0.0539, + "step": 17362 + }, + { + "epoch": 2.44, + "learning_rate": 4.5938611267078424e-05, + "loss": 0.0805, + "step": 17364 + }, + { + "epoch": 2.44, + "learning_rate": 4.593814336515066e-05, + "loss": 0.0582, + "step": 17366 + }, + { + "epoch": 2.44, + "learning_rate": 4.5937675463222916e-05, + "loss": 0.0748, + "step": 17368 + }, + { + "epoch": 2.44, + "learning_rate": 4.5937207561295155e-05, + "loss": 0.0731, + "step": 17370 + }, + { + "epoch": 2.44, + "learning_rate": 4.59367396593674e-05, + "loss": 0.0769, + "step": 17372 + }, + { + "epoch": 2.44, + "learning_rate": 4.593627175743964e-05, + "loss": 0.0725, + "step": 17374 + }, + { + "epoch": 2.44, + "learning_rate": 4.5935803855511886e-05, + "loss": 0.0608, + "step": 17376 + }, + { + "epoch": 2.44, + "learning_rate": 4.593533595358413e-05, + "loss": 0.0682, + "step": 17378 + }, + { + "epoch": 2.44, + "learning_rate": 4.593486805165638e-05, + "loss": 0.0711, + "step": 17380 + }, + { + "epoch": 2.44, + "learning_rate": 4.593440014972862e-05, + "loss": 0.05, + "step": 17382 + }, + { + "epoch": 2.44, + "learning_rate": 4.593393224780086e-05, + "loss": 0.0613, + "step": 17384 + }, + { + "epoch": 2.44, + "learning_rate": 4.593346434587311e-05, + "loss": 0.0728, + "step": 17386 + }, + { + "epoch": 2.44, + "learning_rate": 4.5932996443945354e-05, + "loss": 0.0447, + "step": 17388 + }, + { + "epoch": 2.44, + "learning_rate": 4.5932528542017594e-05, + "loss": 0.0402, + "step": 17390 + }, + { + "epoch": 2.44, + "learning_rate": 4.593206064008984e-05, + "loss": 0.0498, + "step": 17392 + }, + { + "epoch": 2.44, + "learning_rate": 4.5931592738162085e-05, + "loss": 0.0553, + "step": 17394 + }, + { + "epoch": 2.44, + "learning_rate": 4.593112483623433e-05, + "loss": 0.0556, + "step": 17396 + }, + { + "epoch": 2.44, + "learning_rate": 4.593065693430657e-05, + "loss": 0.0628, + "step": 17398 + }, + { + "epoch": 2.44, + "learning_rate": 4.5930189032378816e-05, + "loss": 0.0647, + "step": 17400 + }, + { + "epoch": 2.44, + "learning_rate": 4.5929721130451055e-05, + "loss": 0.0459, + "step": 17402 + }, + { + "epoch": 2.44, + "learning_rate": 4.592925322852331e-05, + "loss": 0.0442, + "step": 17404 + }, + { + "epoch": 2.44, + "learning_rate": 4.592878532659555e-05, + "loss": 0.0455, + "step": 17406 + }, + { + "epoch": 2.44, + "learning_rate": 4.592831742466779e-05, + "loss": 0.0435, + "step": 17408 + }, + { + "epoch": 2.44, + "learning_rate": 4.592784952274003e-05, + "loss": 0.0642, + "step": 17410 + }, + { + "epoch": 2.44, + "learning_rate": 4.5927381620812285e-05, + "loss": 0.0517, + "step": 17412 + }, + { + "epoch": 2.44, + "learning_rate": 4.5926913718884524e-05, + "loss": 0.0332, + "step": 17414 + }, + { + "epoch": 2.44, + "learning_rate": 4.592644581695677e-05, + "loss": 0.0744, + "step": 17416 + }, + { + "epoch": 2.44, + "learning_rate": 4.592597791502901e-05, + "loss": 0.0637, + "step": 17418 + }, + { + "epoch": 2.45, + "learning_rate": 4.5925510013101255e-05, + "loss": 0.0749, + "step": 17420 + }, + { + "epoch": 2.45, + "learning_rate": 4.59250421111735e-05, + "loss": 0.0784, + "step": 17422 + }, + { + "epoch": 2.45, + "learning_rate": 4.592457420924575e-05, + "loss": 0.0613, + "step": 17424 + }, + { + "epoch": 2.45, + "learning_rate": 4.5924106307317986e-05, + "loss": 0.0633, + "step": 17426 + }, + { + "epoch": 2.45, + "learning_rate": 4.592363840539023e-05, + "loss": 0.0738, + "step": 17428 + }, + { + "epoch": 2.45, + "learning_rate": 4.592317050346248e-05, + "loss": 0.0553, + "step": 17430 + }, + { + "epoch": 2.45, + "learning_rate": 4.5922702601534724e-05, + "loss": 0.0489, + "step": 17432 + }, + { + "epoch": 2.45, + "learning_rate": 4.592223469960696e-05, + "loss": 0.0674, + "step": 17434 + }, + { + "epoch": 2.45, + "learning_rate": 4.592176679767921e-05, + "loss": 0.0708, + "step": 17436 + }, + { + "epoch": 2.45, + "learning_rate": 4.5921298895751455e-05, + "loss": 0.0416, + "step": 17438 + }, + { + "epoch": 2.45, + "learning_rate": 4.59208309938237e-05, + "loss": 0.0772, + "step": 17440 + }, + { + "epoch": 2.45, + "learning_rate": 4.592036309189594e-05, + "loss": 0.0666, + "step": 17442 + }, + { + "epoch": 2.45, + "learning_rate": 4.5919895189968186e-05, + "loss": 0.0476, + "step": 17444 + }, + { + "epoch": 2.45, + "learning_rate": 4.591942728804043e-05, + "loss": 0.0762, + "step": 17446 + }, + { + "epoch": 2.45, + "learning_rate": 4.591895938611268e-05, + "loss": 0.0708, + "step": 17448 + }, + { + "epoch": 2.45, + "learning_rate": 4.591849148418492e-05, + "loss": 0.0836, + "step": 17450 + }, + { + "epoch": 2.45, + "learning_rate": 4.591802358225716e-05, + "loss": 0.0539, + "step": 17452 + }, + { + "epoch": 2.45, + "learning_rate": 4.59175556803294e-05, + "loss": 0.0663, + "step": 17454 + }, + { + "epoch": 2.45, + "learning_rate": 4.5917087778401654e-05, + "loss": 0.0752, + "step": 17456 + }, + { + "epoch": 2.45, + "learning_rate": 4.5916619876473894e-05, + "loss": 0.0744, + "step": 17458 + }, + { + "epoch": 2.45, + "learning_rate": 4.591615197454614e-05, + "loss": 0.0574, + "step": 17460 + }, + { + "epoch": 2.45, + "learning_rate": 4.591568407261838e-05, + "loss": 0.0496, + "step": 17462 + }, + { + "epoch": 2.45, + "learning_rate": 4.591521617069063e-05, + "loss": 0.0725, + "step": 17464 + }, + { + "epoch": 2.45, + "learning_rate": 4.591474826876287e-05, + "loss": 0.0516, + "step": 17466 + }, + { + "epoch": 2.45, + "learning_rate": 4.5914280366835116e-05, + "loss": 0.0559, + "step": 17468 + }, + { + "epoch": 2.45, + "learning_rate": 4.5913812464907355e-05, + "loss": 0.0705, + "step": 17470 + }, + { + "epoch": 2.45, + "learning_rate": 4.59133445629796e-05, + "loss": 0.0542, + "step": 17472 + }, + { + "epoch": 2.45, + "learning_rate": 4.591287666105185e-05, + "loss": 0.088, + "step": 17474 + }, + { + "epoch": 2.45, + "learning_rate": 4.591240875912409e-05, + "loss": 0.0651, + "step": 17476 + }, + { + "epoch": 2.45, + "learning_rate": 4.591194085719633e-05, + "loss": 0.0428, + "step": 17478 + }, + { + "epoch": 2.45, + "learning_rate": 4.591147295526858e-05, + "loss": 0.0498, + "step": 17480 + }, + { + "epoch": 2.45, + "learning_rate": 4.5911005053340824e-05, + "loss": 0.0568, + "step": 17482 + }, + { + "epoch": 2.45, + "learning_rate": 4.591053715141306e-05, + "loss": 0.0718, + "step": 17484 + }, + { + "epoch": 2.45, + "learning_rate": 4.591006924948531e-05, + "loss": 0.0672, + "step": 17486 + }, + { + "epoch": 2.45, + "learning_rate": 4.590960134755755e-05, + "loss": 0.0563, + "step": 17488 + }, + { + "epoch": 2.46, + "learning_rate": 4.59091334456298e-05, + "loss": 0.0661, + "step": 17490 + }, + { + "epoch": 2.46, + "learning_rate": 4.590866554370204e-05, + "loss": 0.0686, + "step": 17492 + }, + { + "epoch": 2.46, + "learning_rate": 4.5908197641774286e-05, + "loss": 0.0484, + "step": 17494 + }, + { + "epoch": 2.46, + "learning_rate": 4.5907729739846525e-05, + "loss": 0.0558, + "step": 17496 + }, + { + "epoch": 2.46, + "learning_rate": 4.590726183791878e-05, + "loss": 0.0628, + "step": 17498 + }, + { + "epoch": 2.46, + "learning_rate": 4.590679393599102e-05, + "loss": 0.0566, + "step": 17500 + }, + { + "epoch": 2.46, + "learning_rate": 4.590632603406326e-05, + "loss": 0.0517, + "step": 17502 + }, + { + "epoch": 2.46, + "learning_rate": 4.59058581321355e-05, + "loss": 0.0612, + "step": 17504 + }, + { + "epoch": 2.46, + "learning_rate": 4.590539023020775e-05, + "loss": 0.0634, + "step": 17506 + }, + { + "epoch": 2.46, + "learning_rate": 4.5904922328279994e-05, + "loss": 0.049, + "step": 17508 + }, + { + "epoch": 2.46, + "learning_rate": 4.590445442635224e-05, + "loss": 0.0839, + "step": 17510 + }, + { + "epoch": 2.46, + "learning_rate": 4.590398652442448e-05, + "loss": 0.0552, + "step": 17512 + }, + { + "epoch": 2.46, + "learning_rate": 4.5903518622496725e-05, + "loss": 0.0704, + "step": 17514 + }, + { + "epoch": 2.46, + "learning_rate": 4.590305072056897e-05, + "loss": 0.0565, + "step": 17516 + }, + { + "epoch": 2.46, + "learning_rate": 4.590258281864122e-05, + "loss": 0.0716, + "step": 17518 + }, + { + "epoch": 2.46, + "learning_rate": 4.5902114916713456e-05, + "loss": 0.0609, + "step": 17520 + }, + { + "epoch": 2.46, + "learning_rate": 4.59016470147857e-05, + "loss": 0.0788, + "step": 17522 + }, + { + "epoch": 2.46, + "learning_rate": 4.590117911285795e-05, + "loss": 0.058, + "step": 17524 + }, + { + "epoch": 2.46, + "learning_rate": 4.5900711210930194e-05, + "loss": 0.0572, + "step": 17526 + }, + { + "epoch": 2.46, + "learning_rate": 4.590024330900243e-05, + "loss": 0.0633, + "step": 17528 + }, + { + "epoch": 2.46, + "learning_rate": 4.589977540707468e-05, + "loss": 0.086, + "step": 17530 + }, + { + "epoch": 2.46, + "learning_rate": 4.5899307505146925e-05, + "loss": 0.0699, + "step": 17532 + }, + { + "epoch": 2.46, + "learning_rate": 4.589883960321917e-05, + "loss": 0.1082, + "step": 17534 + }, + { + "epoch": 2.46, + "learning_rate": 4.589837170129141e-05, + "loss": 0.0465, + "step": 17536 + }, + { + "epoch": 2.46, + "learning_rate": 4.5897903799363655e-05, + "loss": 0.0599, + "step": 17538 + }, + { + "epoch": 2.46, + "learning_rate": 4.5897435897435895e-05, + "loss": 0.0774, + "step": 17540 + }, + { + "epoch": 2.46, + "learning_rate": 4.589696799550815e-05, + "loss": 0.0616, + "step": 17542 + }, + { + "epoch": 2.46, + "learning_rate": 4.5896500093580386e-05, + "loss": 0.0889, + "step": 17544 + }, + { + "epoch": 2.46, + "learning_rate": 4.589603219165263e-05, + "loss": 0.0605, + "step": 17546 + }, + { + "epoch": 2.46, + "learning_rate": 4.589556428972487e-05, + "loss": 0.0632, + "step": 17548 + }, + { + "epoch": 2.46, + "learning_rate": 4.589509638779712e-05, + "loss": 0.0577, + "step": 17550 + }, + { + "epoch": 2.46, + "learning_rate": 4.589462848586936e-05, + "loss": 0.0548, + "step": 17552 + }, + { + "epoch": 2.46, + "learning_rate": 4.589416058394161e-05, + "loss": 0.0549, + "step": 17554 + }, + { + "epoch": 2.46, + "learning_rate": 4.589369268201385e-05, + "loss": 0.0402, + "step": 17556 + }, + { + "epoch": 2.46, + "learning_rate": 4.5893224780086094e-05, + "loss": 0.0613, + "step": 17558 + }, + { + "epoch": 2.46, + "learning_rate": 4.589275687815834e-05, + "loss": 0.076, + "step": 17560 + }, + { + "epoch": 2.47, + "learning_rate": 4.5892288976230586e-05, + "loss": 0.0689, + "step": 17562 + }, + { + "epoch": 2.47, + "learning_rate": 4.5891821074302825e-05, + "loss": 0.0634, + "step": 17564 + }, + { + "epoch": 2.47, + "learning_rate": 4.589135317237507e-05, + "loss": 0.0766, + "step": 17566 + }, + { + "epoch": 2.47, + "learning_rate": 4.589088527044732e-05, + "loss": 0.0475, + "step": 17568 + }, + { + "epoch": 2.47, + "learning_rate": 4.589041736851956e-05, + "loss": 0.0632, + "step": 17570 + }, + { + "epoch": 2.47, + "learning_rate": 4.58899494665918e-05, + "loss": 0.0821, + "step": 17572 + }, + { + "epoch": 2.47, + "learning_rate": 4.588948156466405e-05, + "loss": 0.053, + "step": 17574 + }, + { + "epoch": 2.47, + "learning_rate": 4.5889013662736294e-05, + "loss": 0.0832, + "step": 17576 + }, + { + "epoch": 2.47, + "learning_rate": 4.588854576080854e-05, + "loss": 0.0941, + "step": 17578 + }, + { + "epoch": 2.47, + "learning_rate": 4.588807785888078e-05, + "loss": 0.0644, + "step": 17580 + }, + { + "epoch": 2.47, + "learning_rate": 4.5887609956953025e-05, + "loss": 0.0533, + "step": 17582 + }, + { + "epoch": 2.47, + "learning_rate": 4.5887142055025264e-05, + "loss": 0.0699, + "step": 17584 + }, + { + "epoch": 2.47, + "learning_rate": 4.588667415309752e-05, + "loss": 0.064, + "step": 17586 + }, + { + "epoch": 2.47, + "learning_rate": 4.5886206251169756e-05, + "loss": 0.0677, + "step": 17588 + }, + { + "epoch": 2.47, + "learning_rate": 4.5885738349242e-05, + "loss": 0.0468, + "step": 17590 + }, + { + "epoch": 2.47, + "learning_rate": 4.588527044731424e-05, + "loss": 0.0669, + "step": 17592 + }, + { + "epoch": 2.47, + "learning_rate": 4.5884802545386494e-05, + "loss": 0.0693, + "step": 17594 + }, + { + "epoch": 2.47, + "learning_rate": 4.588433464345873e-05, + "loss": 0.0475, + "step": 17596 + }, + { + "epoch": 2.47, + "learning_rate": 4.588386674153098e-05, + "loss": 0.0922, + "step": 17598 + }, + { + "epoch": 2.47, + "learning_rate": 4.588339883960322e-05, + "loss": 0.0714, + "step": 17600 + }, + { + "epoch": 2.47, + "learning_rate": 4.5882930937675464e-05, + "loss": 0.0506, + "step": 17602 + }, + { + "epoch": 2.47, + "learning_rate": 4.588246303574771e-05, + "loss": 0.056, + "step": 17604 + }, + { + "epoch": 2.47, + "learning_rate": 4.5881995133819956e-05, + "loss": 0.0527, + "step": 17606 + }, + { + "epoch": 2.47, + "learning_rate": 4.5881527231892195e-05, + "loss": 0.0761, + "step": 17608 + }, + { + "epoch": 2.47, + "learning_rate": 4.588105932996444e-05, + "loss": 0.0497, + "step": 17610 + }, + { + "epoch": 2.47, + "learning_rate": 4.5880591428036686e-05, + "loss": 0.0687, + "step": 17612 + }, + { + "epoch": 2.47, + "learning_rate": 4.588012352610893e-05, + "loss": 0.0524, + "step": 17614 + }, + { + "epoch": 2.47, + "learning_rate": 4.587965562418117e-05, + "loss": 0.0633, + "step": 17616 + }, + { + "epoch": 2.47, + "learning_rate": 4.587918772225342e-05, + "loss": 0.0666, + "step": 17618 + }, + { + "epoch": 2.47, + "learning_rate": 4.587871982032566e-05, + "loss": 0.0597, + "step": 17620 + }, + { + "epoch": 2.47, + "learning_rate": 4.587825191839791e-05, + "loss": 0.0489, + "step": 17622 + }, + { + "epoch": 2.47, + "learning_rate": 4.587778401647015e-05, + "loss": 0.0527, + "step": 17624 + }, + { + "epoch": 2.47, + "learning_rate": 4.5877316114542394e-05, + "loss": 0.0649, + "step": 17626 + }, + { + "epoch": 2.47, + "learning_rate": 4.587684821261464e-05, + "loss": 0.0676, + "step": 17628 + }, + { + "epoch": 2.47, + "learning_rate": 4.5876380310686886e-05, + "loss": 0.0773, + "step": 17630 + }, + { + "epoch": 2.48, + "learning_rate": 4.5875912408759125e-05, + "loss": 0.0742, + "step": 17632 + }, + { + "epoch": 2.48, + "learning_rate": 4.587544450683137e-05, + "loss": 0.0586, + "step": 17634 + }, + { + "epoch": 2.48, + "learning_rate": 4.587497660490361e-05, + "loss": 0.0665, + "step": 17636 + }, + { + "epoch": 2.48, + "learning_rate": 4.587450870297586e-05, + "loss": 0.061, + "step": 17638 + }, + { + "epoch": 2.48, + "learning_rate": 4.58740408010481e-05, + "loss": 0.0697, + "step": 17640 + }, + { + "epoch": 2.48, + "learning_rate": 4.587357289912035e-05, + "loss": 0.0759, + "step": 17642 + }, + { + "epoch": 2.48, + "learning_rate": 4.587310499719259e-05, + "loss": 0.078, + "step": 17644 + }, + { + "epoch": 2.48, + "learning_rate": 4.587263709526484e-05, + "loss": 0.0636, + "step": 17646 + }, + { + "epoch": 2.48, + "learning_rate": 4.587216919333708e-05, + "loss": 0.0553, + "step": 17648 + }, + { + "epoch": 2.48, + "learning_rate": 4.5871701291409325e-05, + "loss": 0.0655, + "step": 17650 + }, + { + "epoch": 2.48, + "learning_rate": 4.5871233389481564e-05, + "loss": 0.0632, + "step": 17652 + }, + { + "epoch": 2.48, + "learning_rate": 4.587076548755381e-05, + "loss": 0.0642, + "step": 17654 + }, + { + "epoch": 2.48, + "learning_rate": 4.5870297585626056e-05, + "loss": 0.0557, + "step": 17656 + }, + { + "epoch": 2.48, + "learning_rate": 4.58698296836983e-05, + "loss": 0.0645, + "step": 17658 + }, + { + "epoch": 2.48, + "learning_rate": 4.586936178177054e-05, + "loss": 0.0706, + "step": 17660 + }, + { + "epoch": 2.48, + "learning_rate": 4.586889387984279e-05, + "loss": 0.0511, + "step": 17662 + }, + { + "epoch": 2.48, + "learning_rate": 4.586842597791503e-05, + "loss": 0.059, + "step": 17664 + }, + { + "epoch": 2.48, + "learning_rate": 4.586795807598728e-05, + "loss": 0.0614, + "step": 17666 + }, + { + "epoch": 2.48, + "learning_rate": 4.586749017405952e-05, + "loss": 0.046, + "step": 17668 + }, + { + "epoch": 2.48, + "learning_rate": 4.5867022272131764e-05, + "loss": 0.0649, + "step": 17670 + }, + { + "epoch": 2.48, + "learning_rate": 4.586655437020401e-05, + "loss": 0.0633, + "step": 17672 + }, + { + "epoch": 2.48, + "learning_rate": 4.5866086468276256e-05, + "loss": 0.0494, + "step": 17674 + }, + { + "epoch": 2.48, + "learning_rate": 4.5865618566348495e-05, + "loss": 0.0504, + "step": 17676 + }, + { + "epoch": 2.48, + "learning_rate": 4.586515066442074e-05, + "loss": 0.0646, + "step": 17678 + }, + { + "epoch": 2.48, + "learning_rate": 4.5864682762492986e-05, + "loss": 0.0582, + "step": 17680 + }, + { + "epoch": 2.48, + "learning_rate": 4.586421486056523e-05, + "loss": 0.0574, + "step": 17682 + }, + { + "epoch": 2.48, + "learning_rate": 4.586374695863747e-05, + "loss": 0.0904, + "step": 17684 + }, + { + "epoch": 2.48, + "learning_rate": 4.586327905670972e-05, + "loss": 0.0734, + "step": 17686 + }, + { + "epoch": 2.48, + "learning_rate": 4.5862811154781957e-05, + "loss": 0.0562, + "step": 17688 + }, + { + "epoch": 2.48, + "learning_rate": 4.586234325285421e-05, + "loss": 0.0643, + "step": 17690 + }, + { + "epoch": 2.48, + "learning_rate": 4.586187535092645e-05, + "loss": 0.0668, + "step": 17692 + }, + { + "epoch": 2.48, + "learning_rate": 4.5861407448998694e-05, + "loss": 0.0445, + "step": 17694 + }, + { + "epoch": 2.48, + "learning_rate": 4.5860939547070933e-05, + "loss": 0.071, + "step": 17696 + }, + { + "epoch": 2.48, + "learning_rate": 4.586047164514318e-05, + "loss": 0.0678, + "step": 17698 + }, + { + "epoch": 2.48, + "learning_rate": 4.5860003743215425e-05, + "loss": 0.0446, + "step": 17700 + }, + { + "epoch": 2.48, + "learning_rate": 4.585953584128767e-05, + "loss": 0.0642, + "step": 17702 + }, + { + "epoch": 2.49, + "learning_rate": 4.585906793935991e-05, + "loss": 0.0502, + "step": 17704 + }, + { + "epoch": 2.49, + "learning_rate": 4.5858600037432156e-05, + "loss": 0.0481, + "step": 17706 + }, + { + "epoch": 2.49, + "learning_rate": 4.58581321355044e-05, + "loss": 0.0736, + "step": 17708 + }, + { + "epoch": 2.49, + "learning_rate": 4.585766423357665e-05, + "loss": 0.0674, + "step": 17710 + }, + { + "epoch": 2.49, + "learning_rate": 4.585719633164889e-05, + "loss": 0.0467, + "step": 17712 + }, + { + "epoch": 2.49, + "learning_rate": 4.585672842972113e-05, + "loss": 0.0636, + "step": 17714 + }, + { + "epoch": 2.49, + "learning_rate": 4.585626052779338e-05, + "loss": 0.0785, + "step": 17716 + }, + { + "epoch": 2.49, + "learning_rate": 4.5855792625865625e-05, + "loss": 0.0584, + "step": 17718 + }, + { + "epoch": 2.49, + "learning_rate": 4.5855324723937864e-05, + "loss": 0.0388, + "step": 17720 + }, + { + "epoch": 2.49, + "learning_rate": 4.585485682201011e-05, + "loss": 0.0692, + "step": 17722 + }, + { + "epoch": 2.49, + "learning_rate": 4.5854388920082356e-05, + "loss": 0.0619, + "step": 17724 + }, + { + "epoch": 2.49, + "learning_rate": 4.58539210181546e-05, + "loss": 0.0624, + "step": 17726 + }, + { + "epoch": 2.49, + "learning_rate": 4.585345311622684e-05, + "loss": 0.049, + "step": 17728 + }, + { + "epoch": 2.49, + "learning_rate": 4.585298521429908e-05, + "loss": 0.0452, + "step": 17730 + }, + { + "epoch": 2.49, + "learning_rate": 4.5852517312371326e-05, + "loss": 0.0644, + "step": 17732 + }, + { + "epoch": 2.49, + "learning_rate": 4.585204941044357e-05, + "loss": 0.0575, + "step": 17734 + }, + { + "epoch": 2.49, + "learning_rate": 4.585158150851582e-05, + "loss": 0.0643, + "step": 17736 + }, + { + "epoch": 2.49, + "learning_rate": 4.585111360658806e-05, + "loss": 0.0657, + "step": 17738 + }, + { + "epoch": 2.49, + "learning_rate": 4.58506457046603e-05, + "loss": 0.0603, + "step": 17740 + }, + { + "epoch": 2.49, + "learning_rate": 4.585017780273255e-05, + "loss": 0.0641, + "step": 17742 + }, + { + "epoch": 2.49, + "learning_rate": 4.5849709900804795e-05, + "loss": 0.0647, + "step": 17744 + }, + { + "epoch": 2.49, + "learning_rate": 4.5849241998877034e-05, + "loss": 0.045, + "step": 17746 + }, + { + "epoch": 2.49, + "learning_rate": 4.584877409694928e-05, + "loss": 0.0628, + "step": 17748 + }, + { + "epoch": 2.49, + "learning_rate": 4.5848306195021526e-05, + "loss": 0.0619, + "step": 17750 + }, + { + "epoch": 2.49, + "learning_rate": 4.584783829309377e-05, + "loss": 0.0657, + "step": 17752 + }, + { + "epoch": 2.49, + "learning_rate": 4.584737039116601e-05, + "loss": 0.0645, + "step": 17754 + }, + { + "epoch": 2.49, + "learning_rate": 4.5846902489238257e-05, + "loss": 0.0772, + "step": 17756 + }, + { + "epoch": 2.49, + "learning_rate": 4.58464345873105e-05, + "loss": 0.0545, + "step": 17758 + }, + { + "epoch": 2.49, + "learning_rate": 4.584596668538275e-05, + "loss": 0.0629, + "step": 17760 + }, + { + "epoch": 2.49, + "learning_rate": 4.584549878345499e-05, + "loss": 0.088, + "step": 17762 + }, + { + "epoch": 2.49, + "learning_rate": 4.5845030881527233e-05, + "loss": 0.0606, + "step": 17764 + }, + { + "epoch": 2.49, + "learning_rate": 4.584456297959947e-05, + "loss": 0.0589, + "step": 17766 + }, + { + "epoch": 2.49, + "learning_rate": 4.5844095077671725e-05, + "loss": 0.057, + "step": 17768 + }, + { + "epoch": 2.49, + "learning_rate": 4.5843627175743964e-05, + "loss": 0.0668, + "step": 17770 + }, + { + "epoch": 2.49, + "learning_rate": 4.584315927381621e-05, + "loss": 0.0618, + "step": 17772 + }, + { + "epoch": 2.49, + "learning_rate": 4.584269137188845e-05, + "loss": 0.0636, + "step": 17774 + }, + { + "epoch": 2.5, + "learning_rate": 4.58422234699607e-05, + "loss": 0.0617, + "step": 17776 + }, + { + "epoch": 2.5, + "learning_rate": 4.584175556803294e-05, + "loss": 0.0615, + "step": 17778 + }, + { + "epoch": 2.5, + "learning_rate": 4.584128766610519e-05, + "loss": 0.0678, + "step": 17780 + }, + { + "epoch": 2.5, + "learning_rate": 4.5840819764177426e-05, + "loss": 0.0594, + "step": 17782 + }, + { + "epoch": 2.5, + "learning_rate": 4.584035186224967e-05, + "loss": 0.051, + "step": 17784 + }, + { + "epoch": 2.5, + "learning_rate": 4.583988396032192e-05, + "loss": 0.0807, + "step": 17786 + }, + { + "epoch": 2.5, + "learning_rate": 4.5839416058394164e-05, + "loss": 0.0593, + "step": 17788 + }, + { + "epoch": 2.5, + "learning_rate": 4.58389481564664e-05, + "loss": 0.0637, + "step": 17790 + }, + { + "epoch": 2.5, + "learning_rate": 4.583848025453865e-05, + "loss": 0.0318, + "step": 17792 + }, + { + "epoch": 2.5, + "learning_rate": 4.5838012352610895e-05, + "loss": 0.0564, + "step": 17794 + }, + { + "epoch": 2.5, + "learning_rate": 4.583754445068314e-05, + "loss": 0.0575, + "step": 17796 + }, + { + "epoch": 2.5, + "learning_rate": 4.583707654875538e-05, + "loss": 0.078, + "step": 17798 + }, + { + "epoch": 2.5, + "learning_rate": 4.5836608646827626e-05, + "loss": 0.0557, + "step": 17800 + }, + { + "epoch": 2.5, + "learning_rate": 4.583614074489987e-05, + "loss": 0.0446, + "step": 17802 + }, + { + "epoch": 2.5, + "learning_rate": 4.583567284297212e-05, + "loss": 0.0582, + "step": 17804 + }, + { + "epoch": 2.5, + "learning_rate": 4.583520494104436e-05, + "loss": 0.0591, + "step": 17806 + }, + { + "epoch": 2.5, + "learning_rate": 4.58347370391166e-05, + "loss": 0.0552, + "step": 17808 + }, + { + "epoch": 2.5, + "learning_rate": 4.583426913718885e-05, + "loss": 0.0943, + "step": 17810 + }, + { + "epoch": 2.5, + "learning_rate": 4.5833801235261095e-05, + "loss": 0.0671, + "step": 17812 + }, + { + "epoch": 2.5, + "learning_rate": 4.5833333333333334e-05, + "loss": 0.0914, + "step": 17814 + }, + { + "epoch": 2.5, + "learning_rate": 4.583286543140558e-05, + "loss": 0.0672, + "step": 17816 + }, + { + "epoch": 2.5, + "learning_rate": 4.583239752947782e-05, + "loss": 0.0605, + "step": 17818 + }, + { + "epoch": 2.5, + "learning_rate": 4.583192962755007e-05, + "loss": 0.0664, + "step": 17820 + }, + { + "epoch": 2.5, + "learning_rate": 4.583146172562231e-05, + "loss": 0.05, + "step": 17822 + }, + { + "epoch": 2.5, + "learning_rate": 4.583099382369456e-05, + "loss": 0.0583, + "step": 17824 + }, + { + "epoch": 2.5, + "learning_rate": 4.5830525921766796e-05, + "loss": 0.0592, + "step": 17826 + }, + { + "epoch": 2.5, + "learning_rate": 4.583005801983904e-05, + "loss": 0.0468, + "step": 17828 + }, + { + "epoch": 2.5, + "learning_rate": 4.582959011791129e-05, + "loss": 0.0662, + "step": 17830 + }, + { + "epoch": 2.5, + "learning_rate": 4.5829122215983533e-05, + "loss": 0.0739, + "step": 17832 + }, + { + "epoch": 2.5, + "learning_rate": 4.582865431405577e-05, + "loss": 0.0741, + "step": 17834 + }, + { + "epoch": 2.5, + "learning_rate": 4.582818641212802e-05, + "loss": 0.0662, + "step": 17836 + }, + { + "epoch": 2.5, + "learning_rate": 4.5827718510200264e-05, + "loss": 0.0741, + "step": 17838 + }, + { + "epoch": 2.5, + "learning_rate": 4.582725060827251e-05, + "loss": 0.085, + "step": 17840 + }, + { + "epoch": 2.5, + "learning_rate": 4.582678270634475e-05, + "loss": 0.0626, + "step": 17842 + }, + { + "epoch": 2.5, + "learning_rate": 4.5826314804416995e-05, + "loss": 0.0599, + "step": 17844 + }, + { + "epoch": 2.51, + "learning_rate": 4.582584690248924e-05, + "loss": 0.0551, + "step": 17846 + }, + { + "epoch": 2.51, + "learning_rate": 4.582537900056149e-05, + "loss": 0.058, + "step": 17848 + }, + { + "epoch": 2.51, + "learning_rate": 4.5824911098633726e-05, + "loss": 0.0534, + "step": 17850 + }, + { + "epoch": 2.51, + "learning_rate": 4.582444319670597e-05, + "loss": 0.05, + "step": 17852 + }, + { + "epoch": 2.51, + "learning_rate": 4.582397529477822e-05, + "loss": 0.0659, + "step": 17854 + }, + { + "epoch": 2.51, + "learning_rate": 4.5823507392850464e-05, + "loss": 0.0593, + "step": 17856 + }, + { + "epoch": 2.51, + "learning_rate": 4.58230394909227e-05, + "loss": 0.0534, + "step": 17858 + }, + { + "epoch": 2.51, + "learning_rate": 4.582257158899495e-05, + "loss": 0.072, + "step": 17860 + }, + { + "epoch": 2.51, + "learning_rate": 4.582210368706719e-05, + "loss": 0.0592, + "step": 17862 + }, + { + "epoch": 2.51, + "learning_rate": 4.582163578513944e-05, + "loss": 0.0706, + "step": 17864 + }, + { + "epoch": 2.51, + "learning_rate": 4.582116788321168e-05, + "loss": 0.0623, + "step": 17866 + }, + { + "epoch": 2.51, + "learning_rate": 4.5820699981283926e-05, + "loss": 0.0639, + "step": 17868 + }, + { + "epoch": 2.51, + "learning_rate": 4.5820232079356165e-05, + "loss": 0.0638, + "step": 17870 + }, + { + "epoch": 2.51, + "learning_rate": 4.581976417742842e-05, + "loss": 0.0713, + "step": 17872 + }, + { + "epoch": 2.51, + "learning_rate": 4.581929627550066e-05, + "loss": 0.0725, + "step": 17874 + }, + { + "epoch": 2.51, + "learning_rate": 4.58188283735729e-05, + "loss": 0.0629, + "step": 17876 + }, + { + "epoch": 2.51, + "learning_rate": 4.581836047164514e-05, + "loss": 0.0673, + "step": 17878 + }, + { + "epoch": 2.51, + "learning_rate": 4.581789256971739e-05, + "loss": 0.063, + "step": 17880 + }, + { + "epoch": 2.51, + "learning_rate": 4.5817424667789634e-05, + "loss": 0.0706, + "step": 17882 + }, + { + "epoch": 2.51, + "learning_rate": 4.581695676586188e-05, + "loss": 0.0729, + "step": 17884 + }, + { + "epoch": 2.51, + "learning_rate": 4.581648886393412e-05, + "loss": 0.0563, + "step": 17886 + }, + { + "epoch": 2.51, + "learning_rate": 4.5816020962006365e-05, + "loss": 0.058, + "step": 17888 + }, + { + "epoch": 2.51, + "learning_rate": 4.581555306007861e-05, + "loss": 0.0682, + "step": 17890 + }, + { + "epoch": 2.51, + "learning_rate": 4.581508515815086e-05, + "loss": 0.0483, + "step": 17892 + }, + { + "epoch": 2.51, + "learning_rate": 4.5814617256223096e-05, + "loss": 0.0651, + "step": 17894 + }, + { + "epoch": 2.51, + "learning_rate": 4.581414935429534e-05, + "loss": 0.0691, + "step": 17896 + }, + { + "epoch": 2.51, + "learning_rate": 4.581368145236759e-05, + "loss": 0.0664, + "step": 17898 + }, + { + "epoch": 2.51, + "learning_rate": 4.5813213550439834e-05, + "loss": 0.0722, + "step": 17900 + }, + { + "epoch": 2.51, + "learning_rate": 4.581274564851207e-05, + "loss": 0.0618, + "step": 17902 + }, + { + "epoch": 2.51, + "learning_rate": 4.581227774658432e-05, + "loss": 0.0641, + "step": 17904 + }, + { + "epoch": 2.51, + "learning_rate": 4.5811809844656564e-05, + "loss": 0.0549, + "step": 17906 + }, + { + "epoch": 2.51, + "learning_rate": 4.581134194272881e-05, + "loss": 0.0581, + "step": 17908 + }, + { + "epoch": 2.51, + "learning_rate": 4.581087404080105e-05, + "loss": 0.0466, + "step": 17910 + }, + { + "epoch": 2.51, + "learning_rate": 4.5810406138873295e-05, + "loss": 0.0632, + "step": 17912 + }, + { + "epoch": 2.51, + "learning_rate": 4.5809938236945535e-05, + "loss": 0.0846, + "step": 17914 + }, + { + "epoch": 2.51, + "learning_rate": 4.580947033501779e-05, + "loss": 0.0557, + "step": 17916 + }, + { + "epoch": 2.52, + "learning_rate": 4.5809002433090026e-05, + "loss": 0.0645, + "step": 17918 + }, + { + "epoch": 2.52, + "learning_rate": 4.580853453116227e-05, + "loss": 0.0547, + "step": 17920 + }, + { + "epoch": 2.52, + "learning_rate": 4.580806662923451e-05, + "loss": 0.083, + "step": 17922 + }, + { + "epoch": 2.52, + "learning_rate": 4.5807598727306764e-05, + "loss": 0.0525, + "step": 17924 + }, + { + "epoch": 2.52, + "learning_rate": 4.5807130825379e-05, + "loss": 0.0499, + "step": 17926 + }, + { + "epoch": 2.52, + "learning_rate": 4.580666292345125e-05, + "loss": 0.0718, + "step": 17928 + }, + { + "epoch": 2.52, + "learning_rate": 4.580619502152349e-05, + "loss": 0.0704, + "step": 17930 + }, + { + "epoch": 2.52, + "learning_rate": 4.5805727119595734e-05, + "loss": 0.055, + "step": 17932 + }, + { + "epoch": 2.52, + "learning_rate": 4.580525921766798e-05, + "loss": 0.1309, + "step": 17934 + }, + { + "epoch": 2.52, + "learning_rate": 4.5804791315740226e-05, + "loss": 0.099, + "step": 17936 + }, + { + "epoch": 2.52, + "learning_rate": 4.5804323413812465e-05, + "loss": 0.0566, + "step": 17938 + }, + { + "epoch": 2.52, + "learning_rate": 4.580385551188471e-05, + "loss": 0.0975, + "step": 17940 + }, + { + "epoch": 2.52, + "learning_rate": 4.580338760995696e-05, + "loss": 0.0441, + "step": 17942 + }, + { + "epoch": 2.52, + "learning_rate": 4.58029197080292e-05, + "loss": 0.0631, + "step": 17944 + }, + { + "epoch": 2.52, + "learning_rate": 4.580245180610144e-05, + "loss": 0.0603, + "step": 17946 + }, + { + "epoch": 2.52, + "learning_rate": 4.580198390417369e-05, + "loss": 0.0821, + "step": 17948 + }, + { + "epoch": 2.52, + "learning_rate": 4.5801516002245934e-05, + "loss": 0.0696, + "step": 17950 + }, + { + "epoch": 2.52, + "learning_rate": 4.580104810031818e-05, + "loss": 0.0596, + "step": 17952 + }, + { + "epoch": 2.52, + "learning_rate": 4.580058019839042e-05, + "loss": 0.0837, + "step": 17954 + }, + { + "epoch": 2.52, + "learning_rate": 4.5800112296462665e-05, + "loss": 0.0728, + "step": 17956 + }, + { + "epoch": 2.52, + "learning_rate": 4.579964439453491e-05, + "loss": 0.0573, + "step": 17958 + }, + { + "epoch": 2.52, + "learning_rate": 4.579917649260716e-05, + "loss": 0.0813, + "step": 17960 + }, + { + "epoch": 2.52, + "learning_rate": 4.5798708590679396e-05, + "loss": 0.0614, + "step": 17962 + }, + { + "epoch": 2.52, + "learning_rate": 4.579824068875164e-05, + "loss": 0.0495, + "step": 17964 + }, + { + "epoch": 2.52, + "learning_rate": 4.579777278682388e-05, + "loss": 0.0763, + "step": 17966 + }, + { + "epoch": 2.52, + "learning_rate": 4.5797304884896134e-05, + "loss": 0.0801, + "step": 17968 + }, + { + "epoch": 2.52, + "learning_rate": 4.579683698296837e-05, + "loss": 0.0909, + "step": 17970 + }, + { + "epoch": 2.52, + "learning_rate": 4.579636908104062e-05, + "loss": 0.0803, + "step": 17972 + }, + { + "epoch": 2.52, + "learning_rate": 4.579590117911286e-05, + "loss": 0.0755, + "step": 17974 + }, + { + "epoch": 2.52, + "learning_rate": 4.5795433277185104e-05, + "loss": 0.0455, + "step": 17976 + }, + { + "epoch": 2.52, + "learning_rate": 4.579496537525735e-05, + "loss": 0.0625, + "step": 17978 + }, + { + "epoch": 2.52, + "learning_rate": 4.5794497473329595e-05, + "loss": 0.0573, + "step": 17980 + }, + { + "epoch": 2.52, + "learning_rate": 4.5794029571401835e-05, + "loss": 0.0725, + "step": 17982 + }, + { + "epoch": 2.52, + "learning_rate": 4.579356166947408e-05, + "loss": 0.0406, + "step": 17984 + }, + { + "epoch": 2.52, + "learning_rate": 4.5793093767546326e-05, + "loss": 0.0562, + "step": 17986 + }, + { + "epoch": 2.52, + "learning_rate": 4.5792625865618566e-05, + "loss": 0.0727, + "step": 17988 + }, + { + "epoch": 2.53, + "learning_rate": 4.579215796369081e-05, + "loss": 0.074, + "step": 17990 + }, + { + "epoch": 2.53, + "learning_rate": 4.579169006176306e-05, + "loss": 0.0805, + "step": 17992 + }, + { + "epoch": 2.53, + "learning_rate": 4.57912221598353e-05, + "loss": 0.0646, + "step": 17994 + }, + { + "epoch": 2.53, + "learning_rate": 4.579075425790754e-05, + "loss": 0.0542, + "step": 17996 + }, + { + "epoch": 2.53, + "learning_rate": 4.579028635597979e-05, + "loss": 0.0552, + "step": 17998 + }, + { + "epoch": 2.53, + "learning_rate": 4.578981845405203e-05, + "loss": 0.0685, + "step": 18000 + }, + { + "epoch": 2.53, + "eval_gen_len": 29.9828, + "eval_loss": 1.0630050897598267, + "eval_meteor": 0.0517, + "eval_runtime": 14.711, + "eval_samples_per_second": 3.943, + "eval_steps_per_second": 0.544, + "step": 18000 + }, + { + "epoch": 2.53, + "learning_rate": 4.578935055212428e-05, + "loss": 0.0574, + "step": 18002 + }, + { + "epoch": 2.53, + "learning_rate": 4.578888265019652e-05, + "loss": 0.0585, + "step": 18004 + }, + { + "epoch": 2.53, + "learning_rate": 4.5788414748268765e-05, + "loss": 0.0534, + "step": 18006 + }, + { + "epoch": 2.53, + "learning_rate": 4.5787946846341004e-05, + "loss": 0.0853, + "step": 18008 + }, + { + "epoch": 2.53, + "learning_rate": 4.578747894441325e-05, + "loss": 0.0485, + "step": 18010 + }, + { + "epoch": 2.53, + "learning_rate": 4.5787011042485496e-05, + "loss": 0.0588, + "step": 18012 + }, + { + "epoch": 2.53, + "learning_rate": 4.578654314055774e-05, + "loss": 0.0653, + "step": 18014 + }, + { + "epoch": 2.53, + "learning_rate": 4.578607523862998e-05, + "loss": 0.0611, + "step": 18016 + }, + { + "epoch": 2.53, + "learning_rate": 4.578560733670223e-05, + "loss": 0.0585, + "step": 18018 + }, + { + "epoch": 2.53, + "learning_rate": 4.578513943477447e-05, + "loss": 0.0494, + "step": 18020 + }, + { + "epoch": 2.53, + "learning_rate": 4.578467153284672e-05, + "loss": 0.0579, + "step": 18022 + }, + { + "epoch": 2.53, + "learning_rate": 4.578420363091896e-05, + "loss": 0.0788, + "step": 18024 + }, + { + "epoch": 2.53, + "learning_rate": 4.5783735728991204e-05, + "loss": 0.0835, + "step": 18026 + }, + { + "epoch": 2.53, + "learning_rate": 4.578326782706345e-05, + "loss": 0.0799, + "step": 18028 + }, + { + "epoch": 2.53, + "learning_rate": 4.5782799925135696e-05, + "loss": 0.0779, + "step": 18030 + }, + { + "epoch": 2.53, + "learning_rate": 4.5782332023207935e-05, + "loss": 0.0508, + "step": 18032 + }, + { + "epoch": 2.53, + "learning_rate": 4.578186412128018e-05, + "loss": 0.0542, + "step": 18034 + }, + { + "epoch": 2.53, + "learning_rate": 4.578139621935243e-05, + "loss": 0.0572, + "step": 18036 + }, + { + "epoch": 2.53, + "learning_rate": 4.578092831742467e-05, + "loss": 0.0846, + "step": 18038 + }, + { + "epoch": 2.53, + "learning_rate": 4.578046041549691e-05, + "loss": 0.1055, + "step": 18040 + }, + { + "epoch": 2.53, + "learning_rate": 4.577999251356916e-05, + "loss": 0.0624, + "step": 18042 + }, + { + "epoch": 2.53, + "learning_rate": 4.57795246116414e-05, + "loss": 0.0574, + "step": 18044 + }, + { + "epoch": 2.53, + "learning_rate": 4.577905670971365e-05, + "loss": 0.0669, + "step": 18046 + }, + { + "epoch": 2.53, + "learning_rate": 4.577858880778589e-05, + "loss": 0.0626, + "step": 18048 + }, + { + "epoch": 2.53, + "learning_rate": 4.5778120905858135e-05, + "loss": 0.0663, + "step": 18050 + }, + { + "epoch": 2.53, + "learning_rate": 4.5777653003930374e-05, + "loss": 0.061, + "step": 18052 + }, + { + "epoch": 2.53, + "learning_rate": 4.5777185102002626e-05, + "loss": 0.0967, + "step": 18054 + }, + { + "epoch": 2.53, + "learning_rate": 4.5776717200074866e-05, + "loss": 0.076, + "step": 18056 + }, + { + "epoch": 2.53, + "learning_rate": 4.577624929814711e-05, + "loss": 0.0503, + "step": 18058 + }, + { + "epoch": 2.54, + "learning_rate": 4.577578139621935e-05, + "loss": 0.0614, + "step": 18060 + }, + { + "epoch": 2.54, + "learning_rate": 4.5775313494291597e-05, + "loss": 0.0655, + "step": 18062 + }, + { + "epoch": 2.54, + "learning_rate": 4.577484559236384e-05, + "loss": 0.0796, + "step": 18064 + }, + { + "epoch": 2.54, + "learning_rate": 4.577437769043609e-05, + "loss": 0.0709, + "step": 18066 + }, + { + "epoch": 2.54, + "learning_rate": 4.577390978850833e-05, + "loss": 0.0553, + "step": 18068 + }, + { + "epoch": 2.54, + "learning_rate": 4.577344188658057e-05, + "loss": 0.064, + "step": 18070 + }, + { + "epoch": 2.54, + "learning_rate": 4.577297398465282e-05, + "loss": 0.0773, + "step": 18072 + }, + { + "epoch": 2.54, + "learning_rate": 4.5772506082725065e-05, + "loss": 0.0511, + "step": 18074 + }, + { + "epoch": 2.54, + "learning_rate": 4.5772038180797304e-05, + "loss": 0.0555, + "step": 18076 + }, + { + "epoch": 2.54, + "learning_rate": 4.577157027886955e-05, + "loss": 0.0628, + "step": 18078 + }, + { + "epoch": 2.54, + "learning_rate": 4.5771102376941796e-05, + "loss": 0.0613, + "step": 18080 + }, + { + "epoch": 2.54, + "learning_rate": 4.577063447501404e-05, + "loss": 0.0681, + "step": 18082 + }, + { + "epoch": 2.54, + "learning_rate": 4.577016657308628e-05, + "loss": 0.0565, + "step": 18084 + }, + { + "epoch": 2.54, + "learning_rate": 4.576969867115853e-05, + "loss": 0.0705, + "step": 18086 + }, + { + "epoch": 2.54, + "learning_rate": 4.576923076923077e-05, + "loss": 0.0486, + "step": 18088 + }, + { + "epoch": 2.54, + "learning_rate": 4.576876286730302e-05, + "loss": 0.0643, + "step": 18090 + }, + { + "epoch": 2.54, + "learning_rate": 4.576829496537526e-05, + "loss": 0.0604, + "step": 18092 + }, + { + "epoch": 2.54, + "learning_rate": 4.5767827063447504e-05, + "loss": 0.0679, + "step": 18094 + }, + { + "epoch": 2.54, + "learning_rate": 4.576735916151974e-05, + "loss": 0.0723, + "step": 18096 + }, + { + "epoch": 2.54, + "learning_rate": 4.5766891259591996e-05, + "loss": 0.0516, + "step": 18098 + }, + { + "epoch": 2.54, + "learning_rate": 4.5766423357664235e-05, + "loss": 0.0815, + "step": 18100 + }, + { + "epoch": 2.54, + "learning_rate": 4.576595545573648e-05, + "loss": 0.0627, + "step": 18102 + }, + { + "epoch": 2.54, + "learning_rate": 4.576548755380872e-05, + "loss": 0.0669, + "step": 18104 + }, + { + "epoch": 2.54, + "learning_rate": 4.5765019651880966e-05, + "loss": 0.0467, + "step": 18106 + }, + { + "epoch": 2.54, + "learning_rate": 4.576455174995321e-05, + "loss": 0.0538, + "step": 18108 + }, + { + "epoch": 2.54, + "learning_rate": 4.576408384802546e-05, + "loss": 0.0598, + "step": 18110 + }, + { + "epoch": 2.54, + "learning_rate": 4.57636159460977e-05, + "loss": 0.0831, + "step": 18112 + }, + { + "epoch": 2.54, + "learning_rate": 4.576314804416994e-05, + "loss": 0.0829, + "step": 18114 + }, + { + "epoch": 2.54, + "learning_rate": 4.576268014224219e-05, + "loss": 0.0795, + "step": 18116 + }, + { + "epoch": 2.54, + "learning_rate": 4.5762212240314435e-05, + "loss": 0.0695, + "step": 18118 + }, + { + "epoch": 2.54, + "learning_rate": 4.5761744338386674e-05, + "loss": 0.0852, + "step": 18120 + }, + { + "epoch": 2.54, + "learning_rate": 4.576127643645892e-05, + "loss": 0.0471, + "step": 18122 + }, + { + "epoch": 2.54, + "learning_rate": 4.5760808534531166e-05, + "loss": 0.0622, + "step": 18124 + }, + { + "epoch": 2.54, + "learning_rate": 4.576034063260341e-05, + "loss": 0.0434, + "step": 18126 + }, + { + "epoch": 2.54, + "learning_rate": 4.575987273067565e-05, + "loss": 0.0489, + "step": 18128 + }, + { + "epoch": 2.54, + "learning_rate": 4.5759404828747897e-05, + "loss": 0.0932, + "step": 18130 + }, + { + "epoch": 2.55, + "learning_rate": 4.575893692682014e-05, + "loss": 0.0676, + "step": 18132 + }, + { + "epoch": 2.55, + "learning_rate": 4.575846902489239e-05, + "loss": 0.0738, + "step": 18134 + }, + { + "epoch": 2.55, + "learning_rate": 4.575800112296463e-05, + "loss": 0.0719, + "step": 18136 + }, + { + "epoch": 2.55, + "learning_rate": 4.5757533221036873e-05, + "loss": 0.0558, + "step": 18138 + }, + { + "epoch": 2.55, + "learning_rate": 4.575706531910911e-05, + "loss": 0.0841, + "step": 18140 + }, + { + "epoch": 2.55, + "learning_rate": 4.5756597417181365e-05, + "loss": 0.0758, + "step": 18142 + }, + { + "epoch": 2.55, + "learning_rate": 4.5756129515253604e-05, + "loss": 0.0637, + "step": 18144 + }, + { + "epoch": 2.55, + "learning_rate": 4.575566161332585e-05, + "loss": 0.0691, + "step": 18146 + }, + { + "epoch": 2.55, + "learning_rate": 4.575519371139809e-05, + "loss": 0.0624, + "step": 18148 + }, + { + "epoch": 2.55, + "learning_rate": 4.575472580947034e-05, + "loss": 0.0743, + "step": 18150 + }, + { + "epoch": 2.55, + "learning_rate": 4.575425790754258e-05, + "loss": 0.0611, + "step": 18152 + }, + { + "epoch": 2.55, + "learning_rate": 4.575379000561483e-05, + "loss": 0.0608, + "step": 18154 + }, + { + "epoch": 2.55, + "learning_rate": 4.5753322103687066e-05, + "loss": 0.0768, + "step": 18156 + }, + { + "epoch": 2.55, + "learning_rate": 4.575285420175931e-05, + "loss": 0.0476, + "step": 18158 + }, + { + "epoch": 2.55, + "learning_rate": 4.575238629983156e-05, + "loss": 0.0837, + "step": 18160 + }, + { + "epoch": 2.55, + "learning_rate": 4.5751918397903804e-05, + "loss": 0.0747, + "step": 18162 + }, + { + "epoch": 2.55, + "learning_rate": 4.575145049597604e-05, + "loss": 0.0707, + "step": 18164 + }, + { + "epoch": 2.55, + "learning_rate": 4.575098259404829e-05, + "loss": 0.0619, + "step": 18166 + }, + { + "epoch": 2.55, + "learning_rate": 4.5750514692120535e-05, + "loss": 0.0645, + "step": 18168 + }, + { + "epoch": 2.55, + "learning_rate": 4.575004679019278e-05, + "loss": 0.0577, + "step": 18170 + }, + { + "epoch": 2.55, + "learning_rate": 4.574957888826502e-05, + "loss": 0.0545, + "step": 18172 + }, + { + "epoch": 2.55, + "learning_rate": 4.5749110986337266e-05, + "loss": 0.0641, + "step": 18174 + }, + { + "epoch": 2.55, + "learning_rate": 4.574864308440951e-05, + "loss": 0.0938, + "step": 18176 + }, + { + "epoch": 2.55, + "learning_rate": 4.574817518248176e-05, + "loss": 0.0725, + "step": 18178 + }, + { + "epoch": 2.55, + "learning_rate": 4.5747707280554e-05, + "loss": 0.075, + "step": 18180 + }, + { + "epoch": 2.55, + "learning_rate": 4.574723937862624e-05, + "loss": 0.0707, + "step": 18182 + }, + { + "epoch": 2.55, + "learning_rate": 4.574677147669849e-05, + "loss": 0.0726, + "step": 18184 + }, + { + "epoch": 2.55, + "learning_rate": 4.5746303574770735e-05, + "loss": 0.0506, + "step": 18186 + }, + { + "epoch": 2.55, + "learning_rate": 4.5745835672842974e-05, + "loss": 0.0632, + "step": 18188 + }, + { + "epoch": 2.55, + "learning_rate": 4.574536777091522e-05, + "loss": 0.0544, + "step": 18190 + }, + { + "epoch": 2.55, + "learning_rate": 4.574489986898746e-05, + "loss": 0.0564, + "step": 18192 + }, + { + "epoch": 2.55, + "learning_rate": 4.574443196705971e-05, + "loss": 0.0785, + "step": 18194 + }, + { + "epoch": 2.55, + "learning_rate": 4.574396406513195e-05, + "loss": 0.074, + "step": 18196 + }, + { + "epoch": 2.55, + "learning_rate": 4.5743496163204197e-05, + "loss": 0.0464, + "step": 18198 + }, + { + "epoch": 2.55, + "learning_rate": 4.5743028261276436e-05, + "loss": 0.0614, + "step": 18200 + }, + { + "epoch": 2.56, + "learning_rate": 4.574256035934869e-05, + "loss": 0.0843, + "step": 18202 + }, + { + "epoch": 2.56, + "learning_rate": 4.574209245742093e-05, + "loss": 0.0725, + "step": 18204 + }, + { + "epoch": 2.56, + "learning_rate": 4.5741624555493173e-05, + "loss": 0.0684, + "step": 18206 + }, + { + "epoch": 2.56, + "learning_rate": 4.574115665356541e-05, + "loss": 0.0989, + "step": 18208 + }, + { + "epoch": 2.56, + "learning_rate": 4.574068875163766e-05, + "loss": 0.0952, + "step": 18210 + }, + { + "epoch": 2.56, + "learning_rate": 4.5740220849709904e-05, + "loss": 0.0607, + "step": 18212 + }, + { + "epoch": 2.56, + "learning_rate": 4.573975294778215e-05, + "loss": 0.0572, + "step": 18214 + }, + { + "epoch": 2.56, + "learning_rate": 4.573928504585439e-05, + "loss": 0.0673, + "step": 18216 + }, + { + "epoch": 2.56, + "learning_rate": 4.5738817143926635e-05, + "loss": 0.0595, + "step": 18218 + }, + { + "epoch": 2.56, + "learning_rate": 4.573834924199888e-05, + "loss": 0.0681, + "step": 18220 + }, + { + "epoch": 2.56, + "learning_rate": 4.573788134007113e-05, + "loss": 0.0632, + "step": 18222 + }, + { + "epoch": 2.56, + "learning_rate": 4.5737413438143366e-05, + "loss": 0.0569, + "step": 18224 + }, + { + "epoch": 2.56, + "learning_rate": 4.573694553621561e-05, + "loss": 0.0609, + "step": 18226 + }, + { + "epoch": 2.56, + "learning_rate": 4.573647763428786e-05, + "loss": 0.052, + "step": 18228 + }, + { + "epoch": 2.56, + "learning_rate": 4.5736009732360104e-05, + "loss": 0.0567, + "step": 18230 + }, + { + "epoch": 2.56, + "learning_rate": 4.573554183043234e-05, + "loss": 0.0725, + "step": 18232 + }, + { + "epoch": 2.56, + "learning_rate": 4.573507392850459e-05, + "loss": 0.0439, + "step": 18234 + }, + { + "epoch": 2.56, + "learning_rate": 4.5734606026576835e-05, + "loss": 0.0471, + "step": 18236 + }, + { + "epoch": 2.56, + "learning_rate": 4.5734138124649074e-05, + "loss": 0.0728, + "step": 18238 + }, + { + "epoch": 2.56, + "learning_rate": 4.573367022272132e-05, + "loss": 0.0785, + "step": 18240 + }, + { + "epoch": 2.56, + "learning_rate": 4.573320232079356e-05, + "loss": 0.0747, + "step": 18242 + }, + { + "epoch": 2.56, + "learning_rate": 4.5732734418865805e-05, + "loss": 0.0579, + "step": 18244 + }, + { + "epoch": 2.56, + "learning_rate": 4.573226651693805e-05, + "loss": 0.0665, + "step": 18246 + }, + { + "epoch": 2.56, + "learning_rate": 4.57317986150103e-05, + "loss": 0.0481, + "step": 18248 + }, + { + "epoch": 2.56, + "learning_rate": 4.5731330713082536e-05, + "loss": 0.0711, + "step": 18250 + }, + { + "epoch": 2.56, + "learning_rate": 4.573086281115478e-05, + "loss": 0.0555, + "step": 18252 + }, + { + "epoch": 2.56, + "learning_rate": 4.573039490922703e-05, + "loss": 0.0842, + "step": 18254 + }, + { + "epoch": 2.56, + "learning_rate": 4.5729927007299274e-05, + "loss": 0.0768, + "step": 18256 + }, + { + "epoch": 2.56, + "learning_rate": 4.572945910537151e-05, + "loss": 0.0584, + "step": 18258 + }, + { + "epoch": 2.56, + "learning_rate": 4.572899120344376e-05, + "loss": 0.0669, + "step": 18260 + }, + { + "epoch": 2.56, + "learning_rate": 4.5728523301516005e-05, + "loss": 0.0967, + "step": 18262 + }, + { + "epoch": 2.56, + "learning_rate": 4.572805539958825e-05, + "loss": 0.0714, + "step": 18264 + }, + { + "epoch": 2.56, + "learning_rate": 4.572758749766049e-05, + "loss": 0.0476, + "step": 18266 + }, + { + "epoch": 2.56, + "learning_rate": 4.5727119595732736e-05, + "loss": 0.0612, + "step": 18268 + }, + { + "epoch": 2.56, + "learning_rate": 4.572665169380498e-05, + "loss": 0.0443, + "step": 18270 + }, + { + "epoch": 2.56, + "learning_rate": 4.572618379187723e-05, + "loss": 0.0566, + "step": 18272 + }, + { + "epoch": 2.57, + "learning_rate": 4.572571588994947e-05, + "loss": 0.0596, + "step": 18274 + }, + { + "epoch": 2.57, + "learning_rate": 4.572524798802171e-05, + "loss": 0.0519, + "step": 18276 + }, + { + "epoch": 2.57, + "learning_rate": 4.572478008609395e-05, + "loss": 0.0782, + "step": 18278 + }, + { + "epoch": 2.57, + "learning_rate": 4.5724312184166204e-05, + "loss": 0.0565, + "step": 18280 + }, + { + "epoch": 2.57, + "learning_rate": 4.5723844282238444e-05, + "loss": 0.0611, + "step": 18282 + }, + { + "epoch": 2.57, + "learning_rate": 4.572337638031069e-05, + "loss": 0.0419, + "step": 18284 + }, + { + "epoch": 2.57, + "learning_rate": 4.572290847838293e-05, + "loss": 0.0639, + "step": 18286 + }, + { + "epoch": 2.57, + "learning_rate": 4.5722440576455174e-05, + "loss": 0.0831, + "step": 18288 + }, + { + "epoch": 2.57, + "learning_rate": 4.572197267452742e-05, + "loss": 0.0537, + "step": 18290 + }, + { + "epoch": 2.57, + "learning_rate": 4.5721504772599666e-05, + "loss": 0.076, + "step": 18292 + }, + { + "epoch": 2.57, + "learning_rate": 4.5721036870671905e-05, + "loss": 0.0811, + "step": 18294 + }, + { + "epoch": 2.57, + "learning_rate": 4.572056896874415e-05, + "loss": 0.0534, + "step": 18296 + }, + { + "epoch": 2.57, + "learning_rate": 4.57201010668164e-05, + "loss": 0.0726, + "step": 18298 + }, + { + "epoch": 2.57, + "learning_rate": 4.571963316488864e-05, + "loss": 0.0908, + "step": 18300 + }, + { + "epoch": 2.57, + "learning_rate": 4.571916526296088e-05, + "loss": 0.0847, + "step": 18302 + }, + { + "epoch": 2.57, + "learning_rate": 4.571869736103313e-05, + "loss": 0.065, + "step": 18304 + }, + { + "epoch": 2.57, + "learning_rate": 4.5718229459105374e-05, + "loss": 0.0583, + "step": 18306 + }, + { + "epoch": 2.57, + "learning_rate": 4.571776155717762e-05, + "loss": 0.059, + "step": 18308 + }, + { + "epoch": 2.57, + "learning_rate": 4.571729365524986e-05, + "loss": 0.0935, + "step": 18310 + }, + { + "epoch": 2.57, + "learning_rate": 4.5716825753322105e-05, + "loss": 0.0507, + "step": 18312 + }, + { + "epoch": 2.57, + "learning_rate": 4.571635785139435e-05, + "loss": 0.0744, + "step": 18314 + }, + { + "epoch": 2.57, + "learning_rate": 4.57158899494666e-05, + "loss": 0.0534, + "step": 18316 + }, + { + "epoch": 2.57, + "learning_rate": 4.5715422047538836e-05, + "loss": 0.0578, + "step": 18318 + }, + { + "epoch": 2.57, + "learning_rate": 4.571495414561108e-05, + "loss": 0.0678, + "step": 18320 + }, + { + "epoch": 2.57, + "learning_rate": 4.571448624368332e-05, + "loss": 0.0631, + "step": 18322 + }, + { + "epoch": 2.57, + "learning_rate": 4.5714018341755574e-05, + "loss": 0.0579, + "step": 18324 + }, + { + "epoch": 2.57, + "learning_rate": 4.571355043982781e-05, + "loss": 0.0568, + "step": 18326 + }, + { + "epoch": 2.57, + "learning_rate": 4.571308253790006e-05, + "loss": 0.0613, + "step": 18328 + }, + { + "epoch": 2.57, + "learning_rate": 4.57126146359723e-05, + "loss": 0.0549, + "step": 18330 + }, + { + "epoch": 2.57, + "learning_rate": 4.571214673404455e-05, + "loss": 0.0559, + "step": 18332 + }, + { + "epoch": 2.57, + "learning_rate": 4.571167883211679e-05, + "loss": 0.0579, + "step": 18334 + }, + { + "epoch": 2.57, + "learning_rate": 4.5711210930189036e-05, + "loss": 0.0581, + "step": 18336 + }, + { + "epoch": 2.57, + "learning_rate": 4.5710743028261275e-05, + "loss": 0.0759, + "step": 18338 + }, + { + "epoch": 2.57, + "learning_rate": 4.571027512633352e-05, + "loss": 0.0619, + "step": 18340 + }, + { + "epoch": 2.57, + "learning_rate": 4.570980722440577e-05, + "loss": 0.0932, + "step": 18342 + }, + { + "epoch": 2.57, + "learning_rate": 4.570933932247801e-05, + "loss": 0.0834, + "step": 18344 + }, + { + "epoch": 2.58, + "learning_rate": 4.570887142055025e-05, + "loss": 0.0916, + "step": 18346 + }, + { + "epoch": 2.58, + "learning_rate": 4.57084035186225e-05, + "loss": 0.0794, + "step": 18348 + }, + { + "epoch": 2.58, + "learning_rate": 4.5707935616694744e-05, + "loss": 0.0707, + "step": 18350 + }, + { + "epoch": 2.58, + "learning_rate": 4.570746771476699e-05, + "loss": 0.0715, + "step": 18352 + }, + { + "epoch": 2.58, + "learning_rate": 4.570699981283923e-05, + "loss": 0.0645, + "step": 18354 + }, + { + "epoch": 2.58, + "learning_rate": 4.5706531910911475e-05, + "loss": 0.0676, + "step": 18356 + }, + { + "epoch": 2.58, + "learning_rate": 4.570606400898372e-05, + "loss": 0.0562, + "step": 18358 + }, + { + "epoch": 2.58, + "learning_rate": 4.5705596107055966e-05, + "loss": 0.0768, + "step": 18360 + }, + { + "epoch": 2.58, + "learning_rate": 4.5705128205128205e-05, + "loss": 0.0724, + "step": 18362 + }, + { + "epoch": 2.58, + "learning_rate": 4.570466030320045e-05, + "loss": 0.0937, + "step": 18364 + }, + { + "epoch": 2.58, + "learning_rate": 4.57041924012727e-05, + "loss": 0.0621, + "step": 18366 + }, + { + "epoch": 2.58, + "learning_rate": 4.570372449934494e-05, + "loss": 0.0561, + "step": 18368 + }, + { + "epoch": 2.58, + "learning_rate": 4.570325659741718e-05, + "loss": 0.0608, + "step": 18370 + }, + { + "epoch": 2.58, + "learning_rate": 4.570278869548943e-05, + "loss": 0.0528, + "step": 18372 + }, + { + "epoch": 2.58, + "learning_rate": 4.570232079356167e-05, + "loss": 0.0754, + "step": 18374 + }, + { + "epoch": 2.58, + "learning_rate": 4.570185289163392e-05, + "loss": 0.0526, + "step": 18376 + }, + { + "epoch": 2.58, + "learning_rate": 4.570138498970616e-05, + "loss": 0.0686, + "step": 18378 + }, + { + "epoch": 2.58, + "learning_rate": 4.5700917087778405e-05, + "loss": 0.0589, + "step": 18380 + }, + { + "epoch": 2.58, + "learning_rate": 4.5700449185850644e-05, + "loss": 0.0836, + "step": 18382 + }, + { + "epoch": 2.58, + "learning_rate": 4.56999812839229e-05, + "loss": 0.0691, + "step": 18384 + }, + { + "epoch": 2.58, + "learning_rate": 4.5699513381995136e-05, + "loss": 0.0577, + "step": 18386 + }, + { + "epoch": 2.58, + "learning_rate": 4.569904548006738e-05, + "loss": 0.0598, + "step": 18388 + }, + { + "epoch": 2.58, + "learning_rate": 4.569857757813962e-05, + "loss": 0.0531, + "step": 18390 + }, + { + "epoch": 2.58, + "learning_rate": 4.569810967621187e-05, + "loss": 0.0687, + "step": 18392 + }, + { + "epoch": 2.58, + "learning_rate": 4.569764177428411e-05, + "loss": 0.0573, + "step": 18394 + }, + { + "epoch": 2.58, + "learning_rate": 4.569717387235636e-05, + "loss": 0.0716, + "step": 18396 + }, + { + "epoch": 2.58, + "learning_rate": 4.56967059704286e-05, + "loss": 0.0508, + "step": 18398 + }, + { + "epoch": 2.58, + "learning_rate": 4.5696238068500844e-05, + "loss": 0.0861, + "step": 18400 + }, + { + "epoch": 2.58, + "learning_rate": 4.569577016657309e-05, + "loss": 0.0575, + "step": 18402 + }, + { + "epoch": 2.58, + "learning_rate": 4.5695302264645336e-05, + "loss": 0.0549, + "step": 18404 + }, + { + "epoch": 2.58, + "learning_rate": 4.5694834362717575e-05, + "loss": 0.0513, + "step": 18406 + }, + { + "epoch": 2.58, + "learning_rate": 4.569436646078982e-05, + "loss": 0.066, + "step": 18408 + }, + { + "epoch": 2.58, + "learning_rate": 4.569389855886207e-05, + "loss": 0.0694, + "step": 18410 + }, + { + "epoch": 2.58, + "learning_rate": 4.569343065693431e-05, + "loss": 0.0617, + "step": 18412 + }, + { + "epoch": 2.58, + "learning_rate": 4.569296275500655e-05, + "loss": 0.046, + "step": 18414 + }, + { + "epoch": 2.59, + "learning_rate": 4.56924948530788e-05, + "loss": 0.0726, + "step": 18416 + }, + { + "epoch": 2.59, + "learning_rate": 4.569202695115104e-05, + "loss": 0.0718, + "step": 18418 + }, + { + "epoch": 2.59, + "learning_rate": 4.569155904922329e-05, + "loss": 0.0521, + "step": 18420 + }, + { + "epoch": 2.59, + "learning_rate": 4.569109114729553e-05, + "loss": 0.07, + "step": 18422 + }, + { + "epoch": 2.59, + "learning_rate": 4.5690623245367775e-05, + "loss": 0.0855, + "step": 18424 + }, + { + "epoch": 2.59, + "learning_rate": 4.5690155343440014e-05, + "loss": 0.0811, + "step": 18426 + }, + { + "epoch": 2.59, + "learning_rate": 4.5689687441512266e-05, + "loss": 0.0491, + "step": 18428 + }, + { + "epoch": 2.59, + "learning_rate": 4.5689219539584505e-05, + "loss": 0.0697, + "step": 18430 + }, + { + "epoch": 2.59, + "learning_rate": 4.568875163765675e-05, + "loss": 0.0671, + "step": 18432 + }, + { + "epoch": 2.59, + "learning_rate": 4.568828373572899e-05, + "loss": 0.0611, + "step": 18434 + }, + { + "epoch": 2.59, + "learning_rate": 4.5687815833801236e-05, + "loss": 0.0682, + "step": 18436 + }, + { + "epoch": 2.59, + "learning_rate": 4.568734793187348e-05, + "loss": 0.0622, + "step": 18438 + }, + { + "epoch": 2.59, + "learning_rate": 4.568688002994573e-05, + "loss": 0.0688, + "step": 18440 + }, + { + "epoch": 2.59, + "learning_rate": 4.568641212801797e-05, + "loss": 0.0682, + "step": 18442 + }, + { + "epoch": 2.59, + "learning_rate": 4.568594422609021e-05, + "loss": 0.0585, + "step": 18444 + }, + { + "epoch": 2.59, + "learning_rate": 4.568547632416246e-05, + "loss": 0.0631, + "step": 18446 + }, + { + "epoch": 2.59, + "learning_rate": 4.5685008422234705e-05, + "loss": 0.0519, + "step": 18448 + }, + { + "epoch": 2.59, + "learning_rate": 4.5684540520306944e-05, + "loss": 0.0781, + "step": 18450 + }, + { + "epoch": 2.59, + "learning_rate": 4.568407261837919e-05, + "loss": 0.0722, + "step": 18452 + }, + { + "epoch": 2.59, + "learning_rate": 4.5683604716451436e-05, + "loss": 0.0735, + "step": 18454 + }, + { + "epoch": 2.59, + "learning_rate": 4.568313681452368e-05, + "loss": 0.0465, + "step": 18456 + }, + { + "epoch": 2.59, + "learning_rate": 4.568266891259592e-05, + "loss": 0.0464, + "step": 18458 + }, + { + "epoch": 2.59, + "learning_rate": 4.568220101066817e-05, + "loss": 0.0834, + "step": 18460 + }, + { + "epoch": 2.59, + "learning_rate": 4.568173310874041e-05, + "loss": 0.0868, + "step": 18462 + }, + { + "epoch": 2.59, + "learning_rate": 4.568126520681266e-05, + "loss": 0.0675, + "step": 18464 + }, + { + "epoch": 2.59, + "learning_rate": 4.56807973048849e-05, + "loss": 0.0623, + "step": 18466 + }, + { + "epoch": 2.59, + "learning_rate": 4.5680329402957144e-05, + "loss": 0.0811, + "step": 18468 + }, + { + "epoch": 2.59, + "learning_rate": 4.567986150102938e-05, + "loss": 0.1039, + "step": 18470 + }, + { + "epoch": 2.59, + "learning_rate": 4.5679393599101636e-05, + "loss": 0.0557, + "step": 18472 + }, + { + "epoch": 2.59, + "learning_rate": 4.5678925697173875e-05, + "loss": 0.048, + "step": 18474 + }, + { + "epoch": 2.59, + "learning_rate": 4.567845779524612e-05, + "loss": 0.0504, + "step": 18476 + }, + { + "epoch": 2.59, + "learning_rate": 4.567798989331836e-05, + "loss": 0.0646, + "step": 18478 + }, + { + "epoch": 2.59, + "learning_rate": 4.567752199139061e-05, + "loss": 0.0745, + "step": 18480 + }, + { + "epoch": 2.59, + "learning_rate": 4.567705408946285e-05, + "loss": 0.0639, + "step": 18482 + }, + { + "epoch": 2.59, + "learning_rate": 4.56765861875351e-05, + "loss": 0.0683, + "step": 18484 + }, + { + "epoch": 2.59, + "learning_rate": 4.567611828560734e-05, + "loss": 0.0596, + "step": 18486 + }, + { + "epoch": 2.6, + "learning_rate": 4.567565038367958e-05, + "loss": 0.0624, + "step": 18488 + }, + { + "epoch": 2.6, + "learning_rate": 4.567518248175183e-05, + "loss": 0.0536, + "step": 18490 + }, + { + "epoch": 2.6, + "learning_rate": 4.567471457982407e-05, + "loss": 0.0704, + "step": 18492 + }, + { + "epoch": 2.6, + "learning_rate": 4.5674246677896314e-05, + "loss": 0.0923, + "step": 18494 + }, + { + "epoch": 2.6, + "learning_rate": 4.567377877596856e-05, + "loss": 0.0783, + "step": 18496 + }, + { + "epoch": 2.6, + "learning_rate": 4.5673310874040806e-05, + "loss": 0.0792, + "step": 18498 + }, + { + "epoch": 2.6, + "learning_rate": 4.5672842972113045e-05, + "loss": 0.045, + "step": 18500 + }, + { + "epoch": 2.6, + "learning_rate": 4.567237507018529e-05, + "loss": 0.0592, + "step": 18502 + }, + { + "epoch": 2.6, + "learning_rate": 4.567190716825753e-05, + "loss": 0.0582, + "step": 18504 + }, + { + "epoch": 2.6, + "learning_rate": 4.567143926632978e-05, + "loss": 0.0808, + "step": 18506 + }, + { + "epoch": 2.6, + "learning_rate": 4.567097136440202e-05, + "loss": 0.0711, + "step": 18508 + }, + { + "epoch": 2.6, + "learning_rate": 4.567050346247427e-05, + "loss": 0.058, + "step": 18510 + }, + { + "epoch": 2.6, + "learning_rate": 4.5670035560546507e-05, + "loss": 0.0584, + "step": 18512 + }, + { + "epoch": 2.6, + "learning_rate": 4.566956765861876e-05, + "loss": 0.0872, + "step": 18514 + }, + { + "epoch": 2.6, + "learning_rate": 4.5669099756691e-05, + "loss": 0.0625, + "step": 18516 + }, + { + "epoch": 2.6, + "learning_rate": 4.5668631854763244e-05, + "loss": 0.0688, + "step": 18518 + }, + { + "epoch": 2.6, + "learning_rate": 4.5668163952835483e-05, + "loss": 0.0666, + "step": 18520 + }, + { + "epoch": 2.6, + "learning_rate": 4.566769605090773e-05, + "loss": 0.0567, + "step": 18522 + }, + { + "epoch": 2.6, + "learning_rate": 4.5667228148979975e-05, + "loss": 0.0653, + "step": 18524 + }, + { + "epoch": 2.6, + "learning_rate": 4.566676024705222e-05, + "loss": 0.0905, + "step": 18526 + }, + { + "epoch": 2.6, + "learning_rate": 4.566629234512446e-05, + "loss": 0.096, + "step": 18528 + }, + { + "epoch": 2.6, + "learning_rate": 4.5665824443196706e-05, + "loss": 0.0797, + "step": 18530 + }, + { + "epoch": 2.6, + "learning_rate": 4.566535654126895e-05, + "loss": 0.0705, + "step": 18532 + }, + { + "epoch": 2.6, + "learning_rate": 4.56648886393412e-05, + "loss": 0.0588, + "step": 18534 + }, + { + "epoch": 2.6, + "learning_rate": 4.566442073741344e-05, + "loss": 0.0743, + "step": 18536 + }, + { + "epoch": 2.6, + "learning_rate": 4.566395283548568e-05, + "loss": 0.0661, + "step": 18538 + }, + { + "epoch": 2.6, + "learning_rate": 4.566348493355793e-05, + "loss": 0.0802, + "step": 18540 + }, + { + "epoch": 2.6, + "learning_rate": 4.5663017031630175e-05, + "loss": 0.0562, + "step": 18542 + }, + { + "epoch": 2.6, + "learning_rate": 4.5662549129702414e-05, + "loss": 0.0646, + "step": 18544 + }, + { + "epoch": 2.6, + "learning_rate": 4.566208122777466e-05, + "loss": 0.0774, + "step": 18546 + }, + { + "epoch": 2.6, + "learning_rate": 4.5661613325846906e-05, + "loss": 0.0669, + "step": 18548 + }, + { + "epoch": 2.6, + "learning_rate": 4.566114542391915e-05, + "loss": 0.0566, + "step": 18550 + }, + { + "epoch": 2.6, + "learning_rate": 4.566067752199139e-05, + "loss": 0.0428, + "step": 18552 + }, + { + "epoch": 2.6, + "learning_rate": 4.566020962006364e-05, + "loss": 0.0669, + "step": 18554 + }, + { + "epoch": 2.6, + "learning_rate": 4.5659741718135876e-05, + "loss": 0.0795, + "step": 18556 + }, + { + "epoch": 2.6, + "learning_rate": 4.565927381620813e-05, + "loss": 0.0556, + "step": 18558 + }, + { + "epoch": 2.61, + "learning_rate": 4.565880591428037e-05, + "loss": 0.0486, + "step": 18560 + }, + { + "epoch": 2.61, + "learning_rate": 4.5658338012352614e-05, + "loss": 0.056, + "step": 18562 + }, + { + "epoch": 2.61, + "learning_rate": 4.565787011042485e-05, + "loss": 0.0688, + "step": 18564 + }, + { + "epoch": 2.61, + "learning_rate": 4.56574022084971e-05, + "loss": 0.0698, + "step": 18566 + }, + { + "epoch": 2.61, + "learning_rate": 4.5656934306569345e-05, + "loss": 0.0619, + "step": 18568 + }, + { + "epoch": 2.61, + "learning_rate": 4.565646640464159e-05, + "loss": 0.0527, + "step": 18570 + }, + { + "epoch": 2.61, + "learning_rate": 4.565599850271383e-05, + "loss": 0.0389, + "step": 18572 + }, + { + "epoch": 2.61, + "learning_rate": 4.5655530600786076e-05, + "loss": 0.0677, + "step": 18574 + }, + { + "epoch": 2.61, + "learning_rate": 4.565506269885832e-05, + "loss": 0.0628, + "step": 18576 + }, + { + "epoch": 2.61, + "learning_rate": 4.565459479693057e-05, + "loss": 0.0577, + "step": 18578 + }, + { + "epoch": 2.61, + "learning_rate": 4.5654126895002807e-05, + "loss": 0.0564, + "step": 18580 + }, + { + "epoch": 2.61, + "learning_rate": 4.565365899307505e-05, + "loss": 0.0572, + "step": 18582 + }, + { + "epoch": 2.61, + "learning_rate": 4.56531910911473e-05, + "loss": 0.0669, + "step": 18584 + }, + { + "epoch": 2.61, + "learning_rate": 4.5652723189219544e-05, + "loss": 0.0569, + "step": 18586 + }, + { + "epoch": 2.61, + "learning_rate": 4.5652255287291783e-05, + "loss": 0.0608, + "step": 18588 + }, + { + "epoch": 2.61, + "learning_rate": 4.565178738536403e-05, + "loss": 0.0616, + "step": 18590 + }, + { + "epoch": 2.61, + "learning_rate": 4.5651319483436275e-05, + "loss": 0.101, + "step": 18592 + }, + { + "epoch": 2.61, + "learning_rate": 4.565085158150852e-05, + "loss": 0.0599, + "step": 18594 + }, + { + "epoch": 2.61, + "learning_rate": 4.565038367958076e-05, + "loss": 0.0807, + "step": 18596 + }, + { + "epoch": 2.61, + "learning_rate": 4.5649915777653006e-05, + "loss": 0.051, + "step": 18598 + }, + { + "epoch": 2.61, + "learning_rate": 4.5649447875725245e-05, + "loss": 0.076, + "step": 18600 + }, + { + "epoch": 2.61, + "learning_rate": 4.56489799737975e-05, + "loss": 0.0665, + "step": 18602 + }, + { + "epoch": 2.61, + "learning_rate": 4.564851207186974e-05, + "loss": 0.0898, + "step": 18604 + }, + { + "epoch": 2.61, + "learning_rate": 4.564804416994198e-05, + "loss": 0.0615, + "step": 18606 + }, + { + "epoch": 2.61, + "learning_rate": 4.564757626801422e-05, + "loss": 0.0567, + "step": 18608 + }, + { + "epoch": 2.61, + "learning_rate": 4.5647108366086475e-05, + "loss": 0.0735, + "step": 18610 + }, + { + "epoch": 2.61, + "learning_rate": 4.5646640464158714e-05, + "loss": 0.0742, + "step": 18612 + }, + { + "epoch": 2.61, + "learning_rate": 4.564617256223096e-05, + "loss": 0.0531, + "step": 18614 + }, + { + "epoch": 2.61, + "learning_rate": 4.56457046603032e-05, + "loss": 0.0808, + "step": 18616 + }, + { + "epoch": 2.61, + "learning_rate": 4.5645236758375445e-05, + "loss": 0.0787, + "step": 18618 + }, + { + "epoch": 2.61, + "learning_rate": 4.564476885644769e-05, + "loss": 0.0624, + "step": 18620 + }, + { + "epoch": 2.61, + "learning_rate": 4.564430095451994e-05, + "loss": 0.0635, + "step": 18622 + }, + { + "epoch": 2.61, + "learning_rate": 4.5643833052592176e-05, + "loss": 0.0672, + "step": 18624 + }, + { + "epoch": 2.61, + "learning_rate": 4.564336515066442e-05, + "loss": 0.0847, + "step": 18626 + }, + { + "epoch": 2.61, + "learning_rate": 4.564289724873667e-05, + "loss": 0.0717, + "step": 18628 + }, + { + "epoch": 2.62, + "learning_rate": 4.5642429346808914e-05, + "loss": 0.0541, + "step": 18630 + }, + { + "epoch": 2.62, + "learning_rate": 4.564196144488115e-05, + "loss": 0.0768, + "step": 18632 + }, + { + "epoch": 2.62, + "learning_rate": 4.56414935429534e-05, + "loss": 0.0713, + "step": 18634 + }, + { + "epoch": 2.62, + "learning_rate": 4.5641025641025645e-05, + "loss": 0.066, + "step": 18636 + }, + { + "epoch": 2.62, + "learning_rate": 4.564055773909789e-05, + "loss": 0.0893, + "step": 18638 + }, + { + "epoch": 2.62, + "learning_rate": 4.564008983717013e-05, + "loss": 0.0778, + "step": 18640 + }, + { + "epoch": 2.62, + "learning_rate": 4.5639621935242376e-05, + "loss": 0.0615, + "step": 18642 + }, + { + "epoch": 2.62, + "learning_rate": 4.563915403331462e-05, + "loss": 0.0626, + "step": 18644 + }, + { + "epoch": 2.62, + "learning_rate": 4.563868613138687e-05, + "loss": 0.0516, + "step": 18646 + }, + { + "epoch": 2.62, + "learning_rate": 4.5638218229459107e-05, + "loss": 0.0506, + "step": 18648 + }, + { + "epoch": 2.62, + "learning_rate": 4.563775032753135e-05, + "loss": 0.0568, + "step": 18650 + }, + { + "epoch": 2.62, + "learning_rate": 4.563728242560359e-05, + "loss": 0.057, + "step": 18652 + }, + { + "epoch": 2.62, + "learning_rate": 4.5636814523675844e-05, + "loss": 0.062, + "step": 18654 + }, + { + "epoch": 2.62, + "learning_rate": 4.5636346621748083e-05, + "loss": 0.0646, + "step": 18656 + }, + { + "epoch": 2.62, + "learning_rate": 4.563587871982033e-05, + "loss": 0.0736, + "step": 18658 + }, + { + "epoch": 2.62, + "learning_rate": 4.563541081789257e-05, + "loss": 0.0544, + "step": 18660 + }, + { + "epoch": 2.62, + "learning_rate": 4.563494291596482e-05, + "loss": 0.072, + "step": 18662 + }, + { + "epoch": 2.62, + "learning_rate": 4.563447501403706e-05, + "loss": 0.038, + "step": 18664 + }, + { + "epoch": 2.62, + "learning_rate": 4.5634007112109306e-05, + "loss": 0.0732, + "step": 18666 + }, + { + "epoch": 2.62, + "learning_rate": 4.5633539210181545e-05, + "loss": 0.0632, + "step": 18668 + }, + { + "epoch": 2.62, + "learning_rate": 4.563307130825379e-05, + "loss": 0.0554, + "step": 18670 + }, + { + "epoch": 2.62, + "learning_rate": 4.563260340632604e-05, + "loss": 0.0718, + "step": 18672 + }, + { + "epoch": 2.62, + "learning_rate": 4.563213550439828e-05, + "loss": 0.0615, + "step": 18674 + }, + { + "epoch": 2.62, + "learning_rate": 4.563166760247052e-05, + "loss": 0.0532, + "step": 18676 + }, + { + "epoch": 2.62, + "learning_rate": 4.563119970054277e-05, + "loss": 0.0494, + "step": 18678 + }, + { + "epoch": 2.62, + "learning_rate": 4.5630731798615014e-05, + "loss": 0.0624, + "step": 18680 + }, + { + "epoch": 2.62, + "learning_rate": 4.563026389668726e-05, + "loss": 0.0544, + "step": 18682 + }, + { + "epoch": 2.62, + "learning_rate": 4.56297959947595e-05, + "loss": 0.0613, + "step": 18684 + }, + { + "epoch": 2.62, + "learning_rate": 4.5629328092831745e-05, + "loss": 0.0713, + "step": 18686 + }, + { + "epoch": 2.62, + "learning_rate": 4.562886019090399e-05, + "loss": 0.0632, + "step": 18688 + }, + { + "epoch": 2.62, + "learning_rate": 4.562839228897624e-05, + "loss": 0.0607, + "step": 18690 + }, + { + "epoch": 2.62, + "learning_rate": 4.5627924387048476e-05, + "loss": 0.068, + "step": 18692 + }, + { + "epoch": 2.62, + "learning_rate": 4.562745648512072e-05, + "loss": 0.0653, + "step": 18694 + }, + { + "epoch": 2.62, + "learning_rate": 4.562698858319297e-05, + "loss": 0.0796, + "step": 18696 + }, + { + "epoch": 2.62, + "learning_rate": 4.5626520681265214e-05, + "loss": 0.0606, + "step": 18698 + }, + { + "epoch": 2.62, + "learning_rate": 4.562605277933745e-05, + "loss": 0.0678, + "step": 18700 + }, + { + "epoch": 2.63, + "learning_rate": 4.56255848774097e-05, + "loss": 0.0536, + "step": 18702 + }, + { + "epoch": 2.63, + "learning_rate": 4.562511697548194e-05, + "loss": 0.0632, + "step": 18704 + }, + { + "epoch": 2.63, + "learning_rate": 4.562464907355419e-05, + "loss": 0.0571, + "step": 18706 + }, + { + "epoch": 2.63, + "learning_rate": 4.562418117162643e-05, + "loss": 0.0728, + "step": 18708 + }, + { + "epoch": 2.63, + "learning_rate": 4.5623713269698676e-05, + "loss": 0.0751, + "step": 18710 + }, + { + "epoch": 2.63, + "learning_rate": 4.5623245367770915e-05, + "loss": 0.073, + "step": 18712 + }, + { + "epoch": 2.63, + "learning_rate": 4.562277746584316e-05, + "loss": 0.0533, + "step": 18714 + }, + { + "epoch": 2.63, + "learning_rate": 4.5622309563915407e-05, + "loss": 0.0686, + "step": 18716 + }, + { + "epoch": 2.63, + "learning_rate": 4.562184166198765e-05, + "loss": 0.0733, + "step": 18718 + }, + { + "epoch": 2.63, + "learning_rate": 4.562137376005989e-05, + "loss": 0.0781, + "step": 18720 + }, + { + "epoch": 2.63, + "learning_rate": 4.562090585813214e-05, + "loss": 0.0726, + "step": 18722 + }, + { + "epoch": 2.63, + "learning_rate": 4.5620437956204383e-05, + "loss": 0.072, + "step": 18724 + }, + { + "epoch": 2.63, + "learning_rate": 4.561997005427663e-05, + "loss": 0.056, + "step": 18726 + }, + { + "epoch": 2.63, + "learning_rate": 4.561950215234887e-05, + "loss": 0.0599, + "step": 18728 + }, + { + "epoch": 2.63, + "learning_rate": 4.5619034250421114e-05, + "loss": 0.0684, + "step": 18730 + }, + { + "epoch": 2.63, + "learning_rate": 4.561856634849336e-05, + "loss": 0.0667, + "step": 18732 + }, + { + "epoch": 2.63, + "learning_rate": 4.5618098446565606e-05, + "loss": 0.083, + "step": 18734 + }, + { + "epoch": 2.63, + "learning_rate": 4.5617630544637845e-05, + "loss": 0.0546, + "step": 18736 + }, + { + "epoch": 2.63, + "learning_rate": 4.561716264271009e-05, + "loss": 0.0572, + "step": 18738 + }, + { + "epoch": 2.63, + "learning_rate": 4.561669474078234e-05, + "loss": 0.0632, + "step": 18740 + }, + { + "epoch": 2.63, + "learning_rate": 4.561622683885458e-05, + "loss": 0.0497, + "step": 18742 + }, + { + "epoch": 2.63, + "learning_rate": 4.561575893692682e-05, + "loss": 0.0471, + "step": 18744 + }, + { + "epoch": 2.63, + "learning_rate": 4.561529103499906e-05, + "loss": 0.062, + "step": 18746 + }, + { + "epoch": 2.63, + "learning_rate": 4.561482313307131e-05, + "loss": 0.0707, + "step": 18748 + }, + { + "epoch": 2.63, + "learning_rate": 4.561435523114355e-05, + "loss": 0.0727, + "step": 18750 + }, + { + "epoch": 2.63, + "learning_rate": 4.56138873292158e-05, + "loss": 0.0546, + "step": 18752 + }, + { + "epoch": 2.63, + "learning_rate": 4.561341942728804e-05, + "loss": 0.0725, + "step": 18754 + }, + { + "epoch": 2.63, + "learning_rate": 4.5612951525360284e-05, + "loss": 0.0531, + "step": 18756 + }, + { + "epoch": 2.63, + "learning_rate": 4.561248362343253e-05, + "loss": 0.0847, + "step": 18758 + }, + { + "epoch": 2.63, + "learning_rate": 4.5612015721504776e-05, + "loss": 0.0603, + "step": 18760 + }, + { + "epoch": 2.63, + "learning_rate": 4.5611547819577015e-05, + "loss": 0.0646, + "step": 18762 + }, + { + "epoch": 2.63, + "learning_rate": 4.561107991764926e-05, + "loss": 0.0671, + "step": 18764 + }, + { + "epoch": 2.63, + "learning_rate": 4.561061201572151e-05, + "loss": 0.0626, + "step": 18766 + }, + { + "epoch": 2.63, + "learning_rate": 4.561014411379375e-05, + "loss": 0.0754, + "step": 18768 + }, + { + "epoch": 2.63, + "learning_rate": 4.560967621186599e-05, + "loss": 0.0583, + "step": 18770 + }, + { + "epoch": 2.64, + "learning_rate": 4.560920830993824e-05, + "loss": 0.0808, + "step": 18772 + }, + { + "epoch": 2.64, + "learning_rate": 4.5608740408010484e-05, + "loss": 0.0559, + "step": 18774 + }, + { + "epoch": 2.64, + "learning_rate": 4.560827250608273e-05, + "loss": 0.0659, + "step": 18776 + }, + { + "epoch": 2.64, + "learning_rate": 4.560780460415497e-05, + "loss": 0.0571, + "step": 18778 + }, + { + "epoch": 2.64, + "learning_rate": 4.5607336702227215e-05, + "loss": 0.0578, + "step": 18780 + }, + { + "epoch": 2.64, + "learning_rate": 4.5606868800299454e-05, + "loss": 0.0604, + "step": 18782 + }, + { + "epoch": 2.64, + "learning_rate": 4.560640089837171e-05, + "loss": 0.0682, + "step": 18784 + }, + { + "epoch": 2.64, + "learning_rate": 4.5605932996443946e-05, + "loss": 0.0456, + "step": 18786 + }, + { + "epoch": 2.64, + "learning_rate": 4.560546509451619e-05, + "loss": 0.0641, + "step": 18788 + }, + { + "epoch": 2.64, + "learning_rate": 4.560499719258843e-05, + "loss": 0.0702, + "step": 18790 + }, + { + "epoch": 2.64, + "learning_rate": 4.5604529290660683e-05, + "loss": 0.0541, + "step": 18792 + }, + { + "epoch": 2.64, + "learning_rate": 4.560406138873292e-05, + "loss": 0.0545, + "step": 18794 + }, + { + "epoch": 2.64, + "learning_rate": 4.560359348680517e-05, + "loss": 0.0776, + "step": 18796 + }, + { + "epoch": 2.64, + "learning_rate": 4.560312558487741e-05, + "loss": 0.0748, + "step": 18798 + }, + { + "epoch": 2.64, + "learning_rate": 4.5602657682949654e-05, + "loss": 0.062, + "step": 18800 + }, + { + "epoch": 2.64, + "learning_rate": 4.56021897810219e-05, + "loss": 0.0597, + "step": 18802 + }, + { + "epoch": 2.64, + "learning_rate": 4.5601721879094145e-05, + "loss": 0.0576, + "step": 18804 + }, + { + "epoch": 2.64, + "learning_rate": 4.5601253977166385e-05, + "loss": 0.0621, + "step": 18806 + }, + { + "epoch": 2.64, + "learning_rate": 4.560078607523863e-05, + "loss": 0.065, + "step": 18808 + }, + { + "epoch": 2.64, + "learning_rate": 4.5600318173310876e-05, + "loss": 0.0754, + "step": 18810 + }, + { + "epoch": 2.64, + "learning_rate": 4.559985027138312e-05, + "loss": 0.068, + "step": 18812 + }, + { + "epoch": 2.64, + "learning_rate": 4.559938236945536e-05, + "loss": 0.0525, + "step": 18814 + }, + { + "epoch": 2.64, + "learning_rate": 4.559891446752761e-05, + "loss": 0.0633, + "step": 18816 + }, + { + "epoch": 2.64, + "learning_rate": 4.559844656559985e-05, + "loss": 0.0751, + "step": 18818 + }, + { + "epoch": 2.64, + "learning_rate": 4.55979786636721e-05, + "loss": 0.0747, + "step": 18820 + }, + { + "epoch": 2.64, + "learning_rate": 4.559751076174434e-05, + "loss": 0.0561, + "step": 18822 + }, + { + "epoch": 2.64, + "learning_rate": 4.5597042859816584e-05, + "loss": 0.0641, + "step": 18824 + }, + { + "epoch": 2.64, + "learning_rate": 4.559657495788883e-05, + "loss": 0.0682, + "step": 18826 + }, + { + "epoch": 2.64, + "learning_rate": 4.5596107055961076e-05, + "loss": 0.0755, + "step": 18828 + }, + { + "epoch": 2.64, + "learning_rate": 4.5595639154033315e-05, + "loss": 0.0675, + "step": 18830 + }, + { + "epoch": 2.64, + "learning_rate": 4.559517125210556e-05, + "loss": 0.0602, + "step": 18832 + }, + { + "epoch": 2.64, + "learning_rate": 4.55947033501778e-05, + "loss": 0.0609, + "step": 18834 + }, + { + "epoch": 2.64, + "learning_rate": 4.559423544825005e-05, + "loss": 0.0635, + "step": 18836 + }, + { + "epoch": 2.64, + "learning_rate": 4.559376754632229e-05, + "loss": 0.0697, + "step": 18838 + }, + { + "epoch": 2.64, + "learning_rate": 4.559329964439454e-05, + "loss": 0.0515, + "step": 18840 + }, + { + "epoch": 2.64, + "learning_rate": 4.559283174246678e-05, + "loss": 0.0562, + "step": 18842 + }, + { + "epoch": 2.65, + "learning_rate": 4.559236384053902e-05, + "loss": 0.0694, + "step": 18844 + }, + { + "epoch": 2.65, + "learning_rate": 4.559189593861127e-05, + "loss": 0.0796, + "step": 18846 + }, + { + "epoch": 2.65, + "learning_rate": 4.5591428036683515e-05, + "loss": 0.0685, + "step": 18848 + }, + { + "epoch": 2.65, + "learning_rate": 4.5590960134755754e-05, + "loss": 0.0648, + "step": 18850 + }, + { + "epoch": 2.65, + "learning_rate": 4.5590492232828e-05, + "loss": 0.0689, + "step": 18852 + }, + { + "epoch": 2.65, + "learning_rate": 4.5590024330900246e-05, + "loss": 0.0586, + "step": 18854 + }, + { + "epoch": 2.65, + "learning_rate": 4.558955642897249e-05, + "loss": 0.0408, + "step": 18856 + }, + { + "epoch": 2.65, + "learning_rate": 4.558908852704473e-05, + "loss": 0.0653, + "step": 18858 + }, + { + "epoch": 2.65, + "learning_rate": 4.558862062511698e-05, + "loss": 0.0431, + "step": 18860 + }, + { + "epoch": 2.65, + "learning_rate": 4.558815272318922e-05, + "loss": 0.0681, + "step": 18862 + }, + { + "epoch": 2.65, + "learning_rate": 4.558768482126147e-05, + "loss": 0.0815, + "step": 18864 + }, + { + "epoch": 2.65, + "learning_rate": 4.558721691933371e-05, + "loss": 0.0746, + "step": 18866 + }, + { + "epoch": 2.65, + "learning_rate": 4.5586749017405954e-05, + "loss": 0.0648, + "step": 18868 + }, + { + "epoch": 2.65, + "learning_rate": 4.55862811154782e-05, + "loss": 0.0511, + "step": 18870 + }, + { + "epoch": 2.65, + "learning_rate": 4.5585813213550445e-05, + "loss": 0.0745, + "step": 18872 + }, + { + "epoch": 2.65, + "learning_rate": 4.5585345311622685e-05, + "loss": 0.0765, + "step": 18874 + }, + { + "epoch": 2.65, + "learning_rate": 4.558487740969493e-05, + "loss": 0.0779, + "step": 18876 + }, + { + "epoch": 2.65, + "learning_rate": 4.558440950776717e-05, + "loss": 0.0693, + "step": 18878 + }, + { + "epoch": 2.65, + "learning_rate": 4.558394160583942e-05, + "loss": 0.0779, + "step": 18880 + }, + { + "epoch": 2.65, + "learning_rate": 4.558347370391166e-05, + "loss": 0.0706, + "step": 18882 + }, + { + "epoch": 2.65, + "learning_rate": 4.558300580198391e-05, + "loss": 0.0539, + "step": 18884 + }, + { + "epoch": 2.65, + "learning_rate": 4.5582537900056146e-05, + "loss": 0.0695, + "step": 18886 + }, + { + "epoch": 2.65, + "learning_rate": 4.55820699981284e-05, + "loss": 0.0581, + "step": 18888 + }, + { + "epoch": 2.65, + "learning_rate": 4.558160209620064e-05, + "loss": 0.0647, + "step": 18890 + }, + { + "epoch": 2.65, + "learning_rate": 4.5581134194272884e-05, + "loss": 0.0665, + "step": 18892 + }, + { + "epoch": 2.65, + "learning_rate": 4.558066629234512e-05, + "loss": 0.063, + "step": 18894 + }, + { + "epoch": 2.65, + "learning_rate": 4.558019839041737e-05, + "loss": 0.06, + "step": 18896 + }, + { + "epoch": 2.65, + "learning_rate": 4.5579730488489615e-05, + "loss": 0.0868, + "step": 18898 + }, + { + "epoch": 2.65, + "learning_rate": 4.557926258656186e-05, + "loss": 0.059, + "step": 18900 + }, + { + "epoch": 2.65, + "learning_rate": 4.55787946846341e-05, + "loss": 0.0526, + "step": 18902 + }, + { + "epoch": 2.65, + "learning_rate": 4.5578326782706346e-05, + "loss": 0.0748, + "step": 18904 + }, + { + "epoch": 2.65, + "learning_rate": 4.557785888077859e-05, + "loss": 0.069, + "step": 18906 + }, + { + "epoch": 2.65, + "learning_rate": 4.557739097885084e-05, + "loss": 0.0612, + "step": 18908 + }, + { + "epoch": 2.65, + "learning_rate": 4.557692307692308e-05, + "loss": 0.049, + "step": 18910 + }, + { + "epoch": 2.65, + "learning_rate": 4.557645517499532e-05, + "loss": 0.0793, + "step": 18912 + }, + { + "epoch": 2.65, + "learning_rate": 4.557598727306757e-05, + "loss": 0.0985, + "step": 18914 + }, + { + "epoch": 2.66, + "learning_rate": 4.5575519371139815e-05, + "loss": 0.0623, + "step": 18916 + }, + { + "epoch": 2.66, + "learning_rate": 4.5575051469212054e-05, + "loss": 0.0652, + "step": 18918 + }, + { + "epoch": 2.66, + "learning_rate": 4.55745835672843e-05, + "loss": 0.0655, + "step": 18920 + }, + { + "epoch": 2.66, + "learning_rate": 4.5574115665356546e-05, + "loss": 0.0721, + "step": 18922 + }, + { + "epoch": 2.66, + "learning_rate": 4.557364776342879e-05, + "loss": 0.0596, + "step": 18924 + }, + { + "epoch": 2.66, + "learning_rate": 4.557317986150103e-05, + "loss": 0.0819, + "step": 18926 + }, + { + "epoch": 2.66, + "learning_rate": 4.557271195957328e-05, + "loss": 0.0507, + "step": 18928 + }, + { + "epoch": 2.66, + "learning_rate": 4.5572244057645516e-05, + "loss": 0.0519, + "step": 18930 + }, + { + "epoch": 2.66, + "learning_rate": 4.557177615571777e-05, + "loss": 0.0694, + "step": 18932 + }, + { + "epoch": 2.66, + "learning_rate": 4.557130825379001e-05, + "loss": 0.0723, + "step": 18934 + }, + { + "epoch": 2.66, + "learning_rate": 4.5570840351862254e-05, + "loss": 0.0775, + "step": 18936 + }, + { + "epoch": 2.66, + "learning_rate": 4.557037244993449e-05, + "loss": 0.0823, + "step": 18938 + }, + { + "epoch": 2.66, + "learning_rate": 4.5569904548006745e-05, + "loss": 0.063, + "step": 18940 + }, + { + "epoch": 2.66, + "learning_rate": 4.5569436646078985e-05, + "loss": 0.0498, + "step": 18942 + }, + { + "epoch": 2.66, + "learning_rate": 4.556896874415123e-05, + "loss": 0.0713, + "step": 18944 + }, + { + "epoch": 2.66, + "learning_rate": 4.556850084222347e-05, + "loss": 0.0698, + "step": 18946 + }, + { + "epoch": 2.66, + "learning_rate": 4.5568032940295716e-05, + "loss": 0.065, + "step": 18948 + }, + { + "epoch": 2.66, + "learning_rate": 4.556756503836796e-05, + "loss": 0.0562, + "step": 18950 + }, + { + "epoch": 2.66, + "learning_rate": 4.556709713644021e-05, + "loss": 0.0549, + "step": 18952 + }, + { + "epoch": 2.66, + "learning_rate": 4.5566629234512447e-05, + "loss": 0.058, + "step": 18954 + }, + { + "epoch": 2.66, + "learning_rate": 4.556616133258469e-05, + "loss": 0.0707, + "step": 18956 + }, + { + "epoch": 2.66, + "learning_rate": 4.556569343065694e-05, + "loss": 0.0784, + "step": 18958 + }, + { + "epoch": 2.66, + "learning_rate": 4.5565225528729184e-05, + "loss": 0.057, + "step": 18960 + }, + { + "epoch": 2.66, + "learning_rate": 4.556475762680142e-05, + "loss": 0.0569, + "step": 18962 + }, + { + "epoch": 2.66, + "learning_rate": 4.556428972487367e-05, + "loss": 0.0696, + "step": 18964 + }, + { + "epoch": 2.66, + "learning_rate": 4.5563821822945915e-05, + "loss": 0.0619, + "step": 18966 + }, + { + "epoch": 2.66, + "learning_rate": 4.556335392101816e-05, + "loss": 0.0745, + "step": 18968 + }, + { + "epoch": 2.66, + "learning_rate": 4.55628860190904e-05, + "loss": 0.0691, + "step": 18970 + }, + { + "epoch": 2.66, + "learning_rate": 4.5562418117162646e-05, + "loss": 0.084, + "step": 18972 + }, + { + "epoch": 2.66, + "learning_rate": 4.556195021523489e-05, + "loss": 0.0603, + "step": 18974 + }, + { + "epoch": 2.66, + "learning_rate": 4.556148231330714e-05, + "loss": 0.0585, + "step": 18976 + }, + { + "epoch": 2.66, + "learning_rate": 4.556101441137938e-05, + "loss": 0.0505, + "step": 18978 + }, + { + "epoch": 2.66, + "learning_rate": 4.556054650945162e-05, + "loss": 0.0765, + "step": 18980 + }, + { + "epoch": 2.66, + "learning_rate": 4.556007860752386e-05, + "loss": 0.0823, + "step": 18982 + }, + { + "epoch": 2.66, + "learning_rate": 4.5559610705596115e-05, + "loss": 0.0715, + "step": 18984 + }, + { + "epoch": 2.67, + "learning_rate": 4.5559142803668354e-05, + "loss": 0.0713, + "step": 18986 + }, + { + "epoch": 2.67, + "learning_rate": 4.55586749017406e-05, + "loss": 0.078, + "step": 18988 + }, + { + "epoch": 2.67, + "learning_rate": 4.555820699981284e-05, + "loss": 0.0643, + "step": 18990 + }, + { + "epoch": 2.67, + "learning_rate": 4.5557739097885085e-05, + "loss": 0.057, + "step": 18992 + }, + { + "epoch": 2.67, + "learning_rate": 4.555727119595733e-05, + "loss": 0.067, + "step": 18994 + }, + { + "epoch": 2.67, + "learning_rate": 4.555680329402957e-05, + "loss": 0.0474, + "step": 18996 + }, + { + "epoch": 2.67, + "learning_rate": 4.5556335392101816e-05, + "loss": 0.0659, + "step": 18998 + }, + { + "epoch": 2.67, + "learning_rate": 4.555586749017406e-05, + "loss": 0.0504, + "step": 19000 + }, + { + "epoch": 2.67, + "eval_gen_len": 30.431, + "eval_loss": 1.0705924034118652, + "eval_meteor": 0.0441, + "eval_runtime": 16.562, + "eval_samples_per_second": 3.502, + "eval_steps_per_second": 0.483, + "step": 19000 + }, + { + "epoch": 2.67, + "learning_rate": 4.555539958824631e-05, + "loss": 0.0504, + "step": 19002 + }, + { + "epoch": 2.67, + "learning_rate": 4.555493168631855e-05, + "loss": 0.0731, + "step": 19004 + }, + { + "epoch": 2.67, + "learning_rate": 4.555446378439079e-05, + "loss": 0.0599, + "step": 19006 + }, + { + "epoch": 2.67, + "learning_rate": 4.555399588246303e-05, + "loss": 0.0748, + "step": 19008 + }, + { + "epoch": 2.67, + "learning_rate": 4.5553527980535285e-05, + "loss": 0.06, + "step": 19010 + }, + { + "epoch": 2.67, + "learning_rate": 4.5553060078607524e-05, + "loss": 0.0765, + "step": 19012 + }, + { + "epoch": 2.67, + "learning_rate": 4.555259217667977e-05, + "loss": 0.069, + "step": 19014 + }, + { + "epoch": 2.67, + "learning_rate": 4.555212427475201e-05, + "loss": 0.0649, + "step": 19016 + }, + { + "epoch": 2.67, + "learning_rate": 4.555165637282426e-05, + "loss": 0.0666, + "step": 19018 + }, + { + "epoch": 2.67, + "learning_rate": 4.55511884708965e-05, + "loss": 0.0585, + "step": 19020 + }, + { + "epoch": 2.67, + "learning_rate": 4.5550720568968747e-05, + "loss": 0.0764, + "step": 19022 + }, + { + "epoch": 2.67, + "learning_rate": 4.5550252667040986e-05, + "loss": 0.0548, + "step": 19024 + }, + { + "epoch": 2.67, + "learning_rate": 4.554978476511323e-05, + "loss": 0.0594, + "step": 19026 + }, + { + "epoch": 2.67, + "learning_rate": 4.554931686318548e-05, + "loss": 0.0696, + "step": 19028 + }, + { + "epoch": 2.67, + "learning_rate": 4.5548848961257723e-05, + "loss": 0.0481, + "step": 19030 + }, + { + "epoch": 2.67, + "learning_rate": 4.554838105932996e-05, + "loss": 0.0692, + "step": 19032 + }, + { + "epoch": 2.67, + "learning_rate": 4.554791315740221e-05, + "loss": 0.0682, + "step": 19034 + }, + { + "epoch": 2.67, + "learning_rate": 4.5547445255474454e-05, + "loss": 0.0632, + "step": 19036 + }, + { + "epoch": 2.67, + "learning_rate": 4.55469773535467e-05, + "loss": 0.0829, + "step": 19038 + }, + { + "epoch": 2.67, + "learning_rate": 4.554650945161894e-05, + "loss": 0.0775, + "step": 19040 + }, + { + "epoch": 2.67, + "learning_rate": 4.5546041549691185e-05, + "loss": 0.0518, + "step": 19042 + }, + { + "epoch": 2.67, + "learning_rate": 4.554557364776343e-05, + "loss": 0.0752, + "step": 19044 + }, + { + "epoch": 2.67, + "learning_rate": 4.554510574583568e-05, + "loss": 0.0581, + "step": 19046 + }, + { + "epoch": 2.67, + "learning_rate": 4.5544637843907916e-05, + "loss": 0.0826, + "step": 19048 + }, + { + "epoch": 2.67, + "learning_rate": 4.554416994198016e-05, + "loss": 0.0852, + "step": 19050 + }, + { + "epoch": 2.67, + "learning_rate": 4.554370204005241e-05, + "loss": 0.0631, + "step": 19052 + }, + { + "epoch": 2.67, + "learning_rate": 4.5543234138124654e-05, + "loss": 0.0701, + "step": 19054 + }, + { + "epoch": 2.67, + "learning_rate": 4.554276623619689e-05, + "loss": 0.0688, + "step": 19056 + }, + { + "epoch": 2.68, + "learning_rate": 4.554229833426914e-05, + "loss": 0.0678, + "step": 19058 + }, + { + "epoch": 2.68, + "learning_rate": 4.554183043234138e-05, + "loss": 0.0652, + "step": 19060 + }, + { + "epoch": 2.68, + "learning_rate": 4.554136253041363e-05, + "loss": 0.0785, + "step": 19062 + }, + { + "epoch": 2.68, + "learning_rate": 4.554089462848587e-05, + "loss": 0.0489, + "step": 19064 + }, + { + "epoch": 2.68, + "learning_rate": 4.5540426726558116e-05, + "loss": 0.0755, + "step": 19066 + }, + { + "epoch": 2.68, + "learning_rate": 4.5539958824630355e-05, + "loss": 0.0735, + "step": 19068 + }, + { + "epoch": 2.68, + "learning_rate": 4.553949092270261e-05, + "loss": 0.044, + "step": 19070 + }, + { + "epoch": 2.68, + "learning_rate": 4.553902302077485e-05, + "loss": 0.0729, + "step": 19072 + }, + { + "epoch": 2.68, + "learning_rate": 4.553855511884709e-05, + "loss": 0.0615, + "step": 19074 + }, + { + "epoch": 2.68, + "learning_rate": 4.553808721691933e-05, + "loss": 0.0526, + "step": 19076 + }, + { + "epoch": 2.68, + "learning_rate": 4.553761931499158e-05, + "loss": 0.0603, + "step": 19078 + }, + { + "epoch": 2.68, + "learning_rate": 4.5537151413063824e-05, + "loss": 0.087, + "step": 19080 + }, + { + "epoch": 2.68, + "learning_rate": 4.553668351113607e-05, + "loss": 0.0739, + "step": 19082 + }, + { + "epoch": 2.68, + "learning_rate": 4.553621560920831e-05, + "loss": 0.0643, + "step": 19084 + }, + { + "epoch": 2.68, + "learning_rate": 4.5535747707280555e-05, + "loss": 0.0575, + "step": 19086 + }, + { + "epoch": 2.68, + "learning_rate": 4.55352798053528e-05, + "loss": 0.0624, + "step": 19088 + }, + { + "epoch": 2.68, + "learning_rate": 4.5534811903425047e-05, + "loss": 0.0632, + "step": 19090 + }, + { + "epoch": 2.68, + "learning_rate": 4.5534344001497286e-05, + "loss": 0.0607, + "step": 19092 + }, + { + "epoch": 2.68, + "learning_rate": 4.553387609956953e-05, + "loss": 0.0689, + "step": 19094 + }, + { + "epoch": 2.68, + "learning_rate": 4.553340819764178e-05, + "loss": 0.0604, + "step": 19096 + }, + { + "epoch": 2.68, + "learning_rate": 4.5532940295714023e-05, + "loss": 0.0714, + "step": 19098 + }, + { + "epoch": 2.68, + "learning_rate": 4.553247239378626e-05, + "loss": 0.0531, + "step": 19100 + }, + { + "epoch": 2.68, + "learning_rate": 4.553200449185851e-05, + "loss": 0.0674, + "step": 19102 + }, + { + "epoch": 2.68, + "learning_rate": 4.5531536589930754e-05, + "loss": 0.052, + "step": 19104 + }, + { + "epoch": 2.68, + "learning_rate": 4.5531068688003e-05, + "loss": 0.0647, + "step": 19106 + }, + { + "epoch": 2.68, + "learning_rate": 4.553060078607524e-05, + "loss": 0.0566, + "step": 19108 + }, + { + "epoch": 2.68, + "learning_rate": 4.5530132884147485e-05, + "loss": 0.0649, + "step": 19110 + }, + { + "epoch": 2.68, + "learning_rate": 4.5529664982219724e-05, + "loss": 0.0778, + "step": 19112 + }, + { + "epoch": 2.68, + "learning_rate": 4.552919708029198e-05, + "loss": 0.0608, + "step": 19114 + }, + { + "epoch": 2.68, + "learning_rate": 4.5528729178364216e-05, + "loss": 0.0413, + "step": 19116 + }, + { + "epoch": 2.68, + "learning_rate": 4.552826127643646e-05, + "loss": 0.0594, + "step": 19118 + }, + { + "epoch": 2.68, + "learning_rate": 4.55277933745087e-05, + "loss": 0.0597, + "step": 19120 + }, + { + "epoch": 2.68, + "learning_rate": 4.552732547258095e-05, + "loss": 0.0633, + "step": 19122 + }, + { + "epoch": 2.68, + "learning_rate": 4.552685757065319e-05, + "loss": 0.05, + "step": 19124 + }, + { + "epoch": 2.68, + "learning_rate": 4.552638966872544e-05, + "loss": 0.073, + "step": 19126 + }, + { + "epoch": 2.69, + "learning_rate": 4.552592176679768e-05, + "loss": 0.0781, + "step": 19128 + }, + { + "epoch": 2.69, + "learning_rate": 4.5525453864869924e-05, + "loss": 0.0681, + "step": 19130 + }, + { + "epoch": 2.69, + "learning_rate": 4.552498596294217e-05, + "loss": 0.0609, + "step": 19132 + }, + { + "epoch": 2.69, + "learning_rate": 4.5524518061014416e-05, + "loss": 0.0785, + "step": 19134 + }, + { + "epoch": 2.69, + "learning_rate": 4.5524050159086655e-05, + "loss": 0.061, + "step": 19136 + }, + { + "epoch": 2.69, + "learning_rate": 4.55235822571589e-05, + "loss": 0.0505, + "step": 19138 + }, + { + "epoch": 2.69, + "learning_rate": 4.552311435523115e-05, + "loss": 0.0731, + "step": 19140 + }, + { + "epoch": 2.69, + "learning_rate": 4.552264645330339e-05, + "loss": 0.0712, + "step": 19142 + }, + { + "epoch": 2.69, + "learning_rate": 4.552217855137563e-05, + "loss": 0.0726, + "step": 19144 + }, + { + "epoch": 2.69, + "learning_rate": 4.552171064944788e-05, + "loss": 0.0575, + "step": 19146 + }, + { + "epoch": 2.69, + "learning_rate": 4.5521242747520124e-05, + "loss": 0.0604, + "step": 19148 + }, + { + "epoch": 2.69, + "learning_rate": 4.552077484559237e-05, + "loss": 0.0475, + "step": 19150 + }, + { + "epoch": 2.69, + "learning_rate": 4.552030694366461e-05, + "loss": 0.0734, + "step": 19152 + }, + { + "epoch": 2.69, + "learning_rate": 4.5519839041736855e-05, + "loss": 0.0659, + "step": 19154 + }, + { + "epoch": 2.69, + "learning_rate": 4.5519371139809094e-05, + "loss": 0.0629, + "step": 19156 + }, + { + "epoch": 2.69, + "learning_rate": 4.5518903237881347e-05, + "loss": 0.0676, + "step": 19158 + }, + { + "epoch": 2.69, + "learning_rate": 4.5518435335953586e-05, + "loss": 0.0714, + "step": 19160 + }, + { + "epoch": 2.69, + "learning_rate": 4.551796743402583e-05, + "loss": 0.0646, + "step": 19162 + }, + { + "epoch": 2.69, + "learning_rate": 4.551749953209807e-05, + "loss": 0.0534, + "step": 19164 + }, + { + "epoch": 2.69, + "learning_rate": 4.5517031630170323e-05, + "loss": 0.0719, + "step": 19166 + }, + { + "epoch": 2.69, + "learning_rate": 4.551656372824256e-05, + "loss": 0.0642, + "step": 19168 + }, + { + "epoch": 2.69, + "learning_rate": 4.551609582631481e-05, + "loss": 0.066, + "step": 19170 + }, + { + "epoch": 2.69, + "learning_rate": 4.551562792438705e-05, + "loss": 0.0562, + "step": 19172 + }, + { + "epoch": 2.69, + "learning_rate": 4.5515160022459294e-05, + "loss": 0.0592, + "step": 19174 + }, + { + "epoch": 2.69, + "learning_rate": 4.551469212053154e-05, + "loss": 0.0606, + "step": 19176 + }, + { + "epoch": 2.69, + "learning_rate": 4.5514224218603785e-05, + "loss": 0.0603, + "step": 19178 + }, + { + "epoch": 2.69, + "learning_rate": 4.5513756316676024e-05, + "loss": 0.068, + "step": 19180 + }, + { + "epoch": 2.69, + "learning_rate": 4.551328841474827e-05, + "loss": 0.0748, + "step": 19182 + }, + { + "epoch": 2.69, + "learning_rate": 4.5512820512820516e-05, + "loss": 0.0691, + "step": 19184 + }, + { + "epoch": 2.69, + "learning_rate": 4.551235261089276e-05, + "loss": 0.0742, + "step": 19186 + }, + { + "epoch": 2.69, + "learning_rate": 4.5511884708965e-05, + "loss": 0.0575, + "step": 19188 + }, + { + "epoch": 2.69, + "learning_rate": 4.551141680703725e-05, + "loss": 0.0545, + "step": 19190 + }, + { + "epoch": 2.69, + "learning_rate": 4.551094890510949e-05, + "loss": 0.0569, + "step": 19192 + }, + { + "epoch": 2.69, + "learning_rate": 4.551048100318174e-05, + "loss": 0.0844, + "step": 19194 + }, + { + "epoch": 2.69, + "learning_rate": 4.551001310125398e-05, + "loss": 0.0789, + "step": 19196 + }, + { + "epoch": 2.69, + "learning_rate": 4.5509545199326224e-05, + "loss": 0.0648, + "step": 19198 + }, + { + "epoch": 2.7, + "learning_rate": 4.550907729739847e-05, + "loss": 0.0764, + "step": 19200 + }, + { + "epoch": 2.7, + "learning_rate": 4.5508609395470716e-05, + "loss": 0.0917, + "step": 19202 + }, + { + "epoch": 2.7, + "learning_rate": 4.5508141493542955e-05, + "loss": 0.0543, + "step": 19204 + }, + { + "epoch": 2.7, + "learning_rate": 4.55076735916152e-05, + "loss": 0.0609, + "step": 19206 + }, + { + "epoch": 2.7, + "learning_rate": 4.550720568968744e-05, + "loss": 0.0483, + "step": 19208 + }, + { + "epoch": 2.7, + "learning_rate": 4.550673778775969e-05, + "loss": 0.0676, + "step": 19210 + }, + { + "epoch": 2.7, + "learning_rate": 4.550626988583193e-05, + "loss": 0.0832, + "step": 19212 + }, + { + "epoch": 2.7, + "learning_rate": 4.550580198390418e-05, + "loss": 0.0605, + "step": 19214 + }, + { + "epoch": 2.7, + "learning_rate": 4.550533408197642e-05, + "loss": 0.068, + "step": 19216 + }, + { + "epoch": 2.7, + "learning_rate": 4.550486618004867e-05, + "loss": 0.066, + "step": 19218 + }, + { + "epoch": 2.7, + "learning_rate": 4.550439827812091e-05, + "loss": 0.0515, + "step": 19220 + }, + { + "epoch": 2.7, + "learning_rate": 4.5503930376193155e-05, + "loss": 0.086, + "step": 19222 + }, + { + "epoch": 2.7, + "learning_rate": 4.5503462474265394e-05, + "loss": 0.0766, + "step": 19224 + }, + { + "epoch": 2.7, + "learning_rate": 4.550299457233764e-05, + "loss": 0.0666, + "step": 19226 + }, + { + "epoch": 2.7, + "learning_rate": 4.5502526670409886e-05, + "loss": 0.0722, + "step": 19228 + }, + { + "epoch": 2.7, + "learning_rate": 4.550205876848213e-05, + "loss": 0.0682, + "step": 19230 + }, + { + "epoch": 2.7, + "learning_rate": 4.550159086655437e-05, + "loss": 0.0736, + "step": 19232 + }, + { + "epoch": 2.7, + "learning_rate": 4.550112296462662e-05, + "loss": 0.0439, + "step": 19234 + }, + { + "epoch": 2.7, + "learning_rate": 4.550065506269886e-05, + "loss": 0.0654, + "step": 19236 + }, + { + "epoch": 2.7, + "learning_rate": 4.550018716077111e-05, + "loss": 0.0754, + "step": 19238 + }, + { + "epoch": 2.7, + "learning_rate": 4.549971925884335e-05, + "loss": 0.0553, + "step": 19240 + }, + { + "epoch": 2.7, + "learning_rate": 4.5499251356915594e-05, + "loss": 0.0719, + "step": 19242 + }, + { + "epoch": 2.7, + "learning_rate": 4.549878345498784e-05, + "loss": 0.0593, + "step": 19244 + }, + { + "epoch": 2.7, + "learning_rate": 4.5498315553060085e-05, + "loss": 0.0577, + "step": 19246 + }, + { + "epoch": 2.7, + "learning_rate": 4.5497847651132324e-05, + "loss": 0.0555, + "step": 19248 + }, + { + "epoch": 2.7, + "learning_rate": 4.5497379749204564e-05, + "loss": 0.0575, + "step": 19250 + }, + { + "epoch": 2.7, + "learning_rate": 4.5496911847276816e-05, + "loss": 0.0824, + "step": 19252 + }, + { + "epoch": 2.7, + "learning_rate": 4.5496443945349055e-05, + "loss": 0.0802, + "step": 19254 + }, + { + "epoch": 2.7, + "learning_rate": 4.54959760434213e-05, + "loss": 0.0755, + "step": 19256 + }, + { + "epoch": 2.7, + "learning_rate": 4.549550814149354e-05, + "loss": 0.0592, + "step": 19258 + }, + { + "epoch": 2.7, + "learning_rate": 4.5495040239565786e-05, + "loss": 0.0548, + "step": 19260 + }, + { + "epoch": 2.7, + "learning_rate": 4.549457233763803e-05, + "loss": 0.0653, + "step": 19262 + }, + { + "epoch": 2.7, + "learning_rate": 4.549410443571028e-05, + "loss": 0.055, + "step": 19264 + }, + { + "epoch": 2.7, + "learning_rate": 4.549363653378252e-05, + "loss": 0.0724, + "step": 19266 + }, + { + "epoch": 2.7, + "learning_rate": 4.549316863185476e-05, + "loss": 0.0656, + "step": 19268 + }, + { + "epoch": 2.7, + "learning_rate": 4.549270072992701e-05, + "loss": 0.0491, + "step": 19270 + }, + { + "epoch": 2.71, + "learning_rate": 4.5492232827999255e-05, + "loss": 0.0565, + "step": 19272 + }, + { + "epoch": 2.71, + "learning_rate": 4.5491764926071494e-05, + "loss": 0.0735, + "step": 19274 + }, + { + "epoch": 2.71, + "learning_rate": 4.549129702414374e-05, + "loss": 0.0564, + "step": 19276 + }, + { + "epoch": 2.71, + "learning_rate": 4.5490829122215986e-05, + "loss": 0.074, + "step": 19278 + }, + { + "epoch": 2.71, + "learning_rate": 4.549036122028823e-05, + "loss": 0.068, + "step": 19280 + }, + { + "epoch": 2.71, + "learning_rate": 4.548989331836047e-05, + "loss": 0.067, + "step": 19282 + }, + { + "epoch": 2.71, + "learning_rate": 4.548942541643272e-05, + "loss": 0.0689, + "step": 19284 + }, + { + "epoch": 2.71, + "learning_rate": 4.548895751450496e-05, + "loss": 0.0556, + "step": 19286 + }, + { + "epoch": 2.71, + "learning_rate": 4.548848961257721e-05, + "loss": 0.0582, + "step": 19288 + }, + { + "epoch": 2.71, + "learning_rate": 4.548802171064945e-05, + "loss": 0.0523, + "step": 19290 + }, + { + "epoch": 2.71, + "learning_rate": 4.5487553808721694e-05, + "loss": 0.0598, + "step": 19292 + }, + { + "epoch": 2.71, + "learning_rate": 4.548708590679393e-05, + "loss": 0.0523, + "step": 19294 + }, + { + "epoch": 2.71, + "learning_rate": 4.5486618004866186e-05, + "loss": 0.0419, + "step": 19296 + }, + { + "epoch": 2.71, + "learning_rate": 4.5486150102938425e-05, + "loss": 0.0432, + "step": 19298 + }, + { + "epoch": 2.71, + "learning_rate": 4.548568220101067e-05, + "loss": 0.059, + "step": 19300 + }, + { + "epoch": 2.71, + "learning_rate": 4.548521429908291e-05, + "loss": 0.077, + "step": 19302 + }, + { + "epoch": 2.71, + "learning_rate": 4.5484746397155156e-05, + "loss": 0.0592, + "step": 19304 + }, + { + "epoch": 2.71, + "learning_rate": 4.54842784952274e-05, + "loss": 0.0649, + "step": 19306 + }, + { + "epoch": 2.71, + "learning_rate": 4.548381059329965e-05, + "loss": 0.0603, + "step": 19308 + }, + { + "epoch": 2.71, + "learning_rate": 4.548334269137189e-05, + "loss": 0.074, + "step": 19310 + }, + { + "epoch": 2.71, + "learning_rate": 4.548287478944413e-05, + "loss": 0.0663, + "step": 19312 + }, + { + "epoch": 2.71, + "learning_rate": 4.548240688751638e-05, + "loss": 0.0613, + "step": 19314 + }, + { + "epoch": 2.71, + "learning_rate": 4.5481938985588625e-05, + "loss": 0.0723, + "step": 19316 + }, + { + "epoch": 2.71, + "learning_rate": 4.5481471083660864e-05, + "loss": 0.0665, + "step": 19318 + }, + { + "epoch": 2.71, + "learning_rate": 4.548100318173311e-05, + "loss": 0.08, + "step": 19320 + }, + { + "epoch": 2.71, + "learning_rate": 4.5480535279805355e-05, + "loss": 0.0713, + "step": 19322 + }, + { + "epoch": 2.71, + "learning_rate": 4.54800673778776e-05, + "loss": 0.0742, + "step": 19324 + }, + { + "epoch": 2.71, + "learning_rate": 4.547959947594984e-05, + "loss": 0.0652, + "step": 19326 + }, + { + "epoch": 2.71, + "learning_rate": 4.5479131574022086e-05, + "loss": 0.0818, + "step": 19328 + }, + { + "epoch": 2.71, + "learning_rate": 4.547866367209433e-05, + "loss": 0.0762, + "step": 19330 + }, + { + "epoch": 2.71, + "learning_rate": 4.547819577016658e-05, + "loss": 0.0653, + "step": 19332 + }, + { + "epoch": 2.71, + "learning_rate": 4.547772786823882e-05, + "loss": 0.0663, + "step": 19334 + }, + { + "epoch": 2.71, + "learning_rate": 4.547725996631106e-05, + "loss": 0.0596, + "step": 19336 + }, + { + "epoch": 2.71, + "learning_rate": 4.54767920643833e-05, + "loss": 0.0731, + "step": 19338 + }, + { + "epoch": 2.71, + "learning_rate": 4.5476324162455555e-05, + "loss": 0.0628, + "step": 19340 + }, + { + "epoch": 2.72, + "learning_rate": 4.5475856260527794e-05, + "loss": 0.0415, + "step": 19342 + }, + { + "epoch": 2.72, + "learning_rate": 4.547538835860004e-05, + "loss": 0.0531, + "step": 19344 + }, + { + "epoch": 2.72, + "learning_rate": 4.547492045667228e-05, + "loss": 0.088, + "step": 19346 + }, + { + "epoch": 2.72, + "learning_rate": 4.547445255474453e-05, + "loss": 0.084, + "step": 19348 + }, + { + "epoch": 2.72, + "learning_rate": 4.547398465281677e-05, + "loss": 0.0515, + "step": 19350 + }, + { + "epoch": 2.72, + "learning_rate": 4.547351675088902e-05, + "loss": 0.0747, + "step": 19352 + }, + { + "epoch": 2.72, + "learning_rate": 4.5473048848961256e-05, + "loss": 0.0598, + "step": 19354 + }, + { + "epoch": 2.72, + "learning_rate": 4.54725809470335e-05, + "loss": 0.0594, + "step": 19356 + }, + { + "epoch": 2.72, + "learning_rate": 4.547211304510575e-05, + "loss": 0.0726, + "step": 19358 + }, + { + "epoch": 2.72, + "learning_rate": 4.5471645143177994e-05, + "loss": 0.0763, + "step": 19360 + }, + { + "epoch": 2.72, + "learning_rate": 4.547117724125023e-05, + "loss": 0.0562, + "step": 19362 + }, + { + "epoch": 2.72, + "learning_rate": 4.547070933932248e-05, + "loss": 0.0957, + "step": 19364 + }, + { + "epoch": 2.72, + "learning_rate": 4.5470241437394725e-05, + "loss": 0.0545, + "step": 19366 + }, + { + "epoch": 2.72, + "learning_rate": 4.546977353546697e-05, + "loss": 0.0679, + "step": 19368 + }, + { + "epoch": 2.72, + "learning_rate": 4.546930563353921e-05, + "loss": 0.0555, + "step": 19370 + }, + { + "epoch": 2.72, + "learning_rate": 4.5468837731611456e-05, + "loss": 0.0656, + "step": 19372 + }, + { + "epoch": 2.72, + "learning_rate": 4.54683698296837e-05, + "loss": 0.0852, + "step": 19374 + }, + { + "epoch": 2.72, + "learning_rate": 4.546790192775595e-05, + "loss": 0.0689, + "step": 19376 + }, + { + "epoch": 2.72, + "learning_rate": 4.546743402582819e-05, + "loss": 0.068, + "step": 19378 + }, + { + "epoch": 2.72, + "learning_rate": 4.546696612390043e-05, + "loss": 0.0604, + "step": 19380 + }, + { + "epoch": 2.72, + "learning_rate": 4.546649822197268e-05, + "loss": 0.0525, + "step": 19382 + }, + { + "epoch": 2.72, + "learning_rate": 4.5466030320044925e-05, + "loss": 0.0712, + "step": 19384 + }, + { + "epoch": 2.72, + "learning_rate": 4.5465562418117164e-05, + "loss": 0.0672, + "step": 19386 + }, + { + "epoch": 2.72, + "learning_rate": 4.546509451618941e-05, + "loss": 0.0593, + "step": 19388 + }, + { + "epoch": 2.72, + "learning_rate": 4.546462661426165e-05, + "loss": 0.0772, + "step": 19390 + }, + { + "epoch": 2.72, + "learning_rate": 4.54641587123339e-05, + "loss": 0.0518, + "step": 19392 + }, + { + "epoch": 2.72, + "learning_rate": 4.546369081040614e-05, + "loss": 0.0641, + "step": 19394 + }, + { + "epoch": 2.72, + "learning_rate": 4.5463222908478386e-05, + "loss": 0.0677, + "step": 19396 + }, + { + "epoch": 2.72, + "learning_rate": 4.5462755006550626e-05, + "loss": 0.0702, + "step": 19398 + }, + { + "epoch": 2.72, + "learning_rate": 4.546228710462287e-05, + "loss": 0.067, + "step": 19400 + }, + { + "epoch": 2.72, + "learning_rate": 4.546181920269512e-05, + "loss": 0.0771, + "step": 19402 + }, + { + "epoch": 2.72, + "learning_rate": 4.546135130076736e-05, + "loss": 0.0658, + "step": 19404 + }, + { + "epoch": 2.72, + "learning_rate": 4.54608833988396e-05, + "loss": 0.0665, + "step": 19406 + }, + { + "epoch": 2.72, + "learning_rate": 4.546041549691185e-05, + "loss": 0.0538, + "step": 19408 + }, + { + "epoch": 2.72, + "learning_rate": 4.5459947594984094e-05, + "loss": 0.0468, + "step": 19410 + }, + { + "epoch": 2.72, + "learning_rate": 4.545947969305634e-05, + "loss": 0.0793, + "step": 19412 + }, + { + "epoch": 2.73, + "learning_rate": 4.545901179112858e-05, + "loss": 0.0563, + "step": 19414 + }, + { + "epoch": 2.73, + "learning_rate": 4.5458543889200825e-05, + "loss": 0.0623, + "step": 19416 + }, + { + "epoch": 2.73, + "learning_rate": 4.545807598727307e-05, + "loss": 0.0568, + "step": 19418 + }, + { + "epoch": 2.73, + "learning_rate": 4.545760808534532e-05, + "loss": 0.0633, + "step": 19420 + }, + { + "epoch": 2.73, + "learning_rate": 4.5457140183417556e-05, + "loss": 0.0618, + "step": 19422 + }, + { + "epoch": 2.73, + "learning_rate": 4.54566722814898e-05, + "loss": 0.0648, + "step": 19424 + }, + { + "epoch": 2.73, + "learning_rate": 4.545620437956205e-05, + "loss": 0.0549, + "step": 19426 + }, + { + "epoch": 2.73, + "learning_rate": 4.5455736477634294e-05, + "loss": 0.0495, + "step": 19428 + }, + { + "epoch": 2.73, + "learning_rate": 4.545526857570653e-05, + "loss": 0.0602, + "step": 19430 + }, + { + "epoch": 2.73, + "learning_rate": 4.545480067377878e-05, + "loss": 0.0654, + "step": 19432 + }, + { + "epoch": 2.73, + "learning_rate": 4.545433277185102e-05, + "loss": 0.0697, + "step": 19434 + }, + { + "epoch": 2.73, + "learning_rate": 4.545386486992327e-05, + "loss": 0.0556, + "step": 19436 + }, + { + "epoch": 2.73, + "learning_rate": 4.545339696799551e-05, + "loss": 0.0901, + "step": 19438 + }, + { + "epoch": 2.73, + "learning_rate": 4.5452929066067756e-05, + "loss": 0.0668, + "step": 19440 + }, + { + "epoch": 2.73, + "learning_rate": 4.5452461164139995e-05, + "loss": 0.053, + "step": 19442 + }, + { + "epoch": 2.73, + "learning_rate": 4.545199326221225e-05, + "loss": 0.0725, + "step": 19444 + }, + { + "epoch": 2.73, + "learning_rate": 4.545152536028449e-05, + "loss": 0.0628, + "step": 19446 + }, + { + "epoch": 2.73, + "learning_rate": 4.545105745835673e-05, + "loss": 0.0901, + "step": 19448 + }, + { + "epoch": 2.73, + "learning_rate": 4.545058955642897e-05, + "loss": 0.0553, + "step": 19450 + }, + { + "epoch": 2.73, + "learning_rate": 4.545012165450122e-05, + "loss": 0.0812, + "step": 19452 + }, + { + "epoch": 2.73, + "learning_rate": 4.5449653752573464e-05, + "loss": 0.0603, + "step": 19454 + }, + { + "epoch": 2.73, + "learning_rate": 4.544918585064571e-05, + "loss": 0.0691, + "step": 19456 + }, + { + "epoch": 2.73, + "learning_rate": 4.544871794871795e-05, + "loss": 0.0566, + "step": 19458 + }, + { + "epoch": 2.73, + "learning_rate": 4.5448250046790195e-05, + "loss": 0.0724, + "step": 19460 + }, + { + "epoch": 2.73, + "learning_rate": 4.544778214486244e-05, + "loss": 0.0753, + "step": 19462 + }, + { + "epoch": 2.73, + "learning_rate": 4.5447314242934686e-05, + "loss": 0.0713, + "step": 19464 + }, + { + "epoch": 2.73, + "learning_rate": 4.5446846341006926e-05, + "loss": 0.0683, + "step": 19466 + }, + { + "epoch": 2.73, + "learning_rate": 4.544637843907917e-05, + "loss": 0.0846, + "step": 19468 + }, + { + "epoch": 2.73, + "learning_rate": 4.544591053715142e-05, + "loss": 0.0595, + "step": 19470 + }, + { + "epoch": 2.73, + "learning_rate": 4.544544263522366e-05, + "loss": 0.0471, + "step": 19472 + }, + { + "epoch": 2.73, + "learning_rate": 4.54449747332959e-05, + "loss": 0.0723, + "step": 19474 + }, + { + "epoch": 2.73, + "learning_rate": 4.544450683136815e-05, + "loss": 0.0627, + "step": 19476 + }, + { + "epoch": 2.73, + "learning_rate": 4.5444038929440394e-05, + "loss": 0.0688, + "step": 19478 + }, + { + "epoch": 2.73, + "learning_rate": 4.544357102751264e-05, + "loss": 0.069, + "step": 19480 + }, + { + "epoch": 2.73, + "learning_rate": 4.544310312558488e-05, + "loss": 0.0623, + "step": 19482 + }, + { + "epoch": 2.73, + "learning_rate": 4.5442635223657125e-05, + "loss": 0.0502, + "step": 19484 + }, + { + "epoch": 2.74, + "learning_rate": 4.5442167321729364e-05, + "loss": 0.0588, + "step": 19486 + }, + { + "epoch": 2.74, + "learning_rate": 4.544169941980162e-05, + "loss": 0.0726, + "step": 19488 + }, + { + "epoch": 2.74, + "learning_rate": 4.5441231517873856e-05, + "loss": 0.0762, + "step": 19490 + }, + { + "epoch": 2.74, + "learning_rate": 4.54407636159461e-05, + "loss": 0.0669, + "step": 19492 + }, + { + "epoch": 2.74, + "learning_rate": 4.544029571401834e-05, + "loss": 0.0543, + "step": 19494 + }, + { + "epoch": 2.74, + "learning_rate": 4.5439827812090594e-05, + "loss": 0.0528, + "step": 19496 + }, + { + "epoch": 2.74, + "learning_rate": 4.543935991016283e-05, + "loss": 0.0633, + "step": 19498 + }, + { + "epoch": 2.74, + "learning_rate": 4.543889200823508e-05, + "loss": 0.0552, + "step": 19500 + }, + { + "epoch": 2.74, + "learning_rate": 4.543842410630732e-05, + "loss": 0.0763, + "step": 19502 + }, + { + "epoch": 2.74, + "learning_rate": 4.5437956204379564e-05, + "loss": 0.0545, + "step": 19504 + }, + { + "epoch": 2.74, + "learning_rate": 4.543748830245181e-05, + "loss": 0.0695, + "step": 19506 + }, + { + "epoch": 2.74, + "learning_rate": 4.543702040052405e-05, + "loss": 0.069, + "step": 19508 + }, + { + "epoch": 2.74, + "learning_rate": 4.5436552498596295e-05, + "loss": 0.0894, + "step": 19510 + }, + { + "epoch": 2.74, + "learning_rate": 4.543608459666854e-05, + "loss": 0.0437, + "step": 19512 + }, + { + "epoch": 2.74, + "learning_rate": 4.543561669474079e-05, + "loss": 0.0792, + "step": 19514 + }, + { + "epoch": 2.74, + "learning_rate": 4.5435148792813026e-05, + "loss": 0.0734, + "step": 19516 + }, + { + "epoch": 2.74, + "learning_rate": 4.543468089088527e-05, + "loss": 0.0431, + "step": 19518 + }, + { + "epoch": 2.74, + "learning_rate": 4.543421298895751e-05, + "loss": 0.0727, + "step": 19520 + }, + { + "epoch": 2.74, + "learning_rate": 4.5433745087029764e-05, + "loss": 0.0816, + "step": 19522 + }, + { + "epoch": 2.74, + "learning_rate": 4.5433277185102e-05, + "loss": 0.0608, + "step": 19524 + }, + { + "epoch": 2.74, + "learning_rate": 4.543280928317425e-05, + "loss": 0.0633, + "step": 19526 + }, + { + "epoch": 2.74, + "learning_rate": 4.543234138124649e-05, + "loss": 0.0703, + "step": 19528 + }, + { + "epoch": 2.74, + "learning_rate": 4.543187347931874e-05, + "loss": 0.0581, + "step": 19530 + }, + { + "epoch": 2.74, + "learning_rate": 4.543140557739098e-05, + "loss": 0.0641, + "step": 19532 + }, + { + "epoch": 2.74, + "learning_rate": 4.5430937675463226e-05, + "loss": 0.0694, + "step": 19534 + }, + { + "epoch": 2.74, + "learning_rate": 4.5430469773535465e-05, + "loss": 0.0527, + "step": 19536 + }, + { + "epoch": 2.74, + "learning_rate": 4.543000187160771e-05, + "loss": 0.0524, + "step": 19538 + }, + { + "epoch": 2.74, + "learning_rate": 4.5429533969679957e-05, + "loss": 0.0767, + "step": 19540 + }, + { + "epoch": 2.74, + "learning_rate": 4.54290660677522e-05, + "loss": 0.0624, + "step": 19542 + }, + { + "epoch": 2.74, + "learning_rate": 4.542859816582444e-05, + "loss": 0.0769, + "step": 19544 + }, + { + "epoch": 2.74, + "learning_rate": 4.542813026389669e-05, + "loss": 0.0615, + "step": 19546 + }, + { + "epoch": 2.74, + "learning_rate": 4.5427662361968933e-05, + "loss": 0.0767, + "step": 19548 + }, + { + "epoch": 2.74, + "learning_rate": 4.542719446004118e-05, + "loss": 0.0764, + "step": 19550 + }, + { + "epoch": 2.74, + "learning_rate": 4.542672655811342e-05, + "loss": 0.0618, + "step": 19552 + }, + { + "epoch": 2.74, + "learning_rate": 4.5426258656185664e-05, + "loss": 0.0635, + "step": 19554 + }, + { + "epoch": 2.75, + "learning_rate": 4.542579075425791e-05, + "loss": 0.078, + "step": 19556 + }, + { + "epoch": 2.75, + "learning_rate": 4.5425322852330156e-05, + "loss": 0.0589, + "step": 19558 + }, + { + "epoch": 2.75, + "learning_rate": 4.5424854950402395e-05, + "loss": 0.0724, + "step": 19560 + }, + { + "epoch": 2.75, + "learning_rate": 4.542438704847464e-05, + "loss": 0.0552, + "step": 19562 + }, + { + "epoch": 2.75, + "learning_rate": 4.542391914654689e-05, + "loss": 0.0413, + "step": 19564 + }, + { + "epoch": 2.75, + "learning_rate": 4.542345124461913e-05, + "loss": 0.0676, + "step": 19566 + }, + { + "epoch": 2.75, + "learning_rate": 4.542298334269137e-05, + "loss": 0.0735, + "step": 19568 + }, + { + "epoch": 2.75, + "learning_rate": 4.542251544076362e-05, + "loss": 0.0909, + "step": 19570 + }, + { + "epoch": 2.75, + "learning_rate": 4.542204753883586e-05, + "loss": 0.0715, + "step": 19572 + }, + { + "epoch": 2.75, + "learning_rate": 4.542157963690811e-05, + "loss": 0.0639, + "step": 19574 + }, + { + "epoch": 2.75, + "learning_rate": 4.542111173498035e-05, + "loss": 0.0744, + "step": 19576 + }, + { + "epoch": 2.75, + "learning_rate": 4.5420643833052595e-05, + "loss": 0.0783, + "step": 19578 + }, + { + "epoch": 2.75, + "learning_rate": 4.5420175931124834e-05, + "loss": 0.0552, + "step": 19580 + }, + { + "epoch": 2.75, + "learning_rate": 4.541970802919708e-05, + "loss": 0.0751, + "step": 19582 + }, + { + "epoch": 2.75, + "learning_rate": 4.5419240127269326e-05, + "loss": 0.0764, + "step": 19584 + }, + { + "epoch": 2.75, + "learning_rate": 4.541877222534157e-05, + "loss": 0.0739, + "step": 19586 + }, + { + "epoch": 2.75, + "learning_rate": 4.541830432341381e-05, + "loss": 0.0482, + "step": 19588 + }, + { + "epoch": 2.75, + "learning_rate": 4.541783642148606e-05, + "loss": 0.0758, + "step": 19590 + }, + { + "epoch": 2.75, + "learning_rate": 4.54173685195583e-05, + "loss": 0.0751, + "step": 19592 + }, + { + "epoch": 2.75, + "learning_rate": 4.541690061763055e-05, + "loss": 0.0566, + "step": 19594 + }, + { + "epoch": 2.75, + "learning_rate": 4.541643271570279e-05, + "loss": 0.0504, + "step": 19596 + }, + { + "epoch": 2.75, + "learning_rate": 4.5415964813775034e-05, + "loss": 0.0535, + "step": 19598 + }, + { + "epoch": 2.75, + "learning_rate": 4.541549691184728e-05, + "loss": 0.0662, + "step": 19600 + }, + { + "epoch": 2.75, + "learning_rate": 4.5415029009919526e-05, + "loss": 0.0714, + "step": 19602 + }, + { + "epoch": 2.75, + "learning_rate": 4.5414561107991765e-05, + "loss": 0.0624, + "step": 19604 + }, + { + "epoch": 2.75, + "learning_rate": 4.541409320606401e-05, + "loss": 0.0571, + "step": 19606 + }, + { + "epoch": 2.75, + "learning_rate": 4.5413625304136257e-05, + "loss": 0.054, + "step": 19608 + }, + { + "epoch": 2.75, + "learning_rate": 4.54131574022085e-05, + "loss": 0.0696, + "step": 19610 + }, + { + "epoch": 2.75, + "learning_rate": 4.541268950028074e-05, + "loss": 0.0785, + "step": 19612 + }, + { + "epoch": 2.75, + "learning_rate": 4.541222159835299e-05, + "loss": 0.0435, + "step": 19614 + }, + { + "epoch": 2.75, + "learning_rate": 4.541175369642523e-05, + "loss": 0.0757, + "step": 19616 + }, + { + "epoch": 2.75, + "learning_rate": 4.541128579449748e-05, + "loss": 0.0575, + "step": 19618 + }, + { + "epoch": 2.75, + "learning_rate": 4.541081789256972e-05, + "loss": 0.089, + "step": 19620 + }, + { + "epoch": 2.75, + "learning_rate": 4.5410349990641964e-05, + "loss": 0.0588, + "step": 19622 + }, + { + "epoch": 2.75, + "learning_rate": 4.5409882088714204e-05, + "loss": 0.0677, + "step": 19624 + }, + { + "epoch": 2.75, + "learning_rate": 4.5409414186786456e-05, + "loss": 0.0845, + "step": 19626 + }, + { + "epoch": 2.76, + "learning_rate": 4.5408946284858695e-05, + "loss": 0.0549, + "step": 19628 + }, + { + "epoch": 2.76, + "learning_rate": 4.540847838293094e-05, + "loss": 0.0841, + "step": 19630 + }, + { + "epoch": 2.76, + "learning_rate": 4.540801048100318e-05, + "loss": 0.0744, + "step": 19632 + }, + { + "epoch": 2.76, + "learning_rate": 4.5407542579075426e-05, + "loss": 0.0634, + "step": 19634 + }, + { + "epoch": 2.76, + "learning_rate": 4.540707467714767e-05, + "loss": 0.0807, + "step": 19636 + }, + { + "epoch": 2.76, + "learning_rate": 4.540660677521992e-05, + "loss": 0.0731, + "step": 19638 + }, + { + "epoch": 2.76, + "learning_rate": 4.540613887329216e-05, + "loss": 0.0739, + "step": 19640 + }, + { + "epoch": 2.76, + "learning_rate": 4.54056709713644e-05, + "loss": 0.0634, + "step": 19642 + }, + { + "epoch": 2.76, + "learning_rate": 4.540520306943665e-05, + "loss": 0.0718, + "step": 19644 + }, + { + "epoch": 2.76, + "learning_rate": 4.5404735167508895e-05, + "loss": 0.0679, + "step": 19646 + }, + { + "epoch": 2.76, + "learning_rate": 4.5404267265581134e-05, + "loss": 0.065, + "step": 19648 + }, + { + "epoch": 2.76, + "learning_rate": 4.540379936365338e-05, + "loss": 0.0652, + "step": 19650 + }, + { + "epoch": 2.76, + "learning_rate": 4.5403331461725626e-05, + "loss": 0.0818, + "step": 19652 + }, + { + "epoch": 2.76, + "learning_rate": 4.540286355979787e-05, + "loss": 0.0758, + "step": 19654 + }, + { + "epoch": 2.76, + "learning_rate": 4.540239565787011e-05, + "loss": 0.0519, + "step": 19656 + }, + { + "epoch": 2.76, + "learning_rate": 4.540192775594236e-05, + "loss": 0.061, + "step": 19658 + }, + { + "epoch": 2.76, + "learning_rate": 4.54014598540146e-05, + "loss": 0.0553, + "step": 19660 + }, + { + "epoch": 2.76, + "learning_rate": 4.540099195208685e-05, + "loss": 0.0802, + "step": 19662 + }, + { + "epoch": 2.76, + "learning_rate": 4.540052405015909e-05, + "loss": 0.0617, + "step": 19664 + }, + { + "epoch": 2.76, + "learning_rate": 4.5400056148231334e-05, + "loss": 0.0555, + "step": 19666 + }, + { + "epoch": 2.76, + "learning_rate": 4.539958824630357e-05, + "loss": 0.0882, + "step": 19668 + }, + { + "epoch": 2.76, + "learning_rate": 4.5399120344375826e-05, + "loss": 0.0765, + "step": 19670 + }, + { + "epoch": 2.76, + "learning_rate": 4.5398652442448065e-05, + "loss": 0.0644, + "step": 19672 + }, + { + "epoch": 2.76, + "learning_rate": 4.539818454052031e-05, + "loss": 0.0619, + "step": 19674 + }, + { + "epoch": 2.76, + "learning_rate": 4.539771663859255e-05, + "loss": 0.0749, + "step": 19676 + }, + { + "epoch": 2.76, + "learning_rate": 4.53972487366648e-05, + "loss": 0.0661, + "step": 19678 + }, + { + "epoch": 2.76, + "learning_rate": 4.539678083473704e-05, + "loss": 0.0654, + "step": 19680 + }, + { + "epoch": 2.76, + "learning_rate": 4.539631293280929e-05, + "loss": 0.0752, + "step": 19682 + }, + { + "epoch": 2.76, + "learning_rate": 4.539584503088153e-05, + "loss": 0.0627, + "step": 19684 + }, + { + "epoch": 2.76, + "learning_rate": 4.539537712895377e-05, + "loss": 0.0608, + "step": 19686 + }, + { + "epoch": 2.76, + "learning_rate": 4.539490922702602e-05, + "loss": 0.0715, + "step": 19688 + }, + { + "epoch": 2.76, + "learning_rate": 4.5394441325098264e-05, + "loss": 0.0614, + "step": 19690 + }, + { + "epoch": 2.76, + "learning_rate": 4.5393973423170504e-05, + "loss": 0.071, + "step": 19692 + }, + { + "epoch": 2.76, + "learning_rate": 4.539350552124275e-05, + "loss": 0.0462, + "step": 19694 + }, + { + "epoch": 2.76, + "learning_rate": 4.5393037619314995e-05, + "loss": 0.0656, + "step": 19696 + }, + { + "epoch": 2.77, + "learning_rate": 4.539256971738724e-05, + "loss": 0.0698, + "step": 19698 + }, + { + "epoch": 2.77, + "learning_rate": 4.539210181545948e-05, + "loss": 0.0709, + "step": 19700 + }, + { + "epoch": 2.77, + "learning_rate": 4.5391633913531726e-05, + "loss": 0.0644, + "step": 19702 + }, + { + "epoch": 2.77, + "learning_rate": 4.539116601160397e-05, + "loss": 0.0823, + "step": 19704 + }, + { + "epoch": 2.77, + "learning_rate": 4.539069810967622e-05, + "loss": 0.0657, + "step": 19706 + }, + { + "epoch": 2.77, + "learning_rate": 4.539023020774846e-05, + "loss": 0.0682, + "step": 19708 + }, + { + "epoch": 2.77, + "learning_rate": 4.53897623058207e-05, + "loss": 0.0574, + "step": 19710 + }, + { + "epoch": 2.77, + "learning_rate": 4.538929440389294e-05, + "loss": 0.0621, + "step": 19712 + }, + { + "epoch": 2.77, + "learning_rate": 4.5388826501965195e-05, + "loss": 0.0777, + "step": 19714 + }, + { + "epoch": 2.77, + "learning_rate": 4.5388358600037434e-05, + "loss": 0.0759, + "step": 19716 + }, + { + "epoch": 2.77, + "learning_rate": 4.538789069810968e-05, + "loss": 0.0735, + "step": 19718 + }, + { + "epoch": 2.77, + "learning_rate": 4.538742279618192e-05, + "loss": 0.0611, + "step": 19720 + }, + { + "epoch": 2.77, + "learning_rate": 4.538695489425417e-05, + "loss": 0.0881, + "step": 19722 + }, + { + "epoch": 2.77, + "learning_rate": 4.538648699232641e-05, + "loss": 0.0563, + "step": 19724 + }, + { + "epoch": 2.77, + "learning_rate": 4.538601909039866e-05, + "loss": 0.0561, + "step": 19726 + }, + { + "epoch": 2.77, + "learning_rate": 4.5385551188470896e-05, + "loss": 0.0569, + "step": 19728 + }, + { + "epoch": 2.77, + "learning_rate": 4.538508328654314e-05, + "loss": 0.077, + "step": 19730 + }, + { + "epoch": 2.77, + "learning_rate": 4.538461538461539e-05, + "loss": 0.0643, + "step": 19732 + }, + { + "epoch": 2.77, + "learning_rate": 4.5384147482687634e-05, + "loss": 0.0555, + "step": 19734 + }, + { + "epoch": 2.77, + "learning_rate": 4.538367958075987e-05, + "loss": 0.0595, + "step": 19736 + }, + { + "epoch": 2.77, + "learning_rate": 4.538321167883212e-05, + "loss": 0.0582, + "step": 19738 + }, + { + "epoch": 2.77, + "learning_rate": 4.5382743776904365e-05, + "loss": 0.0724, + "step": 19740 + }, + { + "epoch": 2.77, + "learning_rate": 4.538227587497661e-05, + "loss": 0.0678, + "step": 19742 + }, + { + "epoch": 2.77, + "learning_rate": 4.538180797304885e-05, + "loss": 0.0564, + "step": 19744 + }, + { + "epoch": 2.77, + "learning_rate": 4.5381340071121096e-05, + "loss": 0.0886, + "step": 19746 + }, + { + "epoch": 2.77, + "learning_rate": 4.538087216919334e-05, + "loss": 0.0591, + "step": 19748 + }, + { + "epoch": 2.77, + "learning_rate": 4.538040426726559e-05, + "loss": 0.0716, + "step": 19750 + }, + { + "epoch": 2.77, + "learning_rate": 4.537993636533783e-05, + "loss": 0.0814, + "step": 19752 + }, + { + "epoch": 2.77, + "learning_rate": 4.537946846341007e-05, + "loss": 0.0917, + "step": 19754 + }, + { + "epoch": 2.77, + "learning_rate": 4.537900056148232e-05, + "loss": 0.0526, + "step": 19756 + }, + { + "epoch": 2.77, + "learning_rate": 4.537853265955456e-05, + "loss": 0.067, + "step": 19758 + }, + { + "epoch": 2.77, + "learning_rate": 4.5378064757626804e-05, + "loss": 0.0629, + "step": 19760 + }, + { + "epoch": 2.77, + "learning_rate": 4.537759685569904e-05, + "loss": 0.0868, + "step": 19762 + }, + { + "epoch": 2.77, + "learning_rate": 4.537712895377129e-05, + "loss": 0.077, + "step": 19764 + }, + { + "epoch": 2.77, + "learning_rate": 4.5376661051843535e-05, + "loss": 0.071, + "step": 19766 + }, + { + "epoch": 2.77, + "learning_rate": 4.537619314991578e-05, + "loss": 0.0748, + "step": 19768 + }, + { + "epoch": 2.78, + "learning_rate": 4.537572524798802e-05, + "loss": 0.0775, + "step": 19770 + }, + { + "epoch": 2.78, + "learning_rate": 4.5375257346060266e-05, + "loss": 0.0652, + "step": 19772 + }, + { + "epoch": 2.78, + "learning_rate": 4.537478944413251e-05, + "loss": 0.07, + "step": 19774 + }, + { + "epoch": 2.78, + "learning_rate": 4.537432154220476e-05, + "loss": 0.0767, + "step": 19776 + }, + { + "epoch": 2.78, + "learning_rate": 4.5373853640276996e-05, + "loss": 0.0756, + "step": 19778 + }, + { + "epoch": 2.78, + "learning_rate": 4.537338573834924e-05, + "loss": 0.0569, + "step": 19780 + }, + { + "epoch": 2.78, + "learning_rate": 4.537291783642149e-05, + "loss": 0.0516, + "step": 19782 + }, + { + "epoch": 2.78, + "learning_rate": 4.5372449934493734e-05, + "loss": 0.0617, + "step": 19784 + }, + { + "epoch": 2.78, + "learning_rate": 4.537198203256597e-05, + "loss": 0.0691, + "step": 19786 + }, + { + "epoch": 2.78, + "learning_rate": 4.537151413063822e-05, + "loss": 0.0813, + "step": 19788 + }, + { + "epoch": 2.78, + "learning_rate": 4.5371046228710465e-05, + "loss": 0.0938, + "step": 19790 + }, + { + "epoch": 2.78, + "learning_rate": 4.537057832678271e-05, + "loss": 0.0675, + "step": 19792 + }, + { + "epoch": 2.78, + "learning_rate": 4.537011042485495e-05, + "loss": 0.0616, + "step": 19794 + }, + { + "epoch": 2.78, + "learning_rate": 4.5369642522927196e-05, + "loss": 0.0643, + "step": 19796 + }, + { + "epoch": 2.78, + "learning_rate": 4.5369174620999435e-05, + "loss": 0.0681, + "step": 19798 + }, + { + "epoch": 2.78, + "learning_rate": 4.536870671907169e-05, + "loss": 0.0562, + "step": 19800 + }, + { + "epoch": 2.78, + "learning_rate": 4.536823881714393e-05, + "loss": 0.0901, + "step": 19802 + }, + { + "epoch": 2.78, + "learning_rate": 4.536777091521617e-05, + "loss": 0.0784, + "step": 19804 + }, + { + "epoch": 2.78, + "learning_rate": 4.536730301328841e-05, + "loss": 0.0787, + "step": 19806 + }, + { + "epoch": 2.78, + "learning_rate": 4.5366835111360665e-05, + "loss": 0.0533, + "step": 19808 + }, + { + "epoch": 2.78, + "learning_rate": 4.5366367209432904e-05, + "loss": 0.0456, + "step": 19810 + }, + { + "epoch": 2.78, + "learning_rate": 4.536589930750515e-05, + "loss": 0.0672, + "step": 19812 + }, + { + "epoch": 2.78, + "learning_rate": 4.536543140557739e-05, + "loss": 0.0406, + "step": 19814 + }, + { + "epoch": 2.78, + "learning_rate": 4.5364963503649635e-05, + "loss": 0.0633, + "step": 19816 + }, + { + "epoch": 2.78, + "learning_rate": 4.536449560172188e-05, + "loss": 0.0619, + "step": 19818 + }, + { + "epoch": 2.78, + "learning_rate": 4.536402769979413e-05, + "loss": 0.0761, + "step": 19820 + }, + { + "epoch": 2.78, + "learning_rate": 4.5363559797866366e-05, + "loss": 0.0581, + "step": 19822 + }, + { + "epoch": 2.78, + "learning_rate": 4.536309189593861e-05, + "loss": 0.0584, + "step": 19824 + }, + { + "epoch": 2.78, + "learning_rate": 4.536262399401086e-05, + "loss": 0.064, + "step": 19826 + }, + { + "epoch": 2.78, + "learning_rate": 4.5362156092083104e-05, + "loss": 0.0677, + "step": 19828 + }, + { + "epoch": 2.78, + "learning_rate": 4.536168819015534e-05, + "loss": 0.0465, + "step": 19830 + }, + { + "epoch": 2.78, + "learning_rate": 4.536122028822759e-05, + "loss": 0.0571, + "step": 19832 + }, + { + "epoch": 2.78, + "learning_rate": 4.5360752386299835e-05, + "loss": 0.0684, + "step": 19834 + }, + { + "epoch": 2.78, + "learning_rate": 4.536028448437208e-05, + "loss": 0.0612, + "step": 19836 + }, + { + "epoch": 2.78, + "learning_rate": 4.535981658244432e-05, + "loss": 0.054, + "step": 19838 + }, + { + "epoch": 2.78, + "learning_rate": 4.5359348680516566e-05, + "loss": 0.0587, + "step": 19840 + }, + { + "epoch": 2.79, + "learning_rate": 4.535888077858881e-05, + "loss": 0.0657, + "step": 19842 + }, + { + "epoch": 2.79, + "learning_rate": 4.535841287666106e-05, + "loss": 0.0591, + "step": 19844 + }, + { + "epoch": 2.79, + "learning_rate": 4.5357944974733296e-05, + "loss": 0.0682, + "step": 19846 + }, + { + "epoch": 2.79, + "learning_rate": 4.535747707280554e-05, + "loss": 0.0813, + "step": 19848 + }, + { + "epoch": 2.79, + "learning_rate": 4.535700917087778e-05, + "loss": 0.0878, + "step": 19850 + }, + { + "epoch": 2.79, + "learning_rate": 4.5356541268950034e-05, + "loss": 0.0768, + "step": 19852 + }, + { + "epoch": 2.79, + "learning_rate": 4.535607336702227e-05, + "loss": 0.0755, + "step": 19854 + }, + { + "epoch": 2.79, + "learning_rate": 4.535560546509452e-05, + "loss": 0.0582, + "step": 19856 + }, + { + "epoch": 2.79, + "learning_rate": 4.535513756316676e-05, + "loss": 0.064, + "step": 19858 + }, + { + "epoch": 2.79, + "learning_rate": 4.5354669661239004e-05, + "loss": 0.0593, + "step": 19860 + }, + { + "epoch": 2.79, + "learning_rate": 4.535420175931125e-05, + "loss": 0.0766, + "step": 19862 + }, + { + "epoch": 2.79, + "learning_rate": 4.5353733857383496e-05, + "loss": 0.088, + "step": 19864 + }, + { + "epoch": 2.79, + "learning_rate": 4.5353265955455735e-05, + "loss": 0.079, + "step": 19866 + }, + { + "epoch": 2.79, + "learning_rate": 4.535279805352798e-05, + "loss": 0.0584, + "step": 19868 + }, + { + "epoch": 2.79, + "learning_rate": 4.535233015160023e-05, + "loss": 0.0563, + "step": 19870 + }, + { + "epoch": 2.79, + "learning_rate": 4.535186224967247e-05, + "loss": 0.0555, + "step": 19872 + }, + { + "epoch": 2.79, + "learning_rate": 4.535139434774471e-05, + "loss": 0.0936, + "step": 19874 + }, + { + "epoch": 2.79, + "learning_rate": 4.535092644581696e-05, + "loss": 0.0635, + "step": 19876 + }, + { + "epoch": 2.79, + "learning_rate": 4.5350458543889204e-05, + "loss": 0.0664, + "step": 19878 + }, + { + "epoch": 2.79, + "learning_rate": 4.534999064196145e-05, + "loss": 0.0554, + "step": 19880 + }, + { + "epoch": 2.79, + "learning_rate": 4.534952274003369e-05, + "loss": 0.0917, + "step": 19882 + }, + { + "epoch": 2.79, + "learning_rate": 4.5349054838105935e-05, + "loss": 0.0565, + "step": 19884 + }, + { + "epoch": 2.79, + "learning_rate": 4.534858693617818e-05, + "loss": 0.0716, + "step": 19886 + }, + { + "epoch": 2.79, + "learning_rate": 4.534811903425043e-05, + "loss": 0.0707, + "step": 19888 + }, + { + "epoch": 2.79, + "learning_rate": 4.5347651132322666e-05, + "loss": 0.0745, + "step": 19890 + }, + { + "epoch": 2.79, + "learning_rate": 4.534718323039491e-05, + "loss": 0.0768, + "step": 19892 + }, + { + "epoch": 2.79, + "learning_rate": 4.534671532846715e-05, + "loss": 0.0776, + "step": 19894 + }, + { + "epoch": 2.79, + "learning_rate": 4.5346247426539404e-05, + "loss": 0.0834, + "step": 19896 + }, + { + "epoch": 2.79, + "learning_rate": 4.534577952461164e-05, + "loss": 0.0666, + "step": 19898 + }, + { + "epoch": 2.79, + "learning_rate": 4.534531162268389e-05, + "loss": 0.0813, + "step": 19900 + }, + { + "epoch": 2.79, + "learning_rate": 4.534484372075613e-05, + "loss": 0.077, + "step": 19902 + }, + { + "epoch": 2.79, + "learning_rate": 4.534437581882838e-05, + "loss": 0.0506, + "step": 19904 + }, + { + "epoch": 2.79, + "learning_rate": 4.534390791690062e-05, + "loss": 0.0766, + "step": 19906 + }, + { + "epoch": 2.79, + "learning_rate": 4.5343440014972866e-05, + "loss": 0.0815, + "step": 19908 + }, + { + "epoch": 2.79, + "learning_rate": 4.5342972113045105e-05, + "loss": 0.0618, + "step": 19910 + }, + { + "epoch": 2.8, + "learning_rate": 4.534250421111735e-05, + "loss": 0.0726, + "step": 19912 + }, + { + "epoch": 2.8, + "learning_rate": 4.5342036309189597e-05, + "loss": 0.0688, + "step": 19914 + }, + { + "epoch": 2.8, + "learning_rate": 4.534156840726184e-05, + "loss": 0.0624, + "step": 19916 + }, + { + "epoch": 2.8, + "learning_rate": 4.534110050533408e-05, + "loss": 0.0857, + "step": 19918 + }, + { + "epoch": 2.8, + "learning_rate": 4.534063260340633e-05, + "loss": 0.083, + "step": 19920 + }, + { + "epoch": 2.8, + "learning_rate": 4.534016470147857e-05, + "loss": 0.063, + "step": 19922 + }, + { + "epoch": 2.8, + "learning_rate": 4.533969679955082e-05, + "loss": 0.0587, + "step": 19924 + }, + { + "epoch": 2.8, + "learning_rate": 4.533922889762306e-05, + "loss": 0.0435, + "step": 19926 + }, + { + "epoch": 2.8, + "learning_rate": 4.5338760995695304e-05, + "loss": 0.0605, + "step": 19928 + }, + { + "epoch": 2.8, + "learning_rate": 4.533829309376755e-05, + "loss": 0.0856, + "step": 19930 + }, + { + "epoch": 2.8, + "learning_rate": 4.5337825191839796e-05, + "loss": 0.0854, + "step": 19932 + }, + { + "epoch": 2.8, + "learning_rate": 4.5337357289912035e-05, + "loss": 0.0674, + "step": 19934 + }, + { + "epoch": 2.8, + "learning_rate": 4.533688938798428e-05, + "loss": 0.0453, + "step": 19936 + }, + { + "epoch": 2.8, + "learning_rate": 4.533642148605653e-05, + "loss": 0.0656, + "step": 19938 + }, + { + "epoch": 2.8, + "learning_rate": 4.533595358412877e-05, + "loss": 0.077, + "step": 19940 + }, + { + "epoch": 2.8, + "learning_rate": 4.533548568220101e-05, + "loss": 0.0655, + "step": 19942 + }, + { + "epoch": 2.8, + "learning_rate": 4.533501778027326e-05, + "loss": 0.0688, + "step": 19944 + }, + { + "epoch": 2.8, + "learning_rate": 4.53345498783455e-05, + "loss": 0.0745, + "step": 19946 + }, + { + "epoch": 2.8, + "learning_rate": 4.533408197641775e-05, + "loss": 0.0589, + "step": 19948 + }, + { + "epoch": 2.8, + "learning_rate": 4.533361407448999e-05, + "loss": 0.0559, + "step": 19950 + }, + { + "epoch": 2.8, + "learning_rate": 4.5333146172562235e-05, + "loss": 0.0798, + "step": 19952 + }, + { + "epoch": 2.8, + "learning_rate": 4.5332678270634474e-05, + "loss": 0.0677, + "step": 19954 + }, + { + "epoch": 2.8, + "learning_rate": 4.533221036870673e-05, + "loss": 0.0581, + "step": 19956 + }, + { + "epoch": 2.8, + "learning_rate": 4.5331742466778966e-05, + "loss": 0.073, + "step": 19958 + }, + { + "epoch": 2.8, + "learning_rate": 4.533127456485121e-05, + "loss": 0.0769, + "step": 19960 + }, + { + "epoch": 2.8, + "learning_rate": 4.533080666292345e-05, + "loss": 0.0616, + "step": 19962 + }, + { + "epoch": 2.8, + "learning_rate": 4.53303387609957e-05, + "loss": 0.069, + "step": 19964 + }, + { + "epoch": 2.8, + "learning_rate": 4.532987085906794e-05, + "loss": 0.0505, + "step": 19966 + }, + { + "epoch": 2.8, + "learning_rate": 4.532940295714019e-05, + "loss": 0.0569, + "step": 19968 + }, + { + "epoch": 2.8, + "learning_rate": 4.532893505521243e-05, + "loss": 0.0925, + "step": 19970 + }, + { + "epoch": 2.8, + "learning_rate": 4.5328467153284674e-05, + "loss": 0.0643, + "step": 19972 + }, + { + "epoch": 2.8, + "learning_rate": 4.532799925135692e-05, + "loss": 0.0617, + "step": 19974 + }, + { + "epoch": 2.8, + "learning_rate": 4.5327531349429166e-05, + "loss": 0.0697, + "step": 19976 + }, + { + "epoch": 2.8, + "learning_rate": 4.5327063447501405e-05, + "loss": 0.0583, + "step": 19978 + }, + { + "epoch": 2.8, + "learning_rate": 4.532659554557365e-05, + "loss": 0.0478, + "step": 19980 + }, + { + "epoch": 2.8, + "learning_rate": 4.5326127643645897e-05, + "loss": 0.0592, + "step": 19982 + }, + { + "epoch": 2.81, + "learning_rate": 4.532565974171814e-05, + "loss": 0.0671, + "step": 19984 + }, + { + "epoch": 2.81, + "learning_rate": 4.532519183979038e-05, + "loss": 0.0581, + "step": 19986 + }, + { + "epoch": 2.81, + "learning_rate": 4.532472393786263e-05, + "loss": 0.0655, + "step": 19988 + }, + { + "epoch": 2.81, + "learning_rate": 4.5324256035934873e-05, + "loss": 0.0543, + "step": 19990 + }, + { + "epoch": 2.81, + "learning_rate": 4.532378813400712e-05, + "loss": 0.0843, + "step": 19992 + }, + { + "epoch": 2.81, + "learning_rate": 4.532332023207936e-05, + "loss": 0.0496, + "step": 19994 + }, + { + "epoch": 2.81, + "learning_rate": 4.5322852330151604e-05, + "loss": 0.0775, + "step": 19996 + }, + { + "epoch": 2.81, + "learning_rate": 4.5322384428223843e-05, + "loss": 0.071, + "step": 19998 + }, + { + "epoch": 2.81, + "learning_rate": 4.5321916526296096e-05, + "loss": 0.0646, + "step": 20000 + }, + { + "epoch": 2.81, + "eval_gen_len": 30.9655, + "eval_loss": 1.0668187141418457, + "eval_meteor": 0.0542, + "eval_runtime": 15.772, + "eval_samples_per_second": 3.677, + "eval_steps_per_second": 0.507, + "step": 20000 + } + ], + "max_steps": 213720, + "num_train_epochs": 30, + "total_flos": 2.878106005251529e+19, + "trial_name": null, + "trial_params": null +}