diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,46902 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.999936, + "eval_steps": 500, + "global_step": 7812, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.1276595744680853e-06, + "loss": 7.024, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.255319148936171e-06, + "loss": 6.9759, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6.3829787234042555e-06, + "loss": 7.0304, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 8.510638297872341e-06, + "loss": 7.0565, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1.0638297872340426e-05, + "loss": 7.2636, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.2765957446808511e-05, + "loss": 7.1588, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 1.4893617021276598e-05, + "loss": 6.8573, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 1.7021276595744682e-05, + "loss": 7.0311, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 1.9148936170212766e-05, + "loss": 6.8257, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 2.1276595744680852e-05, + "loss": 6.4744, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.3404255319148935e-05, + "loss": 6.6088, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 2.5531914893617022e-05, + "loss": 6.756, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 2.7659574468085105e-05, + "loss": 6.5011, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 2.9787234042553195e-05, + "loss": 6.4492, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 3.1914893617021275e-05, + "loss": 6.2776, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 3.4042553191489365e-05, + "loss": 6.1486, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 3.617021276595744e-05, + "loss": 6.0797, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 3.829787234042553e-05, + "loss": 5.9599, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 4.042553191489362e-05, + "loss": 5.932, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 4.2553191489361704e-05, + "loss": 5.7754, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.468085106382979e-05, + "loss": 5.8122, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 4.680851063829787e-05, + "loss": 5.7687, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 4.893617021276596e-05, + "loss": 5.6369, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 5.1063829787234044e-05, + "loss": 5.4998, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 5.319148936170213e-05, + "loss": 5.7576, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 5.531914893617021e-05, + "loss": 5.669, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 5.74468085106383e-05, + "loss": 5.2979, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 5.957446808510639e-05, + "loss": 5.5214, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 6.170212765957447e-05, + "loss": 5.4971, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 6.382978723404255e-05, + "loss": 5.5076, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.595744680851063e-05, + "loss": 5.449, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 6.808510638297873e-05, + "loss": 5.5011, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 7.021276595744681e-05, + "loss": 5.3285, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 7.234042553191488e-05, + "loss": 5.2681, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 7.446808510638298e-05, + "loss": 5.2136, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 7.659574468085106e-05, + "loss": 5.3492, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 7.872340425531916e-05, + "loss": 5.2397, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 8.085106382978724e-05, + "loss": 5.3845, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 8.297872340425531e-05, + "loss": 5.2662, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 8.510638297872341e-05, + "loss": 5.2852, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 8.723404255319149e-05, + "loss": 5.045, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 8.936170212765958e-05, + "loss": 5.3274, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 9.148936170212766e-05, + "loss": 5.3526, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 9.361702127659574e-05, + "loss": 5.1935, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 9.574468085106382e-05, + "loss": 5.2358, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 9.787234042553192e-05, + "loss": 5.2331, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 5.33, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010212765957446809, + "loss": 5.2884, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010425531914893617, + "loss": 5.1936, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010638297872340425, + "loss": 5.1228, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.00010851063829787235, + "loss": 5.2074, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 0.00011063829787234042, + "loss": 5.1079, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001127659574468085, + "loss": 5.2891, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001148936170212766, + "loss": 5.298, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 0.00011702127659574468, + "loss": 5.219, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 0.00011914893617021278, + "loss": 5.169, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012127659574468085, + "loss": 5.2244, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012340425531914893, + "loss": 5.2145, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012553191489361702, + "loss": 5.0766, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001276595744680851, + "loss": 5.1381, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012978723404255318, + "loss": 5.1906, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 0.00013191489361702127, + "loss": 5.1199, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 0.00013404255319148938, + "loss": 5.1869, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 0.00013617021276595746, + "loss": 5.0917, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 0.00013829787234042554, + "loss": 5.186, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014042553191489363, + "loss": 5.0925, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001425531914893617, + "loss": 5.1989, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014468085106382977, + "loss": 5.1092, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014680851063829788, + "loss": 5.0359, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014893617021276596, + "loss": 5.2016, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 0.00015106382978723404, + "loss": 5.1215, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 0.00015319148936170213, + "loss": 4.9584, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001553191489361702, + "loss": 5.1438, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 0.00015744680851063832, + "loss": 4.9977, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001595744680851064, + "loss": 5.1192, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016170212765957449, + "loss": 5.1743, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016382978723404254, + "loss": 5.1069, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016595744680851062, + "loss": 5.0653, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001680851063829787, + "loss": 5.097, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017021276595744682, + "loss": 5.1989, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001723404255319149, + "loss": 5.1101, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017446808510638298, + "loss": 5.1514, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017659574468085107, + "loss": 5.0901, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017872340425531915, + "loss": 4.9582, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018085106382978726, + "loss": 5.155, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018297872340425532, + "loss": 5.1047, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001851063829787234, + "loss": 5.1678, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018723404255319148, + "loss": 5.2018, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018936170212765957, + "loss": 5.2266, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019148936170212765, + "loss": 5.1776, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019361702127659576, + "loss": 5.2274, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019574468085106384, + "loss": 5.0831, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019787234042553193, + "loss": 4.9544, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002, + "loss": 5.0467, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020212765957446807, + "loss": 5.0003, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020425531914893618, + "loss": 5.0073, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020638297872340426, + "loss": 4.9705, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020851063829787234, + "loss": 5.1062, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021063829787234043, + "loss": 4.8748, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002127659574468085, + "loss": 5.1105, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002148936170212766, + "loss": 5.003, + "step": 101 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002170212765957447, + "loss": 4.9593, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021914893617021279, + "loss": 5.0886, + "step": 103 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022127659574468084, + "loss": 4.7623, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022340425531914892, + "loss": 4.9072, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 0.000225531914893617, + "loss": 5.0578, + "step": 106 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022765957446808512, + "loss": 4.9633, + "step": 107 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002297872340425532, + "loss": 5.0253, + "step": 108 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023191489361702128, + "loss": 4.9662, + "step": 109 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023404255319148937, + "loss": 5.046, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023617021276595745, + "loss": 4.9244, + "step": 111 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023829787234042556, + "loss": 4.8525, + "step": 112 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024042553191489362, + "loss": 5.0366, + "step": 113 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002425531914893617, + "loss": 5.1024, + "step": 114 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002446808510638298, + "loss": 4.9954, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024680851063829787, + "loss": 4.8879, + "step": 116 + }, + { + "epoch": 0.01, + "learning_rate": 0.000248936170212766, + "loss": 5.1033, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 0.00025106382978723403, + "loss": 4.9787, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002531914893617021, + "loss": 4.9459, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002553191489361702, + "loss": 4.8887, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002574468085106383, + "loss": 5.0119, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 0.00025957446808510637, + "loss": 4.9482, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002617021276595745, + "loss": 5.0036, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 0.00026382978723404253, + "loss": 5.0999, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 0.00026595744680851064, + "loss": 4.7897, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 0.00026808510638297875, + "loss": 4.8308, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002702127659574468, + "loss": 5.0703, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002723404255319149, + "loss": 5.1111, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 0.000274468085106383, + "loss": 5.0103, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002765957446808511, + "loss": 4.9165, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 0.00027872340425531914, + "loss": 4.8984, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 0.00028085106382978725, + "loss": 4.8666, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 0.00028297872340425536, + "loss": 5.0499, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002851063829787234, + "loss": 5.002, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 0.00028723404255319153, + "loss": 4.8263, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 0.00028936170212765953, + "loss": 4.9619, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029148936170212764, + "loss": 4.9058, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029361702127659575, + "loss": 4.8237, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002957446808510638, + "loss": 5.0297, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002978723404255319, + "loss": 4.9091, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003, + "loss": 4.9883, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003021276595744681, + "loss": 5.0575, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003042553191489362, + "loss": 4.9783, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 0.00030638297872340425, + "loss": 4.8991, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 0.00030851063829787236, + "loss": 4.7746, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003106382978723404, + "loss": 4.8732, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003127659574468085, + "loss": 4.7298, + "step": 147 + }, + { + "epoch": 0.02, + "learning_rate": 0.00031489361702127664, + "loss": 4.8707, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003170212765957447, + "loss": 4.9312, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003191489361702128, + "loss": 4.8244, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 0.00032127659574468086, + "loss": 4.9428, + "step": 151 + }, + { + "epoch": 0.02, + "learning_rate": 0.00032340425531914897, + "loss": 4.8193, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003255319148936171, + "loss": 4.7837, + "step": 153 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003276595744680851, + "loss": 4.807, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003297872340425532, + "loss": 4.8266, + "step": 155 + }, + { + "epoch": 0.02, + "learning_rate": 0.00033191489361702125, + "loss": 4.8931, + "step": 156 + }, + { + "epoch": 0.02, + "learning_rate": 0.00033404255319148936, + "loss": 4.7448, + "step": 157 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003361702127659574, + "loss": 4.7498, + "step": 158 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003382978723404255, + "loss": 5.0025, + "step": 159 + }, + { + "epoch": 0.02, + "learning_rate": 0.00034042553191489364, + "loss": 4.846, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003425531914893617, + "loss": 4.7673, + "step": 161 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003446808510638298, + "loss": 4.9368, + "step": 162 + }, + { + "epoch": 0.02, + "learning_rate": 0.00034680851063829786, + "loss": 4.8729, + "step": 163 + }, + { + "epoch": 0.02, + "learning_rate": 0.00034893617021276597, + "loss": 4.8593, + "step": 164 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003510638297872341, + "loss": 4.8161, + "step": 165 + }, + { + "epoch": 0.02, + "learning_rate": 0.00035319148936170213, + "loss": 4.8341, + "step": 166 + }, + { + "epoch": 0.02, + "learning_rate": 0.00035531914893617025, + "loss": 4.9574, + "step": 167 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003574468085106383, + "loss": 4.6831, + "step": 168 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003595744680851064, + "loss": 4.9333, + "step": 169 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003617021276595745, + "loss": 4.6844, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003638297872340426, + "loss": 4.8516, + "step": 171 + }, + { + "epoch": 0.02, + "learning_rate": 0.00036595744680851063, + "loss": 4.8536, + "step": 172 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003680851063829787, + "loss": 4.7815, + "step": 173 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003702127659574468, + "loss": 4.8808, + "step": 174 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003723404255319149, + "loss": 4.954, + "step": 175 + }, + { + "epoch": 0.02, + "learning_rate": 0.00037446808510638297, + "loss": 4.7586, + "step": 176 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003765957446808511, + "loss": 4.9232, + "step": 177 + }, + { + "epoch": 0.02, + "learning_rate": 0.00037872340425531913, + "loss": 4.7629, + "step": 178 + }, + { + "epoch": 0.02, + "learning_rate": 0.00038085106382978724, + "loss": 4.7637, + "step": 179 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003829787234042553, + "loss": 4.8205, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003851063829787234, + "loss": 4.8066, + "step": 181 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003872340425531915, + "loss": 4.7646, + "step": 182 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003893617021276596, + "loss": 4.7591, + "step": 183 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003914893617021277, + "loss": 4.7744, + "step": 184 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039361702127659574, + "loss": 4.9201, + "step": 185 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039574468085106385, + "loss": 4.8599, + "step": 186 + }, + { + "epoch": 0.02, + "learning_rate": 0.00039787234042553196, + "loss": 4.8122, + "step": 187 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004, + "loss": 4.7388, + "step": 188 + }, + { + "epoch": 0.02, + "learning_rate": 0.00040212765957446813, + "loss": 4.7767, + "step": 189 + }, + { + "epoch": 0.02, + "learning_rate": 0.00040425531914893613, + "loss": 4.7764, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 0.00040638297872340424, + "loss": 4.6532, + "step": 191 + }, + { + "epoch": 0.02, + "learning_rate": 0.00040851063829787235, + "loss": 4.7337, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004106382978723404, + "loss": 4.8809, + "step": 193 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004127659574468085, + "loss": 4.6934, + "step": 194 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004148936170212766, + "loss": 4.9602, + "step": 195 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004170212765957447, + "loss": 4.8178, + "step": 196 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004191489361702128, + "loss": 4.7071, + "step": 197 + }, + { + "epoch": 0.03, + "learning_rate": 0.00042127659574468085, + "loss": 4.6844, + "step": 198 + }, + { + "epoch": 0.03, + "learning_rate": 0.00042340425531914896, + "loss": 4.7911, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 0.000425531914893617, + "loss": 4.816, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00042765957446808513, + "loss": 4.7413, + "step": 201 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004297872340425532, + "loss": 4.7377, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004319148936170213, + "loss": 4.8655, + "step": 203 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004340425531914894, + "loss": 4.7621, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 0.00043617021276595746, + "loss": 4.7663, + "step": 205 + }, + { + "epoch": 0.03, + "learning_rate": 0.00043829787234042557, + "loss": 4.7083, + "step": 206 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004404255319148936, + "loss": 4.6874, + "step": 207 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004425531914893617, + "loss": 4.8054, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004446808510638298, + "loss": 4.6976, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 0.00044680851063829785, + "loss": 4.8494, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 0.00044893617021276596, + "loss": 4.6101, + "step": 211 + }, + { + "epoch": 0.03, + "learning_rate": 0.000451063829787234, + "loss": 4.7372, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004531914893617021, + "loss": 4.7301, + "step": 213 + }, + { + "epoch": 0.03, + "learning_rate": 0.00045531914893617024, + "loss": 4.6185, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004574468085106383, + "loss": 4.6232, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004595744680851064, + "loss": 4.5684, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 0.00046170212765957446, + "loss": 4.6433, + "step": 217 + }, + { + "epoch": 0.03, + "learning_rate": 0.00046382978723404257, + "loss": 4.7176, + "step": 218 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004659574468085107, + "loss": 4.705, + "step": 219 + }, + { + "epoch": 0.03, + "learning_rate": 0.00046808510638297874, + "loss": 4.8312, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 0.00047021276595744685, + "loss": 4.7071, + "step": 221 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004723404255319149, + "loss": 4.7146, + "step": 222 + }, + { + "epoch": 0.03, + "learning_rate": 0.000474468085106383, + "loss": 4.6534, + "step": 223 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004765957446808511, + "loss": 4.7166, + "step": 224 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004787234042553192, + "loss": 4.6451, + "step": 225 + }, + { + "epoch": 0.03, + "learning_rate": 0.00048085106382978723, + "loss": 4.7097, + "step": 226 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004829787234042553, + "loss": 4.6763, + "step": 227 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004851063829787234, + "loss": 4.7253, + "step": 228 + }, + { + "epoch": 0.03, + "learning_rate": 0.00048723404255319146, + "loss": 4.741, + "step": 229 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004893617021276596, + "loss": 4.5312, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004914893617021277, + "loss": 4.6941, + "step": 231 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004936170212765957, + "loss": 4.6746, + "step": 232 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004957446808510638, + "loss": 4.7585, + "step": 233 + }, + { + "epoch": 0.03, + "learning_rate": 0.000497872340425532, + "loss": 4.5201, + "step": 234 + }, + { + "epoch": 0.03, + "learning_rate": 0.0005, + "loss": 4.6774, + "step": 235 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999999785110509, + "loss": 4.6424, + "step": 236 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999999140442072, + "loss": 4.6048, + "step": 237 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999998065994801, + "loss": 4.8587, + "step": 238 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999996561768879, + "loss": 4.7153, + "step": 239 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999994627764566, + "loss": 4.631, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999992263982194, + "loss": 4.5879, + "step": 241 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499998947042217, + "loss": 4.5206, + "step": 242 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999986247084974, + "loss": 4.6916, + "step": 243 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999982593971157, + "loss": 4.6773, + "step": 244 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999978511081353, + "loss": 4.6062, + "step": 245 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999973998416259, + "loss": 4.7101, + "step": 246 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999969055976653, + "loss": 4.7404, + "step": 247 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999963683763384, + "loss": 4.5602, + "step": 248 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999957881777376, + "loss": 4.5245, + "step": 249 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999951650019627, + "loss": 4.7778, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999944988491207, + "loss": 4.5928, + "step": 251 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499993789719326, + "loss": 4.8413, + "step": 252 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999930376127007, + "loss": 4.683, + "step": 253 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999922425293743, + "loss": 4.6075, + "step": 254 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499991404469483, + "loss": 4.7269, + "step": 255 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999905234331712, + "loss": 4.683, + "step": 256 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999895994205903, + "loss": 4.744, + "step": 257 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999886324318992, + "loss": 4.5838, + "step": 258 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499987622467264, + "loss": 4.6589, + "step": 259 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999865695268584, + "loss": 4.6019, + "step": 260 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999854736108633, + "loss": 4.5268, + "step": 261 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999843347194674, + "loss": 4.6469, + "step": 262 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999831528528662, + "loss": 4.6734, + "step": 263 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999819280112629, + "loss": 4.711, + "step": 264 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999806601948682, + "loss": 4.5771, + "step": 265 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999793494039, + "loss": 4.6156, + "step": 266 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999779956385836, + "loss": 4.6433, + "step": 267 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999765988991518, + "loss": 4.5788, + "step": 268 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999751591858447, + "loss": 4.5647, + "step": 269 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999736764989096, + "loss": 4.5572, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999721508386018, + "loss": 4.7535, + "step": 271 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999705822051832, + "loss": 4.6514, + "step": 272 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999689705989237, + "loss": 4.702, + "step": 273 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999673160201001, + "loss": 4.5488, + "step": 274 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999656184689972, + "loss": 4.6821, + "step": 275 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999638779459065, + "loss": 4.6311, + "step": 276 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999620944511274, + "loss": 4.6528, + "step": 277 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999602679849665, + "loss": 4.5597, + "step": 278 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999583985477377, + "loss": 4.6648, + "step": 279 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999564861397624, + "loss": 4.5228, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999545307613695, + "loss": 4.6784, + "step": 281 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999525324128949, + "loss": 4.531, + "step": 282 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999504910946824, + "loss": 4.6095, + "step": 283 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999484068070827, + "loss": 4.5887, + "step": 284 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999462795504542, + "loss": 4.6187, + "step": 285 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999441093251627, + "loss": 4.635, + "step": 286 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999418961315812, + "loss": 4.5662, + "step": 287 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999396399700902, + "loss": 4.6561, + "step": 288 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999373408410775, + "loss": 4.6885, + "step": 289 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999349987449384, + "loss": 4.6416, + "step": 290 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999326136820754, + "loss": 4.6709, + "step": 291 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999301856528989, + "loss": 4.7176, + "step": 292 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999277146578258, + "loss": 4.5105, + "step": 293 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999252006972813, + "loss": 4.4936, + "step": 294 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999226437716974, + "loss": 4.5534, + "step": 295 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999200438815136, + "loss": 4.5009, + "step": 296 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499917401027177, + "loss": 4.495, + "step": 297 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999147152091419, + "loss": 4.6346, + "step": 298 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999119864278699, + "loss": 4.5174, + "step": 299 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999092146838302, + "loss": 4.5343, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999063999774994, + "loss": 4.7216, + "step": 301 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999035423093612, + "loss": 4.6542, + "step": 302 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499900641679907, + "loss": 4.6059, + "step": 303 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998976980896354, + "loss": 4.7562, + "step": 304 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998947115390524, + "loss": 4.6407, + "step": 305 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998916820286714, + "loss": 4.6257, + "step": 306 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998886095590134, + "loss": 4.7084, + "step": 307 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998854941306064, + "loss": 4.5672, + "step": 308 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499882335743986, + "loss": 4.5577, + "step": 309 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998791343996952, + "loss": 4.5305, + "step": 310 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998758900982845, + "loss": 4.5139, + "step": 311 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998726028403114, + "loss": 4.5928, + "step": 312 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499869272626341, + "loss": 4.5932, + "step": 313 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998658994569459, + "loss": 4.7048, + "step": 314 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998624833327061, + "loss": 4.6108, + "step": 315 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998590242542087, + "loss": 4.5814, + "step": 316 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998555222220485, + "loss": 4.6398, + "step": 317 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998519772368273, + "loss": 4.6003, + "step": 318 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998483892991549, + "loss": 4.5051, + "step": 319 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998447584096477, + "loss": 4.5977, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998410845689301, + "loss": 4.4886, + "step": 321 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998373677776337, + "loss": 4.5492, + "step": 322 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998336080363975, + "loss": 4.6802, + "step": 323 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998298053458676, + "loss": 4.5525, + "step": 324 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499825959706698, + "loss": 4.4299, + "step": 325 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998220711195496, + "loss": 4.5563, + "step": 326 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998181395850911, + "loss": 4.5631, + "step": 327 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998141651039982, + "loss": 4.5899, + "step": 328 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998101476769542, + "loss": 4.6376, + "step": 329 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998060873046498, + "loss": 4.6092, + "step": 330 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499801983987783, + "loss": 4.4953, + "step": 331 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997978377270591, + "loss": 4.5663, + "step": 332 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997936485231911, + "loss": 4.6547, + "step": 333 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997894163768992, + "loss": 4.5887, + "step": 334 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997851412889106, + "loss": 4.5218, + "step": 335 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997808232599604, + "loss": 4.438, + "step": 336 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997764622907911, + "loss": 4.497, + "step": 337 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997720583821523, + "loss": 4.4801, + "step": 338 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499767611534801, + "loss": 4.5117, + "step": 339 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997631217495018, + "loss": 4.5218, + "step": 340 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997585890270265, + "loss": 4.6044, + "step": 341 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997540133681541, + "loss": 4.4802, + "step": 342 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997493947736715, + "loss": 4.5683, + "step": 343 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997447332443727, + "loss": 4.5374, + "step": 344 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997400287810587, + "loss": 4.597, + "step": 345 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997352813845388, + "loss": 4.5688, + "step": 346 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997304910556288, + "loss": 4.4562, + "step": 347 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997256577951521, + "loss": 4.5315, + "step": 348 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997207816039398, + "loss": 4.5297, + "step": 349 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004997158624828303, + "loss": 4.4569, + "step": 350 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499710900432669, + "loss": 4.447, + "step": 351 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997058954543089, + "loss": 4.5307, + "step": 352 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997008475486107, + "loss": 4.5601, + "step": 353 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499695756716442, + "loss": 4.4618, + "step": 354 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996906229586778, + "loss": 4.5403, + "step": 355 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499685446276201, + "loss": 4.4345, + "step": 356 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996802266699014, + "loss": 4.5671, + "step": 357 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996749641406763, + "loss": 4.4683, + "step": 358 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996696586894304, + "loss": 4.6923, + "step": 359 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996643103170757, + "loss": 4.6299, + "step": 360 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996589190245318, + "loss": 4.6717, + "step": 361 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996534848127253, + "loss": 4.5126, + "step": 362 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996480076825906, + "loss": 4.6441, + "step": 363 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996424876350692, + "loss": 4.5981, + "step": 364 + }, + { + "epoch": 0.05, + "learning_rate": 0.00049963692467111, + "loss": 4.3955, + "step": 365 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996313187916694, + "loss": 4.4835, + "step": 366 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996256699977112, + "loss": 4.5081, + "step": 367 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996199782902064, + "loss": 4.3787, + "step": 368 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996142436701336, + "loss": 4.4815, + "step": 369 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996084661384783, + "loss": 4.5286, + "step": 370 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004996026456962341, + "loss": 4.6599, + "step": 371 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995967823444015, + "loss": 4.4566, + "step": 372 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995908760839884, + "loss": 4.6604, + "step": 373 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995849269160102, + "loss": 4.5357, + "step": 374 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995789348414897, + "loss": 4.3763, + "step": 375 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995728998614567, + "loss": 4.6139, + "step": 376 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995668219769492, + "loss": 4.4894, + "step": 377 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995607011890115, + "loss": 4.4425, + "step": 378 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995545374986963, + "loss": 4.5186, + "step": 379 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499548330907063, + "loss": 4.5658, + "step": 380 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995420814151786, + "loss": 4.5511, + "step": 381 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995357890241174, + "loss": 4.4611, + "step": 382 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995294537349612, + "loss": 4.5889, + "step": 383 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995230755487992, + "loss": 4.5257, + "step": 384 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995166544667278, + "loss": 4.5111, + "step": 385 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995101904898507, + "loss": 4.4801, + "step": 386 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004995036836192793, + "loss": 4.5363, + "step": 387 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994971338561323, + "loss": 4.4689, + "step": 388 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994905412015356, + "loss": 4.4937, + "step": 389 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994839056566225, + "loss": 4.5458, + "step": 390 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994772272225337, + "loss": 4.6056, + "step": 391 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994705059004174, + "loss": 4.2671, + "step": 392 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499463741691429, + "loss": 4.4442, + "step": 393 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994569345967314, + "loss": 4.4587, + "step": 394 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994500846174947, + "loss": 4.5909, + "step": 395 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994431917548966, + "loss": 4.4973, + "step": 396 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994362560101221, + "loss": 4.4324, + "step": 397 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994292773843635, + "loss": 4.4228, + "step": 398 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994222558788204, + "loss": 4.5179, + "step": 399 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994151914947001, + "loss": 4.3382, + "step": 400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994080842332168, + "loss": 4.5249, + "step": 401 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004994009340955924, + "loss": 4.5272, + "step": 402 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993937410830561, + "loss": 4.3719, + "step": 403 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993865051968447, + "loss": 4.5047, + "step": 404 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993792264382017, + "loss": 4.5042, + "step": 405 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993719048083788, + "loss": 4.4963, + "step": 406 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993645403086344, + "loss": 4.4584, + "step": 407 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993571329402346, + "loss": 4.6293, + "step": 408 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993496827044529, + "loss": 4.4831, + "step": 409 + }, + { + "epoch": 0.05, + "learning_rate": 0.00049934218960257, + "loss": 4.5925, + "step": 410 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993346536358742, + "loss": 4.4849, + "step": 411 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993270748056607, + "loss": 4.4801, + "step": 412 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993194531132328, + "loss": 4.4836, + "step": 413 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993117885599004, + "loss": 4.4295, + "step": 414 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993040811469815, + "loss": 4.4611, + "step": 415 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992963308758006, + "loss": 4.5412, + "step": 416 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992885377476904, + "loss": 4.5808, + "step": 417 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992807017639906, + "loss": 4.5832, + "step": 418 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992728229260484, + "loss": 4.4949, + "step": 419 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499264901235218, + "loss": 4.4132, + "step": 420 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992569366928612, + "loss": 4.3393, + "step": 421 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992489293003475, + "loss": 4.492, + "step": 422 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992408790590534, + "loss": 4.4672, + "step": 423 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992327859703626, + "loss": 4.6081, + "step": 424 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992246500356665, + "loss": 4.4581, + "step": 425 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992164712563639, + "loss": 4.5307, + "step": 426 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992082496338607, + "loss": 4.5025, + "step": 427 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991999851695704, + "loss": 4.3851, + "step": 428 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991916778649135, + "loss": 4.4762, + "step": 429 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991833277213183, + "loss": 4.5882, + "step": 430 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991749347402205, + "loss": 4.5873, + "step": 431 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991664989230626, + "loss": 4.6166, + "step": 432 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991580202712949, + "loss": 4.4891, + "step": 433 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991494987863751, + "loss": 4.4605, + "step": 434 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991409344697681, + "loss": 4.5469, + "step": 435 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991323273229461, + "loss": 4.3767, + "step": 436 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991236773473889, + "loss": 4.441, + "step": 437 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991149845445834, + "loss": 4.4204, + "step": 438 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004991062489160241, + "loss": 4.4041, + "step": 439 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990974704632128, + "loss": 4.5234, + "step": 440 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990886491876583, + "loss": 4.5024, + "step": 441 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990797850908775, + "loss": 4.4319, + "step": 442 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990708781743942, + "loss": 4.4519, + "step": 443 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990619284397393, + "loss": 4.5382, + "step": 444 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990529358884515, + "loss": 4.4006, + "step": 445 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990439005220768, + "loss": 4.4633, + "step": 446 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990348223421685, + "loss": 4.3915, + "step": 447 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990257013502871, + "loss": 4.4918, + "step": 448 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990165375480007, + "loss": 4.3662, + "step": 449 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990073309368847, + "loss": 4.4996, + "step": 450 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989980815185217, + "loss": 4.4727, + "step": 451 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989887892945018, + "loss": 4.5084, + "step": 452 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989794542664226, + "loss": 4.5213, + "step": 453 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989700764358888, + "loss": 4.4763, + "step": 454 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989606558045126, + "loss": 4.2515, + "step": 455 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989511923739133, + "loss": 4.3524, + "step": 456 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989416861457181, + "loss": 4.4848, + "step": 457 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498932137121561, + "loss": 4.5399, + "step": 458 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989225453030837, + "loss": 4.3947, + "step": 459 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498912910691935, + "loss": 4.4025, + "step": 460 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989032332897714, + "loss": 4.4061, + "step": 461 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988935130982564, + "loss": 4.4679, + "step": 462 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498883750119061, + "loss": 4.3469, + "step": 463 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988739443538638, + "loss": 4.5296, + "step": 464 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988640958043504, + "loss": 4.4016, + "step": 465 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988542044722138, + "loss": 4.3912, + "step": 466 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988442703591545, + "loss": 4.4679, + "step": 467 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988342934668801, + "loss": 4.4699, + "step": 468 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988242737971061, + "loss": 4.3951, + "step": 469 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988142113515548, + "loss": 4.3658, + "step": 470 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498804106131956, + "loss": 4.479, + "step": 471 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498793958140047, + "loss": 4.4312, + "step": 472 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987837673775723, + "loss": 4.4744, + "step": 473 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498773533846284, + "loss": 4.4212, + "step": 474 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498763257547941, + "loss": 4.4934, + "step": 475 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987529384843102, + "loss": 4.3311, + "step": 476 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987425766571655, + "loss": 4.3685, + "step": 477 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987321720682882, + "loss": 4.4967, + "step": 478 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498721724719467, + "loss": 4.5753, + "step": 479 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987112346124978, + "loss": 4.338, + "step": 480 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987007017491842, + "loss": 4.3418, + "step": 481 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986901261313366, + "loss": 4.4897, + "step": 482 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986795077607733, + "loss": 4.4744, + "step": 483 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986688466393198, + "loss": 4.3371, + "step": 484 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986581427688086, + "loss": 4.5328, + "step": 485 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986473961510801, + "loss": 4.5397, + "step": 486 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986366067879815, + "loss": 4.505, + "step": 487 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986257746813678, + "loss": 4.4183, + "step": 488 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986148998331011, + "loss": 4.3742, + "step": 489 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004986039822450509, + "loss": 4.506, + "step": 490 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985930219190942, + "loss": 4.4855, + "step": 491 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985820188571151, + "loss": 4.3544, + "step": 492 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985709730610051, + "loss": 4.4547, + "step": 493 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985598845326631, + "loss": 4.3729, + "step": 494 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985487532739955, + "loss": 4.4839, + "step": 495 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985375792869157, + "loss": 4.3594, + "step": 496 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985263625733447, + "loss": 4.3468, + "step": 497 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985151031352108, + "loss": 4.5483, + "step": 498 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004985038009744498, + "loss": 4.4415, + "step": 499 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004984924560930044, + "loss": 4.4176, + "step": 500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004984810684928251, + "loss": 4.5183, + "step": 501 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004984696381758693, + "loss": 4.4314, + "step": 502 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004984581651441024, + "loss": 4.3945, + "step": 503 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004984466493994964, + "loss": 4.4652, + "step": 504 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004984350909440311, + "loss": 4.3655, + "step": 505 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004984234897796937, + "loss": 4.4413, + "step": 506 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004984118459084783, + "loss": 4.4126, + "step": 507 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984001593323868, + "loss": 4.3304, + "step": 508 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983884300534283, + "loss": 4.3766, + "step": 509 + }, + { + "epoch": 0.07, + "learning_rate": 0.000498376658073619, + "loss": 4.2708, + "step": 510 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983648433949828, + "loss": 4.3697, + "step": 511 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983529860195507, + "loss": 4.3742, + "step": 512 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983410859493611, + "loss": 4.4799, + "step": 513 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983291431864599, + "loss": 4.3975, + "step": 514 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983171577329001, + "loss": 4.3133, + "step": 515 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983051295907421, + "loss": 4.4423, + "step": 516 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982930587620537, + "loss": 4.449, + "step": 517 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982809452489101, + "loss": 4.5099, + "step": 518 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982687890533936, + "loss": 4.3841, + "step": 519 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982565901775943, + "loss": 4.599, + "step": 520 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982443486236089, + "loss": 4.4075, + "step": 521 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982320643935421, + "loss": 4.376, + "step": 522 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982197374895058, + "loss": 4.3289, + "step": 523 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982073679136189, + "loss": 4.4568, + "step": 524 + }, + { + "epoch": 0.07, + "learning_rate": 0.000498194955668008, + "loss": 4.366, + "step": 525 + }, + { + "epoch": 0.07, + "learning_rate": 0.000498182500754807, + "loss": 4.3421, + "step": 526 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004981700031761567, + "loss": 4.332, + "step": 527 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004981574629342061, + "loss": 4.4673, + "step": 528 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004981448800311105, + "loss": 4.4198, + "step": 529 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004981322544690335, + "loss": 4.4123, + "step": 530 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004981195862501452, + "loss": 4.3704, + "step": 531 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004981068753766237, + "loss": 4.4214, + "step": 532 + }, + { + "epoch": 0.07, + "learning_rate": 0.000498094121850654, + "loss": 4.3873, + "step": 533 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004980813256744286, + "loss": 4.4148, + "step": 534 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004980684868501472, + "loss": 4.3722, + "step": 535 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004980556053800171, + "loss": 4.4437, + "step": 536 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004980426812662527, + "loss": 4.2388, + "step": 537 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004980297145110759, + "loss": 4.307, + "step": 538 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004980167051167158, + "loss": 4.5136, + "step": 539 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004980036530854088, + "loss": 4.2747, + "step": 540 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004979905584193986, + "loss": 4.4357, + "step": 541 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004979774211209367, + "loss": 4.2891, + "step": 542 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004979642411922811, + "loss": 4.2277, + "step": 543 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004979510186356979, + "loss": 4.5532, + "step": 544 + }, + { + "epoch": 0.07, + "learning_rate": 0.00049793775345346, + "loss": 4.3415, + "step": 545 + }, + { + "epoch": 0.07, + "learning_rate": 0.000497924445647848, + "loss": 4.356, + "step": 546 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004979110952211496, + "loss": 4.3293, + "step": 547 + }, + { + "epoch": 0.07, + "learning_rate": 0.00049789770217566, + "loss": 4.2259, + "step": 548 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004978842665136814, + "loss": 4.3533, + "step": 549 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004978707882375237, + "loss": 4.3319, + "step": 550 + }, + { + "epoch": 0.07, + "learning_rate": 0.000497857267349504, + "loss": 4.3768, + "step": 551 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004978437038519465, + "loss": 4.3747, + "step": 552 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004978300977471832, + "loss": 4.5344, + "step": 553 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004978164490375529, + "loss": 4.3947, + "step": 554 + }, + { + "epoch": 0.07, + "learning_rate": 0.000497802757725402, + "loss": 4.3948, + "step": 555 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004977890238130844, + "loss": 4.389, + "step": 556 + }, + { + "epoch": 0.07, + "learning_rate": 0.000497775247302961, + "loss": 4.401, + "step": 557 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004977614281974002, + "loss": 4.3791, + "step": 558 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004977475664987773, + "loss": 4.331, + "step": 559 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004977336622094759, + "loss": 4.3169, + "step": 560 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004977197153318858, + "loss": 4.274, + "step": 561 + }, + { + "epoch": 0.07, + "learning_rate": 0.000497705725868405, + "loss": 4.3024, + "step": 562 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004976916938214381, + "loss": 4.3823, + "step": 563 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004976776191933976, + "loss": 4.3152, + "step": 564 + }, + { + "epoch": 0.07, + "learning_rate": 0.000497663501986703, + "loss": 4.2845, + "step": 565 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004976493422037813, + "loss": 4.3458, + "step": 566 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004976351398470667, + "loss": 4.4023, + "step": 567 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004976208949190007, + "loss": 4.3058, + "step": 568 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004976066074220322, + "loss": 4.4635, + "step": 569 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004975922773586173, + "loss": 4.3387, + "step": 570 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004975779047312198, + "loss": 4.2628, + "step": 571 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004975634895423101, + "loss": 4.4015, + "step": 572 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004975490317943666, + "loss": 4.3975, + "step": 573 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004975345314898747, + "loss": 4.2802, + "step": 574 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004975199886313272, + "loss": 4.3113, + "step": 575 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004975054032212241, + "loss": 4.3744, + "step": 576 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004974907752620729, + "loss": 4.3736, + "step": 577 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004974761047563883, + "loss": 4.1832, + "step": 578 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004974613917066923, + "loss": 4.3717, + "step": 579 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004974466361155141, + "loss": 4.3349, + "step": 580 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004974318379853907, + "loss": 4.3597, + "step": 581 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004974169973188658, + "loss": 4.3338, + "step": 582 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004974021141184908, + "loss": 4.4724, + "step": 583 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004973871883868242, + "loss": 4.2821, + "step": 584 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004973722201264319, + "loss": 4.2916, + "step": 585 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004973572093398873, + "loss": 4.4172, + "step": 586 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004973421560297707, + "loss": 4.3547, + "step": 587 + }, + { + "epoch": 0.08, + "learning_rate": 0.00049732706019867, + "loss": 4.246, + "step": 588 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004973119218491805, + "loss": 4.2595, + "step": 589 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004972967409839045, + "loss": 4.3037, + "step": 590 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004972815176054519, + "loss": 4.2909, + "step": 591 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004972662517164396, + "loss": 4.2707, + "step": 592 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004972509433194922, + "loss": 4.2739, + "step": 593 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004972355924172411, + "loss": 4.3051, + "step": 594 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004972201990123255, + "loss": 4.3365, + "step": 595 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004972047631073917, + "loss": 4.4756, + "step": 596 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004971892847050933, + "loss": 4.3048, + "step": 597 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004971737638080912, + "loss": 4.3296, + "step": 598 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004971582004190536, + "loss": 4.326, + "step": 599 + }, + { + "epoch": 0.08, + "learning_rate": 0.000497142594540656, + "loss": 4.3922, + "step": 600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004971269461755813, + "loss": 4.3615, + "step": 601 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004971112553265195, + "loss": 4.1929, + "step": 602 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004970955219961683, + "loss": 4.4233, + "step": 603 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004970797461872323, + "loss": 4.4581, + "step": 604 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004970639279024235, + "loss": 4.2686, + "step": 605 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004970480671444613, + "loss": 4.4099, + "step": 606 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004970321639160723, + "loss": 4.4116, + "step": 607 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004970162182199904, + "loss": 4.3081, + "step": 608 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004970002300589571, + "loss": 4.3503, + "step": 609 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004969841994357207, + "loss": 4.3922, + "step": 610 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004969681263530372, + "loss": 4.3151, + "step": 611 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004969520108136696, + "loss": 4.3402, + "step": 612 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004969358528203884, + "loss": 4.2949, + "step": 613 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004969196523759713, + "loss": 4.3257, + "step": 614 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004969034094832036, + "loss": 4.2943, + "step": 615 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004968871241448774, + "loss": 4.3045, + "step": 616 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004968707963637924, + "loss": 4.2451, + "step": 617 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004968544261427555, + "loss": 4.3215, + "step": 618 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004968380134845811, + "loss": 4.4184, + "step": 619 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004968215583920903, + "loss": 4.2883, + "step": 620 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004968050608681125, + "loss": 4.1952, + "step": 621 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004967885209154833, + "loss": 4.3987, + "step": 622 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004967719385370465, + "loss": 4.3176, + "step": 623 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004967553137356527, + "loss": 4.4916, + "step": 624 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004967386465141596, + "loss": 4.3199, + "step": 625 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004967219368754329, + "loss": 4.3377, + "step": 626 + }, + { + "epoch": 0.08, + "learning_rate": 0.000496705184822345, + "loss": 4.2602, + "step": 627 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004966883903577757, + "loss": 4.2431, + "step": 628 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004966715534846123, + "loss": 4.2344, + "step": 629 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004966546742057491, + "loss": 4.3571, + "step": 630 + }, + { + "epoch": 0.08, + "learning_rate": 0.000496637752524088, + "loss": 4.3229, + "step": 631 + }, + { + "epoch": 0.08, + "learning_rate": 0.000496620788442538, + "loss": 4.1981, + "step": 632 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004966037819640153, + "loss": 4.3227, + "step": 633 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004965867330914437, + "loss": 4.3538, + "step": 634 + }, + { + "epoch": 0.08, + "learning_rate": 0.000496569641827754, + "loss": 4.4017, + "step": 635 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004965525081758843, + "loss": 4.4732, + "step": 636 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004965353321387803, + "loss": 4.3342, + "step": 637 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004965181137193946, + "loss": 4.3139, + "step": 638 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004965008529206872, + "loss": 4.1165, + "step": 639 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004964835497456255, + "loss": 4.2712, + "step": 640 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004964662041971841, + "loss": 4.2252, + "step": 641 + }, + { + "epoch": 0.08, + "learning_rate": 0.000496448816278345, + "loss": 4.2737, + "step": 642 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004964313859920972, + "loss": 4.3351, + "step": 643 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004964139133414373, + "loss": 4.341, + "step": 644 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004963963983293691, + "loss": 4.3291, + "step": 645 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004963788409589035, + "loss": 4.3158, + "step": 646 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004963612412330589, + "loss": 4.2245, + "step": 647 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004963435991548608, + "loss": 4.2424, + "step": 648 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004963259147273422, + "loss": 4.3923, + "step": 649 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004963081879535431, + "loss": 4.2578, + "step": 650 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004962904188365112, + "loss": 4.415, + "step": 651 + }, + { + "epoch": 0.08, + "learning_rate": 0.000496272607379301, + "loss": 4.2244, + "step": 652 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004962547535849745, + "loss": 4.1748, + "step": 653 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004962368574566011, + "loss": 4.2844, + "step": 654 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004962189189972573, + "loss": 4.2328, + "step": 655 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004962009382100268, + "loss": 4.3287, + "step": 656 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004961829150980009, + "loss": 4.3582, + "step": 657 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004961648496642778, + "loss": 4.3382, + "step": 658 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004961467419119634, + "loss": 4.2137, + "step": 659 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004961285918441704, + "loss": 4.3002, + "step": 660 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004961103994640192, + "loss": 4.1849, + "step": 661 + }, + { + "epoch": 0.08, + "learning_rate": 0.000496092164774637, + "loss": 4.1923, + "step": 662 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004960738877791589, + "loss": 4.232, + "step": 663 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004960555684807266, + "loss": 4.314, + "step": 664 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004960372068824896, + "loss": 4.2828, + "step": 665 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004960188029876044, + "loss": 4.2291, + "step": 666 + }, + { + "epoch": 0.09, + "learning_rate": 0.000496000356799235, + "loss": 4.2705, + "step": 667 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004959818683205523, + "loss": 4.4088, + "step": 668 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004959633375547348, + "loss": 4.4048, + "step": 669 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004959447645049681, + "loss": 4.3704, + "step": 670 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004959261491744452, + "loss": 4.1972, + "step": 671 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004959074915663661, + "loss": 4.1795, + "step": 672 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004958887916839386, + "loss": 4.3834, + "step": 673 + }, + { + "epoch": 0.09, + "learning_rate": 0.000495870049530377, + "loss": 4.3535, + "step": 674 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004958512651089036, + "loss": 4.3607, + "step": 675 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004958324384227477, + "loss": 4.2224, + "step": 676 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004958135694751455, + "loss": 4.2188, + "step": 677 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004957946582693412, + "loss": 4.282, + "step": 678 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004957757048085856, + "loss": 4.37, + "step": 679 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004957567090961369, + "loss": 4.2364, + "step": 680 + }, + { + "epoch": 0.09, + "learning_rate": 0.000495737671135261, + "loss": 4.3403, + "step": 681 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004957185909292306, + "loss": 4.3555, + "step": 682 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004956994684813257, + "loss": 4.3167, + "step": 683 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004956803037948338, + "loss": 4.3009, + "step": 684 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004956610968730495, + "loss": 4.4175, + "step": 685 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004956418477192748, + "loss": 4.2695, + "step": 686 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004956225563368187, + "loss": 4.2661, + "step": 687 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004956032227289976, + "loss": 4.2297, + "step": 688 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004955838468991353, + "loss": 4.2865, + "step": 689 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004955644288505627, + "loss": 4.3801, + "step": 690 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004955449685866179, + "loss": 4.2906, + "step": 691 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004955254661106464, + "loss": 4.3527, + "step": 692 + }, + { + "epoch": 0.09, + "learning_rate": 0.000495505921426001, + "loss": 4.1816, + "step": 693 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004954863345360414, + "loss": 4.1234, + "step": 694 + }, + { + "epoch": 0.09, + "learning_rate": 0.000495466705444135, + "loss": 4.1887, + "step": 695 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004954470341536563, + "loss": 4.2989, + "step": 696 + }, + { + "epoch": 0.09, + "learning_rate": 0.000495427320667987, + "loss": 4.2669, + "step": 697 + }, + { + "epoch": 0.09, + "learning_rate": 0.000495407564990516, + "loss": 4.21, + "step": 698 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004953877671246395, + "loss": 4.1854, + "step": 699 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004953679270737611, + "loss": 4.4621, + "step": 700 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004953480448412914, + "loss": 4.1883, + "step": 701 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004953281204306487, + "loss": 4.1756, + "step": 702 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004953081538452579, + "loss": 4.0093, + "step": 703 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004952881450885515, + "loss": 4.3104, + "step": 704 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004952680941639694, + "loss": 4.2394, + "step": 705 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004952480010749585, + "loss": 4.2298, + "step": 706 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004952278658249731, + "loss": 4.2388, + "step": 707 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004952076884174746, + "loss": 4.2597, + "step": 708 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004951874688559318, + "loss": 4.2408, + "step": 709 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004951672071438207, + "loss": 4.3414, + "step": 710 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004951469032846243, + "loss": 4.3405, + "step": 711 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004951265572818334, + "loss": 4.1873, + "step": 712 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004951061691389455, + "loss": 4.1535, + "step": 713 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004950857388594656, + "loss": 4.3068, + "step": 714 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004950652664469059, + "loss": 4.2893, + "step": 715 + }, + { + "epoch": 0.09, + "learning_rate": 0.000495044751904786, + "loss": 4.2828, + "step": 716 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004950241952366322, + "loss": 4.2681, + "step": 717 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004950035964459789, + "loss": 4.1371, + "step": 718 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004949829555363669, + "loss": 4.2934, + "step": 719 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004949622725113448, + "loss": 4.1647, + "step": 720 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004949415473744683, + "loss": 4.205, + "step": 721 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004949207801293001, + "loss": 4.1718, + "step": 722 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004948999707794105, + "loss": 4.2447, + "step": 723 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004948791193283765, + "loss": 4.2515, + "step": 724 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004948582257797834, + "loss": 4.3018, + "step": 725 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004948372901372224, + "loss": 4.2239, + "step": 726 + }, + { + "epoch": 0.09, + "learning_rate": 0.000494816312404293, + "loss": 4.2911, + "step": 727 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004947952925846013, + "loss": 4.1938, + "step": 728 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004947742306817608, + "loss": 4.2104, + "step": 729 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004947531266993924, + "loss": 4.2331, + "step": 730 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004947319806411243, + "loss": 4.2187, + "step": 731 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004947107925105912, + "loss": 4.2547, + "step": 732 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004946895623114362, + "loss": 4.2272, + "step": 733 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004946682900473087, + "loss": 4.2638, + "step": 734 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004946469757218657, + "loss": 4.3074, + "step": 735 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004946256193387714, + "loss": 4.2809, + "step": 736 + }, + { + "epoch": 0.09, + "learning_rate": 0.000494604220901697, + "loss": 4.126, + "step": 737 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004945827804143216, + "loss": 4.2882, + "step": 738 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004945612978803307, + "loss": 4.0373, + "step": 739 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004945397733034175, + "loss": 4.2388, + "step": 740 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004945182066872823, + "loss": 4.3009, + "step": 741 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004944965980356326, + "loss": 4.1565, + "step": 742 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004944749473521834, + "loss": 4.2097, + "step": 743 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004944532546406565, + "loss": 4.2827, + "step": 744 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004944315199047812, + "loss": 4.2114, + "step": 745 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004944097431482939, + "loss": 4.2337, + "step": 746 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004943879243749382, + "loss": 4.2639, + "step": 747 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004943660635884652, + "loss": 4.121, + "step": 748 + }, + { + "epoch": 0.1, + "learning_rate": 0.000494344160792633, + "loss": 4.2165, + "step": 749 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004943222159912069, + "loss": 4.1482, + "step": 750 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004943002291879593, + "loss": 4.2623, + "step": 751 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004942782003866703, + "loss": 4.2787, + "step": 752 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004942561295911267, + "loss": 4.2767, + "step": 753 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004942340168051226, + "loss": 4.2755, + "step": 754 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004942118620324597, + "loss": 4.1874, + "step": 755 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004941896652769466, + "loss": 4.251, + "step": 756 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004941674265423992, + "loss": 4.1956, + "step": 757 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004941451458326404, + "loss": 4.3885, + "step": 758 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004941228231515008, + "loss": 4.0152, + "step": 759 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004941004585028177, + "loss": 4.2508, + "step": 760 + }, + { + "epoch": 0.1, + "learning_rate": 0.000494078051890436, + "loss": 4.2314, + "step": 761 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004940556033182075, + "loss": 4.1776, + "step": 762 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004940331127899915, + "loss": 4.394, + "step": 763 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004940105803096544, + "loss": 4.1682, + "step": 764 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004939880058810696, + "loss": 4.1124, + "step": 765 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004939653895081181, + "loss": 4.187, + "step": 766 + }, + { + "epoch": 0.1, + "learning_rate": 0.000493942731194688, + "loss": 4.0823, + "step": 767 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004939200309446741, + "loss": 4.1678, + "step": 768 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004938972887619793, + "loss": 4.2891, + "step": 769 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004938745046505129, + "loss": 4.236, + "step": 770 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004938516786141921, + "loss": 4.2548, + "step": 771 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004938288106569407, + "loss": 4.2915, + "step": 772 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004938059007826901, + "loss": 4.1657, + "step": 773 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004937829489953787, + "loss": 4.3297, + "step": 774 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004937599552989521, + "loss": 4.1626, + "step": 775 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004937369196973633, + "loss": 4.2399, + "step": 776 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004937138421945724, + "loss": 4.2127, + "step": 777 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004936907227945467, + "loss": 4.2848, + "step": 778 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004936675615012606, + "loss": 4.2064, + "step": 779 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004936443583186958, + "loss": 4.3065, + "step": 780 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004936211132508413, + "loss": 4.2114, + "step": 781 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004935978263016931, + "loss": 4.1466, + "step": 782 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004935744974752546, + "loss": 4.2123, + "step": 783 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004935511267755361, + "loss": 4.1627, + "step": 784 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004935277142065556, + "loss": 4.0829, + "step": 785 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004935042597723376, + "loss": 4.2594, + "step": 786 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004934807634769145, + "loss": 4.2686, + "step": 787 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004934572253243255, + "loss": 4.1844, + "step": 788 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004934336453186171, + "loss": 4.2273, + "step": 789 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004934100234638429, + "loss": 4.2768, + "step": 790 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004933863597640638, + "loss": 4.2901, + "step": 791 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004933626542233478, + "loss": 4.307, + "step": 792 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004933389068457704, + "loss": 4.182, + "step": 793 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004933151176354138, + "loss": 4.1657, + "step": 794 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004932912865963677, + "loss": 4.1593, + "step": 795 + }, + { + "epoch": 0.1, + "learning_rate": 0.000493267413732729, + "loss": 4.251, + "step": 796 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004932434990486017, + "loss": 4.0805, + "step": 797 + }, + { + "epoch": 0.1, + "learning_rate": 0.000493219542548097, + "loss": 4.0694, + "step": 798 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004931955442353333, + "loss": 4.3214, + "step": 799 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004931715041144361, + "loss": 4.3547, + "step": 800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004931474221895383, + "loss": 4.0936, + "step": 801 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004931232984647798, + "loss": 4.2201, + "step": 802 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004930991329443079, + "loss": 4.285, + "step": 803 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004930749256322766, + "loss": 4.2813, + "step": 804 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004930506765328477, + "loss": 4.2098, + "step": 805 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004930263856501899, + "loss": 4.1824, + "step": 806 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004930020529884789, + "loss": 4.2784, + "step": 807 + }, + { + "epoch": 0.1, + "learning_rate": 0.000492977678551898, + "loss": 4.2767, + "step": 808 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004929532623446372, + "loss": 4.1163, + "step": 809 + }, + { + "epoch": 0.1, + "learning_rate": 0.000492928804370894, + "loss": 4.2339, + "step": 810 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004929043046348732, + "loss": 4.1033, + "step": 811 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004928797631407863, + "loss": 4.2538, + "step": 812 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004928551798928525, + "loss": 4.2937, + "step": 813 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004928305548952978, + "loss": 4.2001, + "step": 814 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004928058881523557, + "loss": 4.2609, + "step": 815 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004927811796682666, + "loss": 4.289, + "step": 816 + }, + { + "epoch": 0.1, + "learning_rate": 0.000492756429447278, + "loss": 4.1035, + "step": 817 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004927316374936449, + "loss": 4.1713, + "step": 818 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004927068038116293, + "loss": 4.172, + "step": 819 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004926819284055006, + "loss": 4.2486, + "step": 820 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004926570112795349, + "loss": 4.2689, + "step": 821 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004926320524380159, + "loss": 4.1911, + "step": 822 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004926070518852341, + "loss": 4.2248, + "step": 823 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004925820096254877, + "loss": 4.0852, + "step": 824 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004925569256630816, + "loss": 4.2065, + "step": 825 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004925318000023279, + "loss": 4.1923, + "step": 826 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004925066326475461, + "loss": 4.1202, + "step": 827 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004924814236030629, + "loss": 4.1576, + "step": 828 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004924561728732118, + "loss": 4.1883, + "step": 829 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004924308804623339, + "loss": 4.1946, + "step": 830 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004924055463747769, + "loss": 4.1849, + "step": 831 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004923801706148965, + "loss": 4.2288, + "step": 832 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004923547531870548, + "loss": 4.1721, + "step": 833 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004923292940956215, + "loss": 4.1687, + "step": 834 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004923037933449731, + "loss": 4.2077, + "step": 835 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004922782509394937, + "loss": 4.2337, + "step": 836 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004922526668835741, + "loss": 4.2307, + "step": 837 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004922270411816126, + "loss": 4.2094, + "step": 838 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004922013738380147, + "loss": 4.1423, + "step": 839 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004921756648571928, + "loss": 4.1334, + "step": 840 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004921499142435666, + "loss": 4.1615, + "step": 841 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004921241220015627, + "loss": 4.1999, + "step": 842 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004920982881356156, + "loss": 4.136, + "step": 843 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004920724126501659, + "loss": 4.2756, + "step": 844 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004920464955496622, + "loss": 4.1504, + "step": 845 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004920205368385598, + "loss": 4.0901, + "step": 846 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004919945365213214, + "loss": 4.1955, + "step": 847 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004919684946024168, + "loss": 4.3476, + "step": 848 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004919424110863227, + "loss": 4.1455, + "step": 849 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004919162859775235, + "loss": 4.3172, + "step": 850 + }, + { + "epoch": 0.11, + "learning_rate": 0.00049189011928051, + "loss": 4.1604, + "step": 851 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004918639109997809, + "loss": 4.2831, + "step": 852 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004918376611398415, + "loss": 4.2587, + "step": 853 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004918113697052046, + "loss": 4.1271, + "step": 854 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004917850367003898, + "loss": 4.1855, + "step": 855 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004917586621299243, + "loss": 4.3033, + "step": 856 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004917322459983421, + "loss": 4.1401, + "step": 857 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004917057883101843, + "loss": 4.1845, + "step": 858 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004916792890699995, + "loss": 4.158, + "step": 859 + }, + { + "epoch": 0.11, + "learning_rate": 0.000491652748282343, + "loss": 4.239, + "step": 860 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004916261659517777, + "loss": 4.2849, + "step": 861 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004915995420828732, + "loss": 4.0962, + "step": 862 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004915728766802066, + "loss": 4.2386, + "step": 863 + }, + { + "epoch": 0.11, + "learning_rate": 0.000491546169748362, + "loss": 4.1596, + "step": 864 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004915194212919305, + "loss": 4.3037, + "step": 865 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004914926313155106, + "loss": 4.0943, + "step": 866 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004914657998237078, + "loss": 4.0754, + "step": 867 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004914389268211346, + "loss": 4.1971, + "step": 868 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004914120123124108, + "loss": 4.2508, + "step": 869 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004913850563021636, + "loss": 4.2121, + "step": 870 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004913580587950267, + "loss": 4.1293, + "step": 871 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004913310197956416, + "loss": 4.1778, + "step": 872 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004913039393086563, + "loss": 4.1255, + "step": 873 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004912768173387264, + "loss": 4.0067, + "step": 874 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004912496538905145, + "loss": 4.2463, + "step": 875 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004912224489686903, + "loss": 4.059, + "step": 876 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004911952025779306, + "loss": 4.2897, + "step": 877 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004911679147229194, + "loss": 4.2857, + "step": 878 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004911405854083479, + "loss": 4.2313, + "step": 879 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004911132146389141, + "loss": 4.2071, + "step": 880 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004910858024193236, + "loss": 4.0622, + "step": 881 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004910583487542886, + "loss": 4.2516, + "step": 882 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004910308536485291, + "loss": 4.0968, + "step": 883 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004910033171067713, + "loss": 4.2827, + "step": 884 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004909757391337496, + "loss": 4.2421, + "step": 885 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004909481197342046, + "loss": 4.1554, + "step": 886 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004909204589128845, + "loss": 4.2273, + "step": 887 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004908927566745446, + "loss": 4.2615, + "step": 888 + }, + { + "epoch": 0.11, + "learning_rate": 0.000490865013023947, + "loss": 4.1574, + "step": 889 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004908372279658614, + "loss": 4.081, + "step": 890 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004908094015050643, + "loss": 4.0671, + "step": 891 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004907815336463394, + "loss": 4.1022, + "step": 892 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004907536243944774, + "loss": 4.2456, + "step": 893 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004907256737542765, + "loss": 4.0625, + "step": 894 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004906976817305413, + "loss": 4.2017, + "step": 895 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004906696483280844, + "loss": 4.0595, + "step": 896 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004906415735517248, + "loss": 4.2213, + "step": 897 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004906134574062888, + "loss": 4.1107, + "step": 898 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004905852998966102, + "loss": 4.0951, + "step": 899 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004905571010275295, + "loss": 4.1659, + "step": 900 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004905288608038943, + "loss": 4.2729, + "step": 901 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004905005792305595, + "loss": 4.2096, + "step": 902 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004904722563123869, + "loss": 4.1403, + "step": 903 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004904438920542457, + "loss": 4.1145, + "step": 904 + }, + { + "epoch": 0.12, + "learning_rate": 0.000490415486461012, + "loss": 4.0488, + "step": 905 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004903870395375691, + "loss": 4.0561, + "step": 906 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004903585512888072, + "loss": 4.2437, + "step": 907 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004903300217196239, + "loss": 4.2191, + "step": 908 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004903014508349237, + "loss": 4.2241, + "step": 909 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004902728386396184, + "loss": 4.1496, + "step": 910 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004902441851386266, + "loss": 4.2026, + "step": 911 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004902154903368741, + "loss": 4.1459, + "step": 912 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004901867542392942, + "loss": 4.1303, + "step": 913 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004901579768508265, + "loss": 4.2429, + "step": 914 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004901291581764186, + "loss": 4.146, + "step": 915 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004901002982210246, + "loss": 4.1734, + "step": 916 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004900713969896059, + "loss": 4.1661, + "step": 917 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004900424544871308, + "loss": 4.1901, + "step": 918 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004900134707185751, + "loss": 4.2018, + "step": 919 + }, + { + "epoch": 0.12, + "learning_rate": 0.000489984445688921, + "loss": 4.336, + "step": 920 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004899553794031589, + "loss": 4.0959, + "step": 921 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004899262718662851, + "loss": 4.3157, + "step": 922 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004898971230833037, + "loss": 4.147, + "step": 923 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004898679330592259, + "loss": 4.2017, + "step": 924 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004898387017990694, + "loss": 4.081, + "step": 925 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004898094293078597, + "loss": 4.2413, + "step": 926 + }, + { + "epoch": 0.12, + "learning_rate": 0.000489780115590629, + "loss": 4.0267, + "step": 927 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004897507606524167, + "loss": 4.1788, + "step": 928 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004897213644982691, + "loss": 4.0096, + "step": 929 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004896919271332399, + "loss": 4.0833, + "step": 930 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004896624485623896, + "loss": 4.1198, + "step": 931 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004896329287907862, + "loss": 4.1598, + "step": 932 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004896033678235041, + "loss": 4.2445, + "step": 933 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004895737656656254, + "loss": 4.1618, + "step": 934 + }, + { + "epoch": 0.12, + "learning_rate": 0.000489544122322239, + "loss": 4.2137, + "step": 935 + }, + { + "epoch": 0.12, + "learning_rate": 0.000489514437798441, + "loss": 4.1978, + "step": 936 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004894847120993344, + "loss": 3.9815, + "step": 937 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004894549452300294, + "loss": 4.0579, + "step": 938 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004894251371956433, + "loss": 4.1845, + "step": 939 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004893952880013005, + "loss": 4.1157, + "step": 940 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004893653976521325, + "loss": 4.1358, + "step": 941 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004893354661532776, + "loss": 4.1673, + "step": 942 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004893054935098814, + "loss": 4.3069, + "step": 943 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004892754797270967, + "loss": 4.1752, + "step": 944 + }, + { + "epoch": 0.12, + "learning_rate": 0.000489245424810083, + "loss": 4.1732, + "step": 945 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004892153287640072, + "loss": 4.257, + "step": 946 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004891851915940433, + "loss": 4.1221, + "step": 947 + }, + { + "epoch": 0.12, + "learning_rate": 0.000489155013305372, + "loss": 4.1557, + "step": 948 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004891247939031814, + "loss": 4.035, + "step": 949 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004890945333926665, + "loss": 4.1362, + "step": 950 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004890642317790296, + "loss": 4.2581, + "step": 951 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004890338890674797, + "loss": 4.3468, + "step": 952 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004890035052632333, + "loss": 4.134, + "step": 953 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004889730803715133, + "loss": 4.2842, + "step": 954 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004889426143975507, + "loss": 4.1605, + "step": 955 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004889121073465825, + "loss": 4.1876, + "step": 956 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004888815592238532, + "loss": 4.0625, + "step": 957 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004888509700346146, + "loss": 4.167, + "step": 958 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004888203397841253, + "loss": 4.146, + "step": 959 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004887896684776508, + "loss": 4.0655, + "step": 960 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004887589561204642, + "loss": 4.0848, + "step": 961 + }, + { + "epoch": 0.12, + "learning_rate": 0.000488728202717845, + "loss": 4.0244, + "step": 962 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004886974082750803, + "loss": 4.0178, + "step": 963 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004886665727974638, + "loss": 4.0554, + "step": 964 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004886356962902965, + "loss": 4.2434, + "step": 965 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004886047787588866, + "loss": 4.0964, + "step": 966 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004885738202085491, + "loss": 4.088, + "step": 967 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004885428206446061, + "loss": 4.1858, + "step": 968 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004885117800723869, + "loss": 4.3301, + "step": 969 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004884806984972274, + "loss": 4.1327, + "step": 970 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004884495759244713, + "loss": 4.1871, + "step": 971 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004884184123594687, + "loss": 4.1552, + "step": 972 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004883872078075771, + "loss": 4.0931, + "step": 973 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004883559622741607, + "loss": 4.1683, + "step": 974 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004883246757645914, + "loss": 4.2774, + "step": 975 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004882933482842472, + "loss": 4.0794, + "step": 976 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048826197983851396, + "loss": 4.1157, + "step": 977 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048823057043278427, + "loss": 4.2297, + "step": 978 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048819912007245765, + "loss": 4.1789, + "step": 979 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004881676287629409, + "loss": 4.137, + "step": 980 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004881360965096476, + "loss": 4.1465, + "step": 981 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004881045233179987, + "loss": 4.0563, + "step": 982 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048807290919342187, + "loss": 4.1817, + "step": 983 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048804125414135194, + "loss": 4.1052, + "step": 984 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048800955816723083, + "loss": 4.0714, + "step": 985 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004879778212765075, + "loss": 4.0786, + "step": 986 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004879460434746377, + "loss": 4.2036, + "step": 987 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004879142247670846, + "loss": 4.1906, + "step": 988 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048788236515931807, + "loss": 4.1642, + "step": 989 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048785046465681515, + "loss": 4.0598, + "step": 990 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048781852326506, + "loss": 4.1799, + "step": 991 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004877865409895438, + "loss": 4.2518, + "step": 992 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004877545178357644, + "loss": 4.1942, + "step": 993 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004877224538092271, + "loss": 3.9985, + "step": 994 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048769034891544416, + "loss": 4.1416, + "step": 995 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048765820315993465, + "loss": 4.0925, + "step": 996 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004876260165482248, + "loss": 4.0515, + "step": 997 + }, + { + "epoch": 0.13, + "learning_rate": 0.000487593789085848, + "loss": 4.0033, + "step": 998 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048756152077834435, + "loss": 4.393, + "step": 999 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004875292116312613, + "loss": 4.2389, + "step": 1000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048749686165015305, + "loss": 3.9859, + "step": 1001 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048746447084058105, + "loss": 4.0825, + "step": 1002 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004874320392081135, + "loss": 4.2436, + "step": 1003 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048739956675832594, + "loss": 4.1428, + "step": 1004 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048736705349680063, + "loss": 4.13, + "step": 1005 + }, + { + "epoch": 0.13, + "learning_rate": 0.000487334499429127, + "loss": 4.1201, + "step": 1006 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048730190456090154, + "loss": 4.1488, + "step": 1007 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048726926889772773, + "loss": 4.1181, + "step": 1008 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048723659244521576, + "loss": 4.0505, + "step": 1009 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048720387520898343, + "loss": 4.0116, + "step": 1010 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004871711171946549, + "loss": 4.0749, + "step": 1011 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048713831840786195, + "loss": 4.0319, + "step": 1012 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004871054788542428, + "loss": 4.1929, + "step": 1013 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004870725985394431, + "loss": 4.0323, + "step": 1014 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048703967746911535, + "loss": 4.3231, + "step": 1015 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004870067156489191, + "loss": 4.0712, + "step": 1016 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048697371308452077, + "loss": 4.0787, + "step": 1017 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004869406697815939, + "loss": 4.1698, + "step": 1018 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048690758574581905, + "loss": 4.0051, + "step": 1019 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048687446098288374, + "loss": 4.0403, + "step": 1020 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004868412954984825, + "loss": 4.1008, + "step": 1021 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004868080892983169, + "loss": 4.204, + "step": 1022 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004867748423880954, + "loss": 4.1329, + "step": 1023 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048674155477353354, + "loss": 4.1001, + "step": 1024 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048670822646035397, + "loss": 4.1111, + "step": 1025 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048667485745428594, + "loss": 4.1021, + "step": 1026 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048664144776106635, + "loss": 4.1032, + "step": 1027 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048660799738643837, + "loss": 4.075, + "step": 1028 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004865745063361526, + "loss": 4.0908, + "step": 1029 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048654097461596675, + "loss": 4.1778, + "step": 1030 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048650740223164503, + "loss": 4.037, + "step": 1031 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048647378918895904, + "loss": 4.2646, + "step": 1032 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004864401354936873, + "loss": 3.9891, + "step": 1033 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048640644115161523, + "loss": 4.1972, + "step": 1034 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048637270616853527, + "loss": 4.1234, + "step": 1035 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004863389305502467, + "loss": 4.1665, + "step": 1036 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004863051143025563, + "loss": 4.1039, + "step": 1037 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004862712574312771, + "loss": 4.0983, + "step": 1038 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048623735994222976, + "loss": 4.1553, + "step": 1039 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048620342184124156, + "loss": 4.2219, + "step": 1040 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004861694431341468, + "loss": 4.1449, + "step": 1041 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004861354238267868, + "loss": 4.2251, + "step": 1042 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048610136392501004, + "loss": 4.2004, + "step": 1043 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004860672634346716, + "loss": 4.1751, + "step": 1044 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004860331223616339, + "loss": 4.1839, + "step": 1045 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004859989407117661, + "loss": 4.2916, + "step": 1046 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004859647184909445, + "loss": 4.1341, + "step": 1047 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048593045570505223, + "loss": 4.0182, + "step": 1048 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048589615235997954, + "loss": 4.1049, + "step": 1049 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004858618084616234, + "loss": 4.1843, + "step": 1050 + }, + { + "epoch": 0.13, + "learning_rate": 0.00048582742401588814, + "loss": 4.0779, + "step": 1051 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004857929990286847, + "loss": 4.1128, + "step": 1052 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004857585335059312, + "loss": 3.8983, + "step": 1053 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004857240274535526, + "loss": 4.0946, + "step": 1054 + }, + { + "epoch": 0.14, + "learning_rate": 0.000485689480877481, + "loss": 4.08, + "step": 1055 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048565489378365524, + "loss": 4.1481, + "step": 1056 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048562026617802133, + "loss": 4.1265, + "step": 1057 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048558559806653214, + "loss": 4.1703, + "step": 1058 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004855508894551474, + "loss": 4.1218, + "step": 1059 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004855161403498341, + "loss": 4.1259, + "step": 1060 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048548135075656595, + "loss": 4.2048, + "step": 1061 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048544652068132363, + "loss": 4.1954, + "step": 1062 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004854116501300949, + "loss": 4.0859, + "step": 1063 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048537673910887435, + "loss": 4.1866, + "step": 1064 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048534178762366364, + "loss": 4.2288, + "step": 1065 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004853067956804713, + "loss": 4.0676, + "step": 1066 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004852717632853129, + "loss": 4.0676, + "step": 1067 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004852366904442109, + "loss": 4.2297, + "step": 1068 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048520157716319467, + "loss": 3.9878, + "step": 1069 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048516642344830077, + "loss": 4.1292, + "step": 1070 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004851312293055722, + "loss": 4.0696, + "step": 1071 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004850959947410596, + "loss": 4.152, + "step": 1072 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048506071976081995, + "loss": 4.1378, + "step": 1073 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048502540437091754, + "loss": 4.0982, + "step": 1074 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048499004857742347, + "loss": 4.0957, + "step": 1075 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004849546523864158, + "loss": 4.1133, + "step": 1076 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048491921580397956, + "loss": 4.1408, + "step": 1077 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048488373883620676, + "loss": 4.0628, + "step": 1078 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048484822148919614, + "loss": 4.1714, + "step": 1079 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004848126637690537, + "loss": 4.1729, + "step": 1080 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048477706568189215, + "loss": 4.0717, + "step": 1081 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004847414272338313, + "loss": 4.2288, + "step": 1082 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004847057484309977, + "loss": 4.0948, + "step": 1083 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048467002927952507, + "loss": 4.0377, + "step": 1084 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004846342697855538, + "loss": 4.0763, + "step": 1085 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004845984699552315, + "loss": 4.1896, + "step": 1086 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004845626297947125, + "loss": 4.0716, + "step": 1087 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004845267493101582, + "loss": 4.0993, + "step": 1088 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004844908285077367, + "loss": 4.0102, + "step": 1089 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048445486739362345, + "loss": 3.9836, + "step": 1090 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048441886597400043, + "loss": 3.9555, + "step": 1091 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004843828242550566, + "loss": 4.0382, + "step": 1092 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048434674224298824, + "loss": 4.0448, + "step": 1093 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004843106199439981, + "loss": 4.2643, + "step": 1094 + }, + { + "epoch": 0.14, + "learning_rate": 0.000484274457364296, + "loss": 4.1004, + "step": 1095 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004842382545100987, + "loss": 4.0805, + "step": 1096 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048420201138762997, + "loss": 4.1518, + "step": 1097 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004841657280031204, + "loss": 3.9197, + "step": 1098 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004841294043628074, + "loss": 4.2017, + "step": 1099 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048409304047293566, + "loss": 4.0797, + "step": 1100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004840566363397564, + "loss": 4.1563, + "step": 1101 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048402019196952783, + "loss": 4.0639, + "step": 1102 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004839837073685154, + "loss": 4.1563, + "step": 1103 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048394718254299096, + "loss": 4.1313, + "step": 1104 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004839106174992338, + "loss": 4.0353, + "step": 1105 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004838740122435298, + "loss": 4.0569, + "step": 1106 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048383736678217173, + "loss": 4.1503, + "step": 1107 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048380068112145936, + "loss": 4.1007, + "step": 1108 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004837639552676996, + "loss": 4.1827, + "step": 1109 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048372718922720583, + "loss": 4.2241, + "step": 1110 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048369038300629866, + "loss": 4.1254, + "step": 1111 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004836535366113054, + "loss": 3.976, + "step": 1112 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004836166500485605, + "loss": 4.0615, + "step": 1113 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004835797233244052, + "loss": 4.0476, + "step": 1114 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004835427564451875, + "loss": 4.129, + "step": 1115 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004835057494172625, + "loss": 4.002, + "step": 1116 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048346870224699225, + "loss": 4.0626, + "step": 1117 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004834316149407454, + "loss": 4.0063, + "step": 1118 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004833944875048978, + "loss": 3.9862, + "step": 1119 + }, + { + "epoch": 0.14, + "learning_rate": 0.000483357319945832, + "loss": 4.2208, + "step": 1120 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004833201122699377, + "loss": 4.0908, + "step": 1121 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004832828644836111, + "loss": 4.0375, + "step": 1122 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004832455765932557, + "loss": 4.1744, + "step": 1123 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048320824860528165, + "loss": 4.0832, + "step": 1124 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004831708805261061, + "loss": 4.1218, + "step": 1125 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048313347236215304, + "loss": 4.1064, + "step": 1126 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004830960241198534, + "loss": 4.0322, + "step": 1127 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048305853580564483, + "loss": 4.1082, + "step": 1128 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004830210074259722, + "loss": 4.1197, + "step": 1129 + }, + { + "epoch": 0.14, + "learning_rate": 0.000482983438987287, + "loss": 4.1502, + "step": 1130 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048294583049604756, + "loss": 4.1918, + "step": 1131 + }, + { + "epoch": 0.14, + "learning_rate": 0.00048290818195871946, + "loss": 4.1, + "step": 1132 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004828704933817746, + "loss": 4.0162, + "step": 1133 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004828327647716924, + "loss": 4.1587, + "step": 1134 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004827949961349587, + "loss": 4.2018, + "step": 1135 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004827571874780663, + "loss": 4.0657, + "step": 1136 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004827193388075151, + "loss": 4.0521, + "step": 1137 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048268145012981157, + "loss": 4.0523, + "step": 1138 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004826435214514693, + "loss": 4.0383, + "step": 1139 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004826055527790087, + "loss": 4.1952, + "step": 1140 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048256754411895697, + "loss": 4.0064, + "step": 1141 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004825294954778482, + "loss": 4.2781, + "step": 1142 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004824914068622235, + "loss": 4.0435, + "step": 1143 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004824532782786307, + "loss": 4.2571, + "step": 1144 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048241510973362446, + "loss": 4.1441, + "step": 1145 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048237690123376656, + "loss": 4.1472, + "step": 1146 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004823386527856253, + "loss": 4.0534, + "step": 1147 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048230036439577615, + "loss": 4.1122, + "step": 1148 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004822620360708014, + "loss": 4.2229, + "step": 1149 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048222366781729, + "loss": 4.2783, + "step": 1150 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004821852596418379, + "loss": 4.0979, + "step": 1151 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004821468115510479, + "loss": 4.2686, + "step": 1152 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004821083235515298, + "loss": 4.0831, + "step": 1153 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004820697956499, + "loss": 4.2313, + "step": 1154 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048203122785278195, + "loss": 4.174, + "step": 1155 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004819926201668059, + "loss": 4.1195, + "step": 1156 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048195397259860894, + "loss": 4.1262, + "step": 1157 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048191528515483504, + "loss": 4.1849, + "step": 1158 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004818765578421351, + "loss": 4.0697, + "step": 1159 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004818377906671667, + "loss": 4.2282, + "step": 1160 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048179898363659435, + "loss": 4.1661, + "step": 1161 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004817601367570895, + "loss": 4.1286, + "step": 1162 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048172125003533034, + "loss": 4.1356, + "step": 1163 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048168232347800197, + "loss": 4.0311, + "step": 1164 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004816433570917963, + "loss": 4.0822, + "step": 1165 + }, + { + "epoch": 0.15, + "learning_rate": 0.000481604350883412, + "loss": 4.0521, + "step": 1166 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048156530485955496, + "loss": 4.0076, + "step": 1167 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004815262190269374, + "loss": 3.9863, + "step": 1168 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048148709339227867, + "loss": 4.1538, + "step": 1169 + }, + { + "epoch": 0.15, + "learning_rate": 0.000481447927962305, + "loss": 4.0017, + "step": 1170 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048140872274374935, + "loss": 4.1218, + "step": 1171 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048136947774335154, + "loss": 4.119, + "step": 1172 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048133019296785825, + "loss": 4.3119, + "step": 1173 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004812908684240229, + "loss": 4.0426, + "step": 1174 + }, + { + "epoch": 0.15, + "learning_rate": 0.000481251504118606, + "loss": 4.1286, + "step": 1175 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048121210005837463, + "loss": 4.176, + "step": 1176 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004811726562501028, + "loss": 3.9998, + "step": 1177 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048113317270057136, + "loss": 4.199, + "step": 1178 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004810936494165681, + "loss": 4.1197, + "step": 1179 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048105408640488733, + "loss": 3.9367, + "step": 1180 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004810144836723305, + "loss": 4.0212, + "step": 1181 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048097484122570585, + "loss": 4.0013, + "step": 1182 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004809351590718283, + "loss": 4.2338, + "step": 1183 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048089543721751963, + "loss": 4.0462, + "step": 1184 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004808556756696085, + "loss": 4.1406, + "step": 1185 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048081587443493044, + "loss": 4.0662, + "step": 1186 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004807760335203277, + "loss": 4.0056, + "step": 1187 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004807361529326495, + "loss": 4.0547, + "step": 1188 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004806962326787516, + "loss": 4.0384, + "step": 1189 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048065627276549697, + "loss": 3.9933, + "step": 1190 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048061627319975493, + "loss": 4.1419, + "step": 1191 + }, + { + "epoch": 0.15, + "learning_rate": 0.000480576233988402, + "loss": 4.0957, + "step": 1192 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004805361551383214, + "loss": 3.9766, + "step": 1193 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004804960366564032, + "loss": 3.9591, + "step": 1194 + }, + { + "epoch": 0.15, + "learning_rate": 0.000480455878549544, + "loss": 4.1219, + "step": 1195 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048041568082464773, + "loss": 4.0409, + "step": 1196 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048037544348862474, + "loss": 3.9358, + "step": 1197 + }, + { + "epoch": 0.15, + "learning_rate": 0.00048033516654839213, + "loss": 4.1112, + "step": 1198 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004802948500108743, + "loss": 3.9216, + "step": 1199 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004802544938830018, + "loss": 4.2291, + "step": 1200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004802140981717125, + "loss": 4.2144, + "step": 1201 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004801736628839509, + "loss": 4.0662, + "step": 1202 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004801331880266682, + "loss": 4.0596, + "step": 1203 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004800926736068225, + "loss": 4.1489, + "step": 1204 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004800521196313788, + "loss": 3.9519, + "step": 1205 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004800115261073088, + "loss": 4.0664, + "step": 1206 + }, + { + "epoch": 0.15, + "learning_rate": 0.00047997089304159085, + "loss": 4.1342, + "step": 1207 + }, + { + "epoch": 0.15, + "learning_rate": 0.00047993022044121036, + "loss": 4.128, + "step": 1208 + }, + { + "epoch": 0.15, + "learning_rate": 0.00047988950831315947, + "loss": 4.0463, + "step": 1209 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004798487566644369, + "loss": 4.2508, + "step": 1210 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047980796550204844, + "loss": 4.0301, + "step": 1211 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004797671348330066, + "loss": 4.1524, + "step": 1212 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004797262646643305, + "loss": 4.2672, + "step": 1213 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004796853550030463, + "loss": 4.0777, + "step": 1214 + }, + { + "epoch": 0.16, + "learning_rate": 0.000479644405856187, + "loss": 4.0393, + "step": 1215 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047960341723079185, + "loss": 4.0376, + "step": 1216 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004795623891339076, + "loss": 4.0644, + "step": 1217 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004795213215725873, + "loss": 4.1342, + "step": 1218 + }, + { + "epoch": 0.16, + "learning_rate": 0.000479480214553891, + "loss": 4.1149, + "step": 1219 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047943906808488546, + "loss": 4.0007, + "step": 1220 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047939788217264424, + "loss": 4.1728, + "step": 1221 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047935665682424767, + "loss": 4.1594, + "step": 1222 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047931539204678286, + "loss": 4.0787, + "step": 1223 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004792740878473437, + "loss": 4.2022, + "step": 1224 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004792327442330309, + "loss": 3.9459, + "step": 1225 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047919136121095173, + "loss": 3.9986, + "step": 1226 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047914993878822067, + "loss": 4.2141, + "step": 1227 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004791084769719586, + "loss": 3.9933, + "step": 1228 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047906697576929327, + "loss": 4.1285, + "step": 1229 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004790254351873592, + "loss": 4.1425, + "step": 1230 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004789838552332978, + "loss": 4.1351, + "step": 1231 + }, + { + "epoch": 0.16, + "learning_rate": 0.000478942235914257, + "loss": 3.9809, + "step": 1232 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047890057723739177, + "loss": 4.1049, + "step": 1233 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004788588792098637, + "loss": 4.1119, + "step": 1234 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047881714183884105, + "loss": 4.2442, + "step": 1235 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004787753651314991, + "loss": 4.1816, + "step": 1236 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047873354909501963, + "loss": 3.9376, + "step": 1237 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047869169373659145, + "loss": 3.9678, + "step": 1238 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004786497990634099, + "loss": 4.1368, + "step": 1239 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004786078650826771, + "loss": 4.1303, + "step": 1240 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047856589180160204, + "loss": 4.0071, + "step": 1241 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047852387922740037, + "loss": 3.887, + "step": 1242 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004784818273672947, + "loss": 3.9942, + "step": 1243 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047843973622851407, + "loss": 4.0842, + "step": 1244 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047839760581829444, + "loss": 4.038, + "step": 1245 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004783554361438786, + "loss": 4.135, + "step": 1246 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004783132272125159, + "loss": 4.0931, + "step": 1247 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047827097903146266, + "loss": 4.0916, + "step": 1248 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047822869160798185, + "loss": 4.0196, + "step": 1249 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047818636494934295, + "loss": 4.1495, + "step": 1250 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004781439990628227, + "loss": 4.1027, + "step": 1251 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047810159395570405, + "loss": 4.1701, + "step": 1252 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047805914963527707, + "loss": 3.9884, + "step": 1253 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047801666610883833, + "loss": 3.771, + "step": 1254 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047797414338369125, + "loss": 4.0895, + "step": 1255 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004779315814671461, + "loss": 4.0703, + "step": 1256 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047788898036651967, + "loss": 4.1507, + "step": 1257 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047784634008913565, + "loss": 4.1673, + "step": 1258 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047780366064232427, + "loss": 3.9415, + "step": 1259 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004777609420334227, + "loss": 4.03, + "step": 1260 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047771818426977475, + "loss": 4.0087, + "step": 1261 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004776753873587311, + "loss": 4.0949, + "step": 1262 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004776325513076488, + "loss": 4.0784, + "step": 1263 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047758967612389206, + "loss": 4.0065, + "step": 1264 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047754676181483146, + "loss": 4.2579, + "step": 1265 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004775038083878446, + "loss": 3.9999, + "step": 1266 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047746081585031566, + "loss": 4.1208, + "step": 1267 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047741778420963554, + "loss": 4.1112, + "step": 1268 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004773747134732018, + "loss": 4.2973, + "step": 1269 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004773316036484189, + "loss": 4.0272, + "step": 1270 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004772884547426979, + "loss": 4.0988, + "step": 1271 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004772452667634565, + "loss": 4.0792, + "step": 1272 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004772020397181194, + "loss": 4.103, + "step": 1273 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004771587736141176, + "loss": 4.1888, + "step": 1274 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004771154684588892, + "loss": 4.0628, + "step": 1275 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047707212425987885, + "loss": 4.0736, + "step": 1276 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047702874102453785, + "loss": 3.9494, + "step": 1277 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047698531876032437, + "loss": 4.1016, + "step": 1278 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004769418574747032, + "loss": 4.0243, + "step": 1279 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047689835717514576, + "loss": 4.0747, + "step": 1280 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004768548178691302, + "loss": 4.1757, + "step": 1281 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047681123956414164, + "loss": 4.126, + "step": 1282 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004767676222676715, + "loss": 4.1688, + "step": 1283 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047672396598721824, + "loss": 4.1951, + "step": 1284 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047668027073028676, + "loss": 4.1232, + "step": 1285 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004766365365043889, + "loss": 4.0359, + "step": 1286 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047659276331704295, + "loss": 3.9651, + "step": 1287 + }, + { + "epoch": 0.16, + "learning_rate": 0.00047654895117577414, + "loss": 4.0014, + "step": 1288 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004765051000881142, + "loss": 4.1957, + "step": 1289 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004764612100616017, + "loss": 4.1248, + "step": 1290 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004764172811037818, + "loss": 4.0923, + "step": 1291 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004763733132222065, + "loss": 3.9351, + "step": 1292 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004763293064244343, + "loss": 4.1494, + "step": 1293 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047628526071803046, + "loss": 4.1088, + "step": 1294 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047624117611056696, + "loss": 4.0429, + "step": 1295 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004761970526096225, + "loss": 4.1567, + "step": 1296 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004761528902227824, + "loss": 4.1101, + "step": 1297 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047610868895763865, + "loss": 4.0072, + "step": 1298 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047606444882179, + "loss": 4.2085, + "step": 1299 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004760201698228419, + "loss": 3.948, + "step": 1300 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004759758519684063, + "loss": 4.1649, + "step": 1301 + }, + { + "epoch": 0.17, + "learning_rate": 0.000475931495266102, + "loss": 4.0532, + "step": 1302 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004758870997235545, + "loss": 4.1335, + "step": 1303 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047584266534839577, + "loss": 4.0868, + "step": 1304 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004757981921482647, + "loss": 3.9806, + "step": 1305 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047575368013080675, + "loss": 3.9585, + "step": 1306 + }, + { + "epoch": 0.17, + "learning_rate": 0.000475709129303674, + "loss": 4.086, + "step": 1307 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004756645396745253, + "loss": 4.0879, + "step": 1308 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004756199112510261, + "loss": 3.9662, + "step": 1309 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004755752440408485, + "loss": 4.0317, + "step": 1310 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047553053805167136, + "loss": 4.0294, + "step": 1311 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004754857932911802, + "loss": 4.0403, + "step": 1312 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004754410097670671, + "loss": 4.0611, + "step": 1313 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004753961874870309, + "loss": 3.9594, + "step": 1314 + }, + { + "epoch": 0.17, + "learning_rate": 0.000475351326458777, + "loss": 3.9412, + "step": 1315 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004753064266900177, + "loss": 4.0278, + "step": 1316 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047526148818847157, + "loss": 4.0728, + "step": 1317 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047521651096186417, + "loss": 4.1767, + "step": 1318 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047517149501792767, + "loss": 4.109, + "step": 1319 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004751264403644007, + "loss": 4.0935, + "step": 1320 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047508134700902874, + "loss": 4.1599, + "step": 1321 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004750362149595639, + "loss": 3.9915, + "step": 1322 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004749910442237648, + "loss": 3.8875, + "step": 1323 + }, + { + "epoch": 0.17, + "learning_rate": 0.000474945834809397, + "loss": 4.0336, + "step": 1324 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004749005867242323, + "loss": 4.1032, + "step": 1325 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047485529997604947, + "loss": 4.0625, + "step": 1326 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047480997457263394, + "loss": 4.0826, + "step": 1327 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004747646105217774, + "loss": 4.1114, + "step": 1328 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047471920783127874, + "loss": 4.1775, + "step": 1329 + }, + { + "epoch": 0.17, + "learning_rate": 0.000474673766508943, + "loss": 4.1345, + "step": 1330 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004746282865625822, + "loss": 4.1807, + "step": 1331 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047458276800001485, + "loss": 4.1207, + "step": 1332 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004745372108290661, + "loss": 3.9961, + "step": 1333 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047449161505756766, + "loss": 3.9986, + "step": 1334 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004744459806933581, + "loss": 3.9518, + "step": 1335 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004744003077442825, + "loss": 4.0714, + "step": 1336 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047435459621819257, + "loss": 4.1014, + "step": 1337 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047430884612294645, + "loss": 4.0573, + "step": 1338 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004742630574664094, + "loss": 3.9619, + "step": 1339 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004742172302564528, + "loss": 4.0295, + "step": 1340 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047417136450095504, + "loss": 4.1859, + "step": 1341 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004741254602078009, + "loss": 4.0308, + "step": 1342 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047407951738488187, + "loss": 4.1283, + "step": 1343 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047403353604009605, + "loss": 3.9526, + "step": 1344 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004739875161813481, + "loss": 4.1214, + "step": 1345 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004739414578165495, + "loss": 4.0881, + "step": 1346 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047389536095361807, + "loss": 4.2572, + "step": 1347 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047384922560047855, + "loss": 4.0573, + "step": 1348 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047380305176506203, + "loss": 4.0584, + "step": 1349 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004737568394553064, + "loss": 4.1619, + "step": 1350 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047371058867915606, + "loss": 3.994, + "step": 1351 + }, + { + "epoch": 0.17, + "learning_rate": 0.000473664299444562, + "loss": 4.2357, + "step": 1352 + }, + { + "epoch": 0.17, + "learning_rate": 0.000473617971759482, + "loss": 4.0251, + "step": 1353 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004735716056318802, + "loss": 3.9252, + "step": 1354 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004735252010697275, + "loss": 4.1162, + "step": 1355 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047347875808100145, + "loss": 4.107, + "step": 1356 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047343227667368605, + "loss": 4.0812, + "step": 1357 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004733857568557721, + "loss": 4.1274, + "step": 1358 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004733391986352568, + "loss": 3.9853, + "step": 1359 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004732926020201441, + "loss": 4.1631, + "step": 1360 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004732459670184445, + "loss": 4.1494, + "step": 1361 + }, + { + "epoch": 0.17, + "learning_rate": 0.000473199293638175, + "loss": 4.0575, + "step": 1362 + }, + { + "epoch": 0.17, + "learning_rate": 0.00047315258188735954, + "loss": 4.0702, + "step": 1363 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004731058317740281, + "loss": 4.0097, + "step": 1364 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004730590433062178, + "loss": 4.0108, + "step": 1365 + }, + { + "epoch": 0.17, + "learning_rate": 0.000473012216491972, + "loss": 4.1791, + "step": 1366 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004729653513393408, + "loss": 4.0069, + "step": 1367 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047291844785638085, + "loss": 4.2297, + "step": 1368 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004728715060511555, + "loss": 4.0267, + "step": 1369 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004728245259317344, + "loss": 3.9775, + "step": 1370 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047277750750619415, + "loss": 3.9575, + "step": 1371 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047273045078261765, + "loss": 4.0846, + "step": 1372 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004726833557690946, + "loss": 4.0691, + "step": 1373 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004726362224737211, + "loss": 4.0713, + "step": 1374 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047258905090459996, + "loss": 4.0641, + "step": 1375 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004725418410698405, + "loss": 4.0245, + "step": 1376 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004724945929775586, + "loss": 4.1633, + "step": 1377 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004724473066358768, + "loss": 3.983, + "step": 1378 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047239998205292425, + "loss": 4.0068, + "step": 1379 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004723526192368364, + "loss": 4.003, + "step": 1380 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004723052181957556, + "loss": 4.0362, + "step": 1381 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047225777893783054, + "loss": 4.091, + "step": 1382 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004722103014712167, + "loss": 4.1211, + "step": 1383 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047216278580407603, + "loss": 3.9953, + "step": 1384 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047211523194457683, + "loss": 4.0545, + "step": 1385 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004720676399008943, + "loss": 4.0101, + "step": 1386 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047202000968121007, + "loss": 4.1468, + "step": 1387 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047197234129371234, + "loss": 4.076, + "step": 1388 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047192463474659576, + "loss": 4.1623, + "step": 1389 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004718768900480617, + "loss": 4.0854, + "step": 1390 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047182910720631804, + "loss": 4.1383, + "step": 1391 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047178128622957916, + "loss": 3.9461, + "step": 1392 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047173342712606613, + "loss": 4.1619, + "step": 1393 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047168552990400637, + "loss": 4.1182, + "step": 1394 + }, + { + "epoch": 0.18, + "learning_rate": 0.000471637594571634, + "loss": 3.9955, + "step": 1395 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004715896211371897, + "loss": 4.0278, + "step": 1396 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047154160960892065, + "loss": 4.0897, + "step": 1397 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047149355999508066, + "loss": 4.1619, + "step": 1398 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004714454723039299, + "loss": 4.1358, + "step": 1399 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047139734654373514, + "loss": 4.0249, + "step": 1400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004713491827227699, + "loss": 4.0201, + "step": 1401 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047130098084931406, + "loss": 4.1492, + "step": 1402 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047125274093165405, + "loss": 4.1786, + "step": 1403 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047120446297808287, + "loss": 4.0685, + "step": 1404 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047115614699690014, + "loss": 4.0152, + "step": 1405 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004711077929964118, + "loss": 4.0826, + "step": 1406 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004710594009849306, + "loss": 4.1144, + "step": 1407 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004710109709707757, + "loss": 4.1651, + "step": 1408 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004709625029622726, + "loss": 3.9481, + "step": 1409 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004709139969677537, + "loss": 4.0129, + "step": 1410 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004708654529955576, + "loss": 4.2332, + "step": 1411 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047081687105402967, + "loss": 4.0605, + "step": 1412 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047076825115152166, + "loss": 4.0677, + "step": 1413 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004707195932963919, + "loss": 3.9364, + "step": 1414 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047067089749700534, + "loss": 3.9906, + "step": 1415 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047062216376173315, + "loss": 4.2168, + "step": 1416 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004705733920989534, + "loss": 3.9508, + "step": 1417 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047052458251705043, + "loss": 4.0805, + "step": 1418 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004704757350244152, + "loss": 4.1167, + "step": 1419 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004704268496294451, + "loss": 4.1509, + "step": 1420 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047037792634054416, + "loss": 4.1261, + "step": 1421 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004703289651661229, + "loss": 4.0768, + "step": 1422 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004702799661145981, + "loss": 4.1107, + "step": 1423 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004702309291943936, + "loss": 4.2519, + "step": 1424 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047018185441393914, + "loss": 4.1066, + "step": 1425 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047013274178167136, + "loss": 4.0113, + "step": 1426 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047008359130603326, + "loss": 4.1158, + "step": 1427 + }, + { + "epoch": 0.18, + "learning_rate": 0.00047003440299547437, + "loss": 4.1872, + "step": 1428 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004699851768584508, + "loss": 4.0373, + "step": 1429 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004699359129034251, + "loss": 3.966, + "step": 1430 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004698866111388661, + "loss": 4.0773, + "step": 1431 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046983727157324964, + "loss": 4.0221, + "step": 1432 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004697878942150575, + "loss": 3.937, + "step": 1433 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046973847907277844, + "loss": 3.9961, + "step": 1434 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004696890261549073, + "loss": 4.0911, + "step": 1435 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046963953546994583, + "loss": 3.9627, + "step": 1436 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046959000702640185, + "loss": 4.0355, + "step": 1437 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046954044083279004, + "loss": 3.9009, + "step": 1438 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046949083689763114, + "loss": 3.9883, + "step": 1439 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046944119522945307, + "loss": 3.9936, + "step": 1440 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004693915158367894, + "loss": 3.9077, + "step": 1441 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046934179872818073, + "loss": 4.0762, + "step": 1442 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046929204391217414, + "loss": 4.0061, + "step": 1443 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004692422513973229, + "loss": 4.0044, + "step": 1444 + }, + { + "epoch": 0.18, + "learning_rate": 0.000469192421192187, + "loss": 3.8452, + "step": 1445 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046914255330533273, + "loss": 4.194, + "step": 1446 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046909264774533307, + "loss": 3.9445, + "step": 1447 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004690427045207673, + "loss": 3.9638, + "step": 1448 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046899272364022126, + "loss": 3.9901, + "step": 1449 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004689427051122873, + "loss": 4.1059, + "step": 1450 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046889264894556406, + "loss": 4.136, + "step": 1451 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046884255514865694, + "loss": 3.9938, + "step": 1452 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046879242373017746, + "loss": 4.196, + "step": 1453 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004687422546987439, + "loss": 4.1762, + "step": 1454 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046869204806298094, + "loss": 3.9605, + "step": 1455 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004686418038315196, + "loss": 4.0414, + "step": 1456 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046859152201299736, + "loss": 4.0957, + "step": 1457 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004685412026160584, + "loss": 4.0512, + "step": 1458 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046849084564935323, + "loss": 4.0119, + "step": 1459 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046844045112153865, + "loss": 4.0126, + "step": 1460 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004683900190412782, + "loss": 3.9924, + "step": 1461 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004683395494172417, + "loss": 4.0651, + "step": 1462 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004682890422581054, + "loss": 4.0834, + "step": 1463 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004682384975725522, + "loss": 3.9616, + "step": 1464 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004681879153692711, + "loss": 4.0976, + "step": 1465 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046813729565695793, + "loss": 4.1866, + "step": 1466 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004680866384443149, + "loss": 4.0814, + "step": 1467 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004680359437400503, + "loss": 3.946, + "step": 1468 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004679852115528793, + "loss": 4.0316, + "step": 1469 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004679344418915234, + "loss": 4.0846, + "step": 1470 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004678836347647104, + "loss": 4.1105, + "step": 1471 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004678327901811746, + "loss": 4.0398, + "step": 1472 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046778190814965694, + "loss": 4.0277, + "step": 1473 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004677309886789044, + "loss": 4.085, + "step": 1474 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004676800317776708, + "loss": 4.1552, + "step": 1475 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004676290374547162, + "loss": 3.9984, + "step": 1476 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004675780057188071, + "loss": 4.0266, + "step": 1477 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046752693657871645, + "loss": 4.0931, + "step": 1478 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046747583004322357, + "loss": 4.2104, + "step": 1479 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004674246861211143, + "loss": 4.1177, + "step": 1480 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004673735048211809, + "loss": 4.1087, + "step": 1481 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046732228615222203, + "loss": 4.0664, + "step": 1482 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046727103012304274, + "loss": 3.9682, + "step": 1483 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046721973674245453, + "loss": 4.0439, + "step": 1484 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046716840601927534, + "loss": 4.1316, + "step": 1485 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046711703796232954, + "loss": 4.0281, + "step": 1486 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004670656325804479, + "loss": 3.9938, + "step": 1487 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004670141898824676, + "loss": 4.0526, + "step": 1488 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004669627098772321, + "loss": 4.1051, + "step": 1489 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004669111925735916, + "loss": 4.0372, + "step": 1490 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046685963798040247, + "loss": 3.9415, + "step": 1491 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004668080461065275, + "loss": 4.0555, + "step": 1492 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046675641696083595, + "loss": 4.1429, + "step": 1493 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046670475055220347, + "loss": 3.883, + "step": 1494 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004666530468895121, + "loss": 4.0704, + "step": 1495 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004666013059816503, + "loss": 4.0351, + "step": 1496 + }, + { + "epoch": 0.19, + "learning_rate": 0.000466549527837513, + "loss": 3.9115, + "step": 1497 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046649771246600136, + "loss": 3.8979, + "step": 1498 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046644585987602304, + "loss": 3.973, + "step": 1499 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004663939700764923, + "loss": 4.0078, + "step": 1500 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004663420430763293, + "loss": 3.9422, + "step": 1501 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046629007888446115, + "loss": 4.0192, + "step": 1502 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046623807750982094, + "loss": 4.0225, + "step": 1503 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046618603896134836, + "loss": 4.1158, + "step": 1504 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046613396324798943, + "loss": 3.9484, + "step": 1505 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004660818503786965, + "loss": 3.9921, + "step": 1506 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046602970036242866, + "loss": 4.115, + "step": 1507 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046597751320815074, + "loss": 3.9928, + "step": 1508 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046592528892483453, + "loss": 3.9271, + "step": 1509 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046587302752145793, + "loss": 3.9848, + "step": 1510 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046582072900700524, + "loss": 4.0796, + "step": 1511 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046576839339046726, + "loss": 4.1188, + "step": 1512 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046571602068084107, + "loss": 3.9721, + "step": 1513 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046566361088713016, + "loss": 4.044, + "step": 1514 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046561116401834426, + "loss": 3.9172, + "step": 1515 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004655586800834998, + "loss": 4.0128, + "step": 1516 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004655061590916191, + "loss": 3.9514, + "step": 1517 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004654536010517314, + "loss": 4.0918, + "step": 1518 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046540100597287193, + "loss": 4.0881, + "step": 1519 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046534837386408236, + "loss": 3.9169, + "step": 1520 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004652957047344108, + "loss": 4.0558, + "step": 1521 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046524299859291164, + "loss": 4.0767, + "step": 1522 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004651902554486458, + "loss": 3.9663, + "step": 1523 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004651374753106803, + "loss": 4.0981, + "step": 1524 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046508465818808866, + "loss": 4.0126, + "step": 1525 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046503180408995085, + "loss": 4.09, + "step": 1526 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046497891302535303, + "loss": 4.0559, + "step": 1527 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046492598500338787, + "loss": 4.0077, + "step": 1528 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004648730200331542, + "loss": 4.0353, + "step": 1529 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004648200181237574, + "loss": 3.8577, + "step": 1530 + }, + { + "epoch": 0.2, + "learning_rate": 0.000464766979284309, + "loss": 4.0692, + "step": 1531 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004647139035239272, + "loss": 4.0766, + "step": 1532 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004646607908517361, + "loss": 4.1161, + "step": 1533 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004646076412768665, + "loss": 4.2223, + "step": 1534 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046455445480845543, + "loss": 4.1164, + "step": 1535 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004645012314556463, + "loss": 4.1598, + "step": 1536 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004644479712275887, + "loss": 4.0292, + "step": 1537 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004643946741334387, + "loss": 4.0032, + "step": 1538 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046434134018235885, + "loss": 4.206, + "step": 1539 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004642879693835178, + "loss": 4.0512, + "step": 1540 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046423456174609045, + "loss": 4.118, + "step": 1541 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004641811172792584, + "loss": 3.9917, + "step": 1542 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046412763599220925, + "loss": 4.2138, + "step": 1543 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046407411789413714, + "loss": 4.083, + "step": 1544 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004640205629942423, + "loss": 3.9664, + "step": 1545 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046396697130173165, + "loss": 4.0256, + "step": 1546 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004639133428258181, + "loss": 4.1461, + "step": 1547 + }, + { + "epoch": 0.2, + "learning_rate": 0.000463859677575721, + "loss": 4.0198, + "step": 1548 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046380597556066607, + "loss": 4.052, + "step": 1549 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004637522367898852, + "loss": 4.0777, + "step": 1550 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046369846127261696, + "loss": 4.0551, + "step": 1551 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004636446490181057, + "loss": 4.1564, + "step": 1552 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004635908000356025, + "loss": 4.1473, + "step": 1553 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046353691433436464, + "loss": 4.0669, + "step": 1554 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046348299192365566, + "loss": 4.1048, + "step": 1555 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046342903281274553, + "loss": 4.0964, + "step": 1556 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046337503701091026, + "loss": 4.0863, + "step": 1557 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004633210045274325, + "loss": 4.2488, + "step": 1558 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046326693537160113, + "loss": 3.9806, + "step": 1559 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004632128295527111, + "loss": 4.0896, + "step": 1560 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046315868708006393, + "loss": 3.8495, + "step": 1561 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004631045079629672, + "loss": 4.0053, + "step": 1562 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046305029221073516, + "loss": 4.088, + "step": 1563 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004629960398326879, + "loss": 4.1, + "step": 1564 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046294175083815215, + "loss": 3.9591, + "step": 1565 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004628874252364609, + "loss": 4.0266, + "step": 1566 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046283306303695316, + "loss": 4.0693, + "step": 1567 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004627786642489745, + "loss": 4.0492, + "step": 1568 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004627242288818767, + "loss": 3.9594, + "step": 1569 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004626697569450179, + "loss": 3.9892, + "step": 1570 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046261524844776235, + "loss": 3.8918, + "step": 1571 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004625607033994808, + "loss": 4.1313, + "step": 1572 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004625061218095501, + "loss": 3.9956, + "step": 1573 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046245150368735344, + "loss": 4.0819, + "step": 1574 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004623968490422804, + "loss": 4.0702, + "step": 1575 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004623421578837267, + "loss": 3.96, + "step": 1576 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004622874302210943, + "loss": 4.0995, + "step": 1577 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046223266606379166, + "loss": 4.0713, + "step": 1578 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004621778654212333, + "loss": 4.0919, + "step": 1579 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046212302830284015, + "loss": 4.0356, + "step": 1580 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004620681547180392, + "loss": 4.0621, + "step": 1581 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046201324467626405, + "loss": 4.1044, + "step": 1582 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004619582981869542, + "loss": 4.0745, + "step": 1583 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046190331525955566, + "loss": 4.1086, + "step": 1584 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004618482959035206, + "loss": 4.0987, + "step": 1585 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004617932401283076, + "loss": 3.954, + "step": 1586 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004617381479433813, + "loss": 3.9597, + "step": 1587 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004616830193582127, + "loss": 3.9456, + "step": 1588 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046162785438227895, + "loss": 3.8973, + "step": 1589 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004615726530250637, + "loss": 4.0593, + "step": 1590 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046151741529605654, + "loss": 4.1235, + "step": 1591 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046146214120475367, + "loss": 4.1628, + "step": 1592 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004614068307606572, + "loss": 4.0466, + "step": 1593 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004613514839732757, + "loss": 4.1017, + "step": 1594 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046129610085212394, + "loss": 4.0909, + "step": 1595 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046124068140672284, + "loss": 3.9555, + "step": 1596 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004611852256465997, + "loss": 4.0905, + "step": 1597 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046112973358128796, + "loss": 4.0416, + "step": 1598 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004610742052203275, + "loss": 4.1607, + "step": 1599 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004610186405732641, + "loss": 4.0124, + "step": 1600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00046096303964965004, + "loss": 3.9606, + "step": 1601 + }, + { + "epoch": 0.21, + "learning_rate": 0.00046090740245904383, + "loss": 3.9927, + "step": 1602 + }, + { + "epoch": 0.21, + "learning_rate": 0.00046085172901101006, + "loss": 4.0415, + "step": 1603 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004607960193151197, + "loss": 4.017, + "step": 1604 + }, + { + "epoch": 0.21, + "learning_rate": 0.00046074027338094983, + "loss": 4.0626, + "step": 1605 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004606844912180839, + "loss": 4.2053, + "step": 1606 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004606286728361113, + "loss": 4.1863, + "step": 1607 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004605728182446282, + "loss": 3.9083, + "step": 1608 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004605169274532364, + "loss": 3.9833, + "step": 1609 + }, + { + "epoch": 0.21, + "learning_rate": 0.00046046100047154425, + "loss": 4.1114, + "step": 1610 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004604050373091663, + "loss": 3.9282, + "step": 1611 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004603490379757232, + "loss": 4.1001, + "step": 1612 + }, + { + "epoch": 0.21, + "learning_rate": 0.00046029300248084183, + "loss": 3.977, + "step": 1613 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004602369308341555, + "loss": 3.9486, + "step": 1614 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004601808230453034, + "loss": 4.0121, + "step": 1615 + }, + { + "epoch": 0.21, + "learning_rate": 0.00046012467912393126, + "loss": 3.9856, + "step": 1616 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004600684990796907, + "loss": 4.0571, + "step": 1617 + }, + { + "epoch": 0.21, + "learning_rate": 0.00046001228292223993, + "loss": 4.088, + "step": 1618 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045995603066124305, + "loss": 3.8986, + "step": 1619 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045989974230637045, + "loss": 4.078, + "step": 1620 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004598434178672988, + "loss": 4.3037, + "step": 1621 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045978705735371083, + "loss": 4.1823, + "step": 1622 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045973066077529574, + "loss": 3.9531, + "step": 1623 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045967422814174863, + "loss": 4.0079, + "step": 1624 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004596177594627709, + "loss": 4.0994, + "step": 1625 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045956125474807034, + "loss": 3.9621, + "step": 1626 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004595047140073605, + "loss": 4.0593, + "step": 1627 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004594481372503616, + "loss": 4.0953, + "step": 1628 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045939152448679977, + "loss": 3.9986, + "step": 1629 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004593348757264074, + "loss": 4.1099, + "step": 1630 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004592781909789231, + "loss": 4.0416, + "step": 1631 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004592214702540916, + "loss": 4.0495, + "step": 1632 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045916471356166383, + "loss": 4.0295, + "step": 1633 + }, + { + "epoch": 0.21, + "learning_rate": 0.000459107920911397, + "loss": 4.0836, + "step": 1634 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045905109231305437, + "loss": 3.849, + "step": 1635 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045899422777640543, + "loss": 3.9728, + "step": 1636 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045893732731122584, + "loss": 4.0173, + "step": 1637 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004588803909272975, + "loss": 3.9639, + "step": 1638 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004588234186344084, + "loss": 3.9787, + "step": 1639 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004587664104423528, + "loss": 4.0367, + "step": 1640 + }, + { + "epoch": 0.21, + "learning_rate": 0.000458709366360931, + "loss": 4.0966, + "step": 1641 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004586522863999495, + "loss": 4.151, + "step": 1642 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004585951705692211, + "loss": 4.0714, + "step": 1643 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004585380188785646, + "loss": 3.9279, + "step": 1644 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004584808313378051, + "loss": 4.0589, + "step": 1645 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004584236079567738, + "loss": 4.1344, + "step": 1646 + }, + { + "epoch": 0.21, + "learning_rate": 0.000458366348745308, + "loss": 3.9958, + "step": 1647 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045830905371325125, + "loss": 4.0075, + "step": 1648 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004582517228704533, + "loss": 3.9957, + "step": 1649 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045819435622676985, + "loss": 4.1142, + "step": 1650 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004581369537920631, + "loss": 3.9473, + "step": 1651 + }, + { + "epoch": 0.21, + "learning_rate": 0.000458079515576201, + "loss": 4.0868, + "step": 1652 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045802204158905787, + "loss": 4.0439, + "step": 1653 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045796453184051417, + "loss": 4.1042, + "step": 1654 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004579069863404566, + "loss": 4.0023, + "step": 1655 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004578494050987777, + "loss": 4.1741, + "step": 1656 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004577917881253766, + "loss": 4.0817, + "step": 1657 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004577341354301581, + "loss": 4.0277, + "step": 1658 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045767644702303346, + "loss": 4.0674, + "step": 1659 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045761872291392005, + "loss": 3.9874, + "step": 1660 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045756096311274123, + "loss": 4.0419, + "step": 1661 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045750316762942656, + "loss": 4.037, + "step": 1662 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004574453364739118, + "loss": 3.8747, + "step": 1663 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045738746965613876, + "loss": 4.0749, + "step": 1664 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004573295671860555, + "loss": 4.0866, + "step": 1665 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004572716290736161, + "loss": 4.0838, + "step": 1666 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045721365532878065, + "loss": 3.9158, + "step": 1667 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004571556459615157, + "loss": 4.0505, + "step": 1668 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045709760098179363, + "loss": 4.1002, + "step": 1669 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045703952039959306, + "loss": 4.0659, + "step": 1670 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045698140422489875, + "loss": 4.0311, + "step": 1671 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045692325246770156, + "loss": 4.0259, + "step": 1672 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004568650651379984, + "loss": 3.9667, + "step": 1673 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004568068422457923, + "loss": 4.1356, + "step": 1674 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004567485838010925, + "loss": 4.136, + "step": 1675 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045669028981391434, + "loss": 4.0318, + "step": 1676 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045663196029427925, + "loss": 3.9699, + "step": 1677 + }, + { + "epoch": 0.21, + "learning_rate": 0.00045657359525221465, + "loss": 4.0048, + "step": 1678 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004565151946977542, + "loss": 4.0499, + "step": 1679 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045645675864093766, + "loss": 4.0881, + "step": 1680 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004563982870918109, + "loss": 4.0703, + "step": 1681 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045633978006042575, + "loss": 4.1222, + "step": 1682 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045628123755684036, + "loss": 4.0323, + "step": 1683 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004562226595911188, + "loss": 4.0273, + "step": 1684 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004561640461733313, + "loss": 3.9522, + "step": 1685 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004561053973135543, + "loss": 4.0097, + "step": 1686 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045604671302187, + "loss": 4.0873, + "step": 1687 + }, + { + "epoch": 0.22, + "learning_rate": 0.000455987993308367, + "loss": 4.0492, + "step": 1688 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045592923818314014, + "loss": 3.9688, + "step": 1689 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045587044765628973, + "loss": 3.9127, + "step": 1690 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004558116217379228, + "loss": 4.0844, + "step": 1691 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045575276043815203, + "loss": 4.0878, + "step": 1692 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045569386376709655, + "loss": 4.0207, + "step": 1693 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004556349317348812, + "loss": 3.8404, + "step": 1694 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004555759643516372, + "loss": 4.0899, + "step": 1695 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004555169616275017, + "loss": 4.0925, + "step": 1696 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045545792357261784, + "loss": 3.9984, + "step": 1697 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004553988501971351, + "loss": 4.0411, + "step": 1698 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045533974151120896, + "loss": 3.9763, + "step": 1699 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004552805975250006, + "loss": 4.1045, + "step": 1700 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004552214182486777, + "loss": 3.8542, + "step": 1701 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004551622036924139, + "loss": 4.168, + "step": 1702 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004551029538663889, + "loss": 4.1697, + "step": 1703 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045504366878078826, + "loss": 3.8318, + "step": 1704 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004549843484458041, + "loss": 3.854, + "step": 1705 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004549249928716338, + "loss": 3.9492, + "step": 1706 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004548656020684817, + "loss": 3.9894, + "step": 1707 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004548061760465575, + "loss": 3.8775, + "step": 1708 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045474671481607744, + "loss": 4.0294, + "step": 1709 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045468721838726336, + "loss": 4.0637, + "step": 1710 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045462768677034354, + "loss": 4.0511, + "step": 1711 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045456811997555215, + "loss": 3.9795, + "step": 1712 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004545085180131293, + "loss": 3.9944, + "step": 1713 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004544488808933214, + "loss": 4.0634, + "step": 1714 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004543892086263807, + "loss": 4.0374, + "step": 1715 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004543295012225656, + "loss": 3.9945, + "step": 1716 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045426975869214035, + "loss": 4.1307, + "step": 1717 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004542099810453755, + "loss": 4.1132, + "step": 1718 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004541501682925475, + "loss": 4.0963, + "step": 1719 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004540903204439389, + "loss": 4.0708, + "step": 1720 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004540304375098382, + "loss": 4.1223, + "step": 1721 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004539705195005399, + "loss": 4.0159, + "step": 1722 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045391056642634476, + "loss": 3.8655, + "step": 1723 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045385057829755925, + "loss": 3.8822, + "step": 1724 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045379055512449615, + "loss": 4.0362, + "step": 1725 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045373049691747403, + "loss": 4.1492, + "step": 1726 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004536704036868177, + "loss": 4.0212, + "step": 1727 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004536102754428577, + "loss": 4.0776, + "step": 1728 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045355011219593103, + "loss": 4.0047, + "step": 1729 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045348991395638026, + "loss": 4.0935, + "step": 1730 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045342968073455427, + "loss": 4.0654, + "step": 1731 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004533694125408078, + "loss": 3.9885, + "step": 1732 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045330910938550157, + "loss": 4.0872, + "step": 1733 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045324877127900253, + "loss": 3.9369, + "step": 1734 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045318839823168345, + "loss": 4.058, + "step": 1735 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045312799025392313, + "loss": 3.942, + "step": 1736 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045306754735610643, + "loss": 3.9509, + "step": 1737 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045300706954862425, + "loss": 3.916, + "step": 1738 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045294655684187325, + "loss": 4.0786, + "step": 1739 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045288600924625643, + "loss": 4.0468, + "step": 1740 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045282542677218255, + "loss": 3.9452, + "step": 1741 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045276480943006646, + "loss": 4.1224, + "step": 1742 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045270415723032897, + "loss": 3.9406, + "step": 1743 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004526434701833969, + "loss": 4.1079, + "step": 1744 + }, + { + "epoch": 0.22, + "learning_rate": 0.000452582748299703, + "loss": 4.028, + "step": 1745 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004525219915896863, + "loss": 4.0445, + "step": 1746 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045246120006379125, + "loss": 4.0552, + "step": 1747 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045240037373246885, + "loss": 4.1867, + "step": 1748 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004523395126061757, + "loss": 3.9939, + "step": 1749 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045227861669537475, + "loss": 4.1706, + "step": 1750 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004522176860105345, + "loss": 3.9962, + "step": 1751 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045215672056212966, + "loss": 4.0292, + "step": 1752 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045209572036064105, + "loss": 4.091, + "step": 1753 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045203468541655514, + "loss": 3.9123, + "step": 1754 + }, + { + "epoch": 0.22, + "learning_rate": 0.00045197361574036466, + "loss": 3.9851, + "step": 1755 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004519125113425681, + "loss": 4.1922, + "step": 1756 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004518513722336701, + "loss": 3.9329, + "step": 1757 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045179019842418126, + "loss": 4.1057, + "step": 1758 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004517289899246178, + "loss": 3.9908, + "step": 1759 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004516677467455024, + "loss": 4.0545, + "step": 1760 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045160646889736333, + "loss": 3.9859, + "step": 1761 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045154515639073513, + "loss": 3.9728, + "step": 1762 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045148380923615804, + "loss": 3.9129, + "step": 1763 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004514224274441783, + "loss": 3.9948, + "step": 1764 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045136101102534823, + "loss": 3.9435, + "step": 1765 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045129955999022585, + "loss": 3.8504, + "step": 1766 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004512380743493756, + "loss": 3.9456, + "step": 1767 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045117655411336735, + "loss": 3.9514, + "step": 1768 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045111499929277723, + "loss": 4.0649, + "step": 1769 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004510534098981872, + "loss": 3.907, + "step": 1770 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004509917859401852, + "loss": 3.9433, + "step": 1771 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004509301274293651, + "loss": 4.021, + "step": 1772 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045086843437632673, + "loss": 4.0255, + "step": 1773 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004508067067916758, + "loss": 3.9404, + "step": 1774 + }, + { + "epoch": 0.23, + "learning_rate": 0.000450744944686024, + "loss": 4.0188, + "step": 1775 + }, + { + "epoch": 0.23, + "learning_rate": 0.000450683148069989, + "loss": 4.0157, + "step": 1776 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045062131695419434, + "loss": 4.0448, + "step": 1777 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045055945134926944, + "loss": 4.0216, + "step": 1778 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045049755126584987, + "loss": 4.0422, + "step": 1779 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045043561671457677, + "loss": 4.1583, + "step": 1780 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004503736477060976, + "loss": 4.1637, + "step": 1781 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004503116442510654, + "loss": 4.0515, + "step": 1782 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045024960636013935, + "loss": 3.9994, + "step": 1783 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045018753404398444, + "loss": 4.0899, + "step": 1784 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004501254273132717, + "loss": 4.1362, + "step": 1785 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004500632861786779, + "loss": 3.9608, + "step": 1786 + }, + { + "epoch": 0.23, + "learning_rate": 0.00045000111065088597, + "loss": 4.0226, + "step": 1787 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044993890074058443, + "loss": 4.0738, + "step": 1788 + }, + { + "epoch": 0.23, + "learning_rate": 0.000449876656458468, + "loss": 4.0256, + "step": 1789 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044981437781523714, + "loss": 4.0179, + "step": 1790 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044975206482159827, + "loss": 3.9903, + "step": 1791 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004496897174882637, + "loss": 3.9379, + "step": 1792 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004496273358259517, + "loss": 3.9762, + "step": 1793 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004495649198453865, + "loss": 4.0203, + "step": 1794 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044950246955729794, + "loss": 4.1249, + "step": 1795 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044943998497242203, + "loss": 4.0036, + "step": 1796 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044937746610150065, + "loss": 3.984, + "step": 1797 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044931491295528144, + "loss": 4.1135, + "step": 1798 + }, + { + "epoch": 0.23, + "learning_rate": 0.000449252325544518, + "loss": 3.9935, + "step": 1799 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004491897038799699, + "loss": 4.1046, + "step": 1800 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044912704797240243, + "loss": 3.9857, + "step": 1801 + }, + { + "epoch": 0.23, + "learning_rate": 0.000449064357832587, + "loss": 4.1042, + "step": 1802 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044900163347130073, + "loss": 4.0665, + "step": 1803 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004489388748993266, + "loss": 4.0075, + "step": 1804 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004488760821274536, + "loss": 4.0085, + "step": 1805 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044881325516647654, + "loss": 4.1549, + "step": 1806 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044875039402719606, + "loss": 4.0251, + "step": 1807 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004486874987204187, + "loss": 4.1066, + "step": 1808 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004486245692569569, + "loss": 4.0637, + "step": 1809 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044856160564762904, + "loss": 4.0005, + "step": 1810 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004484986079032592, + "loss": 4.1248, + "step": 1811 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044843557603467754, + "loss": 3.9169, + "step": 1812 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044837251005271984, + "loss": 3.8621, + "step": 1813 + }, + { + "epoch": 0.23, + "learning_rate": 0.000448309409968228, + "loss": 4.1519, + "step": 1814 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044824627579204953, + "loss": 4.0112, + "step": 1815 + }, + { + "epoch": 0.23, + "learning_rate": 0.000448183107535038, + "loss": 3.8594, + "step": 1816 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004481199052080527, + "loss": 3.9444, + "step": 1817 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004480566688219589, + "loss": 4.0365, + "step": 1818 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004479933983876278, + "loss": 4.0893, + "step": 1819 + }, + { + "epoch": 0.23, + "learning_rate": 0.000447930093915936, + "loss": 4.0681, + "step": 1820 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044786675541776653, + "loss": 4.0547, + "step": 1821 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004478033829040079, + "loss": 3.9201, + "step": 1822 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004477399763855547, + "loss": 3.97, + "step": 1823 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004476765358733071, + "loss": 3.8903, + "step": 1824 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004476130613781713, + "loss": 4.0165, + "step": 1825 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004475495529110594, + "loss": 4.1496, + "step": 1826 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044748601048288906, + "loss": 3.9962, + "step": 1827 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044742243410458415, + "loss": 3.9737, + "step": 1828 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044735882378707406, + "loss": 3.9952, + "step": 1829 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044729517954129416, + "loss": 3.9928, + "step": 1830 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004472315013781857, + "loss": 4.1568, + "step": 1831 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004471677893086956, + "loss": 4.0174, + "step": 1832 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044710404334377684, + "loss": 3.9077, + "step": 1833 + }, + { + "epoch": 0.23, + "learning_rate": 0.000447040263494388, + "loss": 4.129, + "step": 1834 + }, + { + "epoch": 0.23, + "learning_rate": 0.00044697644977149346, + "loss": 4.0587, + "step": 1835 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004469126021860637, + "loss": 3.998, + "step": 1836 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004468487207490749, + "loss": 4.0047, + "step": 1837 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044678480547150886, + "loss": 4.0577, + "step": 1838 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044672085636435346, + "loss": 3.9952, + "step": 1839 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004466568734386023, + "loss": 3.9481, + "step": 1840 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044659285670525464, + "loss": 4.0717, + "step": 1841 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044652880617531587, + "loss": 3.9408, + "step": 1842 + }, + { + "epoch": 0.24, + "learning_rate": 0.000446464721859797, + "loss": 4.0484, + "step": 1843 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004464006037697148, + "loss": 3.8815, + "step": 1844 + }, + { + "epoch": 0.24, + "learning_rate": 0.000446336451916092, + "loss": 3.9654, + "step": 1845 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004462722663099569, + "loss": 4.014, + "step": 1846 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044620804696234387, + "loss": 3.9468, + "step": 1847 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004461437938842929, + "loss": 4.0623, + "step": 1848 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044607950708685, + "loss": 3.9074, + "step": 1849 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044601518658106653, + "loss": 3.9694, + "step": 1850 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004459508323780001, + "loss": 4.1161, + "step": 1851 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044588644448871395, + "loss": 3.9852, + "step": 1852 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004458220229242771, + "loss": 4.0608, + "step": 1853 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004457575676957644, + "loss": 4.108, + "step": 1854 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004456930788142563, + "loss": 3.9251, + "step": 1855 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004456285562908393, + "loss": 3.979, + "step": 1856 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004455640001366056, + "loss": 4.0796, + "step": 1857 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044549941036265306, + "loss": 4.0653, + "step": 1858 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044543478698008546, + "loss": 4.0191, + "step": 1859 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004453701300000124, + "loss": 4.0283, + "step": 1860 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044530543943354896, + "loss": 4.0125, + "step": 1861 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004452407152918163, + "loss": 4.0186, + "step": 1862 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004451759575859413, + "loss": 3.9131, + "step": 1863 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004451111663270565, + "loss": 4.0174, + "step": 1864 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044504634152630024, + "loss": 4.1313, + "step": 1865 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004449814831948168, + "loss": 4.0763, + "step": 1866 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044491659134375587, + "loss": 4.0738, + "step": 1867 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004448516659842733, + "loss": 4.0347, + "step": 1868 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044478670712753043, + "loss": 3.9077, + "step": 1869 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004447217147846944, + "loss": 4.0477, + "step": 1870 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004446566889669382, + "loss": 4.0661, + "step": 1871 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044459162968544055, + "loss": 4.0185, + "step": 1872 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044452653695138585, + "loss": 3.9813, + "step": 1873 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044446141077596424, + "loss": 4.1021, + "step": 1874 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044439625117037183, + "loss": 3.8781, + "step": 1875 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004443310581458102, + "loss": 4.0126, + "step": 1876 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044426583171348666, + "loss": 3.9303, + "step": 1877 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004442005718846147, + "loss": 4.057, + "step": 1878 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044413527867041293, + "loss": 4.0755, + "step": 1879 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004440699520821062, + "loss": 3.9156, + "step": 1880 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044400459213092487, + "loss": 4.0005, + "step": 1881 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004439391988281051, + "loss": 4.0716, + "step": 1882 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044387377218488863, + "loss": 3.9141, + "step": 1883 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044380831221252316, + "loss": 3.9045, + "step": 1884 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044374281892226204, + "loss": 3.9521, + "step": 1885 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044367729232536423, + "loss": 4.1467, + "step": 1886 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004436117324330946, + "loss": 3.9944, + "step": 1887 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004435461392567236, + "loss": 4.1514, + "step": 1888 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044348051280752756, + "loss": 3.8894, + "step": 1889 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004434148530967883, + "loss": 3.885, + "step": 1890 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004433491601357935, + "loss": 4.0538, + "step": 1891 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004432834339358367, + "loss": 3.8919, + "step": 1892 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044321767450821673, + "loss": 4.0552, + "step": 1893 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004431518818642386, + "loss": 3.9247, + "step": 1894 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004430860560152128, + "loss": 3.956, + "step": 1895 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044302019697245546, + "loss": 4.0251, + "step": 1896 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004429543047472886, + "loss": 4.0228, + "step": 1897 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004428883793510399, + "loss": 4.098, + "step": 1898 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004428224207950425, + "loss": 4.0111, + "step": 1899 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004427564290906357, + "loss": 4.0143, + "step": 1900 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044269040424916407, + "loss": 4.0003, + "step": 1901 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044262434628197805, + "loss": 4.0665, + "step": 1902 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044255825520043393, + "loss": 3.8685, + "step": 1903 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044249213101589323, + "loss": 3.9191, + "step": 1904 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004424259737397238, + "loss": 4.0447, + "step": 1905 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044235978338329863, + "loss": 3.9911, + "step": 1906 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004422935599579967, + "loss": 3.9774, + "step": 1907 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044222730347520255, + "loss": 4.0165, + "step": 1908 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004421610139463064, + "loss": 3.7829, + "step": 1909 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004420946913827043, + "loss": 3.9972, + "step": 1910 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044202833579579776, + "loss": 3.8938, + "step": 1911 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004419619471969941, + "loss": 3.7972, + "step": 1912 + }, + { + "epoch": 0.24, + "learning_rate": 0.00044189552559770635, + "loss": 4.0563, + "step": 1913 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004418290710093531, + "loss": 4.1089, + "step": 1914 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004417625834433586, + "loss": 4.0726, + "step": 1915 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044169606291115295, + "loss": 3.8492, + "step": 1916 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004416295094241718, + "loss": 3.8562, + "step": 1917 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044156292299385636, + "loss": 4.0232, + "step": 1918 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004414963036316537, + "loss": 4.0317, + "step": 1919 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044142965134901635, + "loss": 4.0787, + "step": 1920 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004413629661574028, + "loss": 3.8406, + "step": 1921 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044129624806827684, + "loss": 3.9884, + "step": 1922 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044122949709310817, + "loss": 3.9354, + "step": 1923 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044116271324337196, + "loss": 3.9165, + "step": 1924 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044109589653054925, + "loss": 4.0717, + "step": 1925 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004410290469661266, + "loss": 4.0505, + "step": 1926 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004409621645615961, + "loss": 3.9035, + "step": 1927 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004408952493284557, + "loss": 4.0334, + "step": 1928 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044082830127820897, + "loss": 3.9847, + "step": 1929 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004407613204223648, + "loss": 3.9883, + "step": 1930 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044069430677243834, + "loss": 3.9646, + "step": 1931 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004406272603399497, + "loss": 3.9541, + "step": 1932 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044056018113642514, + "loss": 3.9378, + "step": 1933 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044049306917339626, + "loss": 3.9302, + "step": 1934 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044042592446240044, + "loss": 4.0166, + "step": 1935 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004403587470149806, + "loss": 3.9469, + "step": 1936 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044029153684268526, + "loss": 3.9002, + "step": 1937 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004402242939570687, + "loss": 3.8483, + "step": 1938 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044015701836969075, + "loss": 4.0102, + "step": 1939 + }, + { + "epoch": 0.25, + "learning_rate": 0.00044008971009211684, + "loss": 4.03, + "step": 1940 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004400223691359181, + "loss": 3.9121, + "step": 1941 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043995499551267115, + "loss": 3.9625, + "step": 1942 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004398875892339583, + "loss": 4.0242, + "step": 1943 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004398201503113675, + "loss": 4.0642, + "step": 1944 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004397526787564923, + "loss": 4.1955, + "step": 1945 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043968517458093184, + "loss": 3.9161, + "step": 1946 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004396176377962908, + "loss": 4.0473, + "step": 1947 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004395500684141797, + "loss": 3.955, + "step": 1948 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043948246644621427, + "loss": 4.0171, + "step": 1949 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004394148319040162, + "loss": 3.8948, + "step": 1950 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043934716479921267, + "loss": 3.9664, + "step": 1951 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043927946514343635, + "loss": 3.9786, + "step": 1952 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043921173294832574, + "loss": 3.8691, + "step": 1953 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004391439682255247, + "loss": 3.9004, + "step": 1954 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004390761709866827, + "loss": 3.9509, + "step": 1955 + }, + { + "epoch": 0.25, + "learning_rate": 0.000439008341243455, + "loss": 4.0593, + "step": 1956 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043894047900750225, + "loss": 3.9904, + "step": 1957 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004388725842904908, + "loss": 4.0855, + "step": 1958 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043880465710409243, + "loss": 3.9821, + "step": 1959 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004387366974599848, + "loss": 3.9669, + "step": 1960 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004386687053698508, + "loss": 3.9224, + "step": 1961 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004386006808453792, + "loss": 3.9813, + "step": 1962 + }, + { + "epoch": 0.25, + "learning_rate": 0.000438532623898264, + "loss": 3.9435, + "step": 1963 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043846453454020513, + "loss": 4.0355, + "step": 1964 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043839641278290787, + "loss": 4.0529, + "step": 1965 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004383282586380832, + "loss": 4.0714, + "step": 1966 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004382600721174477, + "loss": 3.849, + "step": 1967 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043819185323272313, + "loss": 4.0802, + "step": 1968 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004381236019956374, + "loss": 3.9574, + "step": 1969 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043805531841792345, + "loss": 4.0657, + "step": 1970 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004379870025113203, + "loss": 4.0009, + "step": 1971 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043791865428757196, + "loss": 3.8451, + "step": 1972 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043785027375842846, + "loss": 3.9712, + "step": 1973 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004377818609356451, + "loss": 4.0285, + "step": 1974 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043771341583098293, + "loss": 3.9699, + "step": 1975 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043764493845620847, + "loss": 4.0963, + "step": 1976 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043757642882309364, + "loss": 4.0794, + "step": 1977 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043750788694341613, + "loss": 3.9508, + "step": 1978 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004374393128289591, + "loss": 4.0161, + "step": 1979 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004373707064915112, + "loss": 3.9268, + "step": 1980 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004373020679428667, + "loss": 4.0214, + "step": 1981 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004372333971948253, + "loss": 4.1528, + "step": 1982 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004371646942591924, + "loss": 3.9262, + "step": 1983 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043709595914777865, + "loss": 3.919, + "step": 1984 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004370271918724006, + "loss": 4.0873, + "step": 1985 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043695839244488, + "loss": 3.9983, + "step": 1986 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043688956087704434, + "loss": 3.93, + "step": 1987 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004368206971807266, + "loss": 3.9858, + "step": 1988 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043675180136776515, + "loss": 3.9936, + "step": 1989 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043668287345000403, + "loss": 4.0461, + "step": 1990 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004366139134392928, + "loss": 3.9647, + "step": 1991 + }, + { + "epoch": 0.25, + "learning_rate": 0.00043654492134748634, + "loss": 4.0031, + "step": 1992 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043647589718644544, + "loss": 3.8729, + "step": 1993 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043640684096803585, + "loss": 3.9346, + "step": 1994 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004363377527041294, + "loss": 4.0434, + "step": 1995 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043626863240660296, + "loss": 4.1361, + "step": 1996 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004361994800873392, + "loss": 3.9429, + "step": 1997 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043613029575822625, + "loss": 4.0036, + "step": 1998 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004360610794311577, + "loss": 3.9779, + "step": 1999 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004359918311180325, + "loss": 4.1588, + "step": 2000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004359225508307554, + "loss": 3.9793, + "step": 2001 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043585323858123635, + "loss": 4.0598, + "step": 2002 + }, + { + "epoch": 0.26, + "learning_rate": 0.000435783894381391, + "loss": 4.0263, + "step": 2003 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004357145182431405, + "loss": 4.0073, + "step": 2004 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043564511017841123, + "loss": 3.9148, + "step": 2005 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043557567019913534, + "loss": 4.0209, + "step": 2006 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043550619831725037, + "loss": 3.9553, + "step": 2007 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043543669454469935, + "loss": 3.8796, + "step": 2008 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043536715889343084, + "loss": 4.0791, + "step": 2009 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004352975913753987, + "loss": 4.108, + "step": 2010 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004352279920025624, + "loss": 3.971, + "step": 2011 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043515836078688696, + "loss": 3.896, + "step": 2012 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043508869774034275, + "loss": 4.0236, + "step": 2013 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004350190028749057, + "loss": 3.9298, + "step": 2014 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043494927620255715, + "loss": 4.0051, + "step": 2015 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004348795177352839, + "loss": 4.0002, + "step": 2016 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004348097274850782, + "loss": 4.0305, + "step": 2017 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043473990546393784, + "loss": 4.1124, + "step": 2018 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004346700516838661, + "loss": 4.0307, + "step": 2019 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004346001661568715, + "loss": 4.0155, + "step": 2020 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004345302488949684, + "loss": 3.9648, + "step": 2021 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004344602999101761, + "loss": 4.1053, + "step": 2022 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043439031921451994, + "loss": 3.8488, + "step": 2023 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004343203068200302, + "loss": 3.8731, + "step": 2024 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004342502627387429, + "loss": 4.0238, + "step": 2025 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043418018698269945, + "loss": 4.0154, + "step": 2026 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043411007956394666, + "loss": 3.938, + "step": 2027 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004340399404945368, + "loss": 4.0389, + "step": 2028 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004339697697865276, + "loss": 3.9005, + "step": 2029 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004338995674519822, + "loss": 4.0525, + "step": 2030 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043382933350296916, + "loss": 3.9812, + "step": 2031 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004337590679515626, + "loss": 4.0592, + "step": 2032 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043368877080984195, + "loss": 4.0082, + "step": 2033 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004336184420898921, + "loss": 4.063, + "step": 2034 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043354808180380333, + "loss": 3.9677, + "step": 2035 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004334776899636714, + "loss": 4.0996, + "step": 2036 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043340726658159764, + "loss": 4.0173, + "step": 2037 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004333368116696884, + "loss": 3.9103, + "step": 2038 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043326632524005583, + "loss": 3.9576, + "step": 2039 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004331958073048174, + "loss": 4.1326, + "step": 2040 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004331252578760959, + "loss": 4.0881, + "step": 2041 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043305467696601953, + "loss": 3.9423, + "step": 2042 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043298406458672207, + "loss": 4.0407, + "step": 2043 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043291342075034255, + "loss": 3.84, + "step": 2044 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043284274546902555, + "loss": 4.014, + "step": 2045 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043277203875492087, + "loss": 4.0582, + "step": 2046 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004327013006201839, + "loss": 3.9888, + "step": 2047 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043263053107697524, + "loss": 4.1291, + "step": 2048 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004325597301374611, + "loss": 4.0333, + "step": 2049 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043248889781381285, + "loss": 3.8757, + "step": 2050 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043241803411820756, + "loss": 3.7501, + "step": 2051 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004323471390628274, + "loss": 3.7427, + "step": 2052 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043227621265986004, + "loss": 4.014, + "step": 2053 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004322052549214987, + "loss": 4.025, + "step": 2054 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004321342658599416, + "loss": 4.0784, + "step": 2055 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004320632454873929, + "loss": 3.8249, + "step": 2056 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043199219381606153, + "loss": 4.1075, + "step": 2057 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004319211108581622, + "loss": 3.8809, + "step": 2058 + }, + { + "epoch": 0.26, + "learning_rate": 0.000431849996625915, + "loss": 4.0844, + "step": 2059 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043177885113154503, + "loss": 4.0307, + "step": 2060 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004317076743872833, + "loss": 3.9, + "step": 2061 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004316364664053658, + "loss": 3.8585, + "step": 2062 + }, + { + "epoch": 0.26, + "learning_rate": 0.000431565227198034, + "loss": 4.0697, + "step": 2063 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004314939567775347, + "loss": 4.0821, + "step": 2064 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004314226551561202, + "loss": 4.0268, + "step": 2065 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043135132234604814, + "loss": 4.0234, + "step": 2066 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004312799583595813, + "loss": 4.0779, + "step": 2067 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043120856320898806, + "loss": 3.9714, + "step": 2068 + }, + { + "epoch": 0.26, + "learning_rate": 0.000431137136906542, + "loss": 3.921, + "step": 2069 + }, + { + "epoch": 0.26, + "learning_rate": 0.00043106567946452225, + "loss": 4.0924, + "step": 2070 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004309941908952131, + "loss": 4.0276, + "step": 2071 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004309226712109042, + "loss": 3.9837, + "step": 2072 + }, + { + "epoch": 0.27, + "learning_rate": 0.00043085112042389075, + "loss": 4.0959, + "step": 2073 + }, + { + "epoch": 0.27, + "learning_rate": 0.000430779538546473, + "loss": 4.0236, + "step": 2074 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004307079255909569, + "loss": 3.9363, + "step": 2075 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004306362815696534, + "loss": 4.0457, + "step": 2076 + }, + { + "epoch": 0.27, + "learning_rate": 0.00043056460649487904, + "loss": 3.9269, + "step": 2077 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004304929003789555, + "loss": 3.8905, + "step": 2078 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004304211632342099, + "loss": 3.8059, + "step": 2079 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004303493950729748, + "loss": 3.8058, + "step": 2080 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004302775959075878, + "loss": 3.9016, + "step": 2081 + }, + { + "epoch": 0.27, + "learning_rate": 0.00043020576575039215, + "loss": 3.9861, + "step": 2082 + }, + { + "epoch": 0.27, + "learning_rate": 0.00043013390461373626, + "loss": 3.9183, + "step": 2083 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004300620125099738, + "loss": 3.923, + "step": 2084 + }, + { + "epoch": 0.27, + "learning_rate": 0.000429990089451464, + "loss": 4.0612, + "step": 2085 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004299181354505712, + "loss": 4.0257, + "step": 2086 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042984615051966515, + "loss": 4.0632, + "step": 2087 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042977413467112084, + "loss": 4.0759, + "step": 2088 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042970208791731857, + "loss": 3.8247, + "step": 2089 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042963001027064416, + "loss": 3.981, + "step": 2090 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042955790174348864, + "loss": 4.0133, + "step": 2091 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004294857623482481, + "loss": 4.0156, + "step": 2092 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004294135920973242, + "loss": 4.0088, + "step": 2093 + }, + { + "epoch": 0.27, + "learning_rate": 0.000429341391003124, + "loss": 4.0686, + "step": 2094 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004292691590780595, + "loss": 4.0183, + "step": 2095 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042919689633454827, + "loss": 3.9138, + "step": 2096 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004291246027850132, + "loss": 4.0146, + "step": 2097 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042905227844188226, + "loss": 3.9941, + "step": 2098 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042897992331758896, + "loss": 4.0688, + "step": 2099 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004289075374245719, + "loss": 3.918, + "step": 2100 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042883512077527506, + "loss": 4.0808, + "step": 2101 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004287626733821477, + "loss": 3.9975, + "step": 2102 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004286901952576445, + "loss": 4.0308, + "step": 2103 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042861768641422505, + "loss": 3.998, + "step": 2104 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004285451468643546, + "loss": 4.0209, + "step": 2105 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004284725766205037, + "loss": 3.96, + "step": 2106 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042839997569514766, + "loss": 4.0035, + "step": 2107 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004283273441007677, + "loss": 3.8187, + "step": 2108 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042825468184984995, + "loss": 3.8616, + "step": 2109 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004281819889548858, + "loss": 3.9964, + "step": 2110 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042810926542837213, + "loss": 4.0084, + "step": 2111 + }, + { + "epoch": 0.27, + "learning_rate": 0.000428036511282811, + "loss": 3.9264, + "step": 2112 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042796372653070946, + "loss": 4.0473, + "step": 2113 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042789091118458037, + "loss": 3.9586, + "step": 2114 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042781806525694124, + "loss": 4.03, + "step": 2115 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004277451887603152, + "loss": 3.9426, + "step": 2116 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004276722817072307, + "loss": 3.9425, + "step": 2117 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004275993441102212, + "loss": 3.9877, + "step": 2118 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042752637598182555, + "loss": 3.9551, + "step": 2119 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004274533773345878, + "loss": 3.9799, + "step": 2120 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004273803481810573, + "loss": 4.0181, + "step": 2121 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004273072885337885, + "loss": 4.1118, + "step": 2122 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004272341984053413, + "loss": 3.9217, + "step": 2123 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004271610778082807, + "loss": 3.9379, + "step": 2124 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042708792675517703, + "loss": 4.0146, + "step": 2125 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042701474525860583, + "loss": 3.9809, + "step": 2126 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004269415333311477, + "loss": 3.807, + "step": 2127 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004268682909853888, + "loss": 3.9638, + "step": 2128 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004267950182339201, + "loss": 4.03, + "step": 2129 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004267217150893383, + "loss": 3.9426, + "step": 2130 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004266483815642449, + "loss": 3.8551, + "step": 2131 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042657501767124685, + "loss": 3.9368, + "step": 2132 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004265016234229563, + "loss": 3.8583, + "step": 2133 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042642819883199033, + "loss": 3.9795, + "step": 2134 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004263547439109717, + "loss": 3.9877, + "step": 2135 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004262812586725282, + "loss": 4.1012, + "step": 2136 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042620774312929265, + "loss": 4.0688, + "step": 2137 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004261341972939033, + "loss": 3.8718, + "step": 2138 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004260606211790035, + "loss": 3.8787, + "step": 2139 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004259870147972419, + "loss": 4.009, + "step": 2140 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004259133781612722, + "loss": 4.0837, + "step": 2141 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004258397112837534, + "loss": 4.0197, + "step": 2142 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004257660141773497, + "loss": 4.0563, + "step": 2143 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004256922868547306, + "loss": 4.0224, + "step": 2144 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042561852932857045, + "loss": 3.9547, + "step": 2145 + }, + { + "epoch": 0.27, + "learning_rate": 0.00042554474161154933, + "loss": 4.0665, + "step": 2146 + }, + { + "epoch": 0.27, + "learning_rate": 0.000425470923716352, + "loss": 3.9321, + "step": 2147 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004253970756556685, + "loss": 3.9738, + "step": 2148 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004253231974421945, + "loss": 3.9338, + "step": 2149 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042524928908863025, + "loss": 3.9634, + "step": 2150 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004251753506076816, + "loss": 3.9875, + "step": 2151 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042510138201205935, + "loss": 4.0413, + "step": 2152 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004250273833144795, + "loss": 3.9972, + "step": 2153 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042495335452766346, + "loss": 4.0532, + "step": 2154 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004248792956643376, + "loss": 4.0766, + "step": 2155 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042480520673723334, + "loss": 4.077, + "step": 2156 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004247310877590875, + "loss": 3.9163, + "step": 2157 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042465693874264203, + "loss": 3.8994, + "step": 2158 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042458275970064404, + "loss": 3.8919, + "step": 2159 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004245085506458457, + "loss": 4.037, + "step": 2160 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004244343115910044, + "loss": 4.0626, + "step": 2161 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042436004254888286, + "loss": 3.9982, + "step": 2162 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042428574353224846, + "loss": 3.8342, + "step": 2163 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004242114145538744, + "loss": 3.942, + "step": 2164 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042413705562653847, + "loss": 3.9101, + "step": 2165 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004240626667630239, + "loss": 3.9872, + "step": 2166 + }, + { + "epoch": 0.28, + "learning_rate": 0.000423988247976119, + "loss": 3.9556, + "step": 2167 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004239137992786173, + "loss": 3.937, + "step": 2168 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042383932068331727, + "loss": 3.9638, + "step": 2169 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004237648122030227, + "loss": 3.871, + "step": 2170 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042369027385054245, + "loss": 3.8905, + "step": 2171 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042361570563869056, + "loss": 4.0431, + "step": 2172 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042354110758028614, + "loss": 3.9703, + "step": 2173 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042346647968815346, + "loss": 3.901, + "step": 2174 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042339182197512193, + "loss": 3.9943, + "step": 2175 + }, + { + "epoch": 0.28, + "learning_rate": 0.000423317134454026, + "loss": 4.0128, + "step": 2176 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004232424171377055, + "loss": 3.8454, + "step": 2177 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042316767003900503, + "loss": 3.9567, + "step": 2178 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004230928931707746, + "loss": 3.9688, + "step": 2179 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042301808654586915, + "loss": 3.9744, + "step": 2180 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042294325017714875, + "loss": 4.025, + "step": 2181 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042286838407747877, + "loss": 4.0373, + "step": 2182 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042279348825972955, + "loss": 3.9298, + "step": 2183 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004227185627367765, + "loss": 3.8063, + "step": 2184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004226436075215001, + "loss": 3.9494, + "step": 2185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004225686226267862, + "loss": 4.0184, + "step": 2186 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004224936080655255, + "loss": 3.9661, + "step": 2187 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004224185638506138, + "loss": 3.9119, + "step": 2188 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004223434899949521, + "loss": 3.8688, + "step": 2189 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004222683865114465, + "loss": 4.0561, + "step": 2190 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004221932534130082, + "loss": 3.9656, + "step": 2191 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042211809071255344, + "loss": 3.9183, + "step": 2192 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042204289842300344, + "loss": 3.8954, + "step": 2193 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042196767655728473, + "loss": 4.0159, + "step": 2194 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042189242512832875, + "loss": 4.0657, + "step": 2195 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042181714414907224, + "loss": 3.9874, + "step": 2196 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042174183363245674, + "loss": 4.0351, + "step": 2197 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004216664935914291, + "loss": 4.0298, + "step": 2198 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004215911240389409, + "loss": 3.8504, + "step": 2199 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004215157249879493, + "loss": 3.8874, + "step": 2200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004214402964514163, + "loss": 3.9231, + "step": 2201 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042136483844230877, + "loss": 3.9846, + "step": 2202 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004212893509735989, + "loss": 3.9067, + "step": 2203 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042121383405826373, + "loss": 4.0101, + "step": 2204 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042113828770928575, + "loss": 3.987, + "step": 2205 + }, + { + "epoch": 0.28, + "learning_rate": 0.000421062711939652, + "loss": 3.8617, + "step": 2206 + }, + { + "epoch": 0.28, + "learning_rate": 0.000420987106762355, + "loss": 4.116, + "step": 2207 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042091147219039203, + "loss": 3.8828, + "step": 2208 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004208358082367657, + "loss": 4.0237, + "step": 2209 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004207601149144834, + "loss": 3.9967, + "step": 2210 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042068439223655774, + "loss": 4.0607, + "step": 2211 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004206086402160063, + "loss": 3.8761, + "step": 2212 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004205328588658517, + "loss": 4.0512, + "step": 2213 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004204570481991217, + "loss": 3.9825, + "step": 2214 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042038120822884904, + "loss": 3.9363, + "step": 2215 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042030533896807143, + "loss": 3.9772, + "step": 2216 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004202294404298317, + "loss": 3.9812, + "step": 2217 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004201535126271777, + "loss": 4.0001, + "step": 2218 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004200775555731623, + "loss": 3.9617, + "step": 2219 + }, + { + "epoch": 0.28, + "learning_rate": 0.00042000156928084336, + "loss": 4.026, + "step": 2220 + }, + { + "epoch": 0.28, + "learning_rate": 0.00041992555376328385, + "loss": 4.0124, + "step": 2221 + }, + { + "epoch": 0.28, + "learning_rate": 0.00041984950903355166, + "loss": 3.9059, + "step": 2222 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004197734351047199, + "loss": 3.9465, + "step": 2223 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004196973319898664, + "loss": 3.9463, + "step": 2224 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004196211997020742, + "loss": 4.0764, + "step": 2225 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004195450382544315, + "loss": 3.9749, + "step": 2226 + }, + { + "epoch": 0.29, + "learning_rate": 0.000419468847660031, + "loss": 3.9277, + "step": 2227 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041939262793197105, + "loss": 3.9505, + "step": 2228 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041931637908335453, + "loss": 4.0137, + "step": 2229 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004192401011272896, + "loss": 3.8278, + "step": 2230 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004191637940768893, + "loss": 4.0697, + "step": 2231 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004190874579452716, + "loss": 3.9318, + "step": 2232 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004190110927455597, + "loss": 4.0597, + "step": 2233 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004189346984908816, + "loss": 3.819, + "step": 2234 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041885827519437047, + "loss": 3.862, + "step": 2235 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004187818228691641, + "loss": 3.9949, + "step": 2236 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004187053415284058, + "loss": 3.9632, + "step": 2237 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004186288311852435, + "loss": 3.9673, + "step": 2238 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004185522918528302, + "loss": 3.9374, + "step": 2239 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004184757235443238, + "loss": 3.9219, + "step": 2240 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004183991262728875, + "loss": 3.9166, + "step": 2241 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004183225000516891, + "loss": 3.9541, + "step": 2242 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004182458448939016, + "loss": 3.9957, + "step": 2243 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041816916081270286, + "loss": 4.0924, + "step": 2244 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041809244782127573, + "loss": 3.8705, + "step": 2245 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041801570593280824, + "loss": 3.8354, + "step": 2246 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041793893516049307, + "loss": 3.9413, + "step": 2247 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041786213551752804, + "loss": 4.0704, + "step": 2248 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004177853070171159, + "loss": 3.9227, + "step": 2249 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041770844967246423, + "loss": 3.9127, + "step": 2250 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004176315634967859, + "loss": 4.0515, + "step": 2251 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041755464850329847, + "loss": 4.0245, + "step": 2252 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004174777047052245, + "loss": 3.9593, + "step": 2253 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041740073211579156, + "loss": 3.991, + "step": 2254 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041732373074823204, + "loss": 4.0832, + "step": 2255 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004172467006157834, + "loss": 3.9518, + "step": 2256 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041716964173168803, + "loss": 3.8015, + "step": 2257 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041709255410919335, + "loss": 4.0493, + "step": 2258 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041701543776155136, + "loss": 4.0679, + "step": 2259 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004169382927020196, + "loss": 3.949, + "step": 2260 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004168611189438598, + "loss": 4.034, + "step": 2261 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041678391650033943, + "loss": 3.9331, + "step": 2262 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004167066853847302, + "loss": 4.0951, + "step": 2263 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041662942561030905, + "loss": 4.2155, + "step": 2264 + }, + { + "epoch": 0.29, + "learning_rate": 0.000416552137190358, + "loss": 4.1138, + "step": 2265 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041647482013816366, + "loss": 3.995, + "step": 2266 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004163974744670179, + "loss": 3.8665, + "step": 2267 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041632010019021715, + "loss": 3.9457, + "step": 2268 + }, + { + "epoch": 0.29, + "learning_rate": 0.000416242697321063, + "loss": 3.8559, + "step": 2269 + }, + { + "epoch": 0.29, + "learning_rate": 0.000416165265872862, + "loss": 3.8911, + "step": 2270 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004160878058589255, + "loss": 3.9366, + "step": 2271 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041601031729256963, + "loss": 3.8248, + "step": 2272 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041593280018711567, + "loss": 4.0805, + "step": 2273 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004158552545558897, + "loss": 3.9266, + "step": 2274 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041577768041222265, + "loss": 3.8876, + "step": 2275 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004157000777694506, + "loss": 3.839, + "step": 2276 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004156224466409141, + "loss": 3.88, + "step": 2277 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041554478703995903, + "loss": 4.1178, + "step": 2278 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041546709897993594, + "loss": 3.9389, + "step": 2279 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004153893824742002, + "loss": 4.095, + "step": 2280 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041531163753611236, + "loss": 4.0246, + "step": 2281 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041523386417903745, + "loss": 4.0173, + "step": 2282 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041515606241634577, + "loss": 3.91, + "step": 2283 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004150782322614124, + "loss": 3.9302, + "step": 2284 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041500037372761705, + "loss": 3.973, + "step": 2285 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041492248682834464, + "loss": 4.0018, + "step": 2286 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041484457157698473, + "loss": 4.1107, + "step": 2287 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041476662798693205, + "loss": 4.0224, + "step": 2288 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004146886560715858, + "loss": 4.0441, + "step": 2289 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004146106558443504, + "loss": 3.9515, + "step": 2290 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004145326273186348, + "loss": 3.9666, + "step": 2291 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004144545705078533, + "loss": 4.059, + "step": 2292 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004143764854254245, + "loss": 3.8449, + "step": 2293 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004142983720847723, + "loss": 3.9297, + "step": 2294 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004142202304993252, + "loss": 3.9787, + "step": 2295 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004141420606825166, + "loss": 3.9888, + "step": 2296 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041406386264778496, + "loss": 4.0416, + "step": 2297 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004139856364085733, + "loss": 3.9774, + "step": 2298 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041390738197832975, + "loss": 4.0372, + "step": 2299 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041382909937050694, + "loss": 3.9879, + "step": 2300 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041375078859856275, + "loss": 3.7982, + "step": 2301 + }, + { + "epoch": 0.29, + "learning_rate": 0.00041367244967595963, + "loss": 4.0712, + "step": 2302 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004135940826161649, + "loss": 4.0239, + "step": 2303 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004135156874326509, + "loss": 3.8995, + "step": 2304 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004134372641388946, + "loss": 4.0135, + "step": 2305 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004133588127483778, + "loss": 4.0791, + "step": 2306 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041328033327458726, + "loss": 3.8239, + "step": 2307 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041320182573101463, + "loss": 3.7982, + "step": 2308 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004131232901311561, + "loss": 3.9954, + "step": 2309 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041304472648851285, + "loss": 3.9002, + "step": 2310 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041296613481659096, + "loss": 3.8546, + "step": 2311 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004128875151289013, + "loss": 4.0074, + "step": 2312 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004128088674389594, + "loss": 3.9796, + "step": 2313 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004127301917602857, + "loss": 4.0044, + "step": 2314 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004126514881064055, + "loss": 4.0032, + "step": 2315 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041257275649084896, + "loss": 4.0311, + "step": 2316 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041249399692715085, + "loss": 3.7475, + "step": 2317 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041241520942885085, + "loss": 3.9329, + "step": 2318 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041233639400949345, + "loss": 4.045, + "step": 2319 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041225755068262804, + "loss": 4.1141, + "step": 2320 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041217867946180856, + "loss": 3.9051, + "step": 2321 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041209978036059393, + "loss": 3.9196, + "step": 2322 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041202085339254785, + "loss": 4.036, + "step": 2323 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041194189857123877, + "loss": 3.9615, + "step": 2324 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041186291591023994, + "loss": 4.0555, + "step": 2325 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041178390542312936, + "loss": 3.8785, + "step": 2326 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004117048671234899, + "loss": 3.8071, + "step": 2327 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041162580102490915, + "loss": 3.8083, + "step": 2328 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004115467071409794, + "loss": 3.9254, + "step": 2329 + }, + { + "epoch": 0.3, + "learning_rate": 0.000411467585485298, + "loss": 3.8767, + "step": 2330 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004113884360714667, + "loss": 3.8939, + "step": 2331 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004113092589130923, + "loss": 3.9551, + "step": 2332 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041123005402378615, + "loss": 3.9838, + "step": 2333 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041115082141716464, + "loss": 4.0329, + "step": 2334 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041107156110684875, + "loss": 4.0067, + "step": 2335 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041099227310646415, + "loss": 3.9892, + "step": 2336 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004109129574296414, + "loss": 3.8655, + "step": 2337 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004108336140900158, + "loss": 3.9885, + "step": 2338 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041075424310122745, + "loss": 3.8427, + "step": 2339 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041067484447692104, + "loss": 4.085, + "step": 2340 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004105954182307462, + "loss": 4.0046, + "step": 2341 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041051596437635717, + "loss": 3.9017, + "step": 2342 + }, + { + "epoch": 0.3, + "learning_rate": 0.000410436482927413, + "loss": 4.013, + "step": 2343 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041035697389757745, + "loss": 3.8822, + "step": 2344 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004102774373005191, + "loss": 3.8395, + "step": 2345 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004101978731499112, + "loss": 4.0164, + "step": 2346 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041011828145943167, + "loss": 3.863, + "step": 2347 + }, + { + "epoch": 0.3, + "learning_rate": 0.00041003866224276333, + "loss": 3.9352, + "step": 2348 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040995901551359366, + "loss": 4.0414, + "step": 2349 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040987934128561477, + "loss": 4.0618, + "step": 2350 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004097996395725237, + "loss": 3.872, + "step": 2351 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004097199103880219, + "loss": 3.8861, + "step": 2352 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040964015374581606, + "loss": 4.0226, + "step": 2353 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040956036965961694, + "loss": 4.035, + "step": 2354 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004094805581431406, + "loss": 4.0273, + "step": 2355 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040940071921010735, + "loss": 3.9562, + "step": 2356 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004093208528742426, + "loss": 3.88, + "step": 2357 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004092409591492762, + "loss": 3.9152, + "step": 2358 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004091610380489429, + "loss": 3.9757, + "step": 2359 + }, + { + "epoch": 0.3, + "learning_rate": 0.000409081089586982, + "loss": 4.0777, + "step": 2360 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040900111377713743, + "loss": 4.0716, + "step": 2361 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004089211106331583, + "loss": 3.9026, + "step": 2362 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004088410801687977, + "loss": 4.0104, + "step": 2363 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040876102239781407, + "loss": 3.9332, + "step": 2364 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004086809373339703, + "loss": 3.9663, + "step": 2365 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040860082499103356, + "loss": 4.0845, + "step": 2366 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004085206853827765, + "loss": 3.8788, + "step": 2367 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004084405185229759, + "loss": 3.9453, + "step": 2368 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004083603244254134, + "loss": 3.9729, + "step": 2369 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004082801031038751, + "loss": 3.9303, + "step": 2370 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040819985457215223, + "loss": 3.8404, + "step": 2371 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004081195788440404, + "loss": 4.0244, + "step": 2372 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004080392759333398, + "loss": 3.8224, + "step": 2373 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040795894585385557, + "loss": 3.9871, + "step": 2374 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004078785886193973, + "loss": 3.8366, + "step": 2375 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040779820424377935, + "loss": 3.9435, + "step": 2376 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004077177927408208, + "loss": 4.1237, + "step": 2377 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004076373541243452, + "loss": 3.9943, + "step": 2378 + }, + { + "epoch": 0.3, + "learning_rate": 0.00040755688840818095, + "loss": 3.8787, + "step": 2379 + }, + { + "epoch": 0.3, + "learning_rate": 0.000407476395606161, + "loss": 3.9913, + "step": 2380 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004073958757321231, + "loss": 3.9435, + "step": 2381 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004073153287999094, + "loss": 4.0731, + "step": 2382 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040723475482336703, + "loss": 4.0049, + "step": 2383 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040715415381634737, + "loss": 3.9221, + "step": 2384 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004070735257927068, + "loss": 3.818, + "step": 2385 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004069928707663062, + "loss": 3.9436, + "step": 2386 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040691218875101113, + "loss": 3.9024, + "step": 2387 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004068314797606917, + "loss": 3.8132, + "step": 2388 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004067507438092227, + "loss": 3.9632, + "step": 2389 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004066699809104837, + "loss": 3.9753, + "step": 2390 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004065891910783587, + "loss": 3.9294, + "step": 2391 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040650837432673647, + "loss": 3.9517, + "step": 2392 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040642753066951024, + "loss": 4.0014, + "step": 2393 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040634666012057797, + "loss": 3.9105, + "step": 2394 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040626576269384244, + "loss": 3.9108, + "step": 2395 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004061848384032106, + "loss": 3.8627, + "step": 2396 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004061038872625944, + "loss": 3.9314, + "step": 2397 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040602290928591026, + "loss": 3.9857, + "step": 2398 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040594190448707925, + "loss": 3.8502, + "step": 2399 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040586087288002707, + "loss": 3.8613, + "step": 2400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004057798144786839, + "loss": 3.9758, + "step": 2401 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004056987292969847, + "loss": 3.9448, + "step": 2402 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040561761734886885, + "loss": 3.956, + "step": 2403 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004055364786482806, + "loss": 3.8709, + "step": 2404 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040545531320916844, + "loss": 3.9827, + "step": 2405 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004053741210454859, + "loss": 3.8436, + "step": 2406 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040529290217119053, + "loss": 4.0275, + "step": 2407 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040521165660024503, + "loss": 4.0684, + "step": 2408 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004051303843466164, + "loss": 3.9842, + "step": 2409 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004050490854242763, + "loss": 3.8193, + "step": 2410 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004049677598472009, + "loss": 3.9617, + "step": 2411 + }, + { + "epoch": 0.31, + "learning_rate": 0.000404886407629371, + "loss": 4.0035, + "step": 2412 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004048050287847721, + "loss": 3.9366, + "step": 2413 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004047236233273941, + "loss": 4.0379, + "step": 2414 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040464219127123147, + "loss": 4.0045, + "step": 2415 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040456073263028347, + "loss": 4.0031, + "step": 2416 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004044792474185537, + "loss": 4.0328, + "step": 2417 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040439773565005034, + "loss": 3.8943, + "step": 2418 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040431619733878637, + "loss": 3.9217, + "step": 2419 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004042346324987791, + "loss": 3.7265, + "step": 2420 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004041530411440504, + "loss": 4.0082, + "step": 2421 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004040714232886269, + "loss": 3.9637, + "step": 2422 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040398977894653954, + "loss": 3.8473, + "step": 2423 + }, + { + "epoch": 0.31, + "learning_rate": 0.000403908108131824, + "loss": 4.0007, + "step": 2424 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004038264108585204, + "loss": 3.9093, + "step": 2425 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040374468714067345, + "loss": 3.9446, + "step": 2426 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040366293699233246, + "loss": 3.9394, + "step": 2427 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040358116042755115, + "loss": 3.8587, + "step": 2428 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040349935746038793, + "loss": 3.9728, + "step": 2429 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004034175281049056, + "loss": 3.8925, + "step": 2430 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040333567237517165, + "loss": 3.9328, + "step": 2431 + }, + { + "epoch": 0.31, + "learning_rate": 0.000403253790285258, + "loss": 3.9446, + "step": 2432 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040317188184924116, + "loss": 3.8924, + "step": 2433 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040308994708120207, + "loss": 3.8618, + "step": 2434 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040300798599522626, + "loss": 3.7714, + "step": 2435 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040292599860540393, + "loss": 4.0037, + "step": 2436 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040284398492582954, + "loss": 3.9986, + "step": 2437 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040276194497060227, + "loss": 3.9815, + "step": 2438 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004026798787538256, + "loss": 3.8736, + "step": 2439 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040259778628960773, + "loss": 3.8182, + "step": 2440 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004025156675920614, + "loss": 4.0031, + "step": 2441 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004024335226753037, + "loss": 3.9612, + "step": 2442 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004023513515534563, + "loss": 3.7824, + "step": 2443 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004022691542406453, + "loss": 4.0388, + "step": 2444 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040218693075100146, + "loss": 3.9481, + "step": 2445 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004021046810986599, + "loss": 3.9459, + "step": 2446 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004020224052977604, + "loss": 4.0479, + "step": 2447 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040194010336244696, + "loss": 3.9392, + "step": 2448 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004018577753068683, + "loss": 3.9184, + "step": 2449 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040177542114517765, + "loss": 4.0296, + "step": 2450 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004016930408915325, + "loss": 3.9668, + "step": 2451 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040161063456009506, + "loss": 3.87, + "step": 2452 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040152820216503196, + "loss": 4.0359, + "step": 2453 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040144574372051416, + "loss": 4.003, + "step": 2454 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004013632592407174, + "loss": 3.8485, + "step": 2455 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040128074873982156, + "loss": 3.9347, + "step": 2456 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004011982122320112, + "loss": 3.8684, + "step": 2457 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004011156497314754, + "loss": 3.8228, + "step": 2458 + }, + { + "epoch": 0.31, + "learning_rate": 0.00040103306125240746, + "loss": 4.1155, + "step": 2459 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004009504468090054, + "loss": 3.999, + "step": 2460 + }, + { + "epoch": 0.32, + "learning_rate": 0.00040086780641547157, + "loss": 4.009, + "step": 2461 + }, + { + "epoch": 0.32, + "learning_rate": 0.00040078514008601274, + "loss": 3.8697, + "step": 2462 + }, + { + "epoch": 0.32, + "learning_rate": 0.00040070244783484035, + "loss": 3.83, + "step": 2463 + }, + { + "epoch": 0.32, + "learning_rate": 0.00040061972967617013, + "loss": 3.9217, + "step": 2464 + }, + { + "epoch": 0.32, + "learning_rate": 0.00040053698562422216, + "loss": 3.9764, + "step": 2465 + }, + { + "epoch": 0.32, + "learning_rate": 0.00040045421569322116, + "loss": 3.9601, + "step": 2466 + }, + { + "epoch": 0.32, + "learning_rate": 0.00040037141989739626, + "loss": 3.9759, + "step": 2467 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004002885982509811, + "loss": 3.8677, + "step": 2468 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004002057507682136, + "loss": 4.1038, + "step": 2469 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004001228774633362, + "loss": 3.9375, + "step": 2470 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004000399783505957, + "loss": 3.9274, + "step": 2471 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003999570534442436, + "loss": 3.9404, + "step": 2472 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003998741027585356, + "loss": 3.9163, + "step": 2473 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003997911263077318, + "loss": 3.8723, + "step": 2474 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003997081241060968, + "loss": 4.008, + "step": 2475 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039962509616789984, + "loss": 3.8195, + "step": 2476 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003995420425074141, + "loss": 3.9458, + "step": 2477 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039945896313891774, + "loss": 3.8162, + "step": 2478 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039937585807669286, + "loss": 3.8253, + "step": 2479 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039929272733502623, + "loss": 4.0026, + "step": 2480 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003992095709282091, + "loss": 3.8511, + "step": 2481 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003991263888705369, + "loss": 3.9547, + "step": 2482 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039904318117630965, + "loss": 4.1186, + "step": 2483 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003989599478598317, + "loss": 3.902, + "step": 2484 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003988766889354118, + "loss": 3.9828, + "step": 2485 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039879340441736314, + "loss": 3.9632, + "step": 2486 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003987100943200033, + "loss": 4.0425, + "step": 2487 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003986267586576543, + "loss": 3.9371, + "step": 2488 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003985433974446424, + "loss": 3.9976, + "step": 2489 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003984600106952985, + "loss": 3.8574, + "step": 2490 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039837659842395756, + "loss": 4.0411, + "step": 2491 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003982931606449592, + "loss": 3.9693, + "step": 2492 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039820969737264743, + "loss": 3.9219, + "step": 2493 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039812620862137056, + "loss": 3.9049, + "step": 2494 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003980426944054811, + "loss": 3.8941, + "step": 2495 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039795915473933623, + "loss": 4.0803, + "step": 2496 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039787558963729744, + "loss": 3.8671, + "step": 2497 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003977919991137304, + "loss": 3.9015, + "step": 2498 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003977083831830054, + "loss": 3.9329, + "step": 2499 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039762474185949686, + "loss": 3.9453, + "step": 2500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003975410751575839, + "loss": 4.0945, + "step": 2501 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003974573830916496, + "loss": 4.0834, + "step": 2502 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003973736656760817, + "loss": 4.1366, + "step": 2503 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003972899229252721, + "loss": 3.9488, + "step": 2504 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039720615485361733, + "loss": 4.0928, + "step": 2505 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039712236147551795, + "loss": 3.898, + "step": 2506 + }, + { + "epoch": 0.32, + "learning_rate": 0.000397038542805379, + "loss": 3.9937, + "step": 2507 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039695469885761, + "loss": 3.893, + "step": 2508 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003968708296466245, + "loss": 3.8942, + "step": 2509 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039678693518684083, + "loss": 3.9724, + "step": 2510 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003967030154926813, + "loss": 3.9059, + "step": 2511 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039661907057857263, + "loss": 3.8817, + "step": 2512 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003965351004589459, + "loss": 3.974, + "step": 2513 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003964511051482367, + "loss": 3.9135, + "step": 2514 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039636708466088476, + "loss": 4.0608, + "step": 2515 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039628303901133413, + "loss": 3.8452, + "step": 2516 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039619896821403315, + "loss": 3.8544, + "step": 2517 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003961148722834347, + "loss": 3.8736, + "step": 2518 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003960307512339958, + "loss": 3.8943, + "step": 2519 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039594660508017774, + "loss": 4.0124, + "step": 2520 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039586243383644645, + "loss": 3.8659, + "step": 2521 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039577823751727175, + "loss": 4.0561, + "step": 2522 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039569401613712797, + "loss": 3.988, + "step": 2523 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039560976971049386, + "loss": 3.8426, + "step": 2524 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039552549825185224, + "loss": 3.94, + "step": 2525 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003954412017756904, + "loss": 3.9331, + "step": 2526 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039535688029649984, + "loss": 3.9311, + "step": 2527 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003952725338287765, + "loss": 3.9123, + "step": 2528 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003951881623870204, + "loss": 3.9722, + "step": 2529 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039510376598573605, + "loss": 3.9777, + "step": 2530 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003950193446394321, + "loss": 4.0355, + "step": 2531 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039493489836262165, + "loss": 3.9843, + "step": 2532 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003948504271698219, + "loss": 3.9963, + "step": 2533 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003947659310755545, + "loss": 3.9243, + "step": 2534 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039468141009434533, + "loss": 4.0204, + "step": 2535 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003945968642407244, + "loss": 3.8095, + "step": 2536 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003945122935292261, + "loss": 3.876, + "step": 2537 + }, + { + "epoch": 0.32, + "learning_rate": 0.00039442769797438937, + "loss": 3.9652, + "step": 2538 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003943430775907569, + "loss": 3.8776, + "step": 2539 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003942584323928762, + "loss": 3.9427, + "step": 2540 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039417376239529847, + "loss": 3.9187, + "step": 2541 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039408906761257957, + "loss": 3.9405, + "step": 2542 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039400434805927947, + "loss": 3.995, + "step": 2543 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039391960374996263, + "loss": 3.9244, + "step": 2544 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003938348346991973, + "loss": 4.0099, + "step": 2545 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003937500409215565, + "loss": 3.851, + "step": 2546 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003936652224316172, + "loss": 3.8666, + "step": 2547 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039358037924396055, + "loss": 3.8744, + "step": 2548 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003934955113731723, + "loss": 4.0221, + "step": 2549 + }, + { + "epoch": 0.33, + "learning_rate": 0.000393410618833842, + "loss": 4.0439, + "step": 2550 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003933257016405638, + "loss": 3.8266, + "step": 2551 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003932407598079359, + "loss": 3.882, + "step": 2552 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003931557933505608, + "loss": 3.7991, + "step": 2553 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039307080228304526, + "loss": 4.1595, + "step": 2554 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003929857866200002, + "loss": 3.8698, + "step": 2555 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003929007463760407, + "loss": 3.9399, + "step": 2556 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003928156815657863, + "loss": 3.9423, + "step": 2557 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039273059220386065, + "loss": 4.0989, + "step": 2558 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003926454783048914, + "loss": 3.9522, + "step": 2559 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003925603398835108, + "loss": 3.8227, + "step": 2560 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039247517695435507, + "loss": 3.8477, + "step": 2561 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039238998953206473, + "loss": 3.9209, + "step": 2562 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003923047776312844, + "loss": 3.9802, + "step": 2563 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039221954126666313, + "loss": 4.016, + "step": 2564 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039213428045285385, + "loss": 3.9083, + "step": 2565 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003920489952045141, + "loss": 3.8441, + "step": 2566 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039196368553630535, + "loss": 3.8693, + "step": 2567 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003918783514628932, + "loss": 3.9251, + "step": 2568 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003917929929989476, + "loss": 3.8448, + "step": 2569 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003917076101591427, + "loss": 4.0809, + "step": 2570 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039162220295815684, + "loss": 4.0248, + "step": 2571 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039153677141067254, + "loss": 3.858, + "step": 2572 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003914513155313763, + "loss": 4.0136, + "step": 2573 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039136583533495905, + "loss": 3.8535, + "step": 2574 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003912803308361159, + "loss": 4.0809, + "step": 2575 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039119480204954606, + "loss": 3.9864, + "step": 2576 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039110924898995284, + "loss": 3.9649, + "step": 2577 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003910236716720438, + "loss": 3.954, + "step": 2578 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039093807011053073, + "loss": 4.0217, + "step": 2579 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003908524443201296, + "loss": 3.919, + "step": 2580 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003907667943155603, + "loss": 3.9342, + "step": 2581 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039068112011154715, + "loss": 3.9685, + "step": 2582 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003905954217228186, + "loss": 3.816, + "step": 2583 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003905096991641071, + "loss": 3.9185, + "step": 2584 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039042395245014933, + "loss": 4.021, + "step": 2585 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039033818159568624, + "loss": 3.9059, + "step": 2586 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003902523866154628, + "loss": 3.8256, + "step": 2587 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039016656752422814, + "loss": 3.8513, + "step": 2588 + }, + { + "epoch": 0.33, + "learning_rate": 0.00039008072433673556, + "loss": 3.9087, + "step": 2589 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038999485706774263, + "loss": 3.9406, + "step": 2590 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003899089657320107, + "loss": 3.971, + "step": 2591 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003898230503443055, + "loss": 3.8625, + "step": 2592 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003897371109193972, + "loss": 4.0544, + "step": 2593 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038965114747205944, + "loss": 3.8751, + "step": 2594 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003895651600170705, + "loss": 4.0281, + "step": 2595 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003894791485692125, + "loss": 3.9377, + "step": 2596 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003893931131432719, + "loss": 3.8027, + "step": 2597 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038930705375403923, + "loss": 3.9933, + "step": 2598 + }, + { + "epoch": 0.33, + "learning_rate": 0.000389220970416309, + "loss": 3.9849, + "step": 2599 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038913486314488, + "loss": 3.9117, + "step": 2600 + }, + { + "epoch": 0.33, + "learning_rate": 0.000389048731954555, + "loss": 3.9871, + "step": 2601 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003889625768601409, + "loss": 3.9169, + "step": 2602 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038887639787644904, + "loss": 3.9639, + "step": 2603 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038879019501829423, + "loss": 3.936, + "step": 2604 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038870396830049595, + "loss": 4.0163, + "step": 2605 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038861771773787744, + "loss": 3.9298, + "step": 2606 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038853144334526643, + "loss": 3.9745, + "step": 2607 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003884451451374942, + "loss": 3.8848, + "step": 2608 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038835882312939656, + "loss": 4.0468, + "step": 2609 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003882724773358133, + "loss": 3.8449, + "step": 2610 + }, + { + "epoch": 0.33, + "learning_rate": 0.00038818610777158804, + "loss": 3.9363, + "step": 2611 + }, + { + "epoch": 0.33, + "learning_rate": 0.000388099714451569, + "loss": 3.9017, + "step": 2612 + }, + { + "epoch": 0.33, + "learning_rate": 0.000388013297390608, + "loss": 3.9209, + "step": 2613 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003879268566035612, + "loss": 3.841, + "step": 2614 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003878403921052887, + "loss": 3.916, + "step": 2615 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003877539039106549, + "loss": 3.9108, + "step": 2616 + }, + { + "epoch": 0.33, + "learning_rate": 0.000387667392034528, + "loss": 3.927, + "step": 2617 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038758085649178044, + "loss": 3.9684, + "step": 2618 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003874942972972887, + "loss": 3.9905, + "step": 2619 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038740771446593323, + "loss": 4.0698, + "step": 2620 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038732110801259874, + "loss": 3.908, + "step": 2621 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038723447795217374, + "loss": 4.0176, + "step": 2622 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003871478242995511, + "loss": 3.9524, + "step": 2623 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003870611470696275, + "loss": 3.8409, + "step": 2624 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038697444627730374, + "loss": 4.0399, + "step": 2625 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038688772193748466, + "loss": 3.8435, + "step": 2626 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003868009740650794, + "loss": 4.0397, + "step": 2627 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038671420267500067, + "loss": 3.9488, + "step": 2628 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003866274077821655, + "loss": 4.0396, + "step": 2629 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003865405894014951, + "loss": 3.8993, + "step": 2630 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038645374754791445, + "loss": 4.0082, + "step": 2631 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038636688223635273, + "loss": 4.0282, + "step": 2632 + }, + { + "epoch": 0.34, + "learning_rate": 0.000386279993481743, + "loss": 3.9365, + "step": 2633 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003861930812990225, + "loss": 3.9987, + "step": 2634 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038610614570313244, + "loss": 3.9039, + "step": 2635 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038601918670901807, + "loss": 3.9871, + "step": 2636 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003859322043316287, + "loss": 3.9693, + "step": 2637 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003858451985859175, + "loss": 4.041, + "step": 2638 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003857581694868417, + "loss": 3.9381, + "step": 2639 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038567111704936285, + "loss": 3.868, + "step": 2640 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038558404128844604, + "loss": 3.895, + "step": 2641 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038549694221906084, + "loss": 4.0148, + "step": 2642 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038540981985618036, + "loss": 3.8726, + "step": 2643 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038532267421478205, + "loss": 4.0023, + "step": 2644 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003852355053098473, + "loss": 3.9039, + "step": 2645 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038514831315636134, + "loss": 3.9017, + "step": 2646 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038506109776931366, + "loss": 3.9461, + "step": 2647 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003849738591636974, + "loss": 3.9545, + "step": 2648 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038488659735451, + "loss": 3.9439, + "step": 2649 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038479931235675294, + "loss": 3.9303, + "step": 2650 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003847120041854312, + "loss": 4.006, + "step": 2651 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038462467285555423, + "loss": 3.9954, + "step": 2652 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038453731838213533, + "loss": 3.8907, + "step": 2653 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003844499407801918, + "loss": 3.8952, + "step": 2654 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003843625400647446, + "loss": 4.0891, + "step": 2655 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038427511625081925, + "loss": 3.9623, + "step": 2656 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038418766935344466, + "loss": 4.0915, + "step": 2657 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038410019938765413, + "loss": 3.9567, + "step": 2658 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038401270636848474, + "loss": 3.9786, + "step": 2659 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038392519031097745, + "loss": 3.9002, + "step": 2660 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003838376512301773, + "loss": 3.9242, + "step": 2661 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003837500891411334, + "loss": 3.993, + "step": 2662 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003836625040588986, + "loss": 3.9901, + "step": 2663 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038357489599852984, + "loss": 3.8698, + "step": 2664 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003834872649750879, + "loss": 3.869, + "step": 2665 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038339961100363753, + "loss": 3.8214, + "step": 2666 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003833119340992476, + "loss": 3.9931, + "step": 2667 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003832242342769907, + "loss": 3.937, + "step": 2668 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038313651155194345, + "loss": 3.8153, + "step": 2669 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038304876593918646, + "loss": 3.9457, + "step": 2670 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003829609974538041, + "loss": 3.9792, + "step": 2671 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038287320611088486, + "loss": 3.8912, + "step": 2672 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038278539192552113, + "loss": 3.8781, + "step": 2673 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038269755491280916, + "loss": 3.8554, + "step": 2674 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038260969508784916, + "loss": 3.8684, + "step": 2675 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038252181246574516, + "loss": 3.9272, + "step": 2676 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003824339070616053, + "loss": 4.0255, + "step": 2677 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003823459788905415, + "loss": 4.072, + "step": 2678 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038225802796766964, + "loss": 4.0949, + "step": 2679 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003821700543081095, + "loss": 4.0089, + "step": 2680 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038208205792698483, + "loss": 3.8871, + "step": 2681 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003819940388394232, + "loss": 3.9661, + "step": 2682 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038190599706055595, + "loss": 3.98, + "step": 2683 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038181793260551875, + "loss": 3.9881, + "step": 2684 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038172984548945067, + "loss": 3.8125, + "step": 2685 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038164173572749504, + "loss": 3.8706, + "step": 2686 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003815536033347989, + "loss": 4.086, + "step": 2687 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003814654483265134, + "loss": 3.788, + "step": 2688 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038137727071779304, + "loss": 4.0103, + "step": 2689 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003812890705237969, + "loss": 4.0302, + "step": 2690 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003812008477596875, + "loss": 3.8925, + "step": 2691 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003811126024406314, + "loss": 3.8741, + "step": 2692 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038102433458179883, + "loss": 3.9582, + "step": 2693 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003809360441983643, + "loss": 3.866, + "step": 2694 + }, + { + "epoch": 0.34, + "learning_rate": 0.00038084773130550576, + "loss": 3.829, + "step": 2695 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003807593959184053, + "loss": 4.0698, + "step": 2696 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003806710380522488, + "loss": 3.8969, + "step": 2697 + }, + { + "epoch": 0.35, + "learning_rate": 0.000380582657722226, + "loss": 3.9248, + "step": 2698 + }, + { + "epoch": 0.35, + "learning_rate": 0.00038049425494353047, + "loss": 3.9551, + "step": 2699 + }, + { + "epoch": 0.35, + "learning_rate": 0.00038040582973135974, + "loss": 4.0087, + "step": 2700 + }, + { + "epoch": 0.35, + "learning_rate": 0.00038031738210091506, + "loss": 3.992, + "step": 2701 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003802289120674016, + "loss": 3.9958, + "step": 2702 + }, + { + "epoch": 0.35, + "learning_rate": 0.00038014041964602843, + "loss": 3.9015, + "step": 2703 + }, + { + "epoch": 0.35, + "learning_rate": 0.00038005190485200844, + "loss": 4.0435, + "step": 2704 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003799633677005583, + "loss": 3.9363, + "step": 2705 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003798748082068986, + "loss": 3.9122, + "step": 2706 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003797862263862537, + "loss": 3.9683, + "step": 2707 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037969762225385176, + "loss": 4.0858, + "step": 2708 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037960899582492515, + "loss": 3.8399, + "step": 2709 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037952034711470953, + "loss": 3.9176, + "step": 2710 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037943167613844477, + "loss": 3.9553, + "step": 2711 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003793429829113743, + "loss": 3.8553, + "step": 2712 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003792542674487456, + "loss": 3.9646, + "step": 2713 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037916552976580993, + "loss": 4.027, + "step": 2714 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037907676987782225, + "loss": 3.9158, + "step": 2715 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003789879878000415, + "loss": 3.8238, + "step": 2716 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037889918354773016, + "loss": 3.6628, + "step": 2717 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037881035713615495, + "loss": 3.8294, + "step": 2718 + }, + { + "epoch": 0.35, + "learning_rate": 0.000378721508580586, + "loss": 3.7309, + "step": 2719 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037863263789629753, + "loss": 3.9116, + "step": 2720 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037854374509856733, + "loss": 3.89, + "step": 2721 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037845483020267713, + "loss": 4.169, + "step": 2722 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037836589322391234, + "loss": 3.7771, + "step": 2723 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037827693417756254, + "loss": 3.8848, + "step": 2724 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037818795307892057, + "loss": 3.9626, + "step": 2725 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003780989499432833, + "loss": 3.8827, + "step": 2726 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003780099247859516, + "loss": 3.9208, + "step": 2727 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003779208776222298, + "loss": 3.937, + "step": 2728 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037783180846742617, + "loss": 3.8995, + "step": 2729 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003777427173368526, + "loss": 3.8589, + "step": 2730 + }, + { + "epoch": 0.35, + "learning_rate": 0.000377653604245825, + "loss": 4.1228, + "step": 2731 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037756446920966303, + "loss": 3.8969, + "step": 2732 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037747531224368994, + "loss": 3.9445, + "step": 2733 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003773861333632328, + "loss": 4.0063, + "step": 2734 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037729693258362263, + "loss": 3.9836, + "step": 2735 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037720770992019393, + "loss": 3.9004, + "step": 2736 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003771184653882852, + "loss": 4.1088, + "step": 2737 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037702919900323856, + "loss": 3.9713, + "step": 2738 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003769399107804, + "loss": 3.916, + "step": 2739 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003768506007351191, + "loss": 3.9232, + "step": 2740 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037676126888274943, + "loss": 3.9608, + "step": 2741 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003766719152386481, + "loss": 3.9762, + "step": 2742 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037658253981817603, + "loss": 3.9277, + "step": 2743 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037649314263669785, + "loss": 4.075, + "step": 2744 + }, + { + "epoch": 0.35, + "learning_rate": 0.000376403723709582, + "loss": 4.012, + "step": 2745 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003763142830522007, + "loss": 3.8433, + "step": 2746 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037622482067992976, + "loss": 3.9621, + "step": 2747 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003761353366081488, + "loss": 4.0616, + "step": 2748 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037604583085224126, + "loss": 4.1352, + "step": 2749 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003759563034275941, + "loss": 3.9951, + "step": 2750 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003758667543495982, + "loss": 4.0015, + "step": 2751 + }, + { + "epoch": 0.35, + "learning_rate": 0.000375777183633648, + "loss": 4.0938, + "step": 2752 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037568759129514185, + "loss": 3.8132, + "step": 2753 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003755979773494816, + "loss": 3.8116, + "step": 2754 + }, + { + "epoch": 0.35, + "learning_rate": 0.000375508341812073, + "loss": 4.0637, + "step": 2755 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037541868469832547, + "loss": 3.9205, + "step": 2756 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037532900602365205, + "loss": 3.9685, + "step": 2757 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037523930580346955, + "loss": 4.0043, + "step": 2758 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037514958405319846, + "loss": 3.9553, + "step": 2759 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037505984078826303, + "loss": 3.9805, + "step": 2760 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003749700760240912, + "loss": 3.8386, + "step": 2761 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003748802897761144, + "loss": 3.9522, + "step": 2762 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003747904820597682, + "loss": 3.8263, + "step": 2763 + }, + { + "epoch": 0.35, + "learning_rate": 0.00037470065289049135, + "loss": 3.9656, + "step": 2764 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003746108022837266, + "loss": 4.0172, + "step": 2765 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003745209302549204, + "loss": 3.8895, + "step": 2766 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003744310368195227, + "loss": 3.9723, + "step": 2767 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003743411219929872, + "loss": 3.8469, + "step": 2768 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003742511857907713, + "loss": 3.891, + "step": 2769 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003741612282283362, + "loss": 4.0802, + "step": 2770 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003740712493211466, + "loss": 3.9156, + "step": 2771 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003739812490846708, + "loss": 3.9433, + "step": 2772 + }, + { + "epoch": 0.35, + "learning_rate": 0.000373891227534381, + "loss": 3.7503, + "step": 2773 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003738011846857529, + "loss": 3.8342, + "step": 2774 + }, + { + "epoch": 0.36, + "learning_rate": 0.000373711120554266, + "loss": 4.0223, + "step": 2775 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003736210351554032, + "loss": 4.0088, + "step": 2776 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003735309285046513, + "loss": 4.0418, + "step": 2777 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003734408006175008, + "loss": 3.9019, + "step": 2778 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037335065150944556, + "loss": 3.9134, + "step": 2779 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003732604811959834, + "loss": 3.9797, + "step": 2780 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037317028969261547, + "loss": 3.9052, + "step": 2781 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037308007701484684, + "loss": 3.8889, + "step": 2782 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037298984317818613, + "loss": 4.0642, + "step": 2783 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037289958819814557, + "loss": 3.902, + "step": 2784 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037280931209024106, + "loss": 3.9987, + "step": 2785 + }, + { + "epoch": 0.36, + "learning_rate": 0.000372719014869992, + "loss": 3.853, + "step": 2786 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003726286965529216, + "loss": 3.8916, + "step": 2787 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037253835715455664, + "loss": 3.9325, + "step": 2788 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037244799669042754, + "loss": 3.9577, + "step": 2789 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037235761517606826, + "loss": 3.929, + "step": 2790 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037226721262701633, + "loss": 3.8632, + "step": 2791 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037217678905881324, + "loss": 4.0292, + "step": 2792 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037208634448700374, + "loss": 3.8463, + "step": 2793 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037199587892713617, + "loss": 3.8601, + "step": 2794 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003719053923947628, + "loss": 3.8502, + "step": 2795 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003718148849054391, + "loss": 3.91, + "step": 2796 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037172435647472466, + "loss": 3.935, + "step": 2797 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003716338071181821, + "loss": 4.0243, + "step": 2798 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037154323685137803, + "loss": 3.9681, + "step": 2799 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003714526456898824, + "loss": 3.8358, + "step": 2800 + }, + { + "epoch": 0.36, + "learning_rate": 0.000371362033649269, + "loss": 3.9151, + "step": 2801 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037127140074511516, + "loss": 4.0081, + "step": 2802 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003711807469930016, + "loss": 3.8926, + "step": 2803 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003710900724085128, + "loss": 3.9265, + "step": 2804 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037099937700723663, + "loss": 3.8671, + "step": 2805 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037090866080476495, + "loss": 3.9584, + "step": 2806 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037081792381669275, + "loss": 3.8899, + "step": 2807 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003707271660586188, + "loss": 3.852, + "step": 2808 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003706363875461454, + "loss": 3.8865, + "step": 2809 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037054558829487837, + "loss": 3.8732, + "step": 2810 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037045476832042734, + "loss": 3.8703, + "step": 2811 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037036392763840513, + "loss": 3.9365, + "step": 2812 + }, + { + "epoch": 0.36, + "learning_rate": 0.00037027306626442847, + "loss": 3.9207, + "step": 2813 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003701821842141173, + "loss": 4.0098, + "step": 2814 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003700912815030955, + "loss": 3.9348, + "step": 2815 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003700003581469901, + "loss": 4.1022, + "step": 2816 + }, + { + "epoch": 0.36, + "learning_rate": 0.000369909414161432, + "loss": 4.0205, + "step": 2817 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003698184495620555, + "loss": 3.9662, + "step": 2818 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036972746436449844, + "loss": 3.8999, + "step": 2819 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003696364585844023, + "loss": 3.9927, + "step": 2820 + }, + { + "epoch": 0.36, + "learning_rate": 0.000369545432237412, + "loss": 3.9341, + "step": 2821 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003694543853391759, + "loss": 3.9018, + "step": 2822 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003693633179053462, + "loss": 3.8327, + "step": 2823 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036927222995157837, + "loss": 3.9851, + "step": 2824 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003691811214935315, + "loss": 3.8432, + "step": 2825 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036908999254686817, + "loss": 4.1855, + "step": 2826 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036899884312725453, + "loss": 3.9071, + "step": 2827 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003689076732503601, + "loss": 3.9637, + "step": 2828 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003688164829318583, + "loss": 3.9455, + "step": 2829 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036872527218742557, + "loss": 3.8347, + "step": 2830 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036863404103274215, + "loss": 3.9218, + "step": 2831 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036854278948349184, + "loss": 3.9836, + "step": 2832 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003684515175553616, + "loss": 3.8815, + "step": 2833 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036836022526404237, + "loss": 3.8754, + "step": 2834 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003682689126252283, + "loss": 3.9159, + "step": 2835 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003681775796546171, + "loss": 4.0068, + "step": 2836 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003680862263679097, + "loss": 3.8676, + "step": 2837 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036799485278081126, + "loss": 3.9698, + "step": 2838 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003679034589090296, + "loss": 3.9497, + "step": 2839 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003678120447682765, + "loss": 3.7896, + "step": 2840 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003677206103742671, + "loss": 3.9667, + "step": 2841 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003676291557427201, + "loss": 3.821, + "step": 2842 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003675376808893575, + "loss": 4.0463, + "step": 2843 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036744618582990496, + "loss": 3.8394, + "step": 2844 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036735467058009153, + "loss": 3.9844, + "step": 2845 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003672631351556498, + "loss": 3.8582, + "step": 2846 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003671715795723156, + "loss": 4.0331, + "step": 2847 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036708000384582854, + "loss": 3.8068, + "step": 2848 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036698840799193153, + "loss": 4.0488, + "step": 2849 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036689679202637094, + "loss": 4.0293, + "step": 2850 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003668051559648965, + "loss": 4.0192, + "step": 2851 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036671349982326173, + "loss": 3.9229, + "step": 2852 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036662182361722333, + "loss": 3.8002, + "step": 2853 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036653012736254136, + "loss": 4.0963, + "step": 2854 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003664384110749797, + "loss": 3.9599, + "step": 2855 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036634667477030515, + "loss": 3.8888, + "step": 2856 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003662549184642885, + "loss": 3.8584, + "step": 2857 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036616314217270354, + "loss": 3.86, + "step": 2858 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036607134591132783, + "loss": 3.914, + "step": 2859 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036597952969594217, + "loss": 4.0423, + "step": 2860 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003658876935423307, + "loss": 3.9611, + "step": 2861 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036579583746628125, + "loss": 3.9507, + "step": 2862 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036570396148358496, + "loss": 3.8017, + "step": 2863 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036561206561003636, + "loss": 3.9647, + "step": 2864 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036552014986143336, + "loss": 4.0131, + "step": 2865 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003654282142535773, + "loss": 3.9257, + "step": 2866 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003653362588022732, + "loss": 3.9444, + "step": 2867 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003652442835233291, + "loss": 3.7771, + "step": 2868 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003651522884325565, + "loss": 3.7826, + "step": 2869 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003650602735457706, + "loss": 3.8609, + "step": 2870 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003649682388787898, + "loss": 3.9914, + "step": 2871 + }, + { + "epoch": 0.37, + "learning_rate": 0.000364876184447436, + "loss": 3.9766, + "step": 2872 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003647841102675342, + "loss": 3.8305, + "step": 2873 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003646920163549132, + "loss": 3.9367, + "step": 2874 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036459990272540507, + "loss": 3.8795, + "step": 2875 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036450776939484495, + "loss": 3.915, + "step": 2876 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003644156163790719, + "loss": 4.0111, + "step": 2877 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036432344369392795, + "loss": 3.9242, + "step": 2878 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003642312513552586, + "loss": 3.9296, + "step": 2879 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003641390393789129, + "loss": 3.88, + "step": 2880 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036404680778074315, + "loss": 3.8904, + "step": 2881 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036395455657660505, + "loss": 3.8678, + "step": 2882 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003638622857823575, + "loss": 3.995, + "step": 2883 + }, + { + "epoch": 0.37, + "learning_rate": 0.000363769995413863, + "loss": 3.9806, + "step": 2884 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036367768548698745, + "loss": 3.9054, + "step": 2885 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003635853560175998, + "loss": 3.8984, + "step": 2886 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003634930070215727, + "loss": 3.9626, + "step": 2887 + }, + { + "epoch": 0.37, + "learning_rate": 0.000363400638514782, + "loss": 3.8012, + "step": 2888 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003633082505131069, + "loss": 3.9269, + "step": 2889 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003632158430324299, + "loss": 3.8721, + "step": 2890 + }, + { + "epoch": 0.37, + "learning_rate": 0.000363123416088637, + "loss": 3.7973, + "step": 2891 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003630309696976175, + "loss": 3.9936, + "step": 2892 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036293850387526377, + "loss": 3.9517, + "step": 2893 + }, + { + "epoch": 0.37, + "learning_rate": 0.000362846018637472, + "loss": 3.8462, + "step": 2894 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036275351400014144, + "loss": 3.828, + "step": 2895 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036266098997917476, + "loss": 3.9234, + "step": 2896 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036256844659047767, + "loss": 3.9266, + "step": 2897 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003624758838499596, + "loss": 4.0327, + "step": 2898 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003623833017735333, + "loss": 3.8849, + "step": 2899 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036229070037711446, + "loss": 3.8046, + "step": 2900 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003621980796766225, + "loss": 3.9543, + "step": 2901 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003621054396879798, + "loss": 3.8949, + "step": 2902 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036201278042711257, + "loss": 4.0171, + "step": 2903 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003619201019099497, + "loss": 4.0323, + "step": 2904 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003618274041524239, + "loss": 3.9614, + "step": 2905 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036173468717047086, + "loss": 3.932, + "step": 2906 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003616419509800297, + "loss": 4.0073, + "step": 2907 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036154919559704303, + "loss": 3.823, + "step": 2908 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003614564210374563, + "loss": 3.9035, + "step": 2909 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036136362731721886, + "loss": 3.9148, + "step": 2910 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003612708144522827, + "loss": 4.0431, + "step": 2911 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036117798245860364, + "loss": 3.7177, + "step": 2912 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003610851313521405, + "loss": 4.0139, + "step": 2913 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036099226114885545, + "loss": 3.9723, + "step": 2914 + }, + { + "epoch": 0.37, + "learning_rate": 0.000360899371864714, + "loss": 4.0539, + "step": 2915 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036080646351568485, + "loss": 3.7897, + "step": 2916 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003607135361177401, + "loss": 3.8069, + "step": 2917 + }, + { + "epoch": 0.37, + "learning_rate": 0.000360620589686855, + "loss": 3.8578, + "step": 2918 + }, + { + "epoch": 0.37, + "learning_rate": 0.000360527624239008, + "loss": 3.923, + "step": 2919 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003604346397901811, + "loss": 3.989, + "step": 2920 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003603416363563593, + "loss": 3.9291, + "step": 2921 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003602486139535311, + "loss": 3.942, + "step": 2922 + }, + { + "epoch": 0.37, + "learning_rate": 0.000360155572597688, + "loss": 3.8752, + "step": 2923 + }, + { + "epoch": 0.37, + "learning_rate": 0.000360062512304825, + "loss": 3.9155, + "step": 2924 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003599694330909401, + "loss": 4.0028, + "step": 2925 + }, + { + "epoch": 0.37, + "learning_rate": 0.00035987633497203483, + "loss": 3.7415, + "step": 2926 + }, + { + "epoch": 0.37, + "learning_rate": 0.00035978321796411374, + "loss": 3.9386, + "step": 2927 + }, + { + "epoch": 0.37, + "learning_rate": 0.00035969008208318483, + "loss": 3.8203, + "step": 2928 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003595969273452591, + "loss": 3.7857, + "step": 2929 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035950375376635104, + "loss": 3.965, + "step": 2930 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035941056136247826, + "loss": 3.9398, + "step": 2931 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003593173501496616, + "loss": 3.9288, + "step": 2932 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035922412014392503, + "loss": 3.9543, + "step": 2933 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003591308713612961, + "loss": 4.0823, + "step": 2934 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003590376038178051, + "loss": 3.9321, + "step": 2935 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035894431752948603, + "loss": 3.9534, + "step": 2936 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003588510125123757, + "loss": 4.1163, + "step": 2937 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035875768878251444, + "loss": 3.9864, + "step": 2938 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003586643463559457, + "loss": 3.9842, + "step": 2939 + }, + { + "epoch": 0.38, + "learning_rate": 0.000358570985248716, + "loss": 4.0256, + "step": 2940 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035847760547687535, + "loss": 3.8157, + "step": 2941 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003583842070564768, + "loss": 3.9226, + "step": 2942 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035829079000357643, + "loss": 4.0151, + "step": 2943 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003581973543342338, + "loss": 3.9247, + "step": 2944 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035810390006451177, + "loss": 3.9059, + "step": 2945 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035801042721047606, + "loss": 3.9938, + "step": 2946 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035791693578819564, + "loss": 3.8774, + "step": 2947 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035782342581374294, + "loss": 3.9245, + "step": 2948 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003577298973031933, + "loss": 3.856, + "step": 2949 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003576363502726255, + "loss": 3.9516, + "step": 2950 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003575427847381212, + "loss": 3.9455, + "step": 2951 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003574492007157655, + "loss": 3.8837, + "step": 2952 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003573555982216465, + "loss": 3.9361, + "step": 2953 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003572619772718556, + "loss": 3.9213, + "step": 2954 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003571683378824873, + "loss": 3.8461, + "step": 2955 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035707468006963936, + "loss": 3.9555, + "step": 2956 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035698100384941257, + "loss": 3.8357, + "step": 2957 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003568873092379109, + "loss": 3.9484, + "step": 2958 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035679359625124174, + "loss": 3.8682, + "step": 2959 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035669986490551535, + "loss": 3.9489, + "step": 2960 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035660611521684516, + "loss": 3.983, + "step": 2961 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003565123472013478, + "loss": 3.8334, + "step": 2962 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035641856087514337, + "loss": 3.82, + "step": 2963 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003563247562543545, + "loss": 3.8423, + "step": 2964 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035623093335510736, + "loss": 3.8551, + "step": 2965 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003561370921935313, + "loss": 3.8605, + "step": 2966 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035604323278575856, + "loss": 3.8961, + "step": 2967 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003559493551479249, + "loss": 4.0222, + "step": 2968 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035585545929616867, + "loss": 3.8299, + "step": 2969 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003557615452466319, + "loss": 4.0711, + "step": 2970 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003556676130154594, + "loss": 3.9175, + "step": 2971 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035557366261879916, + "loss": 3.85, + "step": 2972 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003554796940728024, + "loss": 3.8821, + "step": 2973 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003553857073936235, + "loss": 3.9925, + "step": 2974 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035529170259741973, + "loss": 3.8283, + "step": 2975 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035519767970035147, + "loss": 3.8463, + "step": 2976 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003551036387185827, + "loss": 3.8632, + "step": 2977 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035500957966827994, + "loss": 4.0758, + "step": 2978 + }, + { + "epoch": 0.38, + "learning_rate": 0.000354915502565613, + "loss": 3.9607, + "step": 2979 + }, + { + "epoch": 0.38, + "learning_rate": 0.000354821407426755, + "loss": 3.8548, + "step": 2980 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035472729426788176, + "loss": 3.9128, + "step": 2981 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003546331631051726, + "loss": 3.849, + "step": 2982 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003545390139548096, + "loss": 3.876, + "step": 2983 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003544448468329783, + "loss": 3.9287, + "step": 2984 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003543506617558669, + "loss": 3.9044, + "step": 2985 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003542564587396671, + "loss": 3.7934, + "step": 2986 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003541622378005733, + "loss": 3.8544, + "step": 2987 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003540679989547833, + "loss": 3.7665, + "step": 2988 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035397374221849786, + "loss": 3.9687, + "step": 2989 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035387946760792073, + "loss": 4.1109, + "step": 2990 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035378517513925885, + "loss": 3.846, + "step": 2991 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003536908648287222, + "loss": 3.9047, + "step": 2992 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003535965366925238, + "loss": 3.9521, + "step": 2993 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003535021907468797, + "loss": 3.8009, + "step": 2994 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003534078270080091, + "loss": 3.8645, + "step": 2995 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035331344549213435, + "loss": 3.9073, + "step": 2996 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003532190462154805, + "loss": 3.9784, + "step": 2997 + }, + { + "epoch": 0.38, + "learning_rate": 0.000353124629194276, + "loss": 3.8606, + "step": 2998 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035303019444475224, + "loss": 3.9157, + "step": 2999 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003529357419831437, + "loss": 3.882, + "step": 3000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035284127182568767, + "loss": 3.8283, + "step": 3001 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003527467839886248, + "loss": 3.8391, + "step": 3002 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035265227848819866, + "loss": 4.0006, + "step": 3003 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003525577553406557, + "loss": 3.9343, + "step": 3004 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003524632145622457, + "loss": 3.8625, + "step": 3005 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003523686561692213, + "loss": 3.9695, + "step": 3006 + }, + { + "epoch": 0.38, + "learning_rate": 0.00035227408017783813, + "loss": 3.8867, + "step": 3007 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003521794866043549, + "loss": 3.9534, + "step": 3008 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003520848754650333, + "loss": 3.9924, + "step": 3009 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035199024677613826, + "loss": 4.0334, + "step": 3010 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035189560055393744, + "loss": 3.9724, + "step": 3011 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003518009368147015, + "loss": 3.9444, + "step": 3012 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035170625557470433, + "loss": 3.9924, + "step": 3013 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003516115568502228, + "loss": 3.8756, + "step": 3014 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035151684065753665, + "loss": 3.8561, + "step": 3015 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035142210701292873, + "loss": 3.9751, + "step": 3016 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003513273559326848, + "loss": 3.832, + "step": 3017 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035123258743309375, + "loss": 3.8846, + "step": 3018 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003511378015304473, + "loss": 3.9705, + "step": 3019 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035104299824104026, + "loss": 3.8308, + "step": 3020 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003509481775811705, + "loss": 3.9261, + "step": 3021 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003508533395671386, + "loss": 3.9218, + "step": 3022 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035075848421524847, + "loss": 3.87, + "step": 3023 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003506636115418069, + "loss": 3.9208, + "step": 3024 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003505687215631235, + "loss": 4.0132, + "step": 3025 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003504738142955109, + "loss": 3.8822, + "step": 3026 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003503788897552848, + "loss": 3.9811, + "step": 3027 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035028394795876396, + "loss": 4.0172, + "step": 3028 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035018898892226984, + "loss": 3.9275, + "step": 3029 + }, + { + "epoch": 0.39, + "learning_rate": 0.00035009401266212706, + "loss": 3.8583, + "step": 3030 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034999901919466304, + "loss": 3.8766, + "step": 3031 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034990400853620836, + "loss": 3.9745, + "step": 3032 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034980898070309643, + "loss": 3.8912, + "step": 3033 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003497139357116637, + "loss": 3.8547, + "step": 3034 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034961887357824934, + "loss": 3.9639, + "step": 3035 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003495237943191957, + "loss": 3.8541, + "step": 3036 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034942869795084815, + "loss": 3.9652, + "step": 3037 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003493335844895547, + "loss": 3.7963, + "step": 3038 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034923845395166655, + "loss": 4.0056, + "step": 3039 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034914330635353765, + "loss": 3.8653, + "step": 3040 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034904814171152497, + "loss": 3.9971, + "step": 3041 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034895296004198854, + "loss": 3.872, + "step": 3042 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003488577613612911, + "loss": 3.8536, + "step": 3043 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003487625456857984, + "loss": 3.8747, + "step": 3044 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034866731303187916, + "loss": 3.9454, + "step": 3045 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034857206341590497, + "loss": 3.8004, + "step": 3046 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034847679685425024, + "loss": 3.9552, + "step": 3047 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034838151336329267, + "loss": 4.0538, + "step": 3048 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003482862129594123, + "loss": 3.9478, + "step": 3049 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034819089565899246, + "loss": 3.8805, + "step": 3050 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034809556147841935, + "loss": 3.9096, + "step": 3051 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003480002104340821, + "loss": 3.9967, + "step": 3052 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003479048425423725, + "loss": 3.9357, + "step": 3053 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003478094578196854, + "loss": 3.7942, + "step": 3054 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034771405628241867, + "loss": 3.9205, + "step": 3055 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034761863794697287, + "loss": 3.9491, + "step": 3056 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034752320282975145, + "loss": 3.819, + "step": 3057 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034742775094716093, + "loss": 3.9545, + "step": 3058 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034733228231561056, + "loss": 3.8278, + "step": 3059 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034723679695151244, + "loss": 4.0323, + "step": 3060 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034714129487128166, + "loss": 3.9206, + "step": 3061 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034704577609133614, + "loss": 3.9808, + "step": 3062 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034695024062809666, + "loss": 3.9285, + "step": 3063 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003468546884979868, + "loss": 3.9802, + "step": 3064 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003467591197174332, + "loss": 3.8742, + "step": 3065 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034666353430286534, + "loss": 3.8327, + "step": 3066 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003465679322707152, + "loss": 4.0036, + "step": 3067 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034647231363741797, + "loss": 3.7816, + "step": 3068 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034637667841941154, + "loss": 3.981, + "step": 3069 + }, + { + "epoch": 0.39, + "learning_rate": 0.000346281026633137, + "loss": 3.9844, + "step": 3070 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003461853582950377, + "loss": 3.9137, + "step": 3071 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034608967342156016, + "loss": 3.8885, + "step": 3072 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034599397202915384, + "loss": 3.7913, + "step": 3073 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003458982541342709, + "loss": 3.9355, + "step": 3074 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034580251975336636, + "loss": 3.938, + "step": 3075 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034570676890289794, + "loss": 3.9983, + "step": 3076 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034561100159932653, + "loss": 3.9772, + "step": 3077 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034551521785911537, + "loss": 3.9308, + "step": 3078 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003454194176987311, + "loss": 3.8266, + "step": 3079 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003453236011346427, + "loss": 3.8667, + "step": 3080 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003452277681833221, + "loss": 3.9071, + "step": 3081 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003451319188612443, + "loss": 3.9878, + "step": 3082 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003450360531848866, + "loss": 3.9623, + "step": 3083 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034494017117072973, + "loss": 3.8799, + "step": 3084 + }, + { + "epoch": 0.39, + "learning_rate": 0.00034484427283525685, + "loss": 3.886, + "step": 3085 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003447483581949538, + "loss": 3.9968, + "step": 3086 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003446524272663096, + "loss": 4.0339, + "step": 3087 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003445564800658159, + "loss": 3.9744, + "step": 3088 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034446051660996714, + "loss": 4.0783, + "step": 3089 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034436453691526035, + "loss": 3.9461, + "step": 3090 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034426854099819584, + "loss": 3.8135, + "step": 3091 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034417252887527616, + "loss": 3.8921, + "step": 3092 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003440765005630071, + "loss": 3.9633, + "step": 3093 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003439804560778968, + "loss": 3.8608, + "step": 3094 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034388439543645683, + "loss": 3.9993, + "step": 3095 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003437883186552008, + "loss": 3.9193, + "step": 3096 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003436922257506454, + "loss": 3.7256, + "step": 3097 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034359611673931027, + "loss": 3.8201, + "step": 3098 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003434999916377177, + "loss": 4.0465, + "step": 3099 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003434038504623926, + "loss": 3.9707, + "step": 3100 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034330769322986267, + "loss": 3.9674, + "step": 3101 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003432115199566586, + "loss": 3.9696, + "step": 3102 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034311533065931374, + "loss": 4.0254, + "step": 3103 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034301912535436395, + "loss": 3.9354, + "step": 3104 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003429229040583482, + "loss": 3.9262, + "step": 3105 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003428266667878079, + "loss": 3.8949, + "step": 3106 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003427304135592876, + "loss": 3.8995, + "step": 3107 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034263414438933407, + "loss": 4.0, + "step": 3108 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034253785929449723, + "loss": 4.0377, + "step": 3109 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034244155829132953, + "loss": 3.9724, + "step": 3110 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003423452413963864, + "loss": 3.9237, + "step": 3111 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034224890862622566, + "loss": 4.0018, + "step": 3112 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034215255999740806, + "loss": 3.9111, + "step": 3113 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034205619552649715, + "loss": 4.0137, + "step": 3114 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034195981523005894, + "loss": 3.8974, + "step": 3115 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003418634191246624, + "loss": 3.9575, + "step": 3116 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003417670072268792, + "loss": 3.9814, + "step": 3117 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034167057955328365, + "loss": 4.0279, + "step": 3118 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003415741361204526, + "loss": 3.9691, + "step": 3119 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034147767694496604, + "loss": 3.8986, + "step": 3120 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034138120204340623, + "loss": 3.9756, + "step": 3121 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034128471143235847, + "loss": 4.0835, + "step": 3122 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034118820512841054, + "loss": 3.9715, + "step": 3123 + }, + { + "epoch": 0.4, + "learning_rate": 0.000341091683148153, + "loss": 3.9334, + "step": 3124 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034099514550817904, + "loss": 3.9012, + "step": 3125 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034089859222508474, + "loss": 4.0435, + "step": 3126 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003408020233154686, + "loss": 3.9526, + "step": 3127 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034070543879593206, + "loss": 3.825, + "step": 3128 + }, + { + "epoch": 0.4, + "learning_rate": 0.00034060883868307893, + "loss": 3.7614, + "step": 3129 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003405122229935161, + "loss": 3.8847, + "step": 3130 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003404155917438528, + "loss": 3.8592, + "step": 3131 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003403189449507012, + "loss": 3.9037, + "step": 3132 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003402222826306757, + "loss": 3.8798, + "step": 3133 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003401256048003939, + "loss": 3.9021, + "step": 3134 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003400289114764759, + "loss": 3.9858, + "step": 3135 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003399322026755443, + "loss": 4.0434, + "step": 3136 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003398354784142244, + "loss": 3.9841, + "step": 3137 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033973873870914433, + "loss": 3.9987, + "step": 3138 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033964198357693475, + "loss": 3.9766, + "step": 3139 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033954521303422905, + "loss": 3.9227, + "step": 3140 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033944842709766307, + "loss": 3.9231, + "step": 3141 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033935162578387545, + "loss": 3.985, + "step": 3142 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033925480910950757, + "loss": 4.0009, + "step": 3143 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003391579770912032, + "loss": 4.0741, + "step": 3144 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033906112974560904, + "loss": 3.8853, + "step": 3145 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003389642670893742, + "loss": 3.957, + "step": 3146 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003388673891391504, + "loss": 3.6829, + "step": 3147 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033877049591159213, + "loss": 3.9288, + "step": 3148 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033867358742335665, + "loss": 3.8199, + "step": 3149 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003385766636911035, + "loss": 3.7886, + "step": 3150 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033847972473149485, + "loss": 3.9433, + "step": 3151 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003383827705611958, + "loss": 3.826, + "step": 3152 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033828580119687395, + "loss": 4.008, + "step": 3153 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003381888166551994, + "loss": 3.8807, + "step": 3154 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003380918169528448, + "loss": 3.935, + "step": 3155 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033799480210648566, + "loss": 3.8847, + "step": 3156 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003378977721327999, + "loss": 3.8543, + "step": 3157 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003378007270484681, + "loss": 3.9859, + "step": 3158 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033770366687017346, + "loss": 4.0039, + "step": 3159 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033760659161460177, + "loss": 3.917, + "step": 3160 + }, + { + "epoch": 0.4, + "learning_rate": 0.00033750950129844134, + "loss": 3.9987, + "step": 3161 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003374123959383831, + "loss": 3.9161, + "step": 3162 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003373152755511207, + "loss": 3.9092, + "step": 3163 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003372181401533501, + "loss": 3.8644, + "step": 3164 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033712098976177015, + "loss": 3.944, + "step": 3165 + }, + { + "epoch": 0.41, + "learning_rate": 0.000337023824393082, + "loss": 3.8356, + "step": 3166 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003369266440639897, + "loss": 3.8787, + "step": 3167 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003368294487911995, + "loss": 3.8908, + "step": 3168 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033673223859142034, + "loss": 3.964, + "step": 3169 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003366350134813639, + "loss": 3.9414, + "step": 3170 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003365377734777444, + "loss": 3.9071, + "step": 3171 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003364405185972783, + "loss": 3.9707, + "step": 3172 + }, + { + "epoch": 0.41, + "learning_rate": 0.000336343248856685, + "loss": 3.9191, + "step": 3173 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033624596427268627, + "loss": 3.9773, + "step": 3174 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033614866486200636, + "loss": 3.9415, + "step": 3175 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003360513506413722, + "loss": 3.886, + "step": 3176 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033595402162751344, + "loss": 3.8555, + "step": 3177 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033585667783716177, + "loss": 3.8942, + "step": 3178 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033575931928705186, + "loss": 3.9312, + "step": 3179 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033566194599392074, + "loss": 3.942, + "step": 3180 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033556455797450807, + "loss": 3.9417, + "step": 3181 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033546715524555593, + "loss": 3.8498, + "step": 3182 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033536973782380896, + "loss": 3.8944, + "step": 3183 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003352723057260144, + "loss": 3.9342, + "step": 3184 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003351748589689219, + "loss": 3.8914, + "step": 3185 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033507739756928373, + "loss": 3.8994, + "step": 3186 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003349799215438546, + "loss": 3.9497, + "step": 3187 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003348824309093918, + "loss": 3.9047, + "step": 3188 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033478492568265507, + "loss": 3.9001, + "step": 3189 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033468740588040667, + "loss": 3.8066, + "step": 3190 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003345898715194115, + "loss": 3.8933, + "step": 3191 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033449232261643675, + "loss": 3.8721, + "step": 3192 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033439475918825215, + "loss": 3.8401, + "step": 3193 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033429718125163013, + "loss": 4.006, + "step": 3194 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003341995888233453, + "loss": 3.9697, + "step": 3195 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033410198192017517, + "loss": 3.8407, + "step": 3196 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033400436055889925, + "loss": 3.8169, + "step": 3197 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003339067247562999, + "loss": 3.8896, + "step": 3198 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003338090745291619, + "loss": 3.8155, + "step": 3199 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003337114098942723, + "loss": 4.0015, + "step": 3200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003336137308684209, + "loss": 3.8409, + "step": 3201 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033351603746839987, + "loss": 3.8571, + "step": 3202 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033341832971100376, + "loss": 3.8959, + "step": 3203 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033332060761302973, + "loss": 3.9764, + "step": 3204 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003332228711912773, + "loss": 3.9449, + "step": 3205 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003331251204625485, + "loss": 3.8627, + "step": 3206 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033302735544364794, + "loss": 3.8956, + "step": 3207 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003329295761513822, + "loss": 3.9146, + "step": 3208 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003328317826025611, + "loss": 3.8203, + "step": 3209 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003327339748139963, + "loss": 3.9349, + "step": 3210 + }, + { + "epoch": 0.41, + "learning_rate": 0.000332636152802502, + "loss": 3.9227, + "step": 3211 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003325383165848951, + "loss": 3.8688, + "step": 3212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033244046617799473, + "loss": 3.868, + "step": 3213 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033234260159862244, + "loss": 3.8031, + "step": 3214 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033224472286360235, + "loss": 3.8394, + "step": 3215 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033214682998976096, + "loss": 3.9325, + "step": 3216 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033204892299392715, + "loss": 3.8294, + "step": 3217 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003319510018929323, + "loss": 3.8166, + "step": 3218 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033185306670361015, + "loss": 3.8983, + "step": 3219 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033175511744279695, + "loss": 3.9261, + "step": 3220 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003316571541273312, + "loss": 3.8377, + "step": 3221 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033155917677405404, + "loss": 3.9409, + "step": 3222 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033146118539980886, + "loss": 3.9374, + "step": 3223 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003313631800214416, + "loss": 3.9646, + "step": 3224 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003312651606558004, + "loss": 3.9214, + "step": 3225 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033116712731973597, + "loss": 3.8415, + "step": 3226 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003310690800301014, + "loss": 4.0768, + "step": 3227 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003309710188037521, + "loss": 3.7425, + "step": 3228 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033087294365754605, + "loss": 4.0714, + "step": 3229 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003307748546083434, + "loss": 4.0826, + "step": 3230 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033067675167300673, + "loss": 3.8665, + "step": 3231 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003305786348684013, + "loss": 4.0145, + "step": 3232 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003304805042113943, + "loss": 3.8493, + "step": 3233 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033038235971885566, + "loss": 3.9475, + "step": 3234 + }, + { + "epoch": 0.41, + "learning_rate": 0.00033028420140765745, + "loss": 3.9245, + "step": 3235 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003301860292946743, + "loss": 4.0419, + "step": 3236 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003300878433967832, + "loss": 3.8817, + "step": 3237 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003299896437308633, + "loss": 3.8677, + "step": 3238 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003298914303137964, + "loss": 3.8864, + "step": 3239 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003297932031624663, + "loss": 3.9735, + "step": 3240 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003296949622937596, + "loss": 3.774, + "step": 3241 + }, + { + "epoch": 0.41, + "learning_rate": 0.000329596707724565, + "loss": 3.9524, + "step": 3242 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003294984394717736, + "loss": 3.8626, + "step": 3243 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003294001575522788, + "loss": 3.8983, + "step": 3244 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003293018619829764, + "loss": 3.9068, + "step": 3245 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032920355278076466, + "loss": 3.8537, + "step": 3246 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003291052299625439, + "loss": 3.9468, + "step": 3247 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032900689354521705, + "loss": 3.9951, + "step": 3248 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032890854354568935, + "loss": 4.007, + "step": 3249 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032881017998086817, + "loss": 3.8816, + "step": 3250 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003287118028676634, + "loss": 3.8757, + "step": 3251 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032861341222298725, + "loss": 3.8082, + "step": 3252 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003285150080637542, + "loss": 3.9894, + "step": 3253 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032841659040688093, + "loss": 3.8418, + "step": 3254 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032831815926928675, + "loss": 3.965, + "step": 3255 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032821971466789304, + "loss": 3.8218, + "step": 3256 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003281212566196236, + "loss": 4.0054, + "step": 3257 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032802278514140444, + "loss": 3.9881, + "step": 3258 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003279243002501641, + "loss": 3.941, + "step": 3259 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032782580196283307, + "loss": 3.9701, + "step": 3260 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003277272902963445, + "loss": 4.0429, + "step": 3261 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032762876526763355, + "loss": 3.9448, + "step": 3262 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032753022689363797, + "loss": 3.845, + "step": 3263 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003274316751912976, + "loss": 3.8646, + "step": 3264 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003273331101775546, + "loss": 3.9707, + "step": 3265 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003272345318693534, + "loss": 3.9452, + "step": 3266 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003271359402836408, + "loss": 3.8168, + "step": 3267 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032703733543736584, + "loss": 3.8669, + "step": 3268 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032693871734747984, + "loss": 3.8844, + "step": 3269 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032684008603093634, + "loss": 3.8348, + "step": 3270 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032674144150469133, + "loss": 3.8999, + "step": 3271 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003266427837857027, + "loss": 3.7566, + "step": 3272 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003265441128909311, + "loss": 3.9355, + "step": 3273 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003264454288373392, + "loss": 4.0392, + "step": 3274 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032634673164189175, + "loss": 3.9993, + "step": 3275 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003262480213215561, + "loss": 3.8321, + "step": 3276 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003261492978933016, + "loss": 3.8977, + "step": 3277 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003260505613741, + "loss": 4.0421, + "step": 3278 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032595181178092525, + "loss": 3.8665, + "step": 3279 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032585304913075353, + "loss": 4.0099, + "step": 3280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003257542734405634, + "loss": 3.9756, + "step": 3281 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003256554847273353, + "loss": 3.8825, + "step": 3282 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032555668300805236, + "loss": 3.8997, + "step": 3283 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003254578682996997, + "loss": 3.7328, + "step": 3284 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032535904061926473, + "loss": 4.012, + "step": 3285 + }, + { + "epoch": 0.42, + "learning_rate": 0.000325260199983737, + "loss": 3.8504, + "step": 3286 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032516134641010836, + "loss": 3.9049, + "step": 3287 + }, + { + "epoch": 0.42, + "learning_rate": 0.000325062479915373, + "loss": 3.9704, + "step": 3288 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032496360051652713, + "loss": 3.96, + "step": 3289 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032486470823056925, + "loss": 3.8499, + "step": 3290 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003247658030745001, + "loss": 3.8733, + "step": 3291 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003246668850653227, + "loss": 3.9393, + "step": 3292 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032456795422004217, + "loss": 4.0212, + "step": 3293 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032446901055566574, + "loss": 4.0222, + "step": 3294 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003243700540892031, + "loss": 3.9097, + "step": 3295 + }, + { + "epoch": 0.42, + "learning_rate": 0.000324271084837666, + "loss": 3.9615, + "step": 3296 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003241721028180684, + "loss": 3.9373, + "step": 3297 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003240731080474265, + "loss": 3.7905, + "step": 3298 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003239741005427586, + "loss": 3.9622, + "step": 3299 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003238750803210851, + "loss": 3.9364, + "step": 3300 + }, + { + "epoch": 0.42, + "learning_rate": 0.000323776047399429, + "loss": 3.9669, + "step": 3301 + }, + { + "epoch": 0.42, + "learning_rate": 0.000323677001794815, + "loss": 3.9852, + "step": 3302 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032357794352427035, + "loss": 3.7565, + "step": 3303 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003234788726048241, + "loss": 3.8924, + "step": 3304 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032337978905350786, + "loss": 3.8227, + "step": 3305 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003232806928873552, + "loss": 3.7803, + "step": 3306 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032318158412340185, + "loss": 3.9227, + "step": 3307 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003230824627786858, + "loss": 4.0976, + "step": 3308 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032298332887024716, + "loss": 3.9138, + "step": 3309 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032288418241512826, + "loss": 3.8135, + "step": 3310 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032278502343037344, + "loss": 3.8815, + "step": 3311 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003226858519330292, + "loss": 3.947, + "step": 3312 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003225866679401444, + "loss": 3.7831, + "step": 3313 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003224874714687699, + "loss": 3.9035, + "step": 3314 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003223882625359587, + "loss": 3.8268, + "step": 3315 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032228904115876603, + "loss": 3.9269, + "step": 3316 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032218980735424906, + "loss": 4.0665, + "step": 3317 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032209056113946733, + "loss": 3.9882, + "step": 3318 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003219913025314824, + "loss": 3.8271, + "step": 3319 + }, + { + "epoch": 0.42, + "learning_rate": 0.00032189203154735804, + "loss": 3.9587, + "step": 3320 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003217927482041599, + "loss": 3.8633, + "step": 3321 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003216934525189561, + "loss": 3.8555, + "step": 3322 + }, + { + "epoch": 0.43, + "learning_rate": 0.00032159414450881667, + "loss": 3.9827, + "step": 3323 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003214948241908138, + "loss": 4.0743, + "step": 3324 + }, + { + "epoch": 0.43, + "learning_rate": 0.00032139549158202187, + "loss": 3.9801, + "step": 3325 + }, + { + "epoch": 0.43, + "learning_rate": 0.00032129614669951717, + "loss": 4.0643, + "step": 3326 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003211967895603784, + "loss": 4.0306, + "step": 3327 + }, + { + "epoch": 0.43, + "learning_rate": 0.00032109742018168605, + "loss": 3.77, + "step": 3328 + }, + { + "epoch": 0.43, + "learning_rate": 0.000320998038580523, + "loss": 3.9384, + "step": 3329 + }, + { + "epoch": 0.43, + "learning_rate": 0.000320898644773974, + "loss": 3.8111, + "step": 3330 + }, + { + "epoch": 0.43, + "learning_rate": 0.00032079923877912606, + "loss": 3.8257, + "step": 3331 + }, + { + "epoch": 0.43, + "learning_rate": 0.00032069982061306814, + "loss": 3.9878, + "step": 3332 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003206003902928914, + "loss": 3.8168, + "step": 3333 + }, + { + "epoch": 0.43, + "learning_rate": 0.00032050094783568915, + "loss": 3.7927, + "step": 3334 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003204014932585567, + "loss": 3.8328, + "step": 3335 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003203020265785914, + "loss": 3.9526, + "step": 3336 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003202025478128926, + "loss": 4.014, + "step": 3337 + }, + { + "epoch": 0.43, + "learning_rate": 0.00032010305697856214, + "loss": 3.9206, + "step": 3338 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003200035540927034, + "loss": 3.9946, + "step": 3339 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003199040391724221, + "loss": 3.8523, + "step": 3340 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031980451223482615, + "loss": 3.802, + "step": 3341 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031970497329702525, + "loss": 3.774, + "step": 3342 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031960542237613145, + "loss": 3.8838, + "step": 3343 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003195058594892585, + "loss": 3.8491, + "step": 3344 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031940628465352254, + "loss": 3.9698, + "step": 3345 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003193066978860416, + "loss": 3.8766, + "step": 3346 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031920709920393593, + "loss": 3.8088, + "step": 3347 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003191074886243275, + "loss": 3.8389, + "step": 3348 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003190078661643406, + "loss": 3.9683, + "step": 3349 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003189082318411016, + "loss": 3.9296, + "step": 3350 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031880858567173855, + "loss": 3.951, + "step": 3351 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031870892767338197, + "loss": 3.981, + "step": 3352 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003186092578631643, + "loss": 3.9066, + "step": 3353 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003185095762582197, + "loss": 3.944, + "step": 3354 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031840988287568474, + "loss": 3.9073, + "step": 3355 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031831017773269784, + "loss": 3.8921, + "step": 3356 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003182104608463995, + "loss": 3.952, + "step": 3357 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031811073223393205, + "loss": 3.9457, + "step": 3358 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003180109919124403, + "loss": 3.9191, + "step": 3359 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031791123989907043, + "loss": 3.7622, + "step": 3360 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031781147621097125, + "loss": 3.9875, + "step": 3361 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003177117008652931, + "loss": 3.9128, + "step": 3362 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031761191387918867, + "loss": 3.8698, + "step": 3363 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031751211526981235, + "loss": 3.9277, + "step": 3364 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031741230505432066, + "loss": 3.8688, + "step": 3365 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003173124832498724, + "loss": 3.8508, + "step": 3366 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003172126498736278, + "loss": 3.9989, + "step": 3367 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003171128049427496, + "loss": 3.8284, + "step": 3368 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031701294847440197, + "loss": 3.9126, + "step": 3369 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031691308048575185, + "loss": 3.9195, + "step": 3370 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003168132009939674, + "loss": 3.8806, + "step": 3371 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031671331001621906, + "loss": 3.902, + "step": 3372 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031661340756967943, + "loss": 4.0059, + "step": 3373 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003165134936715227, + "loss": 3.8925, + "step": 3374 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031641356833892544, + "loss": 3.9161, + "step": 3375 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003163136315890657, + "loss": 3.8743, + "step": 3376 + }, + { + "epoch": 0.43, + "learning_rate": 0.000316213683439124, + "loss": 3.8464, + "step": 3377 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003161137239062825, + "loss": 3.7761, + "step": 3378 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031601375300772537, + "loss": 3.9602, + "step": 3379 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003159137707606388, + "loss": 3.8898, + "step": 3380 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003158137771822109, + "loss": 3.9038, + "step": 3381 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031571377228963184, + "loss": 3.8881, + "step": 3382 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003156137561000933, + "loss": 3.9643, + "step": 3383 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031551372863078956, + "loss": 3.8924, + "step": 3384 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003154136898989164, + "loss": 3.9059, + "step": 3385 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003153136399216715, + "loss": 3.6819, + "step": 3386 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003152135787162548, + "loss": 3.7535, + "step": 3387 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003151135062998678, + "loss": 3.8978, + "step": 3388 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031501342268971434, + "loss": 3.9756, + "step": 3389 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031491332790299973, + "loss": 3.875, + "step": 3390 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003148132219569315, + "loss": 4.0174, + "step": 3391 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003147131048687191, + "loss": 3.8072, + "step": 3392 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003146129766555737, + "loss": 3.9221, + "step": 3393 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003145128373347086, + "loss": 3.8968, + "step": 3394 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003144126869233389, + "loss": 3.8189, + "step": 3395 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003143125254386815, + "loss": 3.9671, + "step": 3396 + }, + { + "epoch": 0.43, + "learning_rate": 0.00031421235289795534, + "loss": 4.0682, + "step": 3397 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003141121693183814, + "loss": 3.8855, + "step": 3398 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031401197471718235, + "loss": 3.7871, + "step": 3399 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003139117691115827, + "loss": 3.8495, + "step": 3400 + }, + { + "epoch": 0.44, + "learning_rate": 0.000313811552518809, + "loss": 3.8799, + "step": 3401 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003137113249560896, + "loss": 3.9321, + "step": 3402 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031361108644065497, + "loss": 3.8286, + "step": 3403 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031351083698973704, + "loss": 3.7745, + "step": 3404 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031341057662057, + "loss": 3.8702, + "step": 3405 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031331030535038963, + "loss": 3.917, + "step": 3406 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031321002319643394, + "loss": 3.8482, + "step": 3407 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031310973017594233, + "loss": 3.9637, + "step": 3408 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031300942630615666, + "loss": 3.9314, + "step": 3409 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031290911160432, + "loss": 4.0102, + "step": 3410 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003128087860876778, + "loss": 3.7536, + "step": 3411 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031270844977347706, + "loss": 3.9367, + "step": 3412 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031260810267896686, + "loss": 3.8819, + "step": 3413 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003125077448213981, + "loss": 3.9777, + "step": 3414 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003124073762180232, + "loss": 3.9792, + "step": 3415 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031230699688609693, + "loss": 3.7409, + "step": 3416 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003122066068428756, + "loss": 3.7771, + "step": 3417 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031210620610561736, + "loss": 3.8912, + "step": 3418 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003120057946915822, + "loss": 3.8809, + "step": 3419 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031190537261803215, + "loss": 3.9646, + "step": 3420 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003118049399022309, + "loss": 3.8362, + "step": 3421 + }, + { + "epoch": 0.44, + "learning_rate": 0.000311704496561444, + "loss": 3.8622, + "step": 3422 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003116040426129388, + "loss": 3.9063, + "step": 3423 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003115035780739845, + "loss": 3.9672, + "step": 3424 + }, + { + "epoch": 0.44, + "learning_rate": 0.000311403102961852, + "loss": 3.7866, + "step": 3425 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003113026172938144, + "loss": 3.8962, + "step": 3426 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003112021210871462, + "loss": 3.8887, + "step": 3427 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031110161435912384, + "loss": 3.9645, + "step": 3428 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003110010971270256, + "loss": 3.8836, + "step": 3429 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003109005694081315, + "loss": 3.6718, + "step": 3430 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003108000312197236, + "loss": 3.8916, + "step": 3431 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003106994825790855, + "loss": 3.966, + "step": 3432 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003105989235035026, + "loss": 3.8793, + "step": 3433 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003104983540102622, + "loss": 3.9721, + "step": 3434 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003103977741166535, + "loss": 3.7532, + "step": 3435 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003102971838399672, + "loss": 3.9219, + "step": 3436 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003101965831974959, + "loss": 3.7108, + "step": 3437 + }, + { + "epoch": 0.44, + "learning_rate": 0.00031009597220653417, + "loss": 3.8471, + "step": 3438 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030999535088437806, + "loss": 3.9094, + "step": 3439 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003098947192483256, + "loss": 3.853, + "step": 3440 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003097940773156766, + "loss": 3.9861, + "step": 3441 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030969342510373253, + "loss": 3.921, + "step": 3442 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030959276262979663, + "loss": 3.9384, + "step": 3443 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030949208991117396, + "loss": 3.8036, + "step": 3444 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030939140696517137, + "loss": 3.7994, + "step": 3445 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003092907138090974, + "loss": 3.7907, + "step": 3446 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003091900104602623, + "loss": 3.9078, + "step": 3447 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030908929693597827, + "loss": 3.8834, + "step": 3448 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030898857325355905, + "loss": 4.004, + "step": 3449 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030888783943032027, + "loss": 3.7814, + "step": 3450 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003087870954835792, + "loss": 3.9482, + "step": 3451 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030868634143065487, + "loss": 3.9065, + "step": 3452 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030858557728886813, + "loss": 3.8569, + "step": 3453 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003084848030755415, + "loss": 3.8084, + "step": 3454 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003083840188079991, + "loss": 3.946, + "step": 3455 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003082832245035671, + "loss": 4.0277, + "step": 3456 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003081824201795731, + "loss": 3.8029, + "step": 3457 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030808160585334653, + "loss": 3.9666, + "step": 3458 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003079807815422187, + "loss": 3.9132, + "step": 3459 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003078799472635223, + "loss": 3.8668, + "step": 3460 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030777910303459187, + "loss": 3.8517, + "step": 3461 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030767824887276385, + "loss": 3.7855, + "step": 3462 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030757738479537624, + "loss": 4.0877, + "step": 3463 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003074765108197687, + "loss": 3.8893, + "step": 3464 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030737562696328254, + "loss": 3.9555, + "step": 3465 + }, + { + "epoch": 0.44, + "learning_rate": 0.000307274733243261, + "loss": 3.8316, + "step": 3466 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003071738296770488, + "loss": 4.0676, + "step": 3467 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003070729162819925, + "loss": 3.9204, + "step": 3468 + }, + { + "epoch": 0.44, + "learning_rate": 0.00030697199307544014, + "loss": 3.8255, + "step": 3469 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003068710600747418, + "loss": 3.967, + "step": 3470 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003067701172972489, + "loss": 3.9322, + "step": 3471 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003066691647603147, + "loss": 3.8838, + "step": 3472 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003065682024812941, + "loss": 3.8632, + "step": 3473 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003064672304775438, + "loss": 3.9008, + "step": 3474 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003063662487664218, + "loss": 4.092, + "step": 3475 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003062652573652882, + "loss": 3.8169, + "step": 3476 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030616425629150467, + "loss": 3.9035, + "step": 3477 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030606324556243435, + "loss": 3.8105, + "step": 3478 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030596222519544216, + "loss": 3.862, + "step": 3479 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030586119520789465, + "loss": 3.9592, + "step": 3480 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030576015561716014, + "loss": 3.781, + "step": 3481 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030565910644060845, + "loss": 3.9666, + "step": 3482 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030555804769561107, + "loss": 3.7998, + "step": 3483 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030545697939954124, + "loss": 3.732, + "step": 3484 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030535590156977375, + "loss": 3.7606, + "step": 3485 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030525481422368503, + "loss": 3.9553, + "step": 3486 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030515371737865313, + "loss": 3.9375, + "step": 3487 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030505261105205785, + "loss": 4.0322, + "step": 3488 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030495149526128053, + "loss": 3.8904, + "step": 3489 + }, + { + "epoch": 0.45, + "learning_rate": 0.000304850370023704, + "loss": 3.7862, + "step": 3490 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003047492353567131, + "loss": 3.9188, + "step": 3491 + }, + { + "epoch": 0.45, + "learning_rate": 0.000304648091277694, + "loss": 3.7326, + "step": 3492 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003045469378040344, + "loss": 3.8899, + "step": 3493 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030444577495312373, + "loss": 3.7884, + "step": 3494 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003043446027423533, + "loss": 3.8272, + "step": 3495 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003042434211891156, + "loss": 3.9342, + "step": 3496 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003041422303108049, + "loss": 4.0903, + "step": 3497 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003040410301248172, + "loss": 3.8715, + "step": 3498 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030393982064854985, + "loss": 3.7642, + "step": 3499 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003038386018994021, + "loss": 3.9508, + "step": 3500 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030373737389477446, + "loss": 3.9771, + "step": 3501 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030363613665206936, + "loss": 3.9432, + "step": 3502 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003035348901886905, + "loss": 3.9097, + "step": 3503 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030343363452204334, + "loss": 3.8684, + "step": 3504 + }, + { + "epoch": 0.45, + "learning_rate": 0.000303332369669535, + "loss": 3.8515, + "step": 3505 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003032310956485741, + "loss": 3.9512, + "step": 3506 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003031298124765707, + "loss": 3.9425, + "step": 3507 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030302852017093654, + "loss": 4.1172, + "step": 3508 + }, + { + "epoch": 0.45, + "learning_rate": 0.000302927218749085, + "loss": 3.8481, + "step": 3509 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003028259082284311, + "loss": 3.8393, + "step": 3510 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030272458862639105, + "loss": 3.8341, + "step": 3511 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030262325996038305, + "loss": 3.7742, + "step": 3512 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030252192224782654, + "loss": 3.7798, + "step": 3513 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030242057550614276, + "loss": 3.9592, + "step": 3514 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003023192197527543, + "loss": 3.9109, + "step": 3515 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003022178550050855, + "loss": 3.8853, + "step": 3516 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030211648128056196, + "loss": 3.9841, + "step": 3517 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003020150985966111, + "loss": 3.9415, + "step": 3518 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030191370697066187, + "loss": 4.0515, + "step": 3519 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030181230642014456, + "loss": 3.8735, + "step": 3520 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003017108969624911, + "loss": 3.9503, + "step": 3521 + }, + { + "epoch": 0.45, + "learning_rate": 0.000301609478615135, + "loss": 3.838, + "step": 3522 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030150805139551114, + "loss": 3.8149, + "step": 3523 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003014066153210562, + "loss": 3.9181, + "step": 3524 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003013051704092081, + "loss": 3.8473, + "step": 3525 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003012037166774063, + "loss": 3.9921, + "step": 3526 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030110225414309213, + "loss": 3.9558, + "step": 3527 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030100078282370804, + "loss": 3.9037, + "step": 3528 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003008993027366981, + "loss": 3.7957, + "step": 3529 + }, + { + "epoch": 0.45, + "learning_rate": 0.000300797813899508, + "loss": 3.807, + "step": 3530 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030069631632958474, + "loss": 3.917, + "step": 3531 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030059481004437696, + "loss": 3.9154, + "step": 3532 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030049329506133483, + "loss": 3.827, + "step": 3533 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003003917713979099, + "loss": 4.0668, + "step": 3534 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003002902390715553, + "loss": 3.8593, + "step": 3535 + }, + { + "epoch": 0.45, + "learning_rate": 0.00030018869809972555, + "loss": 3.7762, + "step": 3536 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003000871484998767, + "loss": 3.8779, + "step": 3537 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029998559028946646, + "loss": 3.8859, + "step": 3538 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002998840234859538, + "loss": 3.8122, + "step": 3539 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029978244810679913, + "loss": 3.8833, + "step": 3540 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002996808641694644, + "loss": 3.9145, + "step": 3541 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029957927169141336, + "loss": 3.8225, + "step": 3542 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002994776706901107, + "loss": 3.8087, + "step": 3543 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002993760611830228, + "loss": 3.7555, + "step": 3544 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029927444318761755, + "loss": 3.8174, + "step": 3545 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002991728167213643, + "loss": 3.7857, + "step": 3546 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002990711818017338, + "loss": 3.8262, + "step": 3547 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029896953844619833, + "loss": 3.9531, + "step": 3548 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002988678866722315, + "loss": 3.8462, + "step": 3549 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002987662264973083, + "loss": 4.0309, + "step": 3550 + }, + { + "epoch": 0.45, + "learning_rate": 0.00029866455793890544, + "loss": 3.8597, + "step": 3551 + }, + { + "epoch": 0.45, + "learning_rate": 0.000298562881014501, + "loss": 3.9318, + "step": 3552 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002984611957415743, + "loss": 3.8157, + "step": 3553 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002983595021376062, + "loss": 3.7826, + "step": 3554 + }, + { + "epoch": 0.46, + "learning_rate": 0.000298257800220079, + "loss": 3.9172, + "step": 3555 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002981560900064766, + "loss": 3.9088, + "step": 3556 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029805437151428403, + "loss": 3.9207, + "step": 3557 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029795264476098786, + "loss": 3.8926, + "step": 3558 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029785090976407614, + "loss": 3.7556, + "step": 3559 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002977491665410383, + "loss": 3.9184, + "step": 3560 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002976474151093653, + "loss": 3.8866, + "step": 3561 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002975456554865491, + "loss": 3.8225, + "step": 3562 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029744388769008365, + "loss": 3.9229, + "step": 3563 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002973421117374637, + "loss": 3.7959, + "step": 3564 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029724032764618605, + "loss": 3.9507, + "step": 3565 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029713853543374846, + "loss": 4.0091, + "step": 3566 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002970367351176501, + "loss": 3.8621, + "step": 3567 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029693492671539165, + "loss": 3.8743, + "step": 3568 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002968331102444752, + "loss": 3.9108, + "step": 3569 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002967312857224042, + "loss": 3.8165, + "step": 3570 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002966294531666834, + "loss": 3.9474, + "step": 3571 + }, + { + "epoch": 0.46, + "learning_rate": 0.000296527612594819, + "loss": 4.0001, + "step": 3572 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002964257640243186, + "loss": 3.9462, + "step": 3573 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029632390747269115, + "loss": 3.8829, + "step": 3574 + }, + { + "epoch": 0.46, + "learning_rate": 0.000296222042957447, + "loss": 3.9109, + "step": 3575 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002961201704960978, + "loss": 3.9793, + "step": 3576 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029601829010615665, + "loss": 3.9817, + "step": 3577 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002959164018051379, + "loss": 3.841, + "step": 3578 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002958145056105575, + "loss": 3.8753, + "step": 3579 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029571260153993224, + "loss": 3.7746, + "step": 3580 + }, + { + "epoch": 0.46, + "learning_rate": 0.000295610689610781, + "loss": 3.8335, + "step": 3581 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029550876984062337, + "loss": 3.826, + "step": 3582 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029540684224698056, + "loss": 3.8324, + "step": 3583 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002953049068473753, + "loss": 3.8055, + "step": 3584 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002952029636593313, + "loss": 3.8624, + "step": 3585 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029510101270037377, + "loss": 3.896, + "step": 3586 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029499905398802927, + "loss": 3.9239, + "step": 3587 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002948970875398258, + "loss": 3.8973, + "step": 3588 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002947951133732925, + "loss": 3.8622, + "step": 3589 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029469313150595983, + "loss": 4.0062, + "step": 3590 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029459114195535977, + "loss": 3.9787, + "step": 3591 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029448914473902544, + "loss": 3.8963, + "step": 3592 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002943871398744914, + "loss": 3.9288, + "step": 3593 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029428512737929334, + "loss": 3.8418, + "step": 3594 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029418310727096864, + "loss": 3.8986, + "step": 3595 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029408107956705544, + "loss": 3.6463, + "step": 3596 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002939790442850937, + "loss": 3.8754, + "step": 3597 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002938770014426244, + "loss": 3.8199, + "step": 3598 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029377495105718993, + "loss": 4.0097, + "step": 3599 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029367289314633383, + "loss": 3.9161, + "step": 3600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029357082772760103, + "loss": 3.8838, + "step": 3601 + }, + { + "epoch": 0.46, + "learning_rate": 0.000293468754818538, + "loss": 3.7499, + "step": 3602 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029336667443669197, + "loss": 3.8812, + "step": 3603 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002932645865996119, + "loss": 3.7838, + "step": 3604 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029316249132484775, + "loss": 4.0172, + "step": 3605 + }, + { + "epoch": 0.46, + "learning_rate": 0.000293060388629951, + "loss": 3.865, + "step": 3606 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002929582785324743, + "loss": 3.8885, + "step": 3607 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002928561610499714, + "loss": 3.8529, + "step": 3608 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002927540361999977, + "loss": 3.9452, + "step": 3609 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002926519040001094, + "loss": 3.8203, + "step": 3610 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029254976446786447, + "loss": 3.8743, + "step": 3611 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029244761762082166, + "loss": 3.8112, + "step": 3612 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029234546347654133, + "loss": 3.8232, + "step": 3613 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002922433020525849, + "loss": 3.9024, + "step": 3614 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002921411333665151, + "loss": 3.9951, + "step": 3615 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029203895743589597, + "loss": 3.9021, + "step": 3616 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029193677427829276, + "loss": 3.9071, + "step": 3617 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002918345839112717, + "loss": 3.778, + "step": 3618 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002917323863524008, + "loss": 3.886, + "step": 3619 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029163018161924885, + "loss": 3.914, + "step": 3620 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002915279697293862, + "loss": 3.8387, + "step": 3621 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002914257507003839, + "loss": 3.9938, + "step": 3622 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002913235245498149, + "loss": 3.9039, + "step": 3623 + }, + { + "epoch": 0.46, + "learning_rate": 0.000291221291295253, + "loss": 3.9079, + "step": 3624 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029111905095427325, + "loss": 3.7952, + "step": 3625 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029101680354445185, + "loss": 3.8688, + "step": 3626 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002909145490833666, + "loss": 3.815, + "step": 3627 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029081228758859583, + "loss": 3.8363, + "step": 3628 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029071001907771983, + "loss": 3.9334, + "step": 3629 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029060774356831946, + "loss": 3.813, + "step": 3630 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002905054610779774, + "loss": 3.8902, + "step": 3631 + }, + { + "epoch": 0.46, + "learning_rate": 0.00029040317162427686, + "loss": 3.8943, + "step": 3632 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002903008752248027, + "loss": 3.8898, + "step": 3633 + }, + { + "epoch": 0.47, + "learning_rate": 0.00029019857189714095, + "loss": 3.9013, + "step": 3634 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002900962616588787, + "loss": 3.8211, + "step": 3635 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028999394452760413, + "loss": 3.8178, + "step": 3636 + }, + { + "epoch": 0.47, + "learning_rate": 0.000289891620520907, + "loss": 3.7968, + "step": 3637 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002897892896563777, + "loss": 3.9137, + "step": 3638 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028968695195160824, + "loss": 4.0254, + "step": 3639 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002895846074241917, + "loss": 3.9046, + "step": 3640 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028948225609172224, + "loss": 3.8943, + "step": 3641 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002893798979717952, + "loss": 3.8885, + "step": 3642 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002892775330820072, + "loss": 3.8965, + "step": 3643 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028917516143995594, + "loss": 3.9384, + "step": 3644 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028907278306324025, + "loss": 3.8913, + "step": 3645 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002889703979694602, + "loss": 3.869, + "step": 3646 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002888680061762169, + "loss": 3.821, + "step": 3647 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028876560770111276, + "loss": 3.9797, + "step": 3648 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028866320256175126, + "loss": 3.8471, + "step": 3649 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028856079077573704, + "loss": 3.8398, + "step": 3650 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028845837236067583, + "loss": 3.7877, + "step": 3651 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028835594733417455, + "loss": 3.9525, + "step": 3652 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002882535157138413, + "loss": 3.7899, + "step": 3653 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002881510775172851, + "loss": 3.9862, + "step": 3654 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002880486327621166, + "loss": 4.0003, + "step": 3655 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028794618146594684, + "loss": 3.8901, + "step": 3656 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002878437236463887, + "loss": 3.8364, + "step": 3657 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028774125932105567, + "loss": 3.9687, + "step": 3658 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002876387885075627, + "loss": 3.9301, + "step": 3659 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028753631122352565, + "loss": 3.9692, + "step": 3660 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028743382748656145, + "loss": 3.8995, + "step": 3661 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028733133731428836, + "loss": 3.9209, + "step": 3662 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002872288407243256, + "loss": 3.9804, + "step": 3663 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028712633773429356, + "loss": 3.8816, + "step": 3664 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002870238283618136, + "loss": 3.9589, + "step": 3665 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002869213126245084, + "loss": 3.904, + "step": 3666 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002868187905400015, + "loss": 3.8315, + "step": 3667 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002867162621259177, + "loss": 3.7119, + "step": 3668 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028661372739988275, + "loss": 3.9151, + "step": 3669 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028651118637952356, + "loss": 3.9228, + "step": 3670 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028640863908246825, + "loss": 3.9611, + "step": 3671 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028630608552634567, + "loss": 3.9494, + "step": 3672 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002862035257287862, + "loss": 3.8096, + "step": 3673 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028610095970742097, + "loss": 3.8468, + "step": 3674 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028599838747988215, + "loss": 3.9155, + "step": 3675 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028589580906380324, + "loss": 3.855, + "step": 3676 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002857932244768186, + "loss": 4.0399, + "step": 3677 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002856906337365638, + "loss": 3.8823, + "step": 3678 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002855880368606752, + "loss": 3.8779, + "step": 3679 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002854854338667905, + "loss": 4.0015, + "step": 3680 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002853828247725484, + "loss": 3.8166, + "step": 3681 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002852802095955885, + "loss": 4.0089, + "step": 3682 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002851775883535515, + "loss": 3.9383, + "step": 3683 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002850749610640793, + "loss": 3.8613, + "step": 3684 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028497232774481467, + "loss": 4.0671, + "step": 3685 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002848696884134016, + "loss": 3.9327, + "step": 3686 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028476704308748465, + "loss": 3.8072, + "step": 3687 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028466439178471, + "loss": 3.7755, + "step": 3688 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028456173452272456, + "loss": 3.9838, + "step": 3689 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002844590713191762, + "loss": 3.886, + "step": 3690 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002843564021917141, + "loss": 3.8392, + "step": 3691 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028425372715798816, + "loss": 3.9524, + "step": 3692 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002841510462356494, + "loss": 3.8846, + "step": 3693 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002840483594423498, + "loss": 3.9569, + "step": 3694 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028394566679574256, + "loss": 4.0283, + "step": 3695 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002838429683134817, + "loss": 3.8745, + "step": 3696 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028374026401322224, + "loss": 3.9387, + "step": 3697 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028363755391262014, + "loss": 3.8054, + "step": 3698 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028353483802933267, + "loss": 3.8814, + "step": 3699 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002834321163810178, + "loss": 3.7493, + "step": 3700 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002833293889853345, + "loss": 3.7925, + "step": 3701 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002832266558599428, + "loss": 3.7523, + "step": 3702 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002831239170225038, + "loss": 3.7106, + "step": 3703 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002830211724906794, + "loss": 4.0267, + "step": 3704 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002829184222821327, + "loss": 3.8998, + "step": 3705 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028281566641452763, + "loss": 3.9235, + "step": 3706 + }, + { + "epoch": 0.47, + "learning_rate": 0.000282712904905529, + "loss": 3.8428, + "step": 3707 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028261013777280275, + "loss": 4.1082, + "step": 3708 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028250736503401586, + "loss": 3.8457, + "step": 3709 + }, + { + "epoch": 0.47, + "learning_rate": 0.000282404586706836, + "loss": 3.7184, + "step": 3710 + }, + { + "epoch": 0.48, + "learning_rate": 0.000282301802808932, + "loss": 3.99, + "step": 3711 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028219901335797365, + "loss": 3.9297, + "step": 3712 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028209621837163167, + "loss": 4.0245, + "step": 3713 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028199341786757764, + "loss": 3.789, + "step": 3714 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002818906118634841, + "loss": 4.0504, + "step": 3715 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002817878003770247, + "loss": 3.9803, + "step": 3716 + }, + { + "epoch": 0.48, + "learning_rate": 0.000281684983425874, + "loss": 3.8087, + "step": 3717 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002815821610277073, + "loss": 3.853, + "step": 3718 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028147933320020084, + "loss": 3.8688, + "step": 3719 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002813764999610322, + "loss": 3.8644, + "step": 3720 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002812736613278794, + "loss": 3.8048, + "step": 3721 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002811708173184217, + "loss": 3.913, + "step": 3722 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028106796795033916, + "loss": 3.8109, + "step": 3723 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028096511324131275, + "loss": 3.8916, + "step": 3724 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002808622532090244, + "loss": 4.0019, + "step": 3725 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028075938787115685, + "loss": 3.8573, + "step": 3726 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028065651724539404, + "loss": 3.8284, + "step": 3727 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028055364134942047, + "loss": 3.8811, + "step": 3728 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028045076020092176, + "loss": 3.8873, + "step": 3729 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028034787381758433, + "loss": 3.9289, + "step": 3730 + }, + { + "epoch": 0.48, + "learning_rate": 0.00028024498221709555, + "loss": 3.7521, + "step": 3731 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002801420854171438, + "loss": 3.9046, + "step": 3732 + }, + { + "epoch": 0.48, + "learning_rate": 0.000280039183435418, + "loss": 4.0387, + "step": 3733 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002799362762896084, + "loss": 3.9674, + "step": 3734 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027983336399740584, + "loss": 3.9258, + "step": 3735 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027973044657650213, + "loss": 3.8073, + "step": 3736 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027962752404459, + "loss": 3.9689, + "step": 3737 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002795245964193631, + "loss": 3.9288, + "step": 3738 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027942166371851566, + "loss": 3.8571, + "step": 3739 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002793187259597432, + "loss": 3.8321, + "step": 3740 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002792157831607418, + "loss": 3.8771, + "step": 3741 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027911283533920865, + "loss": 3.9656, + "step": 3742 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002790098825128416, + "loss": 3.8734, + "step": 3743 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002789069246993394, + "loss": 3.7302, + "step": 3744 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002788039619164017, + "loss": 3.9218, + "step": 3745 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002787009941817291, + "loss": 4.0322, + "step": 3746 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002785980215130228, + "loss": 3.9068, + "step": 3747 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002784950439279852, + "loss": 3.9843, + "step": 3748 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027839206144431906, + "loss": 3.8436, + "step": 3749 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002782890740797285, + "loss": 3.9711, + "step": 3750 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002781860818519181, + "loss": 3.9936, + "step": 3751 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002780830847785935, + "loss": 3.9212, + "step": 3752 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027798008287746096, + "loss": 3.8818, + "step": 3753 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002778770761662279, + "loss": 3.8371, + "step": 3754 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002777740646626022, + "loss": 3.9482, + "step": 3755 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027767104838429283, + "loss": 4.0151, + "step": 3756 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027756802734900957, + "loss": 3.9286, + "step": 3757 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002774650015744626, + "loss": 3.8742, + "step": 3758 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027736197107836356, + "loss": 3.9109, + "step": 3759 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027725893587842457, + "loss": 3.9441, + "step": 3760 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027715589599235834, + "loss": 4.0124, + "step": 3761 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002770528514378788, + "loss": 3.9331, + "step": 3762 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027694980223270054, + "loss": 3.9064, + "step": 3763 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027684674839453886, + "loss": 3.9754, + "step": 3764 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002767436899411098, + "loss": 3.9156, + "step": 3765 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027664062689013055, + "loss": 3.8496, + "step": 3766 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002765375592593186, + "loss": 3.9098, + "step": 3767 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002764344870663925, + "loss": 3.9027, + "step": 3768 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027633141032907166, + "loss": 3.9622, + "step": 3769 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027622832906507624, + "loss": 3.8036, + "step": 3770 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027612524329212685, + "loss": 3.8032, + "step": 3771 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002760221530279453, + "loss": 3.9652, + "step": 3772 + }, + { + "epoch": 0.48, + "learning_rate": 0.000275919058290254, + "loss": 4.0472, + "step": 3773 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027581595909677607, + "loss": 3.7368, + "step": 3774 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027571285546523555, + "loss": 3.7725, + "step": 3775 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027560974741335696, + "loss": 3.8756, + "step": 3776 + }, + { + "epoch": 0.48, + "learning_rate": 0.000275506634958866, + "loss": 3.9271, + "step": 3777 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002754035181194888, + "loss": 3.9885, + "step": 3778 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027530039691295224, + "loss": 3.8075, + "step": 3779 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002751972713569842, + "loss": 3.8157, + "step": 3780 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027509414146931313, + "loss": 3.9534, + "step": 3781 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027499100726766817, + "loss": 3.7694, + "step": 3782 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002748878687697794, + "loss": 3.8344, + "step": 3783 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002747847259933774, + "loss": 3.976, + "step": 3784 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002746815789561938, + "loss": 4.0054, + "step": 3785 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002745784276759605, + "loss": 3.8959, + "step": 3786 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027447527217041055, + "loss": 3.9489, + "step": 3787 + }, + { + "epoch": 0.48, + "learning_rate": 0.00027437211245727764, + "loss": 3.8571, + "step": 3788 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002742689485542961, + "loss": 3.9471, + "step": 3789 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002741657804792008, + "loss": 3.7113, + "step": 3790 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002740626082497277, + "loss": 3.7668, + "step": 3791 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027395943188361345, + "loss": 4.0002, + "step": 3792 + }, + { + "epoch": 0.49, + "learning_rate": 0.000273856251398595, + "loss": 3.9482, + "step": 3793 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002737530668124104, + "loss": 3.9292, + "step": 3794 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002736498781427981, + "loss": 3.7309, + "step": 3795 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027354668540749774, + "loss": 3.8523, + "step": 3796 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002734434886242491, + "loss": 3.8794, + "step": 3797 + }, + { + "epoch": 0.49, + "learning_rate": 0.000273340287810793, + "loss": 3.8836, + "step": 3798 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027323708298487094, + "loss": 3.8851, + "step": 3799 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002731338741642248, + "loss": 3.8801, + "step": 3800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002730306613665975, + "loss": 3.9352, + "step": 3801 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027292744460973256, + "loss": 3.8166, + "step": 3802 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002728242239113741, + "loss": 3.917, + "step": 3803 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027272099928926695, + "loss": 3.8652, + "step": 3804 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027261777076115657, + "loss": 3.8644, + "step": 3805 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002725145383447893, + "loss": 3.8971, + "step": 3806 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002724113020579118, + "loss": 3.8099, + "step": 3807 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027230806191827164, + "loss": 3.8713, + "step": 3808 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027220481794361704, + "loss": 3.8969, + "step": 3809 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027210157015169687, + "loss": 3.7344, + "step": 3810 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027199831856026044, + "loss": 3.7889, + "step": 3811 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027189506318705815, + "loss": 3.8807, + "step": 3812 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002717918040498406, + "loss": 3.8619, + "step": 3813 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002716885411663593, + "loss": 4.0551, + "step": 3814 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002715852745543663, + "loss": 3.8855, + "step": 3815 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002714820042316145, + "loss": 3.7957, + "step": 3816 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002713787302158571, + "loss": 3.7331, + "step": 3817 + }, + { + "epoch": 0.49, + "learning_rate": 0.000271275452524848, + "loss": 3.778, + "step": 3818 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027117217117634206, + "loss": 3.8667, + "step": 3819 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002710688861880945, + "loss": 3.9889, + "step": 3820 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002709655975778611, + "loss": 3.9807, + "step": 3821 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002708623053633984, + "loss": 3.9075, + "step": 3822 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027075900956246353, + "loss": 3.8001, + "step": 3823 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002706557101928143, + "loss": 3.8531, + "step": 3824 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002705524072722091, + "loss": 3.9541, + "step": 3825 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027044910081840665, + "loss": 3.8646, + "step": 3826 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002703457908491668, + "loss": 3.8777, + "step": 3827 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002702424773822497, + "loss": 3.8639, + "step": 3828 + }, + { + "epoch": 0.49, + "learning_rate": 0.00027013916043541595, + "loss": 3.8053, + "step": 3829 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002700358400264271, + "loss": 3.8983, + "step": 3830 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026993251617304513, + "loss": 3.8152, + "step": 3831 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002698291888930325, + "loss": 3.877, + "step": 3832 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026972585820415234, + "loss": 3.9378, + "step": 3833 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002696225241241686, + "loss": 3.8495, + "step": 3834 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026951918667084547, + "loss": 3.7633, + "step": 3835 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026941584586194777, + "loss": 3.9092, + "step": 3836 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026931250171524114, + "loss": 4.0166, + "step": 3837 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026920915424849155, + "loss": 3.9132, + "step": 3838 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026910580347946575, + "loss": 3.9106, + "step": 3839 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026900244942593067, + "loss": 3.7292, + "step": 3840 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026889909210565434, + "loss": 3.9718, + "step": 3841 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026879573153640497, + "loss": 3.9736, + "step": 3842 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026869236773595145, + "loss": 3.8777, + "step": 3843 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002685890007220632, + "loss": 3.7759, + "step": 3844 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002684856305125103, + "loss": 3.8737, + "step": 3845 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002683822571250631, + "loss": 3.9161, + "step": 3846 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026827888057749275, + "loss": 3.8849, + "step": 3847 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002681755008875711, + "loss": 3.8002, + "step": 3848 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002680721180730701, + "loss": 3.8285, + "step": 3849 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002679687321517624, + "loss": 3.9473, + "step": 3850 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026786534314142136, + "loss": 3.9442, + "step": 3851 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002677619510598208, + "loss": 3.9126, + "step": 3852 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002676585559247349, + "loss": 3.8254, + "step": 3853 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002675551577539384, + "loss": 3.7943, + "step": 3854 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002674517565652069, + "loss": 3.6568, + "step": 3855 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026734835237631605, + "loss": 4.0065, + "step": 3856 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002672449452050424, + "loss": 3.874, + "step": 3857 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002671415350691627, + "loss": 3.8069, + "step": 3858 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002670381219864544, + "loss": 3.8341, + "step": 3859 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002669347059746954, + "loss": 3.8664, + "step": 3860 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026683128705166416, + "loss": 3.8016, + "step": 3861 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026672786523513947, + "loss": 3.7237, + "step": 3862 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026662444054290085, + "loss": 3.8703, + "step": 3863 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002665210129927282, + "loss": 3.9013, + "step": 3864 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026641758260240184, + "loss": 4.0097, + "step": 3865 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002663141493897028, + "loss": 3.965, + "step": 3866 + }, + { + "epoch": 0.49, + "learning_rate": 0.00026621071337241226, + "loss": 3.803, + "step": 3867 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026610727456831217, + "loss": 3.7547, + "step": 3868 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002660038329951849, + "loss": 3.8357, + "step": 3869 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026590038867081316, + "loss": 3.8051, + "step": 3870 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002657969416129803, + "loss": 3.7578, + "step": 3871 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026569349183947, + "loss": 3.8595, + "step": 3872 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002655900393680666, + "loss": 3.8952, + "step": 3873 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026548658421655464, + "loss": 3.8566, + "step": 3874 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026538312640271935, + "loss": 3.9487, + "step": 3875 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002652796659443462, + "loss": 3.9683, + "step": 3876 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026517620285922144, + "loss": 3.9167, + "step": 3877 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026507273716513144, + "loss": 3.8778, + "step": 3878 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026496926887986324, + "loss": 3.9178, + "step": 3879 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026486579802120406, + "loss": 3.8602, + "step": 3880 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026476232460694195, + "loss": 3.8056, + "step": 3881 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002646588486548651, + "loss": 3.8631, + "step": 3882 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002645553701827621, + "loss": 3.9545, + "step": 3883 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002644518892084224, + "loss": 3.9123, + "step": 3884 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002643484057496353, + "loss": 3.7795, + "step": 3885 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026424491982419095, + "loss": 3.9306, + "step": 3886 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026414143144987975, + "loss": 4.0308, + "step": 3887 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002640379406444925, + "loss": 3.8383, + "step": 3888 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002639344474258206, + "loss": 3.7695, + "step": 3889 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026383095181165553, + "loss": 3.8537, + "step": 3890 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002637274538197896, + "loss": 3.954, + "step": 3891 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002636239534680151, + "loss": 3.865, + "step": 3892 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026352045077412516, + "loss": 3.9662, + "step": 3893 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002634169457559129, + "loss": 3.9601, + "step": 3894 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002633134384311722, + "loss": 3.8687, + "step": 3895 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002632099288176971, + "loss": 3.9698, + "step": 3896 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026310641693328205, + "loss": 3.9158, + "step": 3897 + }, + { + "epoch": 0.5, + "learning_rate": 0.000263002902795722, + "loss": 3.8223, + "step": 3898 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002628993864228122, + "loss": 3.8544, + "step": 3899 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002627958678323484, + "loss": 3.7313, + "step": 3900 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002626923470421264, + "loss": 4.0006, + "step": 3901 + }, + { + "epoch": 0.5, + "learning_rate": 0.000262588824069943, + "loss": 3.897, + "step": 3902 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026248529893359475, + "loss": 3.8559, + "step": 3903 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002623817716508788, + "loss": 3.905, + "step": 3904 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026227824223959287, + "loss": 3.8216, + "step": 3905 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026217471071753463, + "loss": 3.7363, + "step": 3906 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026207117710250254, + "loss": 3.8011, + "step": 3907 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002619676414122952, + "loss": 3.9157, + "step": 3908 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002618641036647115, + "loss": 3.903, + "step": 3909 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002617605638775509, + "loss": 3.808, + "step": 3910 + }, + { + "epoch": 0.5, + "learning_rate": 0.000261657022068613, + "loss": 3.9086, + "step": 3911 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026155347825569783, + "loss": 3.7958, + "step": 3912 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002614499324566059, + "loss": 3.9009, + "step": 3913 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002613463846891377, + "loss": 3.9075, + "step": 3914 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026124283497109445, + "loss": 4.0372, + "step": 3915 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026113928332027763, + "loss": 3.7627, + "step": 3916 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026103572975448884, + "loss": 3.7272, + "step": 3917 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002609321742915302, + "loss": 3.871, + "step": 3918 + }, + { + "epoch": 0.5, + "learning_rate": 0.000260828616949204, + "loss": 3.8518, + "step": 3919 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026072505774531304, + "loss": 3.7879, + "step": 3920 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002606214966976603, + "loss": 3.8682, + "step": 3921 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026051793382404916, + "loss": 3.9432, + "step": 3922 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002604143691422833, + "loss": 3.8048, + "step": 3923 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026031080267016664, + "loss": 3.8793, + "step": 3924 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002602072344255034, + "loss": 3.8867, + "step": 3925 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002601036644260982, + "loss": 3.8264, + "step": 3926 + }, + { + "epoch": 0.5, + "learning_rate": 0.00026000009268975613, + "loss": 3.9244, + "step": 3927 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002598965192342821, + "loss": 3.7008, + "step": 3928 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002597929440774816, + "loss": 4.0193, + "step": 3929 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002596893672371605, + "loss": 3.8065, + "step": 3930 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025958578873112496, + "loss": 4.0149, + "step": 3931 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002594822085771812, + "loss": 3.8226, + "step": 3932 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025937862679313573, + "loss": 3.9149, + "step": 3933 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025927504339679565, + "loss": 3.7425, + "step": 3934 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002591714584059681, + "loss": 3.8534, + "step": 3935 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002590678718384605, + "loss": 3.9397, + "step": 3936 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002589642837120807, + "loss": 3.8376, + "step": 3937 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025886069404463646, + "loss": 3.9541, + "step": 3938 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025875710285393634, + "loss": 3.8778, + "step": 3939 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002586535101577886, + "loss": 3.9374, + "step": 3940 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025854991597400217, + "loss": 3.8088, + "step": 3941 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002584463203203861, + "loss": 3.8829, + "step": 3942 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025834272321474964, + "loss": 3.8441, + "step": 3943 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002582391246749023, + "loss": 3.8763, + "step": 3944 + }, + { + "epoch": 0.5, + "learning_rate": 0.000258135524718654, + "loss": 3.7413, + "step": 3945 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025803192336381465, + "loss": 3.7678, + "step": 3946 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002579283206281945, + "loss": 3.9555, + "step": 3947 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002578247165296041, + "loss": 3.8918, + "step": 3948 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002577211110858543, + "loss": 3.9629, + "step": 3949 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025761750431475605, + "loss": 3.7664, + "step": 3950 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002575138962341204, + "loss": 3.8763, + "step": 3951 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025741028686175887, + "loss": 3.8576, + "step": 3952 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002573066762154832, + "loss": 3.9443, + "step": 3953 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002572030643131051, + "loss": 3.8292, + "step": 3954 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025709945117243676, + "loss": 3.7995, + "step": 3955 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002569958368112905, + "loss": 3.8608, + "step": 3956 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025689222124747877, + "loss": 3.8778, + "step": 3957 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025678860449881427, + "loss": 3.8299, + "step": 3958 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025668498658311, + "loss": 3.94, + "step": 3959 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025658136751817906, + "loss": 3.939, + "step": 3960 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002564777473218347, + "loss": 3.7435, + "step": 3961 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002563741260118904, + "loss": 3.8985, + "step": 3962 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002562705036061601, + "loss": 4.0283, + "step": 3963 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002561668801224575, + "loss": 4.1025, + "step": 3964 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025606325557859665, + "loss": 3.8529, + "step": 3965 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025595962999239185, + "loss": 3.8781, + "step": 3966 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025585600338165764, + "loss": 4.072, + "step": 3967 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025575237576420864, + "loss": 3.9647, + "step": 3968 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025564874715785943, + "loss": 3.726, + "step": 3969 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025554511758042514, + "loss": 3.5921, + "step": 3970 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025544148704972084, + "loss": 3.7737, + "step": 3971 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025533785558356175, + "loss": 3.774, + "step": 3972 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025523422319976354, + "loss": 3.9024, + "step": 3973 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025513058991614166, + "loss": 3.9558, + "step": 3974 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002550269557505118, + "loss": 3.791, + "step": 3975 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025492332072069, + "loss": 3.8784, + "step": 3976 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025481968484449236, + "loss": 3.7797, + "step": 3977 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025471604813973503, + "loss": 3.8705, + "step": 3978 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002546124106242343, + "loss": 3.9638, + "step": 3979 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025450877231580665, + "loss": 3.9685, + "step": 3980 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002544051332322689, + "loss": 3.9009, + "step": 3981 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002543014933914376, + "loss": 4.0275, + "step": 3982 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002541978528111297, + "loss": 3.8452, + "step": 3983 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025409421150916235, + "loss": 3.7343, + "step": 3984 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002539905695033524, + "loss": 3.806, + "step": 3985 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002538869268115175, + "loss": 3.8528, + "step": 3986 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002537832834514747, + "loss": 3.847, + "step": 3987 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002536796394410416, + "loss": 3.8535, + "step": 3988 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025357599479803586, + "loss": 3.7803, + "step": 3989 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025347234954027506, + "loss": 3.7724, + "step": 3990 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002533687036855772, + "loss": 3.9515, + "step": 3991 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002532650572517602, + "loss": 3.7699, + "step": 3992 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025316141025664193, + "loss": 3.7605, + "step": 3993 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002530577627180405, + "loss": 3.9212, + "step": 3994 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002529541146537743, + "loss": 3.8154, + "step": 3995 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025285046608166165, + "loss": 3.9191, + "step": 3996 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002527468170195207, + "loss": 3.8356, + "step": 3997 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002526431674851701, + "loss": 3.8742, + "step": 3998 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002525395174964283, + "loss": 4.0652, + "step": 3999 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002524358670711141, + "loss": 4.0676, + "step": 4000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025233221622704606, + "loss": 3.8342, + "step": 4001 + }, + { + "epoch": 0.51, + "learning_rate": 0.000252228564982043, + "loss": 3.8492, + "step": 4002 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002521249133539238, + "loss": 3.8841, + "step": 4003 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002520212613605074, + "loss": 3.7939, + "step": 4004 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002519176090196127, + "loss": 3.8743, + "step": 4005 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002518139563490588, + "loss": 3.8197, + "step": 4006 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002517103033666648, + "loss": 3.8451, + "step": 4007 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002516066500902497, + "loss": 3.6909, + "step": 4008 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002515029965376329, + "loss": 3.6843, + "step": 4009 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002513993427266336, + "loss": 3.8982, + "step": 4010 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025129568867507103, + "loss": 3.9074, + "step": 4011 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025119203440076453, + "loss": 3.8338, + "step": 4012 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025108837992153353, + "loss": 4.0045, + "step": 4013 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002509847252551974, + "loss": 3.8938, + "step": 4014 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025088107041957566, + "loss": 3.8918, + "step": 4015 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002507774154324876, + "loss": 3.8916, + "step": 4016 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002506737603117528, + "loss": 3.8975, + "step": 4017 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002505701050751909, + "loss": 3.7691, + "step": 4018 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025046644974062123, + "loss": 3.7655, + "step": 4019 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002503627943258635, + "loss": 3.7854, + "step": 4020 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025025913884873727, + "loss": 3.9247, + "step": 4021 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002501554833270619, + "loss": 3.9576, + "step": 4022 + }, + { + "epoch": 0.51, + "learning_rate": 0.00025005182777865725, + "loss": 3.8002, + "step": 4023 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024994817222134276, + "loss": 3.7872, + "step": 4024 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002498445166729381, + "loss": 3.8094, + "step": 4025 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002497408611512628, + "loss": 3.8906, + "step": 4026 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002496372056741365, + "loss": 4.0508, + "step": 4027 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002495335502593788, + "loss": 3.8526, + "step": 4028 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024942989492480913, + "loss": 3.7954, + "step": 4029 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024932623968824724, + "loss": 3.7929, + "step": 4030 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024922258456751246, + "loss": 3.8373, + "step": 4031 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002491189295804244, + "loss": 3.926, + "step": 4032 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002490152747448026, + "loss": 3.8786, + "step": 4033 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002489116200784664, + "loss": 3.8569, + "step": 4034 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002488079655992355, + "loss": 3.9321, + "step": 4035 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024870431132492904, + "loss": 3.8958, + "step": 4036 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024860065727336646, + "loss": 3.9823, + "step": 4037 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024849700346236714, + "loss": 3.729, + "step": 4038 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002483933499097504, + "loss": 3.8956, + "step": 4039 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024828969663333533, + "loss": 3.815, + "step": 4040 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002481860436509413, + "loss": 3.9463, + "step": 4041 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024808239098038744, + "loss": 3.8898, + "step": 4042 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024797873863949266, + "loss": 3.8998, + "step": 4043 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002478750866460762, + "loss": 3.8183, + "step": 4044 + }, + { + "epoch": 0.52, + "learning_rate": 0.000247771435017957, + "loss": 3.8803, + "step": 4045 + }, + { + "epoch": 0.52, + "learning_rate": 0.000247667783772954, + "loss": 3.8975, + "step": 4046 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002475641329288859, + "loss": 3.8464, + "step": 4047 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024746048250357173, + "loss": 3.9169, + "step": 4048 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024735683251483003, + "loss": 3.9524, + "step": 4049 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024725318298047936, + "loss": 3.7892, + "step": 4050 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024714953391833847, + "loss": 3.8135, + "step": 4051 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024704588534622563, + "loss": 3.8706, + "step": 4052 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002469422372819595, + "loss": 3.9537, + "step": 4053 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024683858974335814, + "loss": 3.8856, + "step": 4054 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024673494274823985, + "loss": 3.9073, + "step": 4055 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002466312963144228, + "loss": 3.9833, + "step": 4056 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024652765045972495, + "loss": 3.911, + "step": 4057 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002464240052019642, + "loss": 3.9005, + "step": 4058 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002463203605589585, + "loss": 3.7541, + "step": 4059 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024621671654852543, + "loss": 3.7981, + "step": 4060 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024611307318848257, + "loss": 4.0581, + "step": 4061 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024600943049664755, + "loss": 3.7202, + "step": 4062 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002459057884908377, + "loss": 3.8895, + "step": 4063 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002458021471888703, + "loss": 3.879, + "step": 4064 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002456985066085624, + "loss": 3.9059, + "step": 4065 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002455948667677312, + "loss": 3.8704, + "step": 4066 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024549122768419336, + "loss": 3.9528, + "step": 4067 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024538758937576576, + "loss": 3.7614, + "step": 4068 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002452839518602651, + "loss": 3.7882, + "step": 4069 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002451803151555076, + "loss": 3.8621, + "step": 4070 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024507667927930993, + "loss": 3.7456, + "step": 4071 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002449730442494882, + "loss": 3.8629, + "step": 4072 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002448694100838584, + "loss": 3.9288, + "step": 4073 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002447657768002365, + "loss": 3.9079, + "step": 4074 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024466214441643826, + "loss": 3.9371, + "step": 4075 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024455851295027923, + "loss": 3.9416, + "step": 4076 + }, + { + "epoch": 0.52, + "learning_rate": 0.000244454882419575, + "loss": 3.7874, + "step": 4077 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002443512528421407, + "loss": 3.923, + "step": 4078 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002442476242357915, + "loss": 3.8138, + "step": 4079 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002441439966183423, + "loss": 3.7962, + "step": 4080 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002440403700076081, + "loss": 3.9213, + "step": 4081 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002439367444214034, + "loss": 3.9243, + "step": 4082 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024383311987754254, + "loss": 3.8565, + "step": 4083 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024372949639383992, + "loss": 3.9012, + "step": 4084 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024362587398810959, + "loss": 3.908, + "step": 4085 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024352225267816537, + "loss": 3.8208, + "step": 4086 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024341863248182103, + "loss": 3.7851, + "step": 4087 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024331501341689007, + "loss": 3.8561, + "step": 4088 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024321139550118572, + "loss": 3.8182, + "step": 4089 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024310777875252127, + "loss": 3.6873, + "step": 4090 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002430041631887095, + "loss": 3.7627, + "step": 4091 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024290054882756325, + "loss": 3.9866, + "step": 4092 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024279693568689495, + "loss": 4.0238, + "step": 4093 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024269332378451687, + "loss": 3.9574, + "step": 4094 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024258971313824115, + "loss": 3.8509, + "step": 4095 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024248610376587971, + "loss": 3.828, + "step": 4096 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024238249568524404, + "loss": 3.8622, + "step": 4097 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024227888891414562, + "loss": 3.7275, + "step": 4098 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024217528347039583, + "loss": 3.8959, + "step": 4099 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002420716793718055, + "loss": 3.8713, + "step": 4100 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002419680766361854, + "loss": 3.8139, + "step": 4101 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024186447528134605, + "loss": 3.9847, + "step": 4102 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002417608753250977, + "loss": 3.9814, + "step": 4103 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002416572767852504, + "loss": 3.9618, + "step": 4104 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024155367967961395, + "loss": 3.8364, + "step": 4105 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002414500840259979, + "loss": 3.8499, + "step": 4106 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002413464898422114, + "loss": 3.7052, + "step": 4107 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024124289714606368, + "loss": 3.8076, + "step": 4108 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024113930595536353, + "loss": 3.8643, + "step": 4109 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024103571628791937, + "loss": 3.9234, + "step": 4110 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024093212816153953, + "loss": 4.0114, + "step": 4111 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024082854159403192, + "loss": 3.9365, + "step": 4112 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024072495660320439, + "loss": 3.8984, + "step": 4113 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024062137320686436, + "loss": 3.9259, + "step": 4114 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024051779142281892, + "loss": 3.8681, + "step": 4115 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024041421126887514, + "loss": 3.8117, + "step": 4116 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024031063276283945, + "loss": 3.8287, + "step": 4117 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024020705592251842, + "loss": 3.8458, + "step": 4118 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024010348076571798, + "loss": 3.8161, + "step": 4119 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023999990731024396, + "loss": 3.8489, + "step": 4120 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002398963355739018, + "loss": 3.7949, + "step": 4121 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023979276557449663, + "loss": 3.7581, + "step": 4122 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023968919732983345, + "loss": 3.7977, + "step": 4123 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002395856308577168, + "loss": 3.7108, + "step": 4124 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023948206617595088, + "loss": 3.6279, + "step": 4125 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023937850330233966, + "loss": 3.8979, + "step": 4126 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023927494225468694, + "loss": 3.9152, + "step": 4127 + }, + { + "epoch": 0.53, + "learning_rate": 0.000239171383050796, + "loss": 3.9118, + "step": 4128 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002390678257084698, + "loss": 3.8448, + "step": 4129 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023896427024551115, + "loss": 3.8967, + "step": 4130 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002388607166797224, + "loss": 3.8906, + "step": 4131 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023875716502890556, + "loss": 3.9843, + "step": 4132 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023865361531086234, + "loss": 3.9774, + "step": 4133 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023855006754339424, + "loss": 3.8641, + "step": 4134 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023844652174430218, + "loss": 4.0305, + "step": 4135 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023834297793138708, + "loss": 3.9246, + "step": 4136 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023823943612244914, + "loss": 3.9234, + "step": 4137 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023813589633528854, + "loss": 3.8442, + "step": 4138 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023803235858770489, + "loss": 3.7697, + "step": 4139 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023792882289749747, + "loss": 3.8613, + "step": 4140 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002378252892824654, + "loss": 3.7364, + "step": 4141 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023772175776040727, + "loss": 3.9799, + "step": 4142 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023761822834912126, + "loss": 3.6548, + "step": 4143 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023751470106640526, + "loss": 3.7431, + "step": 4144 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023741117593005702, + "loss": 3.9118, + "step": 4145 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023730765295787356, + "loss": 3.8423, + "step": 4146 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023720413216765166, + "loss": 3.9033, + "step": 4147 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023710061357718783, + "loss": 3.9378, + "step": 4148 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023699709720427807, + "loss": 3.8092, + "step": 4149 + }, + { + "epoch": 0.53, + "learning_rate": 0.000236893583066718, + "loss": 3.7799, + "step": 4150 + }, + { + "epoch": 0.53, + "learning_rate": 0.000236790071182303, + "loss": 4.0035, + "step": 4151 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023668656156882787, + "loss": 4.0038, + "step": 4152 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023658305424408718, + "loss": 3.8698, + "step": 4153 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002364795492258749, + "loss": 3.7773, + "step": 4154 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002363760465319849, + "loss": 3.9057, + "step": 4155 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023627254618021048, + "loss": 3.8008, + "step": 4156 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023616904818834453, + "loss": 3.8535, + "step": 4157 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002360655525741795, + "loss": 3.8065, + "step": 4158 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023596205935550756, + "loss": 3.8891, + "step": 4159 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023585856855012037, + "loss": 3.8496, + "step": 4160 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002357550801758091, + "loss": 3.7957, + "step": 4161 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002356515942503648, + "loss": 3.7828, + "step": 4162 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023554811079157763, + "loss": 3.9553, + "step": 4163 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023544462981723788, + "loss": 3.9182, + "step": 4164 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023534115134513496, + "loss": 3.9482, + "step": 4165 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002352376753930581, + "loss": 3.7753, + "step": 4166 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023513420197879598, + "loss": 3.6705, + "step": 4167 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023503073112013685, + "loss": 3.8897, + "step": 4168 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023492726283486862, + "loss": 3.9076, + "step": 4169 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023482379714077865, + "loss": 4.0084, + "step": 4170 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023472033405565388, + "loss": 3.8027, + "step": 4171 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023461687359728071, + "loss": 3.8416, + "step": 4172 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023451341578344538, + "loss": 3.86, + "step": 4173 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023440996063193344, + "loss": 3.8012, + "step": 4174 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023430650816053, + "loss": 3.8281, + "step": 4175 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023420305838701971, + "loss": 4.0163, + "step": 4176 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023409961132918688, + "loss": 3.8295, + "step": 4177 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023399616700481518, + "loss": 3.9123, + "step": 4178 + }, + { + "epoch": 0.53, + "learning_rate": 0.00023389272543168784, + "loss": 3.9554, + "step": 4179 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002337892866275878, + "loss": 3.7995, + "step": 4180 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023368585061029723, + "loss": 3.8682, + "step": 4181 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023358241739759815, + "loss": 3.8141, + "step": 4182 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002334789870072718, + "loss": 3.8893, + "step": 4183 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023337555945709916, + "loss": 3.7704, + "step": 4184 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002332721347648606, + "loss": 3.8507, + "step": 4185 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002331687129483359, + "loss": 3.8233, + "step": 4186 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023306529402530467, + "loss": 3.8844, + "step": 4187 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002329618780135457, + "loss": 3.9278, + "step": 4188 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023285846493083736, + "loss": 3.9152, + "step": 4189 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023275505479495768, + "loss": 3.942, + "step": 4190 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002326516476236839, + "loss": 3.9301, + "step": 4191 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023254824343479314, + "loss": 3.9498, + "step": 4192 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002324448422460616, + "loss": 4.0234, + "step": 4193 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002323414440752652, + "loss": 3.7695, + "step": 4194 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002322380489401793, + "loss": 3.7967, + "step": 4195 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002321346568585787, + "loss": 3.749, + "step": 4196 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023203126784823765, + "loss": 3.7653, + "step": 4197 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023192788192693002, + "loss": 3.9755, + "step": 4198 + }, + { + "epoch": 0.54, + "learning_rate": 0.000231824499112429, + "loss": 3.6491, + "step": 4199 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023172111942250717, + "loss": 3.656, + "step": 4200 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002316177428749369, + "loss": 3.9382, + "step": 4201 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023151436948748978, + "loss": 3.8459, + "step": 4202 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023141099927793683, + "loss": 3.9966, + "step": 4203 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023130763226404858, + "loss": 3.8737, + "step": 4204 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023120426846359507, + "loss": 3.7722, + "step": 4205 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023110090789434573, + "loss": 3.8001, + "step": 4206 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023099755057406934, + "loss": 3.8689, + "step": 4207 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002308941965205344, + "loss": 3.9071, + "step": 4208 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023079084575150844, + "loss": 3.6734, + "step": 4209 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023068749828475887, + "loss": 3.8067, + "step": 4210 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023058415413805227, + "loss": 3.9878, + "step": 4211 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002304808133291546, + "loss": 3.9408, + "step": 4212 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023037747587583145, + "loss": 3.9806, + "step": 4213 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002302741417958477, + "loss": 3.7683, + "step": 4214 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002301708111069676, + "loss": 3.8937, + "step": 4215 + }, + { + "epoch": 0.54, + "learning_rate": 0.00023006748382695496, + "loss": 4.0089, + "step": 4216 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022996415997357297, + "loss": 3.799, + "step": 4217 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022986083956458403, + "loss": 3.7985, + "step": 4218 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002297575226177503, + "loss": 3.906, + "step": 4219 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002296542091508332, + "loss": 3.9099, + "step": 4220 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022955089918159333, + "loss": 3.8687, + "step": 4221 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022944759272779098, + "loss": 3.8561, + "step": 4222 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022934428980718571, + "loss": 3.754, + "step": 4223 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022924099043753648, + "loss": 4.0015, + "step": 4224 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022913769463660163, + "loss": 3.9, + "step": 4225 + }, + { + "epoch": 0.54, + "learning_rate": 0.000229034402422139, + "loss": 3.8789, + "step": 4226 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022893111381190563, + "loss": 3.8472, + "step": 4227 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002288278288236579, + "loss": 3.8479, + "step": 4228 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022872454747515197, + "loss": 3.8169, + "step": 4229 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022862126978414295, + "loss": 3.9463, + "step": 4230 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022851799576838552, + "loss": 3.7594, + "step": 4231 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022841472544563368, + "loss": 3.8032, + "step": 4232 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002283114588336407, + "loss": 3.7329, + "step": 4233 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022820819595015945, + "loss": 3.8383, + "step": 4234 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022810493681294197, + "loss": 3.8479, + "step": 4235 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002280016814397396, + "loss": 3.7129, + "step": 4236 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022789842984830317, + "loss": 3.9178, + "step": 4237 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022779518205638297, + "loss": 3.8374, + "step": 4238 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022769193808172843, + "loss": 3.9703, + "step": 4239 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022758869794208824, + "loss": 3.8422, + "step": 4240 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022748546165521079, + "loss": 4.0271, + "step": 4241 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022738222923884347, + "loss": 3.7704, + "step": 4242 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022727900071073308, + "loss": 3.7666, + "step": 4243 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022717577608862596, + "loss": 3.9156, + "step": 4244 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022707255539026753, + "loss": 3.8207, + "step": 4245 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002269693386334025, + "loss": 3.9078, + "step": 4246 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022686612583577525, + "loss": 3.8335, + "step": 4247 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022676291701512912, + "loss": 3.8202, + "step": 4248 + }, + { + "epoch": 0.54, + "learning_rate": 0.000226659712189207, + "loss": 3.8088, + "step": 4249 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022655651137575095, + "loss": 3.8453, + "step": 4250 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022645331459250233, + "loss": 3.9662, + "step": 4251 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022635012185720193, + "loss": 3.914, + "step": 4252 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022624693318758977, + "loss": 3.9974, + "step": 4253 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022614374860140511, + "loss": 3.8575, + "step": 4254 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022604056811638656, + "loss": 3.8513, + "step": 4255 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022593739175027222, + "loss": 3.9305, + "step": 4256 + }, + { + "epoch": 0.54, + "learning_rate": 0.00022583421952079925, + "loss": 3.7853, + "step": 4257 + }, + { + "epoch": 0.55, + "learning_rate": 0.000225731051445704, + "loss": 3.8598, + "step": 4258 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002256278875427224, + "loss": 3.9319, + "step": 4259 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002255247278295895, + "loss": 3.9505, + "step": 4260 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022542157232403957, + "loss": 3.8624, + "step": 4261 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022531842104380633, + "loss": 3.792, + "step": 4262 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022521527400662267, + "loss": 3.88, + "step": 4263 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022511213123022067, + "loss": 3.8824, + "step": 4264 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022500899273233184, + "loss": 3.9503, + "step": 4265 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022490585853068688, + "loss": 3.8778, + "step": 4266 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022480272864301582, + "loss": 3.871, + "step": 4267 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002246996030870478, + "loss": 4.0719, + "step": 4268 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022459648188051127, + "loss": 3.9072, + "step": 4269 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022449336504113405, + "loss": 3.7622, + "step": 4270 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002243902525866431, + "loss": 3.7779, + "step": 4271 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022428714453476457, + "loss": 3.7122, + "step": 4272 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022418404090322403, + "loss": 3.7471, + "step": 4273 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022408094170974603, + "loss": 3.9968, + "step": 4274 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022397784697205473, + "loss": 3.9207, + "step": 4275 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022387475670787317, + "loss": 3.6862, + "step": 4276 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022377167093492385, + "loss": 3.9228, + "step": 4277 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022366858967092835, + "loss": 3.9049, + "step": 4278 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002235655129336075, + "loss": 3.93, + "step": 4279 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002234624407406815, + "loss": 3.8753, + "step": 4280 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002233593731098696, + "loss": 3.7117, + "step": 4281 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022325631005889023, + "loss": 3.9513, + "step": 4282 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022315325160546118, + "loss": 3.7961, + "step": 4283 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022305019776729942, + "loss": 3.7307, + "step": 4284 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022294714856212116, + "loss": 3.8608, + "step": 4285 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002228441040076417, + "loss": 3.8974, + "step": 4286 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022274106412157552, + "loss": 3.9227, + "step": 4287 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022263802892163645, + "loss": 3.8743, + "step": 4288 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022253499842553746, + "loss": 3.8402, + "step": 4289 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022243197265099058, + "loss": 3.8926, + "step": 4290 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022232895161570723, + "loss": 3.965, + "step": 4291 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002222259353373978, + "loss": 3.8677, + "step": 4292 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022212292383377215, + "loss": 3.7602, + "step": 4293 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022201991712253905, + "loss": 3.7643, + "step": 4294 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002219169152214066, + "loss": 3.8372, + "step": 4295 + }, + { + "epoch": 0.55, + "learning_rate": 0.000221813918148082, + "loss": 3.8742, + "step": 4296 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022171092592027157, + "loss": 3.899, + "step": 4297 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022160793855568098, + "loss": 3.9563, + "step": 4298 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022150495607201493, + "loss": 3.8191, + "step": 4299 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022140197848697718, + "loss": 3.8053, + "step": 4300 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022129900581827094, + "loss": 3.9362, + "step": 4301 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022119603808359823, + "loss": 3.8516, + "step": 4302 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022109307530066062, + "loss": 3.916, + "step": 4303 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022099011748715844, + "loss": 3.9067, + "step": 4304 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022088716466079134, + "loss": 3.8365, + "step": 4305 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002207842168392582, + "loss": 3.8272, + "step": 4306 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002206812740402569, + "loss": 3.8364, + "step": 4307 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002205783362814844, + "loss": 3.7423, + "step": 4308 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022047540358063707, + "loss": 3.8403, + "step": 4309 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002203724759554101, + "loss": 3.6815, + "step": 4310 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022026955342349788, + "loss": 3.8266, + "step": 4311 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022016663600259417, + "loss": 3.8448, + "step": 4312 + }, + { + "epoch": 0.55, + "learning_rate": 0.00022006372371039163, + "loss": 3.8455, + "step": 4313 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021996081656458204, + "loss": 3.8887, + "step": 4314 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021985791458285626, + "loss": 4.0069, + "step": 4315 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021975501778290446, + "loss": 3.8789, + "step": 4316 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021965212618241576, + "loss": 3.8225, + "step": 4317 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002195492397990783, + "loss": 3.8878, + "step": 4318 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002194463586505796, + "loss": 3.7402, + "step": 4319 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021934348275460597, + "loss": 3.8341, + "step": 4320 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021924061212884313, + "loss": 3.8332, + "step": 4321 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021913774679097568, + "loss": 3.9132, + "step": 4322 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021903488675868726, + "loss": 3.8772, + "step": 4323 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021893203204966088, + "loss": 4.0563, + "step": 4324 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021882918268157834, + "loss": 3.8399, + "step": 4325 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002187263386721206, + "loss": 3.8733, + "step": 4326 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021862350003896787, + "loss": 3.8156, + "step": 4327 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021852066679979923, + "loss": 3.9739, + "step": 4328 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021841783897229278, + "loss": 3.8571, + "step": 4329 + }, + { + "epoch": 0.55, + "learning_rate": 0.000218315016574126, + "loss": 3.8641, + "step": 4330 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021821219962297529, + "loss": 3.7774, + "step": 4331 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021810938813651592, + "loss": 3.8373, + "step": 4332 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021800658213242243, + "loss": 3.8822, + "step": 4333 + }, + { + "epoch": 0.55, + "learning_rate": 0.00021790378162836837, + "loss": 3.8518, + "step": 4334 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002178009866420264, + "loss": 3.804, + "step": 4335 + }, + { + "epoch": 0.56, + "learning_rate": 0.000217698197191068, + "loss": 3.8276, + "step": 4336 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021759541329316408, + "loss": 3.974, + "step": 4337 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021749263496598426, + "loss": 4.019, + "step": 4338 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021738986222719723, + "loss": 3.8763, + "step": 4339 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021728709509447102, + "loss": 3.8779, + "step": 4340 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002171843335854724, + "loss": 3.7821, + "step": 4341 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021708157771786732, + "loss": 3.923, + "step": 4342 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021697882750932064, + "loss": 3.9031, + "step": 4343 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021687608297749625, + "loss": 3.81, + "step": 4344 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002167733441400573, + "loss": 3.7067, + "step": 4345 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021667061101466565, + "loss": 3.976, + "step": 4346 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002165678836189823, + "loss": 3.8156, + "step": 4347 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002164651619706673, + "loss": 3.9061, + "step": 4348 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021636244608737982, + "loss": 3.7773, + "step": 4349 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021625973598677785, + "loss": 3.7525, + "step": 4350 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021615703168651832, + "loss": 3.9694, + "step": 4351 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021605433320425743, + "loss": 3.8064, + "step": 4352 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021595164055765022, + "loss": 3.9029, + "step": 4353 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021584895376435068, + "loss": 3.8013, + "step": 4354 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021574627284201193, + "loss": 3.7852, + "step": 4355 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021564359780828598, + "loss": 3.8148, + "step": 4356 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002155409286808238, + "loss": 3.9204, + "step": 4357 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021543826547727545, + "loss": 3.8628, + "step": 4358 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021533560821529002, + "loss": 3.8327, + "step": 4359 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021523295691251544, + "loss": 3.8447, + "step": 4360 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021513031158659852, + "loss": 3.9011, + "step": 4361 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021502767225518532, + "loss": 3.8446, + "step": 4362 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021492503893592074, + "loss": 3.848, + "step": 4363 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021482241164644855, + "loss": 3.7873, + "step": 4364 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002147197904044116, + "loss": 3.6673, + "step": 4365 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021461717522745162, + "loss": 3.8948, + "step": 4366 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002145145661332095, + "loss": 3.8496, + "step": 4367 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021441196313932485, + "loss": 3.9201, + "step": 4368 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021430936626343626, + "loss": 3.71, + "step": 4369 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021420677552318141, + "loss": 3.9736, + "step": 4370 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021410419093619682, + "loss": 3.6768, + "step": 4371 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021400161252011786, + "loss": 3.7686, + "step": 4372 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021389904029257912, + "loss": 3.7654, + "step": 4373 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021379647427121387, + "loss": 3.8857, + "step": 4374 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021369391447365437, + "loss": 3.8871, + "step": 4375 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021359136091753176, + "loss": 3.8558, + "step": 4376 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021348881362047643, + "loss": 3.7594, + "step": 4377 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021338627260011732, + "loss": 3.7724, + "step": 4378 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021328373787408235, + "loss": 4.0097, + "step": 4379 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021318120945999853, + "loss": 3.9453, + "step": 4380 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021307868737549166, + "loss": 3.7096, + "step": 4381 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021297617163818639, + "loss": 3.7592, + "step": 4382 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002128736622657065, + "loss": 3.8288, + "step": 4383 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021277115927567446, + "loss": 3.7947, + "step": 4384 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021266866268571168, + "loss": 3.893, + "step": 4385 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021256617251343862, + "loss": 3.8617, + "step": 4386 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021246368877647442, + "loss": 3.7789, + "step": 4387 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021236121149243733, + "loss": 3.8513, + "step": 4388 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021225874067894434, + "loss": 3.727, + "step": 4389 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021215627635361135, + "loss": 3.9808, + "step": 4390 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021205381853405317, + "loss": 3.978, + "step": 4391 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002119513672378835, + "loss": 3.7964, + "step": 4392 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021184892248271489, + "loss": 3.8384, + "step": 4393 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002117464842861587, + "loss": 3.8618, + "step": 4394 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021164405266582546, + "loss": 3.9861, + "step": 4395 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002115416276393242, + "loss": 3.7618, + "step": 4396 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021143920922426298, + "loss": 3.8636, + "step": 4397 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021133679743824877, + "loss": 4.066, + "step": 4398 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002112343922988873, + "loss": 3.8443, + "step": 4399 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021113199382378312, + "loss": 3.9738, + "step": 4400 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002110296020305399, + "loss": 3.8196, + "step": 4401 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021092721693675984, + "loss": 3.945, + "step": 4402 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021082483856004405, + "loss": 3.9437, + "step": 4403 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002107224669179928, + "loss": 3.8161, + "step": 4404 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021062010202820477, + "loss": 3.6977, + "step": 4405 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021051774390827777, + "loss": 3.7776, + "step": 4406 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021041539257580832, + "loss": 3.9208, + "step": 4407 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021031304804839177, + "loss": 3.8898, + "step": 4408 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002102107103436224, + "loss": 3.8875, + "step": 4409 + }, + { + "epoch": 0.56, + "learning_rate": 0.00021010837947909314, + "loss": 3.7377, + "step": 4410 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002100060554723959, + "loss": 3.9554, + "step": 4411 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020990373834112142, + "loss": 3.7216, + "step": 4412 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020980142810285904, + "loss": 3.9264, + "step": 4413 + }, + { + "epoch": 0.56, + "learning_rate": 0.00020969912477519732, + "loss": 3.8647, + "step": 4414 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020959682837572318, + "loss": 3.904, + "step": 4415 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002094945389220227, + "loss": 3.8189, + "step": 4416 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020939225643168055, + "loss": 3.8737, + "step": 4417 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020928998092228023, + "loss": 3.6628, + "step": 4418 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020918771241140423, + "loss": 3.847, + "step": 4419 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020908545091663356, + "loss": 3.8538, + "step": 4420 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020898319645554816, + "loss": 3.7713, + "step": 4421 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002088809490457268, + "loss": 3.7997, + "step": 4422 + }, + { + "epoch": 0.57, + "learning_rate": 0.000208778708704747, + "loss": 3.791, + "step": 4423 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002086764754501851, + "loss": 3.8876, + "step": 4424 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020857424929961613, + "loss": 3.9327, + "step": 4425 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002084720302706139, + "loss": 3.8892, + "step": 4426 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020836981838075113, + "loss": 4.0586, + "step": 4427 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020826761364759925, + "loss": 3.8599, + "step": 4428 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002081654160887283, + "loss": 3.8871, + "step": 4429 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002080632257217074, + "loss": 3.8547, + "step": 4430 + }, + { + "epoch": 0.57, + "learning_rate": 0.000207961042564104, + "loss": 3.9026, + "step": 4431 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020785886663348492, + "loss": 3.9097, + "step": 4432 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020775669794741508, + "loss": 3.8451, + "step": 4433 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002076545365234587, + "loss": 3.7786, + "step": 4434 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002075523823791784, + "loss": 3.9081, + "step": 4435 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020745023553213557, + "loss": 3.7733, + "step": 4436 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020734809599989062, + "loss": 3.7964, + "step": 4437 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002072459638000024, + "loss": 3.8779, + "step": 4438 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020714383895002863, + "loss": 3.9907, + "step": 4439 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020704172146752576, + "loss": 3.9461, + "step": 4440 + }, + { + "epoch": 0.57, + "learning_rate": 0.000206939611370049, + "loss": 3.8543, + "step": 4441 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020683750867515226, + "loss": 3.8704, + "step": 4442 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002067354134003882, + "loss": 4.0506, + "step": 4443 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020663332556330807, + "loss": 3.8336, + "step": 4444 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002065312451814621, + "loss": 3.8766, + "step": 4445 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020642917227239898, + "loss": 3.7369, + "step": 4446 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020632710685366623, + "loss": 3.8469, + "step": 4447 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020622504894281018, + "loss": 3.9429, + "step": 4448 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002061229985573757, + "loss": 3.8777, + "step": 4449 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002060209557149063, + "loss": 3.8357, + "step": 4450 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020591892043294452, + "loss": 3.9114, + "step": 4451 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020581689272903143, + "loss": 3.8944, + "step": 4452 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020571487262070664, + "loss": 3.7403, + "step": 4453 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020561286012550864, + "loss": 3.7945, + "step": 4454 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002055108552609746, + "loss": 3.7529, + "step": 4455 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020540885804464033, + "loss": 3.8366, + "step": 4456 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002053068684940402, + "loss": 3.7253, + "step": 4457 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002052048866267076, + "loss": 3.8257, + "step": 4458 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020510291246017415, + "loss": 4.0183, + "step": 4459 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002050009460119707, + "loss": 3.9245, + "step": 4460 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020489898729962627, + "loss": 3.9791, + "step": 4461 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020479703634066873, + "loss": 3.7815, + "step": 4462 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020469509315262474, + "loss": 3.7823, + "step": 4463 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020459315775301945, + "loss": 3.9861, + "step": 4464 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002044912301593767, + "loss": 3.9027, + "step": 4465 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020438931038921913, + "loss": 3.8175, + "step": 4466 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020428739846006783, + "loss": 3.8958, + "step": 4467 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020418549438944262, + "loss": 3.8541, + "step": 4468 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020408359819486206, + "loss": 3.8402, + "step": 4469 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020398170989384336, + "loss": 3.7174, + "step": 4470 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020387982950390222, + "loss": 3.7715, + "step": 4471 + }, + { + "epoch": 0.57, + "learning_rate": 0.000203777957042553, + "loss": 3.638, + "step": 4472 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020367609252730886, + "loss": 4.133, + "step": 4473 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020357423597568147, + "loss": 3.8902, + "step": 4474 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020347238740518107, + "loss": 3.7686, + "step": 4475 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020337054683331672, + "loss": 3.8894, + "step": 4476 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020326871427759583, + "loss": 3.835, + "step": 4477 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020316688975552483, + "loss": 3.8053, + "step": 4478 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020306507328460838, + "loss": 3.7915, + "step": 4479 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020296326488234996, + "loss": 3.7649, + "step": 4480 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002028614645662516, + "loss": 3.8612, + "step": 4481 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020275967235381398, + "loss": 3.8359, + "step": 4482 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020265788826253627, + "loss": 3.9662, + "step": 4483 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002025561123099165, + "loss": 3.826, + "step": 4484 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020245434451345102, + "loss": 3.8275, + "step": 4485 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020235258489063486, + "loss": 3.9672, + "step": 4486 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020225083345896163, + "loss": 3.7228, + "step": 4487 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020214909023592387, + "loss": 3.8046, + "step": 4488 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020204735523901218, + "loss": 3.8588, + "step": 4489 + }, + { + "epoch": 0.57, + "learning_rate": 0.000201945628485716, + "loss": 3.849, + "step": 4490 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020184390999352346, + "loss": 3.8449, + "step": 4491 + }, + { + "epoch": 0.57, + "learning_rate": 0.00020174219977992102, + "loss": 3.9291, + "step": 4492 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020164049786239386, + "loss": 3.7768, + "step": 4493 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002015388042584258, + "loss": 3.6946, + "step": 4494 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002014371189854991, + "loss": 3.8827, + "step": 4495 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002013354420610945, + "loss": 3.7958, + "step": 4496 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020123377350269176, + "loss": 3.8772, + "step": 4497 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002011321133277686, + "loss": 3.9771, + "step": 4498 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020103046155380173, + "loss": 3.8635, + "step": 4499 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020092881819826623, + "loss": 3.7698, + "step": 4500 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002008271832786357, + "loss": 3.85, + "step": 4501 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002007255568123825, + "loss": 3.9239, + "step": 4502 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020062393881697732, + "loss": 3.8833, + "step": 4503 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002005223293098894, + "loss": 3.9684, + "step": 4504 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020042072830858663, + "loss": 3.9154, + "step": 4505 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002003191358305355, + "loss": 4.0093, + "step": 4506 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020021755189320096, + "loss": 3.9395, + "step": 4507 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020011597651404625, + "loss": 3.7806, + "step": 4508 + }, + { + "epoch": 0.58, + "learning_rate": 0.00020001440971053355, + "loss": 3.7515, + "step": 4509 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019991285150012332, + "loss": 3.8475, + "step": 4510 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019981130190027452, + "loss": 3.9071, + "step": 4511 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001997097609284448, + "loss": 3.7947, + "step": 4512 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001996082286020902, + "loss": 3.9237, + "step": 4513 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019950670493866518, + "loss": 3.953, + "step": 4514 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019940518995562306, + "loss": 3.9554, + "step": 4515 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001993036836704153, + "loss": 3.9368, + "step": 4516 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019920218610049205, + "loss": 3.857, + "step": 4517 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019910069726330196, + "loss": 4.0226, + "step": 4518 + }, + { + "epoch": 0.58, + "learning_rate": 0.000198999217176292, + "loss": 3.8446, + "step": 4519 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019889774585690794, + "loss": 3.7447, + "step": 4520 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019879628332259376, + "loss": 3.7306, + "step": 4521 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019869482959079205, + "loss": 3.854, + "step": 4522 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019859338467894395, + "loss": 3.8102, + "step": 4523 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019849194860448887, + "loss": 3.8972, + "step": 4524 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019839052138486508, + "loss": 3.9351, + "step": 4525 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001982891030375089, + "loss": 3.7717, + "step": 4526 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019818769357985547, + "loss": 3.7166, + "step": 4527 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019808629302933817, + "loss": 3.9236, + "step": 4528 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019798490140338887, + "loss": 3.9696, + "step": 4529 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001978835187194381, + "loss": 3.9562, + "step": 4530 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019778214499491462, + "loss": 3.7532, + "step": 4531 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019768078024724576, + "loss": 3.7254, + "step": 4532 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019757942449385725, + "loss": 3.9524, + "step": 4533 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019747807775217344, + "loss": 3.9046, + "step": 4534 + }, + { + "epoch": 0.58, + "learning_rate": 0.000197376740039617, + "loss": 3.9569, + "step": 4535 + }, + { + "epoch": 0.58, + "learning_rate": 0.000197275411373609, + "loss": 3.7851, + "step": 4536 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019717409177156893, + "loss": 3.7795, + "step": 4537 + }, + { + "epoch": 0.58, + "learning_rate": 0.000197072781250915, + "loss": 3.7999, + "step": 4538 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019697147982906355, + "loss": 3.9142, + "step": 4539 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019687018752342938, + "loss": 3.6168, + "step": 4540 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019676890435142602, + "loss": 3.9025, + "step": 4541 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019666763033046497, + "loss": 3.8562, + "step": 4542 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019656636547795664, + "loss": 3.8201, + "step": 4543 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001964651098113095, + "loss": 3.8896, + "step": 4544 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001963638633479307, + "loss": 3.855, + "step": 4545 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019626262610522558, + "loss": 3.7969, + "step": 4546 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019616139810059793, + "loss": 3.9251, + "step": 4547 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019606017935145018, + "loss": 3.747, + "step": 4548 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019595896987518292, + "loss": 4.0152, + "step": 4549 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019585776968919516, + "loss": 3.7817, + "step": 4550 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001957565788108844, + "loss": 3.7771, + "step": 4551 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001956553972576467, + "loss": 3.8316, + "step": 4552 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019555422504687625, + "loss": 3.7123, + "step": 4553 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019545306219596564, + "loss": 3.8362, + "step": 4554 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019535190872230608, + "loss": 3.7362, + "step": 4555 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001952507646432869, + "loss": 3.9601, + "step": 4556 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019514962997629602, + "loss": 3.7651, + "step": 4557 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019504850473871954, + "loss": 3.8503, + "step": 4558 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019494738894794222, + "loss": 3.8218, + "step": 4559 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019484628262134696, + "loss": 3.9067, + "step": 4560 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019474518577631503, + "loss": 3.7732, + "step": 4561 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019464409843022627, + "loss": 3.7727, + "step": 4562 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019454302060045877, + "loss": 3.7831, + "step": 4563 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019444195230438897, + "loss": 3.8678, + "step": 4564 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019434089355939159, + "loss": 3.7944, + "step": 4565 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001942398443828399, + "loss": 3.8289, + "step": 4566 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019413880479210538, + "loss": 3.8823, + "step": 4567 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001940377748045579, + "loss": 3.7622, + "step": 4568 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019393675443756572, + "loss": 3.9409, + "step": 4569 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019383574370849532, + "loss": 3.8734, + "step": 4570 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019373474263471177, + "loss": 3.8141, + "step": 4571 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001936337512335782, + "loss": 3.794, + "step": 4572 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019353276952245628, + "loss": 3.8878, + "step": 4573 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019343179751870593, + "loss": 3.7528, + "step": 4574 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001933308352396853, + "loss": 3.7493, + "step": 4575 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019322988270275115, + "loss": 3.819, + "step": 4576 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019312893992525827, + "loss": 3.9115, + "step": 4577 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019302800692455995, + "loss": 3.875, + "step": 4578 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019292708371800756, + "loss": 3.9751, + "step": 4579 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019282617032295122, + "loss": 3.808, + "step": 4580 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019272526675673908, + "loss": 3.7209, + "step": 4581 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019262437303671753, + "loss": 3.8612, + "step": 4582 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019252348918023137, + "loss": 3.8169, + "step": 4583 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019242261520462382, + "loss": 3.8424, + "step": 4584 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001923217511272362, + "loss": 3.7141, + "step": 4585 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019222089696540814, + "loss": 3.7796, + "step": 4586 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001921200527364778, + "loss": 3.8652, + "step": 4587 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001920192184577813, + "loss": 3.7282, + "step": 4588 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001919183941466534, + "loss": 3.8248, + "step": 4589 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019181757982042686, + "loss": 3.8031, + "step": 4590 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019171677549643292, + "loss": 3.8466, + "step": 4591 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001916159811920009, + "loss": 3.8143, + "step": 4592 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019151519692445858, + "loss": 3.9658, + "step": 4593 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001914144227111319, + "loss": 3.7235, + "step": 4594 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001913136585693452, + "loss": 3.8035, + "step": 4595 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001912129045164209, + "loss": 3.9255, + "step": 4596 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001911121605696798, + "loss": 3.8056, + "step": 4597 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001910114267464409, + "loss": 3.7879, + "step": 4598 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019091070306402174, + "loss": 3.8769, + "step": 4599 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001908099895397377, + "loss": 3.9319, + "step": 4600 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019070928619090266, + "loss": 3.8316, + "step": 4601 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001906085930348287, + "loss": 3.9907, + "step": 4602 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019050791008882613, + "loss": 3.6698, + "step": 4603 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019040723737020343, + "loss": 3.7266, + "step": 4604 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019030657489626753, + "loss": 3.7912, + "step": 4605 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019020592268432346, + "loss": 3.8368, + "step": 4606 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001901052807516744, + "loss": 4.0666, + "step": 4607 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019000464911562192, + "loss": 3.8147, + "step": 4608 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001899040277934659, + "loss": 3.7312, + "step": 4609 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018980341680250413, + "loss": 3.7839, + "step": 4610 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018970281616003285, + "loss": 3.8657, + "step": 4611 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018960222588334656, + "loss": 3.7898, + "step": 4612 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018950164598973782, + "loss": 3.8281, + "step": 4613 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018940107649649743, + "loss": 3.8499, + "step": 4614 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001893005174209146, + "loss": 3.8584, + "step": 4615 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018919996878027635, + "loss": 3.8007, + "step": 4616 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018909943059186845, + "loss": 3.982, + "step": 4617 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001889989028729745, + "loss": 4.0222, + "step": 4618 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018889838564087623, + "loss": 3.8832, + "step": 4619 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001887978789128539, + "loss": 3.8322, + "step": 4620 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018869738270618566, + "loss": 3.8444, + "step": 4621 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018859689703814797, + "loss": 3.9032, + "step": 4622 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001884964219260156, + "loss": 3.8505, + "step": 4623 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001883959573870613, + "loss": 4.0383, + "step": 4624 + }, + { + "epoch": 0.59, + "learning_rate": 0.000188295503438556, + "loss": 3.727, + "step": 4625 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018819506009776904, + "loss": 3.7151, + "step": 4626 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018809462738196783, + "loss": 3.893, + "step": 4627 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001879942053084178, + "loss": 3.9551, + "step": 4628 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018789379389438273, + "loss": 3.8031, + "step": 4629 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001877933931571245, + "loss": 3.8945, + "step": 4630 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018769300311390313, + "loss": 3.8202, + "step": 4631 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001875926237819768, + "loss": 3.8583, + "step": 4632 + }, + { + "epoch": 0.59, + "learning_rate": 0.000187492255178602, + "loss": 3.9714, + "step": 4633 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018739189732103317, + "loss": 3.8386, + "step": 4634 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018729155022652292, + "loss": 3.9587, + "step": 4635 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018719121391232225, + "loss": 3.7962, + "step": 4636 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018709088839568, + "loss": 3.6877, + "step": 4637 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018699057369384343, + "loss": 3.7752, + "step": 4638 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018689026982405765, + "loss": 3.8664, + "step": 4639 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001867899768035661, + "loss": 3.7937, + "step": 4640 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018668969464961038, + "loss": 3.6311, + "step": 4641 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018658942337943012, + "loss": 3.8087, + "step": 4642 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018648916301026303, + "loss": 3.7688, + "step": 4643 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018638891355934504, + "loss": 3.7569, + "step": 4644 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018628867504391037, + "loss": 3.8431, + "step": 4645 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018618844748119107, + "loss": 3.9029, + "step": 4646 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018608823088841732, + "loss": 3.795, + "step": 4647 + }, + { + "epoch": 0.59, + "learning_rate": 0.00018598802528281772, + "loss": 3.8824, + "step": 4648 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018588783068161863, + "loss": 3.8696, + "step": 4649 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018578764710204467, + "loss": 3.9285, + "step": 4650 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001856874745613186, + "loss": 3.9091, + "step": 4651 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018558731307666127, + "loss": 3.8833, + "step": 4652 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018548716266529141, + "loss": 3.7179, + "step": 4653 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018538702334442632, + "loss": 3.7828, + "step": 4654 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018528689513128092, + "loss": 3.8648, + "step": 4655 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001851867780430685, + "loss": 3.9126, + "step": 4656 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018508667209700034, + "loss": 3.8705, + "step": 4657 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018498657731028572, + "loss": 3.8486, + "step": 4658 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018488649370013223, + "loss": 3.9367, + "step": 4659 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001847864212837453, + "loss": 3.5666, + "step": 4660 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018468636007832852, + "loss": 3.7075, + "step": 4661 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018458631010108363, + "loss": 3.7807, + "step": 4662 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018448627136921042, + "loss": 3.8227, + "step": 4663 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018438624389990665, + "loss": 3.8691, + "step": 4664 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018428622771036822, + "loss": 3.9125, + "step": 4665 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018418622281778907, + "loss": 3.7998, + "step": 4666 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018408622923936124, + "loss": 3.7937, + "step": 4667 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018398624699227467, + "loss": 3.7939, + "step": 4668 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018388627609371757, + "loss": 3.9336, + "step": 4669 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018378631656087608, + "loss": 3.7999, + "step": 4670 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018368636841093434, + "loss": 3.9123, + "step": 4671 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018358643166107463, + "loss": 3.8434, + "step": 4672 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018348650632847726, + "loss": 3.7359, + "step": 4673 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018338659243032063, + "loss": 3.7845, + "step": 4674 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018328668998378095, + "loss": 3.8407, + "step": 4675 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018318679900603265, + "loss": 4.0945, + "step": 4676 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018308691951424822, + "loss": 3.7955, + "step": 4677 + }, + { + "epoch": 0.6, + "learning_rate": 0.000182987051525598, + "loss": 3.8017, + "step": 4678 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018288719505725056, + "loss": 3.8606, + "step": 4679 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018278735012637227, + "loss": 3.7998, + "step": 4680 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001826875167501276, + "loss": 3.9884, + "step": 4681 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018258769494567932, + "loss": 3.9149, + "step": 4682 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001824878847301877, + "loss": 3.8656, + "step": 4683 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018238808612081143, + "loss": 4.0099, + "step": 4684 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018228829913470695, + "loss": 3.7518, + "step": 4685 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001821885237890288, + "loss": 3.8419, + "step": 4686 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018208876010092958, + "loss": 3.8075, + "step": 4687 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001819890080875598, + "loss": 3.7883, + "step": 4688 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018188926776606796, + "loss": 3.9013, + "step": 4689 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018178953915360058, + "loss": 3.802, + "step": 4690 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001816898222673022, + "loss": 3.7843, + "step": 4691 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018159011712431527, + "loss": 3.7726, + "step": 4692 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018149042374178038, + "loss": 3.7536, + "step": 4693 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018139074213683576, + "loss": 3.7786, + "step": 4694 + }, + { + "epoch": 0.6, + "learning_rate": 0.000181291072326618, + "loss": 3.8426, + "step": 4695 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001811914143282615, + "loss": 3.9876, + "step": 4696 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001810917681588985, + "loss": 3.9475, + "step": 4697 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018099213383565945, + "loss": 3.8931, + "step": 4698 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018089251137567254, + "loss": 3.8532, + "step": 4699 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018079290079606414, + "loss": 3.7568, + "step": 4700 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018069330211395833, + "loss": 3.8194, + "step": 4701 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001805937153464775, + "loss": 3.9214, + "step": 4702 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001804941405107416, + "loss": 3.9203, + "step": 4703 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018039457762386864, + "loss": 3.8535, + "step": 4704 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018029502670297479, + "loss": 3.9201, + "step": 4705 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018019548776517395, + "loss": 3.6989, + "step": 4706 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018009596082757794, + "loss": 3.7334, + "step": 4707 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001799964459072967, + "loss": 3.8209, + "step": 4708 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017989694302143788, + "loss": 3.8439, + "step": 4709 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017979745218710735, + "loss": 3.7949, + "step": 4710 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017969797342140868, + "loss": 3.7439, + "step": 4711 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001795985067414433, + "loss": 3.8779, + "step": 4712 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017949905216431083, + "loss": 3.8351, + "step": 4713 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017939960970710865, + "loss": 3.8426, + "step": 4714 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017930017938693193, + "loss": 3.9348, + "step": 4715 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001792007612208741, + "loss": 3.8444, + "step": 4716 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017910135522602614, + "loss": 3.8788, + "step": 4717 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017900196141947705, + "loss": 3.8218, + "step": 4718 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017890257981831393, + "loss": 3.6373, + "step": 4719 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017880321043962165, + "loss": 3.925, + "step": 4720 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017870385330048284, + "loss": 3.8094, + "step": 4721 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017860450841797814, + "loss": 3.7461, + "step": 4722 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001785051758091862, + "loss": 3.8131, + "step": 4723 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017840585549118337, + "loss": 3.9754, + "step": 4724 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001783065474810439, + "loss": 3.7299, + "step": 4725 + }, + { + "epoch": 0.6, + "learning_rate": 0.00017820725179584014, + "loss": 3.9469, + "step": 4726 + }, + { + "epoch": 0.61, + "learning_rate": 0.000178107968452642, + "loss": 3.9713, + "step": 4727 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017800869746851757, + "loss": 3.8387, + "step": 4728 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017790943886053268, + "loss": 3.9232, + "step": 4729 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017781019264575092, + "loss": 3.9189, + "step": 4730 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017771095884123404, + "loss": 3.826, + "step": 4731 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017761173746404135, + "loss": 3.6786, + "step": 4732 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017751252853123012, + "loss": 3.8017, + "step": 4733 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017741333205985565, + "loss": 3.6934, + "step": 4734 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001773141480669709, + "loss": 3.6931, + "step": 4735 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017721497656962665, + "loss": 3.8475, + "step": 4736 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017711581758487178, + "loss": 3.9477, + "step": 4737 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017701667112975285, + "loss": 3.7796, + "step": 4738 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017691753722131424, + "loss": 3.865, + "step": 4739 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017681841587659816, + "loss": 3.8738, + "step": 4740 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017671930711264487, + "loss": 3.7379, + "step": 4741 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017662021094649223, + "loss": 3.8854, + "step": 4742 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017652112739517596, + "loss": 3.8435, + "step": 4743 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017642205647572975, + "loss": 3.9188, + "step": 4744 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017632299820518504, + "loss": 3.796, + "step": 4745 + }, + { + "epoch": 0.61, + "learning_rate": 0.000176223952600571, + "loss": 3.7505, + "step": 4746 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017612491967891486, + "loss": 3.9916, + "step": 4747 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017602589945724144, + "loss": 3.8734, + "step": 4748 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017592689195257354, + "loss": 3.823, + "step": 4749 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001758278971819316, + "loss": 3.8803, + "step": 4750 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017572891516233398, + "loss": 3.7511, + "step": 4751 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017562994591079696, + "loss": 3.9741, + "step": 4752 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017553098944433435, + "loss": 3.835, + "step": 4753 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017543204577995795, + "loss": 3.9151, + "step": 4754 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001753331149346773, + "loss": 3.7847, + "step": 4755 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017523419692549988, + "loss": 3.7682, + "step": 4756 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001751352917694308, + "loss": 3.8262, + "step": 4757 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017503639948347288, + "loss": 3.8621, + "step": 4758 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017493752008462704, + "loss": 3.8346, + "step": 4759 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017483865358989168, + "loss": 3.9349, + "step": 4760 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017473980001626304, + "loss": 3.9178, + "step": 4761 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017464095938073536, + "loss": 3.8445, + "step": 4762 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017454213170030037, + "loss": 3.7265, + "step": 4763 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017444331699194762, + "loss": 3.8095, + "step": 4764 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017434451527266465, + "loss": 3.9328, + "step": 4765 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017424572655943665, + "loss": 3.7402, + "step": 4766 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017414695086924648, + "loss": 3.8467, + "step": 4767 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017404818821907482, + "loss": 3.6584, + "step": 4768 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017394943862590004, + "loss": 3.9564, + "step": 4769 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001738507021066985, + "loss": 3.7957, + "step": 4770 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017375197867844401, + "loss": 3.8144, + "step": 4771 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017365326835810832, + "loss": 3.8035, + "step": 4772 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001735545711626608, + "loss": 3.8486, + "step": 4773 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001734558871090689, + "loss": 3.7691, + "step": 4774 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017335721621429732, + "loss": 3.8415, + "step": 4775 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017325855849530876, + "loss": 3.8055, + "step": 4776 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017315991396906372, + "loss": 3.8753, + "step": 4777 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017306128265252025, + "loss": 3.8525, + "step": 4778 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001729626645626342, + "loss": 3.7133, + "step": 4779 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017286405971635928, + "loss": 3.8502, + "step": 4780 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001727654681306467, + "loss": 3.781, + "step": 4781 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001726668898224455, + "loss": 3.8366, + "step": 4782 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017256832480870237, + "loss": 3.8316, + "step": 4783 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017246977310636201, + "loss": 3.7847, + "step": 4784 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017237123473236643, + "loss": 3.7918, + "step": 4785 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017227270970365555, + "loss": 3.963, + "step": 4786 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017217419803716697, + "loss": 3.8533, + "step": 4787 + }, + { + "epoch": 0.61, + "learning_rate": 0.000172075699749836, + "loss": 3.9529, + "step": 4788 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017197721485859562, + "loss": 3.9692, + "step": 4789 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017187874338037644, + "loss": 3.7846, + "step": 4790 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017178028533210705, + "loss": 3.8281, + "step": 4791 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017168184073071324, + "loss": 3.8976, + "step": 4792 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001715834095931191, + "loss": 3.6901, + "step": 4793 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017148499193624586, + "loss": 3.7584, + "step": 4794 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001713865877770128, + "loss": 3.7512, + "step": 4795 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017128819713233665, + "loss": 3.7156, + "step": 4796 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017118982001913187, + "loss": 3.8569, + "step": 4797 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017109145645431074, + "loss": 3.7356, + "step": 4798 + }, + { + "epoch": 0.61, + "learning_rate": 0.000170993106454783, + "loss": 3.9224, + "step": 4799 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017089477003745618, + "loss": 3.8835, + "step": 4800 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017079644721923538, + "loss": 3.9532, + "step": 4801 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017069813801702362, + "loss": 4.0215, + "step": 4802 + }, + { + "epoch": 0.61, + "learning_rate": 0.00017059984244772124, + "loss": 3.8239, + "step": 4803 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001705015605282264, + "loss": 3.7934, + "step": 4804 + }, + { + "epoch": 0.62, + "learning_rate": 0.000170403292275435, + "loss": 3.9006, + "step": 4805 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017030503770624046, + "loss": 3.8376, + "step": 4806 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001702067968375337, + "loss": 3.8161, + "step": 4807 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017010856968620373, + "loss": 3.9587, + "step": 4808 + }, + { + "epoch": 0.62, + "learning_rate": 0.00017001035626913678, + "loss": 3.8574, + "step": 4809 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016991215660321679, + "loss": 4.0101, + "step": 4810 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016981397070532566, + "loss": 3.8319, + "step": 4811 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001697157985923425, + "loss": 3.7131, + "step": 4812 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016961764028114437, + "loss": 3.8634, + "step": 4813 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016951949578860575, + "loss": 3.7434, + "step": 4814 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016942136513159873, + "loss": 3.6028, + "step": 4815 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016932324832699325, + "loss": 3.9238, + "step": 4816 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001692251453916567, + "loss": 3.9384, + "step": 4817 + }, + { + "epoch": 0.62, + "learning_rate": 0.000169127056342454, + "loss": 3.8277, + "step": 4818 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016902898119624794, + "loss": 3.7849, + "step": 4819 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001689309199698986, + "loss": 3.679, + "step": 4820 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016883287268026404, + "loss": 3.9021, + "step": 4821 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016873483934419959, + "loss": 3.8123, + "step": 4822 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016863681997855844, + "loss": 3.8193, + "step": 4823 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016853881460019115, + "loss": 3.953, + "step": 4824 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016844082322594597, + "loss": 3.8929, + "step": 4825 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016834284587266885, + "loss": 3.9337, + "step": 4826 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016824488255720317, + "loss": 3.856, + "step": 4827 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016814693329638992, + "loss": 3.7287, + "step": 4828 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016804899810706774, + "loss": 3.8386, + "step": 4829 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016795107700607286, + "loss": 3.8569, + "step": 4830 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016785317001023905, + "loss": 3.7829, + "step": 4831 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001677552771363977, + "loss": 3.8382, + "step": 4832 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016765739840137757, + "loss": 3.7418, + "step": 4833 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016755953382200533, + "loss": 3.8608, + "step": 4834 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016746168341510499, + "loss": 3.9332, + "step": 4835 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016736384719749804, + "loss": 3.8557, + "step": 4836 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016726602518600382, + "loss": 3.804, + "step": 4837 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016716821739743887, + "loss": 3.8969, + "step": 4838 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016707042384861775, + "loss": 3.7321, + "step": 4839 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016697264455635213, + "loss": 3.7331, + "step": 4840 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001668748795374515, + "loss": 3.7408, + "step": 4841 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016677712880872275, + "loss": 3.8894, + "step": 4842 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016667939238697028, + "loss": 3.7409, + "step": 4843 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001665816702889963, + "loss": 3.893, + "step": 4844 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001664839625316002, + "loss": 3.7712, + "step": 4845 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001663862691315791, + "loss": 3.6619, + "step": 4846 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016628859010572771, + "loss": 3.8583, + "step": 4847 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016619092547083813, + "loss": 3.9171, + "step": 4848 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016609327524370012, + "loss": 3.7806, + "step": 4849 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001659956394411008, + "loss": 3.747, + "step": 4850 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016589801807982487, + "loss": 3.7768, + "step": 4851 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016580041117665467, + "loss": 3.9282, + "step": 4852 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016570281874836996, + "loss": 3.8324, + "step": 4853 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001656052408117479, + "loss": 3.8437, + "step": 4854 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016550767738356337, + "loss": 3.7875, + "step": 4855 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001654101284805886, + "loss": 3.7194, + "step": 4856 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016531259411959332, + "loss": 3.7118, + "step": 4857 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016521507431734492, + "loss": 3.8773, + "step": 4858 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001651175690906082, + "loss": 3.8661, + "step": 4859 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001650200784561454, + "loss": 3.6902, + "step": 4860 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016492260243071628, + "loss": 3.889, + "step": 4861 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016482514103107813, + "loss": 3.8466, + "step": 4862 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016472769427398564, + "loss": 4.0104, + "step": 4863 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016463026217619105, + "loss": 3.741, + "step": 4864 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016453284475444413, + "loss": 3.7671, + "step": 4865 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016443544202549189, + "loss": 3.9214, + "step": 4866 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016433805400607924, + "loss": 3.8743, + "step": 4867 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016424068071294818, + "loss": 3.8154, + "step": 4868 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016414332216283827, + "loss": 3.7502, + "step": 4869 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016404597837248663, + "loss": 3.8969, + "step": 4870 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001639486493586278, + "loss": 3.8258, + "step": 4871 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016385133513799368, + "loss": 3.7549, + "step": 4872 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016375403572731385, + "loss": 3.8092, + "step": 4873 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016365675114331512, + "loss": 4.0241, + "step": 4874 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001635594814027217, + "loss": 3.8377, + "step": 4875 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001634622265222556, + "loss": 3.8534, + "step": 4876 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016336498651863609, + "loss": 3.8841, + "step": 4877 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001632677614085797, + "loss": 3.8844, + "step": 4878 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016317055120880058, + "loss": 3.9008, + "step": 4879 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016307335593601036, + "loss": 3.9812, + "step": 4880 + }, + { + "epoch": 0.62, + "learning_rate": 0.000162976175606918, + "loss": 3.9968, + "step": 4881 + }, + { + "epoch": 0.62, + "learning_rate": 0.00016287901023822988, + "loss": 3.8911, + "step": 4882 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016278185984664996, + "loss": 3.9363, + "step": 4883 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016268472444887932, + "loss": 3.8305, + "step": 4884 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001625876040616169, + "loss": 3.9436, + "step": 4885 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016249049870155875, + "loss": 3.8914, + "step": 4886 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016239340838539827, + "loss": 3.8293, + "step": 4887 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016229633312982655, + "loss": 3.8368, + "step": 4888 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016219927295153198, + "loss": 3.8361, + "step": 4889 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016210222786720014, + "loss": 3.8507, + "step": 4890 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016200519789351443, + "loss": 3.847, + "step": 4891 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001619081830471553, + "loss": 3.7504, + "step": 4892 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016181118334480073, + "loss": 3.7876, + "step": 4893 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016171419880312604, + "loss": 3.9416, + "step": 4894 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016161722943880416, + "loss": 3.8392, + "step": 4895 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016152027526850519, + "loss": 3.8191, + "step": 4896 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001614233363088966, + "loss": 3.7971, + "step": 4897 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001613264125766434, + "loss": 3.8372, + "step": 4898 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016122950408840785, + "loss": 3.7575, + "step": 4899 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016113261086084962, + "loss": 3.9158, + "step": 4900 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001610357329106259, + "loss": 3.8935, + "step": 4901 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016093887025439105, + "loss": 3.8398, + "step": 4902 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016084202290879678, + "loss": 3.8698, + "step": 4903 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001607451908904925, + "loss": 3.7919, + "step": 4904 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016064837421612456, + "loss": 3.7257, + "step": 4905 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016055157290233702, + "loss": 4.0441, + "step": 4906 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016045478696577104, + "loss": 3.8028, + "step": 4907 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016035801642306526, + "loss": 3.9468, + "step": 4908 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016026126129085568, + "loss": 3.8332, + "step": 4909 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016016452158577565, + "loss": 3.9557, + "step": 4910 + }, + { + "epoch": 0.63, + "learning_rate": 0.00016006779732445582, + "loss": 3.8405, + "step": 4911 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001599710885235241, + "loss": 3.9238, + "step": 4912 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015987439519960607, + "loss": 3.7463, + "step": 4913 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015977771736932434, + "loss": 3.781, + "step": 4914 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015968105504929892, + "loss": 3.7317, + "step": 4915 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015958440825614723, + "loss": 3.742, + "step": 4916 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015948777700648396, + "loss": 3.718, + "step": 4917 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015939116131692105, + "loss": 3.7271, + "step": 4918 + }, + { + "epoch": 0.63, + "learning_rate": 0.000159294561204068, + "loss": 3.8743, + "step": 4919 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015919797668453146, + "loss": 3.8576, + "step": 4920 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015910140777491527, + "loss": 3.8746, + "step": 4921 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015900485449182094, + "loss": 3.7759, + "step": 4922 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015890831685184704, + "loss": 3.8293, + "step": 4923 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015881179487158952, + "loss": 3.7863, + "step": 4924 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015871528856764163, + "loss": 3.9973, + "step": 4925 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015861879795659378, + "loss": 3.8642, + "step": 4926 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015852232305503406, + "loss": 3.9524, + "step": 4927 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001584258638795475, + "loss": 3.8009, + "step": 4928 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015832942044671647, + "loss": 3.8513, + "step": 4929 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015823299277312086, + "loss": 3.7923, + "step": 4930 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015813658087533757, + "loss": 4.064, + "step": 4931 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001580401847699411, + "loss": 3.8008, + "step": 4932 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015794380447350288, + "loss": 3.8232, + "step": 4933 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015784744000259195, + "loss": 3.9322, + "step": 4934 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001577510913737744, + "loss": 3.7826, + "step": 4935 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015765475860361365, + "loss": 3.908, + "step": 4936 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015755844170867048, + "loss": 3.6982, + "step": 4937 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015746214070550286, + "loss": 3.9018, + "step": 4938 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015736585561066603, + "loss": 3.7893, + "step": 4939 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015726958644071248, + "loss": 3.8001, + "step": 4940 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015717333321219206, + "loss": 3.8397, + "step": 4941 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015707709594165185, + "loss": 3.9198, + "step": 4942 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001569808746456361, + "loss": 3.7783, + "step": 4943 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015688466934068632, + "loss": 3.7729, + "step": 4944 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001567884800433414, + "loss": 3.772, + "step": 4945 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001566923067701374, + "loss": 3.6454, + "step": 4946 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001565961495376075, + "loss": 3.7542, + "step": 4947 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001565000083622824, + "loss": 3.7597, + "step": 4948 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015640388326068969, + "loss": 3.8512, + "step": 4949 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001563077742493546, + "loss": 3.8881, + "step": 4950 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015621168134479925, + "loss": 3.846, + "step": 4951 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015611560456354323, + "loss": 3.7733, + "step": 4952 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015601954392210316, + "loss": 3.7001, + "step": 4953 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015592349943699296, + "loss": 3.7655, + "step": 4954 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001558274711247239, + "loss": 3.7969, + "step": 4955 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001557314590018043, + "loss": 3.7281, + "step": 4956 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015563546308473966, + "loss": 3.7952, + "step": 4957 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015553948339003287, + "loss": 3.7988, + "step": 4958 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015544351993418404, + "loss": 3.8819, + "step": 4959 + }, + { + "epoch": 0.63, + "learning_rate": 0.00015534757273369038, + "loss": 3.7774, + "step": 4960 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015525164180504622, + "loss": 3.8698, + "step": 4961 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015515572716474322, + "loss": 3.8826, + "step": 4962 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015505982882927028, + "loss": 3.8076, + "step": 4963 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015496394681511344, + "loss": 3.6724, + "step": 4964 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015486808113875582, + "loss": 3.8982, + "step": 4965 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015477223181667796, + "loss": 3.8639, + "step": 4966 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015467639886535746, + "loss": 3.7323, + "step": 4967 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015458058230126892, + "loss": 3.9422, + "step": 4968 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015448478214088456, + "loss": 3.9195, + "step": 4969 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015438899840067356, + "loss": 3.7781, + "step": 4970 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015429323109710207, + "loss": 3.7282, + "step": 4971 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001541974802466337, + "loss": 3.8546, + "step": 4972 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015410174586572912, + "loss": 3.754, + "step": 4973 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001540060279708462, + "loss": 3.8567, + "step": 4974 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015391032657843988, + "loss": 3.7351, + "step": 4975 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001538146417049624, + "loss": 3.9017, + "step": 4976 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015371897336686303, + "loss": 3.882, + "step": 4977 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001536233215805884, + "loss": 3.7924, + "step": 4978 + }, + { + "epoch": 0.64, + "learning_rate": 0.000153527686362582, + "loss": 3.6992, + "step": 4979 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015343206772928486, + "loss": 3.9463, + "step": 4980 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015333646569713473, + "loss": 3.784, + "step": 4981 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015324088028256677, + "loss": 3.9063, + "step": 4982 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015314531150201316, + "loss": 3.8179, + "step": 4983 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001530497593719034, + "loss": 3.796, + "step": 4984 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015295422390866398, + "loss": 3.7755, + "step": 4985 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015285870512871835, + "loss": 3.7689, + "step": 4986 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015276320304848757, + "loss": 3.7719, + "step": 4987 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015266771768438948, + "loss": 3.8117, + "step": 4988 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015257224905283913, + "loss": 3.7177, + "step": 4989 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015247679717024854, + "loss": 3.8249, + "step": 4990 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001523813620530272, + "loss": 3.8431, + "step": 4991 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015228594371758137, + "loss": 3.8385, + "step": 4992 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015219054218031458, + "loss": 3.8136, + "step": 4993 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001520951574576276, + "loss": 3.7424, + "step": 4994 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001519997895659179, + "loss": 3.7284, + "step": 4995 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015190443852158058, + "loss": 3.7735, + "step": 4996 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015180910434100747, + "loss": 3.7853, + "step": 4997 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015171378704058772, + "loss": 3.8087, + "step": 4998 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001516184866367074, + "loss": 3.7253, + "step": 4999 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015152320314574974, + "loss": 3.7838, + "step": 5000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001514279365840951, + "loss": 3.724, + "step": 5001 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001513326869681209, + "loss": 3.7979, + "step": 5002 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015123745431420169, + "loss": 3.8644, + "step": 5003 + }, + { + "epoch": 0.64, + "learning_rate": 0.000151142238638709, + "loss": 3.7716, + "step": 5004 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015104703995801145, + "loss": 3.7755, + "step": 5005 + }, + { + "epoch": 0.64, + "learning_rate": 0.000150951858288475, + "loss": 3.8406, + "step": 5006 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015085669364646242, + "loss": 3.8315, + "step": 5007 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001507615460483335, + "loss": 3.8297, + "step": 5008 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015066641551044532, + "loss": 3.8227, + "step": 5009 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015057130204915192, + "loss": 3.8887, + "step": 5010 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015047620568080428, + "loss": 3.6706, + "step": 5011 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015038112642175072, + "loss": 3.8229, + "step": 5012 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015028606428833645, + "loss": 3.8383, + "step": 5013 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015019101929690358, + "loss": 3.8172, + "step": 5014 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015009599146379162, + "loss": 3.9406, + "step": 5015 + }, + { + "epoch": 0.64, + "learning_rate": 0.00015000098080533697, + "loss": 3.8746, + "step": 5016 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014990598733787304, + "loss": 3.6762, + "step": 5017 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001498110110777302, + "loss": 3.6218, + "step": 5018 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014971605204123608, + "loss": 3.8251, + "step": 5019 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014962111024471522, + "loss": 3.8233, + "step": 5020 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014952618570448923, + "loss": 3.8232, + "step": 5021 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014943127843687658, + "loss": 3.8137, + "step": 5022 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001493363884581931, + "loss": 3.8823, + "step": 5023 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001492415157847515, + "loss": 3.7525, + "step": 5024 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001491466604328614, + "loss": 3.8383, + "step": 5025 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014905182241882955, + "loss": 3.7885, + "step": 5026 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014895700175895978, + "loss": 3.7819, + "step": 5027 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014886219846955276, + "loss": 3.7372, + "step": 5028 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001487674125669063, + "loss": 3.8765, + "step": 5029 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014867264406731524, + "loss": 3.8746, + "step": 5030 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014857789298707133, + "loss": 3.7508, + "step": 5031 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001484831593424633, + "loss": 3.5782, + "step": 5032 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014838844314977719, + "loss": 4.055, + "step": 5033 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014829374442529563, + "loss": 3.9113, + "step": 5034 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014819906318529858, + "loss": 3.9308, + "step": 5035 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014810439944606263, + "loss": 3.7477, + "step": 5036 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014800975322386175, + "loss": 3.7854, + "step": 5037 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014791512453496669, + "loss": 3.8698, + "step": 5038 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014782051339564512, + "loss": 3.9145, + "step": 5039 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014772591982216193, + "loss": 3.8363, + "step": 5040 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014763134383077875, + "loss": 3.912, + "step": 5041 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014753678543775428, + "loss": 3.8281, + "step": 5042 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001474422446593443, + "loss": 3.9184, + "step": 5043 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001473477215118014, + "loss": 3.7317, + "step": 5044 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014725321601137526, + "loss": 3.8709, + "step": 5045 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014715872817431242, + "loss": 3.8241, + "step": 5046 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001470642580168564, + "loss": 3.8226, + "step": 5047 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001469698055552478, + "loss": 3.9322, + "step": 5048 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014687537080572405, + "loss": 3.6783, + "step": 5049 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014678095378451955, + "loss": 3.9462, + "step": 5050 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014668655450786566, + "loss": 3.7896, + "step": 5051 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014659217299199084, + "loss": 3.8385, + "step": 5052 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014649780925312034, + "loss": 3.9773, + "step": 5053 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014640346330747623, + "loss": 3.8409, + "step": 5054 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014630913517127786, + "loss": 3.7678, + "step": 5055 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014621482486074121, + "loss": 3.713, + "step": 5056 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014612053239207928, + "loss": 3.7963, + "step": 5057 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001460262577815022, + "loss": 3.7022, + "step": 5058 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001459320010452167, + "loss": 3.7499, + "step": 5059 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014583776219942665, + "loss": 3.8874, + "step": 5060 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014574354126033293, + "loss": 3.7996, + "step": 5061 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001456493382441331, + "loss": 3.7659, + "step": 5062 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014555515316702175, + "loss": 3.7795, + "step": 5063 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001454609860451904, + "loss": 3.8391, + "step": 5064 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001453668368948275, + "loss": 3.7261, + "step": 5065 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014527270573211823, + "loss": 3.7934, + "step": 5066 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014517859257324507, + "loss": 3.6762, + "step": 5067 + }, + { + "epoch": 0.65, + "learning_rate": 0.000145084497434387, + "loss": 3.7415, + "step": 5068 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014499042033172, + "loss": 3.7005, + "step": 5069 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014489636128141726, + "loss": 3.8057, + "step": 5070 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014480232029964849, + "loss": 3.8451, + "step": 5071 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001447082974025804, + "loss": 3.7694, + "step": 5072 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001446142926063766, + "loss": 3.7455, + "step": 5073 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014452030592719756, + "loss": 3.737, + "step": 5074 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014442633738120087, + "loss": 3.8605, + "step": 5075 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001443323869845407, + "loss": 3.9079, + "step": 5076 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001442384547533682, + "loss": 3.8816, + "step": 5077 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014414454070383142, + "loss": 3.717, + "step": 5078 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014405064485207517, + "loss": 3.6431, + "step": 5079 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014395676721424145, + "loss": 3.9233, + "step": 5080 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014386290780646872, + "loss": 3.8311, + "step": 5081 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014376906664489265, + "loss": 3.8787, + "step": 5082 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014367524374564556, + "loss": 3.7456, + "step": 5083 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014358143912485672, + "loss": 3.8817, + "step": 5084 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001434876527986522, + "loss": 3.8497, + "step": 5085 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014339388478315496, + "loss": 3.87, + "step": 5086 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014330013509448468, + "loss": 3.8685, + "step": 5087 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014320640374875827, + "loss": 3.7326, + "step": 5088 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014311269076208903, + "loss": 3.8829, + "step": 5089 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014301899615058747, + "loss": 3.7961, + "step": 5090 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001429253199303607, + "loss": 3.8582, + "step": 5091 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014283166211751276, + "loss": 3.8273, + "step": 5092 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001427380227281445, + "loss": 3.5907, + "step": 5093 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014264440177835363, + "loss": 3.8071, + "step": 5094 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014255079928423455, + "loss": 3.9064, + "step": 5095 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014245721526187882, + "loss": 3.8977, + "step": 5096 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014236364972737447, + "loss": 3.9154, + "step": 5097 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014227010269680663, + "loss": 3.8088, + "step": 5098 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014217657418625707, + "loss": 3.7672, + "step": 5099 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001420830642118044, + "loss": 3.9052, + "step": 5100 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014198957278952406, + "loss": 3.8072, + "step": 5101 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014189609993548824, + "loss": 3.8472, + "step": 5102 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014180264566576617, + "loss": 3.8489, + "step": 5103 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001417092099964236, + "loss": 3.9005, + "step": 5104 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014161579294352333, + "loss": 3.8342, + "step": 5105 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001415223945231246, + "loss": 3.7913, + "step": 5106 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014142901475128395, + "loss": 3.8144, + "step": 5107 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014133565364405433, + "loss": 3.841, + "step": 5108 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001412423112174856, + "loss": 3.8766, + "step": 5109 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001411489874876243, + "loss": 3.8302, + "step": 5110 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014105568247051403, + "loss": 3.8046, + "step": 5111 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014096239618219492, + "loss": 3.8827, + "step": 5112 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014086912863870403, + "loss": 3.7287, + "step": 5113 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014077587985607504, + "loss": 3.9162, + "step": 5114 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014068264985033857, + "loss": 3.8533, + "step": 5115 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014058943863752178, + "loss": 3.6691, + "step": 5116 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001404962462336489, + "loss": 3.8757, + "step": 5117 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014040307265474086, + "loss": 3.7832, + "step": 5118 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014030991791681518, + "loss": 3.7595, + "step": 5119 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014021678203588627, + "loss": 3.7876, + "step": 5120 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014012366502796526, + "loss": 3.7268, + "step": 5121 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014003056690906, + "loss": 3.7985, + "step": 5122 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013993748769517507, + "loss": 3.9383, + "step": 5123 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013984442740231203, + "loss": 3.8217, + "step": 5124 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013975138604646888, + "loss": 3.9065, + "step": 5125 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013965836364364067, + "loss": 3.7197, + "step": 5126 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013956536020981897, + "loss": 3.6887, + "step": 5127 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001394723757609921, + "loss": 4.0084, + "step": 5128 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013937941031314516, + "loss": 3.8224, + "step": 5129 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013928646388226002, + "loss": 3.8512, + "step": 5130 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013919353648431516, + "loss": 3.8638, + "step": 5131 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013910062813528605, + "loss": 3.876, + "step": 5132 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001390077388511446, + "loss": 3.8528, + "step": 5133 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001389148686478595, + "loss": 3.8329, + "step": 5134 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013882201754139638, + "loss": 3.868, + "step": 5135 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001387291855477173, + "loss": 3.6783, + "step": 5136 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013863637268278123, + "loss": 3.7683, + "step": 5137 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001385435789625436, + "loss": 3.7679, + "step": 5138 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013845080440295698, + "loss": 3.8296, + "step": 5139 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013835804901997029, + "loss": 4.0001, + "step": 5140 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001382653128295292, + "loss": 3.747, + "step": 5141 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013817259584757619, + "loss": 3.844, + "step": 5142 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001380798980900503, + "loss": 3.915, + "step": 5143 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013798721957288747, + "loss": 3.9289, + "step": 5144 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001378945603120202, + "loss": 3.9076, + "step": 5145 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013780192032337752, + "loss": 3.8193, + "step": 5146 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013770929962288552, + "loss": 3.8796, + "step": 5147 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013761669822646676, + "loss": 3.6126, + "step": 5148 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001375241161500404, + "loss": 3.7534, + "step": 5149 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013743155340952242, + "loss": 3.7155, + "step": 5150 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013733901002082544, + "loss": 3.7919, + "step": 5151 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013724648599985857, + "loss": 3.8299, + "step": 5152 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013715398136252794, + "loss": 3.8232, + "step": 5153 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001370614961247362, + "loss": 3.984, + "step": 5154 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013696903030238262, + "loss": 3.7193, + "step": 5155 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013687658391136305, + "loss": 3.9145, + "step": 5156 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013678415696757016, + "loss": 3.7941, + "step": 5157 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013669174948689318, + "loss": 3.7396, + "step": 5158 + }, + { + "epoch": 0.66, + "learning_rate": 0.000136599361485218, + "loss": 3.8833, + "step": 5159 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001365069929784273, + "loss": 3.8085, + "step": 5160 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013641464398240021, + "loss": 3.836, + "step": 5161 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013632231451301256, + "loss": 3.8958, + "step": 5162 + }, + { + "epoch": 0.66, + "learning_rate": 0.000136230004586137, + "loss": 3.9697, + "step": 5163 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013613771421764254, + "loss": 3.8956, + "step": 5164 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013604544342339506, + "loss": 3.7922, + "step": 5165 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001359531922192569, + "loss": 3.7311, + "step": 5166 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001358609606210871, + "loss": 3.82, + "step": 5167 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013576874864474142, + "loss": 3.7838, + "step": 5168 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001356765563060721, + "loss": 3.8477, + "step": 5169 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013558438362092816, + "loss": 3.8478, + "step": 5170 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013549223060515503, + "loss": 3.7467, + "step": 5171 + }, + { + "epoch": 0.66, + "learning_rate": 0.000135400097274595, + "loss": 3.7967, + "step": 5172 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013530798364508678, + "loss": 3.9468, + "step": 5173 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013521588973246573, + "loss": 3.6938, + "step": 5174 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013512381555256403, + "loss": 3.7237, + "step": 5175 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001350317611212102, + "loss": 3.7099, + "step": 5176 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013493972645422942, + "loss": 3.7762, + "step": 5177 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013484771156744356, + "loss": 3.9263, + "step": 5178 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001347557164766711, + "loss": 3.8198, + "step": 5179 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013466374119772685, + "loss": 3.7999, + "step": 5180 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001345717857464226, + "loss": 3.8842, + "step": 5181 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013447985013856665, + "loss": 3.8151, + "step": 5182 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013438793438996365, + "loss": 3.7784, + "step": 5183 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013429603851641505, + "loss": 3.857, + "step": 5184 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001342041625337188, + "loss": 3.605, + "step": 5185 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013411230645766936, + "loss": 3.7224, + "step": 5186 + }, + { + "epoch": 0.66, + "learning_rate": 0.000134020470304058, + "loss": 3.834, + "step": 5187 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013392865408867223, + "loss": 3.8024, + "step": 5188 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001338368578272965, + "loss": 3.9132, + "step": 5189 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013374508153571153, + "loss": 3.6688, + "step": 5190 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013365332522969486, + "loss": 3.9424, + "step": 5191 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013356158892502038, + "loss": 3.8739, + "step": 5192 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013346987263745862, + "loss": 3.7626, + "step": 5193 + }, + { + "epoch": 0.66, + "learning_rate": 0.00013337817638277673, + "loss": 3.7741, + "step": 5194 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001332865001767382, + "loss": 3.717, + "step": 5195 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013319484403510345, + "loss": 3.7725, + "step": 5196 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013310320797362915, + "loss": 3.883, + "step": 5197 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013301159200806856, + "loss": 3.7737, + "step": 5198 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013291999615417147, + "loss": 3.9506, + "step": 5199 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013282842042768446, + "loss": 3.7772, + "step": 5200 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001327368648443503, + "loss": 3.8291, + "step": 5201 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013264532941990853, + "loss": 3.8698, + "step": 5202 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013255381417009502, + "loss": 3.7241, + "step": 5203 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001324623191106425, + "loss": 3.6949, + "step": 5204 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013237084425727995, + "loss": 3.9337, + "step": 5205 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013227938962573295, + "loss": 3.7836, + "step": 5206 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001321879552317236, + "loss": 3.7886, + "step": 5207 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013209654109097043, + "loss": 3.8771, + "step": 5208 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013200514721918883, + "loss": 3.6691, + "step": 5209 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013191377363209022, + "loss": 3.8298, + "step": 5210 + }, + { + "epoch": 0.67, + "learning_rate": 0.000131822420345383, + "loss": 3.7485, + "step": 5211 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013173108737477173, + "loss": 3.8268, + "step": 5212 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013163977473595767, + "loss": 3.9313, + "step": 5213 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013154848244463846, + "loss": 3.9035, + "step": 5214 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013145721051650833, + "loss": 3.8329, + "step": 5215 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013136595896725786, + "loss": 3.8637, + "step": 5216 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013127472781257439, + "loss": 3.8025, + "step": 5217 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001311835170681417, + "loss": 3.8244, + "step": 5218 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013109232674963982, + "loss": 3.8608, + "step": 5219 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001310011568727455, + "loss": 3.799, + "step": 5220 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013091000745313187, + "loss": 3.8088, + "step": 5221 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013081887850646857, + "loss": 3.7981, + "step": 5222 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001307277700484217, + "loss": 3.674, + "step": 5223 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001306366820946538, + "loss": 3.9577, + "step": 5224 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013054561466082411, + "loss": 3.7878, + "step": 5225 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013045456776258812, + "loss": 4.0177, + "step": 5226 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001303635414155977, + "loss": 3.8041, + "step": 5227 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013027253563550157, + "loss": 3.8869, + "step": 5228 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013018155043794454, + "loss": 3.6836, + "step": 5229 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013009058583856808, + "loss": 3.7782, + "step": 5230 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001299996418530099, + "loss": 3.874, + "step": 5231 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012990871849690455, + "loss": 3.7247, + "step": 5232 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001298178157858827, + "loss": 3.7936, + "step": 5233 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001297269337355716, + "loss": 3.7374, + "step": 5234 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001296360723615949, + "loss": 3.8866, + "step": 5235 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012954523167957267, + "loss": 3.902, + "step": 5236 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001294544117051216, + "loss": 3.9452, + "step": 5237 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012936361245385457, + "loss": 3.7427, + "step": 5238 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012927283394138122, + "loss": 3.8394, + "step": 5239 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001291820761833073, + "loss": 3.6473, + "step": 5240 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001290913391952351, + "loss": 3.7733, + "step": 5241 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012900062299276338, + "loss": 3.9145, + "step": 5242 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012890992759148734, + "loss": 3.9172, + "step": 5243 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012881925300699853, + "loss": 3.8198, + "step": 5244 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012872859925488488, + "loss": 3.8418, + "step": 5245 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001286379663507309, + "loss": 3.7478, + "step": 5246 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012854735431011758, + "loss": 3.7354, + "step": 5247 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012845676314862203, + "loss": 3.8125, + "step": 5248 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012836619288181795, + "loss": 3.7118, + "step": 5249 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012827564352527543, + "loss": 3.8992, + "step": 5250 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012818511509456092, + "loss": 3.8037, + "step": 5251 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001280946076052372, + "loss": 3.7819, + "step": 5252 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012800412107286384, + "loss": 3.8774, + "step": 5253 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012791365551299624, + "loss": 3.7757, + "step": 5254 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012782321094118672, + "loss": 3.8651, + "step": 5255 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001277327873729836, + "loss": 3.8014, + "step": 5256 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001276423848239318, + "loss": 3.8437, + "step": 5257 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001275520033095725, + "loss": 3.7908, + "step": 5258 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012746164284544332, + "loss": 3.8933, + "step": 5259 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012737130344707843, + "loss": 3.8598, + "step": 5260 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012728098513000805, + "loss": 3.7507, + "step": 5261 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012719068790975906, + "loss": 3.88, + "step": 5262 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001271004118018545, + "loss": 3.7589, + "step": 5263 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012701015682181385, + "loss": 3.789, + "step": 5264 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012691992298515317, + "loss": 3.7968, + "step": 5265 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001268297103073846, + "loss": 3.8561, + "step": 5266 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012673951880401663, + "loss": 3.8456, + "step": 5267 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012664934849055442, + "loss": 3.9313, + "step": 5268 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012655919938249922, + "loss": 3.6814, + "step": 5269 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001264690714953487, + "loss": 3.8568, + "step": 5270 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012637896484459687, + "loss": 3.9018, + "step": 5271 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012628887944573413, + "loss": 3.8118, + "step": 5272 + }, + { + "epoch": 0.67, + "learning_rate": 0.00012619881531424713, + "loss": 3.7677, + "step": 5273 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012610877246561897, + "loss": 3.8426, + "step": 5274 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001260187509153292, + "loss": 3.8082, + "step": 5275 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012592875067885345, + "loss": 3.7778, + "step": 5276 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001258387717716638, + "loss": 3.7894, + "step": 5277 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012574881420922873, + "loss": 3.7139, + "step": 5278 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012565887800701291, + "loss": 3.7601, + "step": 5279 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012556896318047733, + "loss": 3.8518, + "step": 5280 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012547906974507968, + "loss": 3.8922, + "step": 5281 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012538919771627334, + "loss": 3.7884, + "step": 5282 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012529934710950864, + "loss": 3.8757, + "step": 5283 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012520951794023184, + "loss": 3.9187, + "step": 5284 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012511971022388557, + "loss": 3.8281, + "step": 5285 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001250299239759089, + "loss": 3.9924, + "step": 5286 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012494015921173704, + "loss": 3.8527, + "step": 5287 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012485041594680155, + "loss": 3.7974, + "step": 5288 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001247606941965305, + "loss": 3.8827, + "step": 5289 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012467099397634802, + "loss": 3.8734, + "step": 5290 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012458131530167452, + "loss": 3.8639, + "step": 5291 + }, + { + "epoch": 0.68, + "learning_rate": 0.000124491658187927, + "loss": 3.8561, + "step": 5292 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012440202265051844, + "loss": 3.8842, + "step": 5293 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012431240870485824, + "loss": 3.7677, + "step": 5294 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012422281636635202, + "loss": 3.8532, + "step": 5295 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012413324565040186, + "loss": 3.834, + "step": 5296 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012404369657240596, + "loss": 3.9817, + "step": 5297 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001239541691477588, + "loss": 3.8339, + "step": 5298 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012386466339185125, + "loss": 3.953, + "step": 5299 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012377517932007033, + "loss": 3.8358, + "step": 5300 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012368571694779934, + "loss": 3.8847, + "step": 5301 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012359627629041805, + "loss": 3.7142, + "step": 5302 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012350685736330216, + "loss": 3.8399, + "step": 5303 + }, + { + "epoch": 0.68, + "learning_rate": 0.000123417460181824, + "loss": 3.7674, + "step": 5304 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012332808476135193, + "loss": 3.7219, + "step": 5305 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012323873111725063, + "loss": 3.8875, + "step": 5306 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012314939926488095, + "loss": 3.8816, + "step": 5307 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001230600892196001, + "loss": 3.8469, + "step": 5308 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012297080099676146, + "loss": 3.9018, + "step": 5309 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001228815346117148, + "loss": 3.7831, + "step": 5310 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012279229007980605, + "loss": 3.8952, + "step": 5311 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001227030674163774, + "loss": 3.8285, + "step": 5312 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012261386663676722, + "loss": 3.8406, + "step": 5313 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012252468775631012, + "loss": 3.8662, + "step": 5314 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012243553079033703, + "loss": 3.8545, + "step": 5315 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012234639575417497, + "loss": 3.9497, + "step": 5316 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012225728266314746, + "loss": 3.7928, + "step": 5317 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012216819153257398, + "loss": 3.6717, + "step": 5318 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012207912237777022, + "loss": 3.8895, + "step": 5319 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001219900752140484, + "loss": 3.7012, + "step": 5320 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001219010500567167, + "loss": 3.7881, + "step": 5321 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012181204692107952, + "loss": 3.9193, + "step": 5322 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012172306582243756, + "loss": 3.9214, + "step": 5323 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001216341067760876, + "loss": 3.754, + "step": 5324 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012154516979732295, + "loss": 3.6506, + "step": 5325 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012145625490143275, + "loss": 3.8918, + "step": 5326 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012136736210370255, + "loss": 3.719, + "step": 5327 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012127849141941396, + "loss": 3.7609, + "step": 5328 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012118964286384506, + "loss": 3.7746, + "step": 5329 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012110081645226986, + "loss": 3.816, + "step": 5330 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012101201219995853, + "loss": 3.6948, + "step": 5331 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012092323012217774, + "loss": 3.8873, + "step": 5332 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012083447023419009, + "loss": 3.8575, + "step": 5333 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012074573255125442, + "loss": 3.6837, + "step": 5334 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012065701708862578, + "loss": 3.8263, + "step": 5335 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012056832386155536, + "loss": 3.8019, + "step": 5336 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001204796528852905, + "loss": 3.7033, + "step": 5337 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001203910041750749, + "loss": 3.8669, + "step": 5338 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012030237774614816, + "loss": 3.9708, + "step": 5339 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012021377361374636, + "loss": 3.7183, + "step": 5340 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001201251917931015, + "loss": 3.8343, + "step": 5341 + }, + { + "epoch": 0.68, + "learning_rate": 0.00012003663229944178, + "loss": 3.8254, + "step": 5342 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011994809514799166, + "loss": 3.8522, + "step": 5343 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011985958035397169, + "loss": 3.6948, + "step": 5344 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011977108793259845, + "loss": 3.7783, + "step": 5345 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011968261789908502, + "loss": 3.7825, + "step": 5346 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011959417026864025, + "loss": 3.8865, + "step": 5347 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011950574505646952, + "loss": 3.8958, + "step": 5348 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011941734227777403, + "loss": 3.8584, + "step": 5349 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011932896194775125, + "loss": 3.8334, + "step": 5350 + }, + { + "epoch": 0.68, + "learning_rate": 0.00011924060408159477, + "loss": 3.9825, + "step": 5351 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011915226869449425, + "loss": 3.8001, + "step": 5352 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011906395580163576, + "loss": 3.771, + "step": 5353 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001189756654182012, + "loss": 3.8619, + "step": 5354 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011888739755936873, + "loss": 3.9401, + "step": 5355 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011879915224031249, + "loss": 3.9106, + "step": 5356 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001187109294762031, + "loss": 3.7929, + "step": 5357 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011862272928220696, + "loss": 3.8168, + "step": 5358 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011853455167348673, + "loss": 3.8195, + "step": 5359 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011844639666520105, + "loss": 3.8452, + "step": 5360 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011835826427250496, + "loss": 3.9046, + "step": 5361 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011827015451054937, + "loss": 3.7875, + "step": 5362 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011818206739448137, + "loss": 3.7839, + "step": 5363 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011809400293944414, + "loss": 3.6961, + "step": 5364 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011800596116057688, + "loss": 3.6733, + "step": 5365 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011791794207301524, + "loss": 3.8426, + "step": 5366 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011782994569189045, + "loss": 3.7794, + "step": 5367 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011774197203233037, + "loss": 3.8863, + "step": 5368 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011765402110945852, + "loss": 3.7505, + "step": 5369 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011756609293839477, + "loss": 3.7847, + "step": 5370 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011747818753425493, + "loss": 3.7177, + "step": 5371 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011739030491215097, + "loss": 3.7509, + "step": 5372 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011730244508719087, + "loss": 3.8133, + "step": 5373 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011721460807447889, + "loss": 3.7951, + "step": 5374 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001171267938891151, + "loss": 3.7458, + "step": 5375 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001170390025461959, + "loss": 3.8332, + "step": 5376 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011695123406081359, + "loss": 3.9078, + "step": 5377 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011686348844805659, + "loss": 3.8717, + "step": 5378 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011677576572300935, + "loss": 3.9072, + "step": 5379 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011668806590075248, + "loss": 3.8012, + "step": 5380 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011660038899636247, + "loss": 3.8732, + "step": 5381 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011651273502491216, + "loss": 3.8475, + "step": 5382 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011642510400147025, + "loss": 3.8047, + "step": 5383 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011633749594110139, + "loss": 3.7558, + "step": 5384 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011624991085886661, + "loss": 3.8303, + "step": 5385 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001161623487698227, + "loss": 3.7953, + "step": 5386 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011607480968902264, + "loss": 3.8116, + "step": 5387 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001159872936315153, + "loss": 3.716, + "step": 5388 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011589980061234587, + "loss": 3.7232, + "step": 5389 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011581233064655536, + "loss": 3.7958, + "step": 5390 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011572488374918083, + "loss": 3.9251, + "step": 5391 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011563745993525543, + "loss": 3.8178, + "step": 5392 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011555005921980825, + "loss": 3.8644, + "step": 5393 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011546268161786466, + "loss": 3.6991, + "step": 5394 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001153753271444458, + "loss": 3.873, + "step": 5395 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011528799581456878, + "loss": 3.7316, + "step": 5396 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011520068764324712, + "loss": 3.8557, + "step": 5397 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011511340264548997, + "loss": 3.9291, + "step": 5398 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011502614083630264, + "loss": 3.9376, + "step": 5399 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011493890223068646, + "loss": 3.8908, + "step": 5400 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011485168684363876, + "loss": 3.8084, + "step": 5401 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011476449469015276, + "loss": 3.918, + "step": 5402 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001146773257852179, + "loss": 3.8447, + "step": 5403 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011459018014381963, + "loss": 3.9126, + "step": 5404 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001145030577809392, + "loss": 3.816, + "step": 5405 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011441595871155397, + "loss": 3.8329, + "step": 5406 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011432888295063723, + "loss": 3.7933, + "step": 5407 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011424183051315837, + "loss": 3.6948, + "step": 5408 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011415480141408258, + "loss": 3.7276, + "step": 5409 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011406779566837139, + "loss": 3.7967, + "step": 5410 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011398081329098198, + "loss": 3.8496, + "step": 5411 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011389385429686752, + "loss": 3.6666, + "step": 5412 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001138069187009775, + "loss": 3.7422, + "step": 5413 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011372000651825703, + "loss": 3.8537, + "step": 5414 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011363311776364735, + "loss": 3.8568, + "step": 5415 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001135462524520856, + "loss": 3.8115, + "step": 5416 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001134594105985049, + "loss": 3.8126, + "step": 5417 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011337259221783453, + "loss": 3.7973, + "step": 5418 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011328579732499944, + "loss": 3.8261, + "step": 5419 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011319902593492074, + "loss": 3.8509, + "step": 5420 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011311227806251531, + "loss": 3.7266, + "step": 5421 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011302555372269633, + "loss": 3.7749, + "step": 5422 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011293885293037259, + "loss": 3.9055, + "step": 5423 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001128521757004489, + "loss": 3.7346, + "step": 5424 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011276552204782625, + "loss": 3.9381, + "step": 5425 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011267889198740131, + "loss": 3.8309, + "step": 5426 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001125922855340668, + "loss": 3.6812, + "step": 5427 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001125057027027114, + "loss": 3.5822, + "step": 5428 + }, + { + "epoch": 0.69, + "learning_rate": 0.00011241914350821967, + "loss": 3.891, + "step": 5429 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011233260796547201, + "loss": 3.8618, + "step": 5430 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011224609608934505, + "loss": 3.7978, + "step": 5431 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011215960789471125, + "loss": 3.7817, + "step": 5432 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011207314339643884, + "loss": 3.9724, + "step": 5433 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011198670260939206, + "loss": 3.6747, + "step": 5434 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011190028554843107, + "loss": 3.7108, + "step": 5435 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011181389222841201, + "loss": 3.7888, + "step": 5436 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011172752266418684, + "loss": 3.8179, + "step": 5437 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011164117687060346, + "loss": 3.8427, + "step": 5438 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011155485486250574, + "loss": 3.7356, + "step": 5439 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011146855665473355, + "loss": 3.9233, + "step": 5440 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011138228226212249, + "loss": 3.8695, + "step": 5441 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011129603169950409, + "loss": 3.928, + "step": 5442 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011120980498170583, + "loss": 3.8911, + "step": 5443 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001111236021235511, + "loss": 3.8696, + "step": 5444 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011103742313985906, + "loss": 3.9523, + "step": 5445 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011095126804544505, + "loss": 3.701, + "step": 5446 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001108651368551201, + "loss": 3.9522, + "step": 5447 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011077902958369106, + "loss": 3.8038, + "step": 5448 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011069294624596077, + "loss": 3.6743, + "step": 5449 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001106068868567281, + "loss": 3.8473, + "step": 5450 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011052085143078752, + "loss": 3.807, + "step": 5451 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011043483998292949, + "loss": 3.7283, + "step": 5452 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011034885252794056, + "loss": 3.758, + "step": 5453 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011026288908060284, + "loss": 3.8091, + "step": 5454 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011017694965569447, + "loss": 3.777, + "step": 5455 + }, + { + "epoch": 0.7, + "learning_rate": 0.00011009103426798939, + "loss": 3.8185, + "step": 5456 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001100051429322575, + "loss": 3.7685, + "step": 5457 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010991927566326443, + "loss": 3.8078, + "step": 5458 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010983343247577187, + "loss": 3.7709, + "step": 5459 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010974761338453718, + "loss": 3.7374, + "step": 5460 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010966181840431375, + "loss": 3.8706, + "step": 5461 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001095760475498507, + "loss": 3.8547, + "step": 5462 + }, + { + "epoch": 0.7, + "learning_rate": 0.000109490300835893, + "loss": 3.7919, + "step": 5463 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010940457827718151, + "loss": 3.7817, + "step": 5464 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010931887988845294, + "loss": 3.8442, + "step": 5465 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010923320568443972, + "loss": 3.8254, + "step": 5466 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001091475556798704, + "loss": 3.8622, + "step": 5467 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010906192988946922, + "loss": 3.796, + "step": 5468 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010897632832795618, + "loss": 3.712, + "step": 5469 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001088907510100472, + "loss": 3.7616, + "step": 5470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010880519795045399, + "loss": 3.7969, + "step": 5471 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010871966916388415, + "loss": 3.7606, + "step": 5472 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010863416466504092, + "loss": 3.8026, + "step": 5473 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010854868446862373, + "loss": 3.914, + "step": 5474 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010846322858932756, + "loss": 3.7687, + "step": 5475 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010837779704184311, + "loss": 3.8588, + "step": 5476 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010829238984085727, + "loss": 3.7978, + "step": 5477 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010820700700105244, + "loss": 3.7868, + "step": 5478 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010812164853710687, + "loss": 3.7593, + "step": 5479 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010803631446369477, + "loss": 3.8112, + "step": 5480 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010795100479548586, + "loss": 3.7992, + "step": 5481 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010786571954714613, + "loss": 3.8357, + "step": 5482 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010778045873333695, + "loss": 3.8775, + "step": 5483 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010769522236871568, + "loss": 3.7146, + "step": 5484 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010761001046793539, + "loss": 3.7934, + "step": 5485 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010752482304564495, + "loss": 3.7715, + "step": 5486 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010743966011648926, + "loss": 3.8349, + "step": 5487 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001073545216951086, + "loss": 3.8459, + "step": 5488 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010726940779613942, + "loss": 3.6793, + "step": 5489 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001071843184342137, + "loss": 3.8648, + "step": 5490 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010709925362395933, + "loss": 3.7756, + "step": 5491 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001070142133799999, + "loss": 3.8001, + "step": 5492 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010692919771695483, + "loss": 3.8064, + "step": 5493 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001068442066494392, + "loss": 3.851, + "step": 5494 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010675924019206415, + "loss": 3.8049, + "step": 5495 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010667429835943617, + "loss": 3.9761, + "step": 5496 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010658938116615802, + "loss": 3.9133, + "step": 5497 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010650448862682777, + "loss": 3.8371, + "step": 5498 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010641962075603948, + "loss": 3.7212, + "step": 5499 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010633477756838292, + "loss": 3.6665, + "step": 5500 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001062499590784436, + "loss": 3.8421, + "step": 5501 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010616516530080269, + "loss": 3.7905, + "step": 5502 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010608039625003746, + "loss": 3.7654, + "step": 5503 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010599565194072047, + "loss": 3.9373, + "step": 5504 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010591093238742047, + "loss": 3.7846, + "step": 5505 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010582623760470159, + "loss": 3.8808, + "step": 5506 + }, + { + "epoch": 0.7, + "learning_rate": 0.00010574156760712389, + "loss": 3.7954, + "step": 5507 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010565692240924307, + "loss": 3.8886, + "step": 5508 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001055723020256106, + "loss": 3.8399, + "step": 5509 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010548770647077385, + "loss": 3.7562, + "step": 5510 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010540313575927568, + "loss": 3.7556, + "step": 5511 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010531858990565477, + "loss": 3.7683, + "step": 5512 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010523406892444549, + "loss": 3.8894, + "step": 5513 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010514957283017809, + "loss": 3.6843, + "step": 5514 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001050651016373784, + "loss": 3.8219, + "step": 5515 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010498065536056794, + "loss": 3.8949, + "step": 5516 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010489623401426396, + "loss": 3.8311, + "step": 5517 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010481183761297961, + "loss": 3.8507, + "step": 5518 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010472746617122356, + "loss": 3.7113, + "step": 5519 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010464311970350021, + "loss": 3.5195, + "step": 5520 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010455879822430969, + "loss": 3.7667, + "step": 5521 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010447450174814787, + "loss": 3.8112, + "step": 5522 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001043902302895062, + "loss": 3.7515, + "step": 5523 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010430598386287199, + "loss": 3.6899, + "step": 5524 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010422176248272825, + "loss": 3.9529, + "step": 5525 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010413756616355358, + "loss": 3.8027, + "step": 5526 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010405339491982224, + "loss": 3.6847, + "step": 5527 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010396924876600428, + "loss": 3.9386, + "step": 5528 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010388512771656539, + "loss": 3.717, + "step": 5529 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010380103178596686, + "loss": 3.9159, + "step": 5530 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010371696098866596, + "loss": 3.8728, + "step": 5531 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010363291533911523, + "loss": 3.8557, + "step": 5532 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010354889485176328, + "loss": 3.8127, + "step": 5533 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001034648995410541, + "loss": 3.8082, + "step": 5534 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010338092942142746, + "loss": 3.971, + "step": 5535 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001032969845073188, + "loss": 3.8159, + "step": 5536 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010321306481315926, + "loss": 3.75, + "step": 5537 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010312917035337546, + "loss": 3.8749, + "step": 5538 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010304530114239008, + "loss": 3.8087, + "step": 5539 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010296145719462105, + "loss": 3.7361, + "step": 5540 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010287763852448207, + "loss": 3.8425, + "step": 5541 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010279384514638268, + "loss": 3.7133, + "step": 5542 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010271007707472788, + "loss": 3.7999, + "step": 5543 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010262633432391838, + "loss": 3.7309, + "step": 5544 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001025426169083504, + "loss": 3.82, + "step": 5545 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010245892484241615, + "loss": 3.8344, + "step": 5546 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010237525814050316, + "loss": 3.8056, + "step": 5547 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001022916168169947, + "loss": 3.6759, + "step": 5548 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010220800088626969, + "loss": 3.6524, + "step": 5549 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010212441036270271, + "loss": 3.8776, + "step": 5550 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001020408452606638, + "loss": 3.7679, + "step": 5551 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010195730559451893, + "loss": 3.7081, + "step": 5552 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010187379137862945, + "loss": 3.8167, + "step": 5553 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010179030262735254, + "loss": 3.8453, + "step": 5554 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010170683935504077, + "loss": 3.7957, + "step": 5555 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010162340157604252, + "loss": 3.7665, + "step": 5556 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010153998930470165, + "loss": 3.7508, + "step": 5557 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010145660255535771, + "loss": 3.8518, + "step": 5558 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010137324134234577, + "loss": 3.9196, + "step": 5559 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010128990567999666, + "loss": 3.7772, + "step": 5560 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010120659558263687, + "loss": 3.8266, + "step": 5561 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010112331106458825, + "loss": 3.81, + "step": 5562 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010104005214016837, + "loss": 3.8163, + "step": 5563 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010095681882369042, + "loss": 3.9581, + "step": 5564 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010087361112946319, + "loss": 3.824, + "step": 5565 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010079042907179092, + "loss": 3.8172, + "step": 5566 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001007072726649738, + "loss": 3.7594, + "step": 5567 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010062414192330724, + "loss": 3.7946, + "step": 5568 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010054103686108229, + "loss": 3.8902, + "step": 5569 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001004579574925859, + "loss": 3.7703, + "step": 5570 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010037490383210024, + "loss": 3.9799, + "step": 5571 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001002918758939032, + "loss": 3.8509, + "step": 5572 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001002088736922683, + "loss": 3.8267, + "step": 5573 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010012589724146443, + "loss": 3.928, + "step": 5574 + }, + { + "epoch": 0.71, + "learning_rate": 0.00010004294655575639, + "loss": 3.8604, + "step": 5575 + }, + { + "epoch": 0.71, + "learning_rate": 9.996002164940429e-05, + "loss": 3.9335, + "step": 5576 + }, + { + "epoch": 0.71, + "learning_rate": 9.98771225366639e-05, + "loss": 3.8147, + "step": 5577 + }, + { + "epoch": 0.71, + "learning_rate": 9.979424923178643e-05, + "loss": 3.826, + "step": 5578 + }, + { + "epoch": 0.71, + "learning_rate": 9.971140174901891e-05, + "loss": 3.7667, + "step": 5579 + }, + { + "epoch": 0.71, + "learning_rate": 9.962858010260376e-05, + "loss": 3.6826, + "step": 5580 + }, + { + "epoch": 0.71, + "learning_rate": 9.954578430677882e-05, + "loss": 3.7642, + "step": 5581 + }, + { + "epoch": 0.71, + "learning_rate": 9.94630143757779e-05, + "loss": 3.9135, + "step": 5582 + }, + { + "epoch": 0.71, + "learning_rate": 9.938027032382996e-05, + "loss": 3.8731, + "step": 5583 + }, + { + "epoch": 0.71, + "learning_rate": 9.92975521651597e-05, + "loss": 3.839, + "step": 5584 + }, + { + "epoch": 0.71, + "learning_rate": 9.92148599139873e-05, + "loss": 3.8638, + "step": 5585 + }, + { + "epoch": 0.72, + "learning_rate": 9.913219358452855e-05, + "loss": 3.8374, + "step": 5586 + }, + { + "epoch": 0.72, + "learning_rate": 9.904955319099462e-05, + "loss": 3.5474, + "step": 5587 + }, + { + "epoch": 0.72, + "learning_rate": 9.896693874759257e-05, + "loss": 3.8179, + "step": 5588 + }, + { + "epoch": 0.72, + "learning_rate": 9.888435026852458e-05, + "loss": 3.795, + "step": 5589 + }, + { + "epoch": 0.72, + "learning_rate": 9.880178776798876e-05, + "loss": 3.8618, + "step": 5590 + }, + { + "epoch": 0.72, + "learning_rate": 9.871925126017845e-05, + "loss": 3.7563, + "step": 5591 + }, + { + "epoch": 0.72, + "learning_rate": 9.863674075928267e-05, + "loss": 3.6676, + "step": 5592 + }, + { + "epoch": 0.72, + "learning_rate": 9.855425627948587e-05, + "loss": 3.9224, + "step": 5593 + }, + { + "epoch": 0.72, + "learning_rate": 9.847179783496815e-05, + "loss": 3.7931, + "step": 5594 + }, + { + "epoch": 0.72, + "learning_rate": 9.838936543990495e-05, + "loss": 3.9554, + "step": 5595 + }, + { + "epoch": 0.72, + "learning_rate": 9.830695910846754e-05, + "loss": 3.8124, + "step": 5596 + }, + { + "epoch": 0.72, + "learning_rate": 9.822457885482237e-05, + "loss": 3.8189, + "step": 5597 + }, + { + "epoch": 0.72, + "learning_rate": 9.814222469313166e-05, + "loss": 3.7622, + "step": 5598 + }, + { + "epoch": 0.72, + "learning_rate": 9.805989663755308e-05, + "loss": 3.8276, + "step": 5599 + }, + { + "epoch": 0.72, + "learning_rate": 9.797759470223966e-05, + "loss": 3.8183, + "step": 5600 + }, + { + "epoch": 0.72, + "learning_rate": 9.789531890134012e-05, + "loss": 3.8115, + "step": 5601 + }, + { + "epoch": 0.72, + "learning_rate": 9.781306924899852e-05, + "loss": 3.8625, + "step": 5602 + }, + { + "epoch": 0.72, + "learning_rate": 9.773084575935471e-05, + "loss": 3.728, + "step": 5603 + }, + { + "epoch": 0.72, + "learning_rate": 9.764864844654379e-05, + "loss": 3.8426, + "step": 5604 + }, + { + "epoch": 0.72, + "learning_rate": 9.756647732469636e-05, + "loss": 3.6725, + "step": 5605 + }, + { + "epoch": 0.72, + "learning_rate": 9.748433240793858e-05, + "loss": 3.9449, + "step": 5606 + }, + { + "epoch": 0.72, + "learning_rate": 9.740221371039226e-05, + "loss": 3.8359, + "step": 5607 + }, + { + "epoch": 0.72, + "learning_rate": 9.732012124617449e-05, + "loss": 3.804, + "step": 5608 + }, + { + "epoch": 0.72, + "learning_rate": 9.723805502939786e-05, + "loss": 3.9114, + "step": 5609 + }, + { + "epoch": 0.72, + "learning_rate": 9.715601507417046e-05, + "loss": 3.8014, + "step": 5610 + }, + { + "epoch": 0.72, + "learning_rate": 9.70740013945961e-05, + "loss": 3.735, + "step": 5611 + }, + { + "epoch": 0.72, + "learning_rate": 9.699201400477372e-05, + "loss": 3.7003, + "step": 5612 + }, + { + "epoch": 0.72, + "learning_rate": 9.691005291879801e-05, + "loss": 3.7938, + "step": 5613 + }, + { + "epoch": 0.72, + "learning_rate": 9.682811815075895e-05, + "loss": 3.7965, + "step": 5614 + }, + { + "epoch": 0.72, + "learning_rate": 9.674620971474202e-05, + "loss": 3.5874, + "step": 5615 + }, + { + "epoch": 0.72, + "learning_rate": 9.666432762482838e-05, + "loss": 3.8107, + "step": 5616 + }, + { + "epoch": 0.72, + "learning_rate": 9.658247189509436e-05, + "loss": 3.7729, + "step": 5617 + }, + { + "epoch": 0.72, + "learning_rate": 9.650064253961208e-05, + "loss": 3.922, + "step": 5618 + }, + { + "epoch": 0.72, + "learning_rate": 9.641883957244887e-05, + "loss": 3.858, + "step": 5619 + }, + { + "epoch": 0.72, + "learning_rate": 9.633706300766759e-05, + "loss": 3.8946, + "step": 5620 + }, + { + "epoch": 0.72, + "learning_rate": 9.62553128593266e-05, + "loss": 3.9102, + "step": 5621 + }, + { + "epoch": 0.72, + "learning_rate": 9.617358914147969e-05, + "loss": 3.6979, + "step": 5622 + }, + { + "epoch": 0.72, + "learning_rate": 9.609189186817604e-05, + "loss": 3.7835, + "step": 5623 + }, + { + "epoch": 0.72, + "learning_rate": 9.60102210534605e-05, + "loss": 3.896, + "step": 5624 + }, + { + "epoch": 0.72, + "learning_rate": 9.59285767113731e-05, + "loss": 3.8452, + "step": 5625 + }, + { + "epoch": 0.72, + "learning_rate": 9.584695885594957e-05, + "loss": 3.8408, + "step": 5626 + }, + { + "epoch": 0.72, + "learning_rate": 9.576536750122094e-05, + "loss": 3.6536, + "step": 5627 + }, + { + "epoch": 0.72, + "learning_rate": 9.568380266121366e-05, + "loss": 3.7545, + "step": 5628 + }, + { + "epoch": 0.72, + "learning_rate": 9.56022643499497e-05, + "loss": 3.8362, + "step": 5629 + }, + { + "epoch": 0.72, + "learning_rate": 9.55207525814464e-05, + "loss": 3.9063, + "step": 5630 + }, + { + "epoch": 0.72, + "learning_rate": 9.543926736971656e-05, + "loss": 3.8351, + "step": 5631 + }, + { + "epoch": 0.72, + "learning_rate": 9.535780872876857e-05, + "loss": 3.8878, + "step": 5632 + }, + { + "epoch": 0.72, + "learning_rate": 9.5276376672606e-05, + "loss": 3.7697, + "step": 5633 + }, + { + "epoch": 0.72, + "learning_rate": 9.519497121522791e-05, + "loss": 3.8612, + "step": 5634 + }, + { + "epoch": 0.72, + "learning_rate": 9.5113592370629e-05, + "loss": 3.936, + "step": 5635 + }, + { + "epoch": 0.72, + "learning_rate": 9.503224015279916e-05, + "loss": 3.7479, + "step": 5636 + }, + { + "epoch": 0.72, + "learning_rate": 9.49509145757238e-05, + "loss": 3.7652, + "step": 5637 + }, + { + "epoch": 0.72, + "learning_rate": 9.48696156533836e-05, + "loss": 3.8449, + "step": 5638 + }, + { + "epoch": 0.72, + "learning_rate": 9.478834339975498e-05, + "loss": 3.8171, + "step": 5639 + }, + { + "epoch": 0.72, + "learning_rate": 9.470709782880952e-05, + "loss": 3.8144, + "step": 5640 + }, + { + "epoch": 0.72, + "learning_rate": 9.462587895451424e-05, + "loss": 3.8403, + "step": 5641 + }, + { + "epoch": 0.72, + "learning_rate": 9.454468679083161e-05, + "loss": 3.8675, + "step": 5642 + }, + { + "epoch": 0.72, + "learning_rate": 9.446352135171943e-05, + "loss": 3.5885, + "step": 5643 + }, + { + "epoch": 0.72, + "learning_rate": 9.438238265113116e-05, + "loss": 3.7873, + "step": 5644 + }, + { + "epoch": 0.72, + "learning_rate": 9.43012707030153e-05, + "loss": 3.906, + "step": 5645 + }, + { + "epoch": 0.72, + "learning_rate": 9.422018552131611e-05, + "loss": 3.9228, + "step": 5646 + }, + { + "epoch": 0.72, + "learning_rate": 9.413912711997297e-05, + "loss": 3.9561, + "step": 5647 + }, + { + "epoch": 0.72, + "learning_rate": 9.405809551292077e-05, + "loss": 3.6751, + "step": 5648 + }, + { + "epoch": 0.72, + "learning_rate": 9.39770907140898e-05, + "loss": 3.9274, + "step": 5649 + }, + { + "epoch": 0.72, + "learning_rate": 9.38961127374057e-05, + "loss": 3.845, + "step": 5650 + }, + { + "epoch": 0.72, + "learning_rate": 9.381516159678955e-05, + "loss": 3.7293, + "step": 5651 + }, + { + "epoch": 0.72, + "learning_rate": 9.373423730615766e-05, + "loss": 3.9177, + "step": 5652 + }, + { + "epoch": 0.72, + "learning_rate": 9.365333987942199e-05, + "loss": 3.6666, + "step": 5653 + }, + { + "epoch": 0.72, + "learning_rate": 9.357246933048977e-05, + "loss": 3.9857, + "step": 5654 + }, + { + "epoch": 0.72, + "learning_rate": 9.349162567326355e-05, + "loss": 3.7522, + "step": 5655 + }, + { + "epoch": 0.72, + "learning_rate": 9.34108089216413e-05, + "loss": 3.8806, + "step": 5656 + }, + { + "epoch": 0.72, + "learning_rate": 9.333001908951633e-05, + "loss": 3.8505, + "step": 5657 + }, + { + "epoch": 0.72, + "learning_rate": 9.324925619077732e-05, + "loss": 4.1017, + "step": 5658 + }, + { + "epoch": 0.72, + "learning_rate": 9.316852023930832e-05, + "loss": 3.6103, + "step": 5659 + }, + { + "epoch": 0.72, + "learning_rate": 9.308781124898894e-05, + "loss": 3.7798, + "step": 5660 + }, + { + "epoch": 0.72, + "learning_rate": 9.300712923369387e-05, + "loss": 3.8092, + "step": 5661 + }, + { + "epoch": 0.72, + "learning_rate": 9.29264742072932e-05, + "loss": 3.6405, + "step": 5662 + }, + { + "epoch": 0.72, + "learning_rate": 9.284584618365266e-05, + "loss": 3.9397, + "step": 5663 + }, + { + "epoch": 0.72, + "learning_rate": 9.276524517663306e-05, + "loss": 3.755, + "step": 5664 + }, + { + "epoch": 0.73, + "learning_rate": 9.268467120009063e-05, + "loss": 3.8751, + "step": 5665 + }, + { + "epoch": 0.73, + "learning_rate": 9.26041242678769e-05, + "loss": 3.8151, + "step": 5666 + }, + { + "epoch": 0.73, + "learning_rate": 9.252360439383897e-05, + "loss": 3.8207, + "step": 5667 + }, + { + "epoch": 0.73, + "learning_rate": 9.244311159181906e-05, + "loss": 3.7627, + "step": 5668 + }, + { + "epoch": 0.73, + "learning_rate": 9.236264587565485e-05, + "loss": 3.9344, + "step": 5669 + }, + { + "epoch": 0.73, + "learning_rate": 9.22822072591793e-05, + "loss": 3.7443, + "step": 5670 + }, + { + "epoch": 0.73, + "learning_rate": 9.220179575622065e-05, + "loss": 3.9654, + "step": 5671 + }, + { + "epoch": 0.73, + "learning_rate": 9.212141138060273e-05, + "loss": 3.9246, + "step": 5672 + }, + { + "epoch": 0.73, + "learning_rate": 9.20410541461445e-05, + "loss": 3.9061, + "step": 5673 + }, + { + "epoch": 0.73, + "learning_rate": 9.19607240666602e-05, + "loss": 3.7868, + "step": 5674 + }, + { + "epoch": 0.73, + "learning_rate": 9.188042115595966e-05, + "loss": 3.8226, + "step": 5675 + }, + { + "epoch": 0.73, + "learning_rate": 9.180014542784779e-05, + "loss": 3.8636, + "step": 5676 + }, + { + "epoch": 0.73, + "learning_rate": 9.171989689612495e-05, + "loss": 3.7827, + "step": 5677 + }, + { + "epoch": 0.73, + "learning_rate": 9.163967557458675e-05, + "loss": 3.6058, + "step": 5678 + }, + { + "epoch": 0.73, + "learning_rate": 9.155948147702419e-05, + "loss": 3.7316, + "step": 5679 + }, + { + "epoch": 0.73, + "learning_rate": 9.14793146172235e-05, + "loss": 3.9097, + "step": 5680 + }, + { + "epoch": 0.73, + "learning_rate": 9.139917500896635e-05, + "loss": 3.8002, + "step": 5681 + }, + { + "epoch": 0.73, + "learning_rate": 9.131906266602977e-05, + "loss": 3.8136, + "step": 5682 + }, + { + "epoch": 0.73, + "learning_rate": 9.123897760218589e-05, + "loss": 3.8889, + "step": 5683 + }, + { + "epoch": 0.73, + "learning_rate": 9.115891983120228e-05, + "loss": 3.8321, + "step": 5684 + }, + { + "epoch": 0.73, + "learning_rate": 9.107888936684181e-05, + "loss": 3.8112, + "step": 5685 + }, + { + "epoch": 0.73, + "learning_rate": 9.099888622286262e-05, + "loss": 3.8083, + "step": 5686 + }, + { + "epoch": 0.73, + "learning_rate": 9.091891041301808e-05, + "loss": 3.7861, + "step": 5687 + }, + { + "epoch": 0.73, + "learning_rate": 9.083896195105718e-05, + "loss": 3.7568, + "step": 5688 + }, + { + "epoch": 0.73, + "learning_rate": 9.075904085072375e-05, + "loss": 3.7838, + "step": 5689 + }, + { + "epoch": 0.73, + "learning_rate": 9.06791471257574e-05, + "loss": 3.7787, + "step": 5690 + }, + { + "epoch": 0.73, + "learning_rate": 9.059928078989266e-05, + "loss": 3.9395, + "step": 5691 + }, + { + "epoch": 0.73, + "learning_rate": 9.051944185685948e-05, + "loss": 3.7566, + "step": 5692 + }, + { + "epoch": 0.73, + "learning_rate": 9.04396303403831e-05, + "loss": 3.7063, + "step": 5693 + }, + { + "epoch": 0.73, + "learning_rate": 9.035984625418406e-05, + "loss": 3.8995, + "step": 5694 + }, + { + "epoch": 0.73, + "learning_rate": 9.028008961197803e-05, + "loss": 3.8891, + "step": 5695 + }, + { + "epoch": 0.73, + "learning_rate": 9.020036042747637e-05, + "loss": 3.8148, + "step": 5696 + }, + { + "epoch": 0.73, + "learning_rate": 9.012065871438527e-05, + "loss": 3.9337, + "step": 5697 + }, + { + "epoch": 0.73, + "learning_rate": 9.004098448640643e-05, + "loss": 3.6891, + "step": 5698 + }, + { + "epoch": 0.73, + "learning_rate": 8.996133775723666e-05, + "loss": 3.9061, + "step": 5699 + }, + { + "epoch": 0.73, + "learning_rate": 8.988171854056837e-05, + "loss": 3.7796, + "step": 5700 + }, + { + "epoch": 0.73, + "learning_rate": 8.98021268500889e-05, + "loss": 3.7717, + "step": 5701 + }, + { + "epoch": 0.73, + "learning_rate": 8.97225626994809e-05, + "loss": 3.8517, + "step": 5702 + }, + { + "epoch": 0.73, + "learning_rate": 8.964302610242256e-05, + "loss": 3.7798, + "step": 5703 + }, + { + "epoch": 0.73, + "learning_rate": 8.956351707258705e-05, + "loss": 3.7425, + "step": 5704 + }, + { + "epoch": 0.73, + "learning_rate": 8.948403562364291e-05, + "loss": 3.8573, + "step": 5705 + }, + { + "epoch": 0.73, + "learning_rate": 8.940458176925389e-05, + "loss": 3.9132, + "step": 5706 + }, + { + "epoch": 0.73, + "learning_rate": 8.932515552307904e-05, + "loss": 3.8019, + "step": 5707 + }, + { + "epoch": 0.73, + "learning_rate": 8.92457568987726e-05, + "loss": 3.7317, + "step": 5708 + }, + { + "epoch": 0.73, + "learning_rate": 8.916638590998425e-05, + "loss": 3.9243, + "step": 5709 + }, + { + "epoch": 0.73, + "learning_rate": 8.90870425703586e-05, + "loss": 3.8529, + "step": 5710 + }, + { + "epoch": 0.73, + "learning_rate": 8.900772689353589e-05, + "loss": 3.7938, + "step": 5711 + }, + { + "epoch": 0.73, + "learning_rate": 8.892843889315133e-05, + "loss": 3.8256, + "step": 5712 + }, + { + "epoch": 0.73, + "learning_rate": 8.88491785828354e-05, + "loss": 3.9248, + "step": 5713 + }, + { + "epoch": 0.73, + "learning_rate": 8.876994597621391e-05, + "loss": 3.8384, + "step": 5714 + }, + { + "epoch": 0.73, + "learning_rate": 8.869074108690783e-05, + "loss": 3.7656, + "step": 5715 + }, + { + "epoch": 0.73, + "learning_rate": 8.861156392853334e-05, + "loss": 3.8254, + "step": 5716 + }, + { + "epoch": 0.73, + "learning_rate": 8.853241451470198e-05, + "loss": 3.7255, + "step": 5717 + }, + { + "epoch": 0.73, + "learning_rate": 8.845329285902054e-05, + "loss": 3.6724, + "step": 5718 + }, + { + "epoch": 0.73, + "learning_rate": 8.837419897509086e-05, + "loss": 3.8234, + "step": 5719 + }, + { + "epoch": 0.73, + "learning_rate": 8.829513287651011e-05, + "loss": 3.8343, + "step": 5720 + }, + { + "epoch": 0.73, + "learning_rate": 8.821609457687069e-05, + "loss": 3.7867, + "step": 5721 + }, + { + "epoch": 0.73, + "learning_rate": 8.813708408976015e-05, + "loss": 4.0941, + "step": 5722 + }, + { + "epoch": 0.73, + "learning_rate": 8.805810142876123e-05, + "loss": 3.674, + "step": 5723 + }, + { + "epoch": 0.73, + "learning_rate": 8.797914660745218e-05, + "loss": 3.7494, + "step": 5724 + }, + { + "epoch": 0.73, + "learning_rate": 8.790021963940612e-05, + "loss": 3.927, + "step": 5725 + }, + { + "epoch": 0.73, + "learning_rate": 8.782132053819145e-05, + "loss": 3.8812, + "step": 5726 + }, + { + "epoch": 0.73, + "learning_rate": 8.774244931737197e-05, + "loss": 3.7147, + "step": 5727 + }, + { + "epoch": 0.73, + "learning_rate": 8.766360599050654e-05, + "loss": 3.9641, + "step": 5728 + }, + { + "epoch": 0.73, + "learning_rate": 8.758479057114917e-05, + "loss": 3.794, + "step": 5729 + }, + { + "epoch": 0.73, + "learning_rate": 8.750600307284922e-05, + "loss": 3.8917, + "step": 5730 + }, + { + "epoch": 0.73, + "learning_rate": 8.742724350915102e-05, + "loss": 3.9366, + "step": 5731 + }, + { + "epoch": 0.73, + "learning_rate": 8.734851189359447e-05, + "loss": 3.8991, + "step": 5732 + }, + { + "epoch": 0.73, + "learning_rate": 8.726980823971434e-05, + "loss": 3.822, + "step": 5733 + }, + { + "epoch": 0.73, + "learning_rate": 8.719113256104069e-05, + "loss": 3.8277, + "step": 5734 + }, + { + "epoch": 0.73, + "learning_rate": 8.71124848710988e-05, + "loss": 3.7825, + "step": 5735 + }, + { + "epoch": 0.73, + "learning_rate": 8.703386518340902e-05, + "loss": 3.9275, + "step": 5736 + }, + { + "epoch": 0.73, + "learning_rate": 8.695527351148719e-05, + "loss": 3.8566, + "step": 5737 + }, + { + "epoch": 0.73, + "learning_rate": 8.687670986884394e-05, + "loss": 3.7921, + "step": 5738 + }, + { + "epoch": 0.73, + "learning_rate": 8.67981742689854e-05, + "loss": 3.8688, + "step": 5739 + }, + { + "epoch": 0.73, + "learning_rate": 8.671966672541273e-05, + "loss": 3.8685, + "step": 5740 + }, + { + "epoch": 0.73, + "learning_rate": 8.664118725162226e-05, + "loss": 3.9111, + "step": 5741 + }, + { + "epoch": 0.73, + "learning_rate": 8.656273586110549e-05, + "loss": 3.9813, + "step": 5742 + }, + { + "epoch": 0.74, + "learning_rate": 8.648431256734918e-05, + "loss": 3.7683, + "step": 5743 + }, + { + "epoch": 0.74, + "learning_rate": 8.640591738383507e-05, + "loss": 3.8504, + "step": 5744 + }, + { + "epoch": 0.74, + "learning_rate": 8.632755032404041e-05, + "loss": 3.9037, + "step": 5745 + }, + { + "epoch": 0.74, + "learning_rate": 8.624921140143722e-05, + "loss": 3.6839, + "step": 5746 + }, + { + "epoch": 0.74, + "learning_rate": 8.617090062949303e-05, + "loss": 3.8422, + "step": 5747 + }, + { + "epoch": 0.74, + "learning_rate": 8.609261802167029e-05, + "loss": 3.8687, + "step": 5748 + }, + { + "epoch": 0.74, + "learning_rate": 8.60143635914267e-05, + "loss": 3.8141, + "step": 5749 + }, + { + "epoch": 0.74, + "learning_rate": 8.593613735221506e-05, + "loss": 3.8337, + "step": 5750 + }, + { + "epoch": 0.74, + "learning_rate": 8.585793931748343e-05, + "loss": 3.7968, + "step": 5751 + }, + { + "epoch": 0.74, + "learning_rate": 8.577976950067484e-05, + "loss": 3.8765, + "step": 5752 + }, + { + "epoch": 0.74, + "learning_rate": 8.570162791522776e-05, + "loss": 3.9218, + "step": 5753 + }, + { + "epoch": 0.74, + "learning_rate": 8.562351457457549e-05, + "loss": 3.8688, + "step": 5754 + }, + { + "epoch": 0.74, + "learning_rate": 8.554542949214672e-05, + "loss": 3.8039, + "step": 5755 + }, + { + "epoch": 0.74, + "learning_rate": 8.546737268136518e-05, + "loss": 3.8722, + "step": 5756 + }, + { + "epoch": 0.74, + "learning_rate": 8.538934415564966e-05, + "loss": 3.8599, + "step": 5757 + }, + { + "epoch": 0.74, + "learning_rate": 8.531134392841424e-05, + "loss": 3.9517, + "step": 5758 + }, + { + "epoch": 0.74, + "learning_rate": 8.523337201306796e-05, + "loss": 3.808, + "step": 5759 + }, + { + "epoch": 0.74, + "learning_rate": 8.515542842301524e-05, + "loss": 3.6834, + "step": 5760 + }, + { + "epoch": 0.74, + "learning_rate": 8.507751317165541e-05, + "loss": 3.9323, + "step": 5761 + }, + { + "epoch": 0.74, + "learning_rate": 8.499962627238302e-05, + "loss": 3.7881, + "step": 5762 + }, + { + "epoch": 0.74, + "learning_rate": 8.492176773858765e-05, + "loss": 3.8118, + "step": 5763 + }, + { + "epoch": 0.74, + "learning_rate": 8.484393758365422e-05, + "loss": 3.8317, + "step": 5764 + }, + { + "epoch": 0.74, + "learning_rate": 8.476613582096257e-05, + "loss": 3.8491, + "step": 5765 + }, + { + "epoch": 0.74, + "learning_rate": 8.468836246388772e-05, + "loss": 3.8785, + "step": 5766 + }, + { + "epoch": 0.74, + "learning_rate": 8.461061752579976e-05, + "loss": 3.8194, + "step": 5767 + }, + { + "epoch": 0.74, + "learning_rate": 8.453290102006408e-05, + "loss": 3.7155, + "step": 5768 + }, + { + "epoch": 0.74, + "learning_rate": 8.445521296004099e-05, + "loss": 3.7774, + "step": 5769 + }, + { + "epoch": 0.74, + "learning_rate": 8.437755335908592e-05, + "loss": 3.733, + "step": 5770 + }, + { + "epoch": 0.74, + "learning_rate": 8.429992223054952e-05, + "loss": 3.8542, + "step": 5771 + }, + { + "epoch": 0.74, + "learning_rate": 8.422231958777743e-05, + "loss": 3.8953, + "step": 5772 + }, + { + "epoch": 0.74, + "learning_rate": 8.414474544411038e-05, + "loss": 3.8811, + "step": 5773 + }, + { + "epoch": 0.74, + "learning_rate": 8.406719981288436e-05, + "loss": 3.8883, + "step": 5774 + }, + { + "epoch": 0.74, + "learning_rate": 8.398968270743041e-05, + "loss": 3.8561, + "step": 5775 + }, + { + "epoch": 0.74, + "learning_rate": 8.391219414107456e-05, + "loss": 3.7498, + "step": 5776 + }, + { + "epoch": 0.74, + "learning_rate": 8.383473412713802e-05, + "loss": 3.8628, + "step": 5777 + }, + { + "epoch": 0.74, + "learning_rate": 8.375730267893703e-05, + "loss": 3.6234, + "step": 5778 + }, + { + "epoch": 0.74, + "learning_rate": 8.367989980978294e-05, + "loss": 3.8499, + "step": 5779 + }, + { + "epoch": 0.74, + "learning_rate": 8.360252553298214e-05, + "loss": 3.7203, + "step": 5780 + }, + { + "epoch": 0.74, + "learning_rate": 8.352517986183636e-05, + "loss": 3.8321, + "step": 5781 + }, + { + "epoch": 0.74, + "learning_rate": 8.344786280964197e-05, + "loss": 3.7746, + "step": 5782 + }, + { + "epoch": 0.74, + "learning_rate": 8.337057438969092e-05, + "loss": 3.7895, + "step": 5783 + }, + { + "epoch": 0.74, + "learning_rate": 8.329331461526988e-05, + "loss": 3.911, + "step": 5784 + }, + { + "epoch": 0.74, + "learning_rate": 8.321608349966065e-05, + "loss": 3.7747, + "step": 5785 + }, + { + "epoch": 0.74, + "learning_rate": 8.31388810561402e-05, + "loss": 3.7642, + "step": 5786 + }, + { + "epoch": 0.74, + "learning_rate": 8.306170729798054e-05, + "loss": 3.8016, + "step": 5787 + }, + { + "epoch": 0.74, + "learning_rate": 8.29845622384486e-05, + "loss": 3.6977, + "step": 5788 + }, + { + "epoch": 0.74, + "learning_rate": 8.290744589080674e-05, + "loss": 3.904, + "step": 5789 + }, + { + "epoch": 0.74, + "learning_rate": 8.283035826831201e-05, + "loss": 3.731, + "step": 5790 + }, + { + "epoch": 0.74, + "learning_rate": 8.275329938421658e-05, + "loss": 3.8641, + "step": 5791 + }, + { + "epoch": 0.74, + "learning_rate": 8.2676269251768e-05, + "loss": 3.7798, + "step": 5792 + }, + { + "epoch": 0.74, + "learning_rate": 8.25992678842085e-05, + "loss": 3.7251, + "step": 5793 + }, + { + "epoch": 0.74, + "learning_rate": 8.252229529477554e-05, + "loss": 3.6365, + "step": 5794 + }, + { + "epoch": 0.74, + "learning_rate": 8.244535149670148e-05, + "loss": 3.7161, + "step": 5795 + }, + { + "epoch": 0.74, + "learning_rate": 8.236843650321408e-05, + "loss": 3.8313, + "step": 5796 + }, + { + "epoch": 0.74, + "learning_rate": 8.229155032753577e-05, + "loss": 3.768, + "step": 5797 + }, + { + "epoch": 0.74, + "learning_rate": 8.221469298288419e-05, + "loss": 3.9414, + "step": 5798 + }, + { + "epoch": 0.74, + "learning_rate": 8.213786448247205e-05, + "loss": 3.8859, + "step": 5799 + }, + { + "epoch": 0.74, + "learning_rate": 8.206106483950693e-05, + "loss": 3.9244, + "step": 5800 + }, + { + "epoch": 0.74, + "learning_rate": 8.198429406719177e-05, + "loss": 3.786, + "step": 5801 + }, + { + "epoch": 0.74, + "learning_rate": 8.190755217872425e-05, + "loss": 3.9673, + "step": 5802 + }, + { + "epoch": 0.74, + "learning_rate": 8.183083918729713e-05, + "loss": 3.9238, + "step": 5803 + }, + { + "epoch": 0.74, + "learning_rate": 8.175415510609843e-05, + "loss": 3.7463, + "step": 5804 + }, + { + "epoch": 0.74, + "learning_rate": 8.167749994831092e-05, + "loss": 3.7987, + "step": 5805 + }, + { + "epoch": 0.74, + "learning_rate": 8.160087372711256e-05, + "loss": 3.7863, + "step": 5806 + }, + { + "epoch": 0.74, + "learning_rate": 8.152427645567622e-05, + "loss": 3.7694, + "step": 5807 + }, + { + "epoch": 0.74, + "learning_rate": 8.144770814716993e-05, + "loss": 3.6761, + "step": 5808 + }, + { + "epoch": 0.74, + "learning_rate": 8.137116881475653e-05, + "loss": 3.8401, + "step": 5809 + }, + { + "epoch": 0.74, + "learning_rate": 8.129465847159414e-05, + "loss": 3.8097, + "step": 5810 + }, + { + "epoch": 0.74, + "learning_rate": 8.121817713083584e-05, + "loss": 3.8854, + "step": 5811 + }, + { + "epoch": 0.74, + "learning_rate": 8.114172480562957e-05, + "loss": 3.8364, + "step": 5812 + }, + { + "epoch": 0.74, + "learning_rate": 8.106530150911837e-05, + "loss": 3.7648, + "step": 5813 + }, + { + "epoch": 0.74, + "learning_rate": 8.098890725444033e-05, + "loss": 3.7724, + "step": 5814 + }, + { + "epoch": 0.74, + "learning_rate": 8.091254205472846e-05, + "loss": 3.6865, + "step": 5815 + }, + { + "epoch": 0.74, + "learning_rate": 8.083620592311075e-05, + "loss": 3.8421, + "step": 5816 + }, + { + "epoch": 0.74, + "learning_rate": 8.075989887271043e-05, + "loss": 3.852, + "step": 5817 + }, + { + "epoch": 0.74, + "learning_rate": 8.068362091664552e-05, + "loss": 3.7748, + "step": 5818 + }, + { + "epoch": 0.74, + "learning_rate": 8.060737206802896e-05, + "loss": 3.7328, + "step": 5819 + }, + { + "epoch": 0.74, + "learning_rate": 8.0531152339969e-05, + "loss": 3.9191, + "step": 5820 + }, + { + "epoch": 0.75, + "learning_rate": 8.04549617455686e-05, + "loss": 3.7834, + "step": 5821 + }, + { + "epoch": 0.75, + "learning_rate": 8.037880029792582e-05, + "loss": 3.8286, + "step": 5822 + }, + { + "epoch": 0.75, + "learning_rate": 8.030266801013366e-05, + "loss": 3.8859, + "step": 5823 + }, + { + "epoch": 0.75, + "learning_rate": 8.022656489528013e-05, + "loss": 3.813, + "step": 5824 + }, + { + "epoch": 0.75, + "learning_rate": 8.015049096644833e-05, + "loss": 3.9336, + "step": 5825 + }, + { + "epoch": 0.75, + "learning_rate": 8.007444623671619e-05, + "loss": 3.8329, + "step": 5826 + }, + { + "epoch": 0.75, + "learning_rate": 7.999843071915671e-05, + "loss": 3.7384, + "step": 5827 + }, + { + "epoch": 0.75, + "learning_rate": 7.992244442683771e-05, + "loss": 3.7586, + "step": 5828 + }, + { + "epoch": 0.75, + "learning_rate": 7.984648737282232e-05, + "loss": 3.8453, + "step": 5829 + }, + { + "epoch": 0.75, + "learning_rate": 7.977055957016835e-05, + "loss": 3.7223, + "step": 5830 + }, + { + "epoch": 0.75, + "learning_rate": 7.969466103192858e-05, + "loss": 3.7365, + "step": 5831 + }, + { + "epoch": 0.75, + "learning_rate": 7.961879177115097e-05, + "loss": 3.7947, + "step": 5832 + }, + { + "epoch": 0.75, + "learning_rate": 7.954295180087831e-05, + "loss": 3.7737, + "step": 5833 + }, + { + "epoch": 0.75, + "learning_rate": 7.946714113414836e-05, + "loss": 3.8212, + "step": 5834 + }, + { + "epoch": 0.75, + "learning_rate": 7.939135978399382e-05, + "loss": 3.9, + "step": 5835 + }, + { + "epoch": 0.75, + "learning_rate": 7.931560776344238e-05, + "loss": 3.8743, + "step": 5836 + }, + { + "epoch": 0.75, + "learning_rate": 7.923988508551663e-05, + "loss": 3.9123, + "step": 5837 + }, + { + "epoch": 0.75, + "learning_rate": 7.916419176323428e-05, + "loss": 3.8002, + "step": 5838 + }, + { + "epoch": 0.75, + "learning_rate": 7.908852780960794e-05, + "loss": 3.8297, + "step": 5839 + }, + { + "epoch": 0.75, + "learning_rate": 7.901289323764502e-05, + "loss": 3.7997, + "step": 5840 + }, + { + "epoch": 0.75, + "learning_rate": 7.893728806034803e-05, + "loss": 3.6599, + "step": 5841 + }, + { + "epoch": 0.75, + "learning_rate": 7.886171229071434e-05, + "loss": 3.8016, + "step": 5842 + }, + { + "epoch": 0.75, + "learning_rate": 7.878616594173632e-05, + "loss": 3.809, + "step": 5843 + }, + { + "epoch": 0.75, + "learning_rate": 7.871064902640124e-05, + "loss": 3.7594, + "step": 5844 + }, + { + "epoch": 0.75, + "learning_rate": 7.863516155769129e-05, + "loss": 3.7702, + "step": 5845 + }, + { + "epoch": 0.75, + "learning_rate": 7.855970354858378e-05, + "loss": 3.7817, + "step": 5846 + }, + { + "epoch": 0.75, + "learning_rate": 7.848427501205064e-05, + "loss": 3.7437, + "step": 5847 + }, + { + "epoch": 0.75, + "learning_rate": 7.840887596105909e-05, + "loss": 3.7363, + "step": 5848 + }, + { + "epoch": 0.75, + "learning_rate": 7.833350640857101e-05, + "loss": 3.8299, + "step": 5849 + }, + { + "epoch": 0.75, + "learning_rate": 7.825816636754333e-05, + "loss": 3.9471, + "step": 5850 + }, + { + "epoch": 0.75, + "learning_rate": 7.818285585092783e-05, + "loss": 3.7334, + "step": 5851 + }, + { + "epoch": 0.75, + "learning_rate": 7.810757487167122e-05, + "loss": 3.7998, + "step": 5852 + }, + { + "epoch": 0.75, + "learning_rate": 7.803232344271532e-05, + "loss": 3.8157, + "step": 5853 + }, + { + "epoch": 0.75, + "learning_rate": 7.795710157699662e-05, + "loss": 3.7195, + "step": 5854 + }, + { + "epoch": 0.75, + "learning_rate": 7.788190928744668e-05, + "loss": 3.8798, + "step": 5855 + }, + { + "epoch": 0.75, + "learning_rate": 7.78067465869918e-05, + "loss": 3.9483, + "step": 5856 + }, + { + "epoch": 0.75, + "learning_rate": 7.773161348855349e-05, + "loss": 3.9121, + "step": 5857 + }, + { + "epoch": 0.75, + "learning_rate": 7.765651000504795e-05, + "loss": 3.7371, + "step": 5858 + }, + { + "epoch": 0.75, + "learning_rate": 7.758143614938621e-05, + "loss": 3.9206, + "step": 5859 + }, + { + "epoch": 0.75, + "learning_rate": 7.750639193447454e-05, + "loss": 3.9458, + "step": 5860 + }, + { + "epoch": 0.75, + "learning_rate": 7.743137737321381e-05, + "loss": 3.6768, + "step": 5861 + }, + { + "epoch": 0.75, + "learning_rate": 7.73563924784999e-05, + "loss": 3.8028, + "step": 5862 + }, + { + "epoch": 0.75, + "learning_rate": 7.728143726322359e-05, + "loss": 3.8632, + "step": 5863 + }, + { + "epoch": 0.75, + "learning_rate": 7.720651174027051e-05, + "loss": 3.7005, + "step": 5864 + }, + { + "epoch": 0.75, + "learning_rate": 7.713161592252121e-05, + "loss": 3.7183, + "step": 5865 + }, + { + "epoch": 0.75, + "learning_rate": 7.705674982285127e-05, + "loss": 3.7797, + "step": 5866 + }, + { + "epoch": 0.75, + "learning_rate": 7.698191345413086e-05, + "loss": 3.7159, + "step": 5867 + }, + { + "epoch": 0.75, + "learning_rate": 7.690710682922541e-05, + "loss": 3.9126, + "step": 5868 + }, + { + "epoch": 0.75, + "learning_rate": 7.683232996099498e-05, + "loss": 3.7093, + "step": 5869 + }, + { + "epoch": 0.75, + "learning_rate": 7.675758286229456e-05, + "loss": 3.7598, + "step": 5870 + }, + { + "epoch": 0.75, + "learning_rate": 7.668286554597404e-05, + "loss": 3.7609, + "step": 5871 + }, + { + "epoch": 0.75, + "learning_rate": 7.660817802487819e-05, + "loss": 3.8824, + "step": 5872 + }, + { + "epoch": 0.75, + "learning_rate": 7.65335203118466e-05, + "loss": 3.8826, + "step": 5873 + }, + { + "epoch": 0.75, + "learning_rate": 7.645889241971384e-05, + "loss": 3.7335, + "step": 5874 + }, + { + "epoch": 0.75, + "learning_rate": 7.638429436130945e-05, + "loss": 3.8045, + "step": 5875 + }, + { + "epoch": 0.75, + "learning_rate": 7.630972614945756e-05, + "loss": 3.8262, + "step": 5876 + }, + { + "epoch": 0.75, + "learning_rate": 7.623518779697733e-05, + "loss": 3.9372, + "step": 5877 + }, + { + "epoch": 0.75, + "learning_rate": 7.616067931668277e-05, + "loss": 3.7192, + "step": 5878 + }, + { + "epoch": 0.75, + "learning_rate": 7.608620072138278e-05, + "loss": 3.8266, + "step": 5879 + }, + { + "epoch": 0.75, + "learning_rate": 7.601175202388097e-05, + "loss": 3.8831, + "step": 5880 + }, + { + "epoch": 0.75, + "learning_rate": 7.593733323697613e-05, + "loss": 3.7909, + "step": 5881 + }, + { + "epoch": 0.75, + "learning_rate": 7.586294437346158e-05, + "loss": 3.7512, + "step": 5882 + }, + { + "epoch": 0.75, + "learning_rate": 7.578858544612571e-05, + "loss": 3.6676, + "step": 5883 + }, + { + "epoch": 0.75, + "learning_rate": 7.571425646775151e-05, + "loss": 3.8068, + "step": 5884 + }, + { + "epoch": 0.75, + "learning_rate": 7.563995745111724e-05, + "loss": 3.8178, + "step": 5885 + }, + { + "epoch": 0.75, + "learning_rate": 7.55656884089956e-05, + "loss": 3.746, + "step": 5886 + }, + { + "epoch": 0.75, + "learning_rate": 7.549144935415434e-05, + "loss": 3.8496, + "step": 5887 + }, + { + "epoch": 0.75, + "learning_rate": 7.541724029935596e-05, + "loss": 3.7702, + "step": 5888 + }, + { + "epoch": 0.75, + "learning_rate": 7.534306125735796e-05, + "loss": 3.6885, + "step": 5889 + }, + { + "epoch": 0.75, + "learning_rate": 7.526891224091254e-05, + "loss": 3.7955, + "step": 5890 + }, + { + "epoch": 0.75, + "learning_rate": 7.519479326276677e-05, + "loss": 3.927, + "step": 5891 + }, + { + "epoch": 0.75, + "learning_rate": 7.512070433566253e-05, + "loss": 3.8379, + "step": 5892 + }, + { + "epoch": 0.75, + "learning_rate": 7.504664547233655e-05, + "loss": 3.7674, + "step": 5893 + }, + { + "epoch": 0.75, + "learning_rate": 7.497261668552049e-05, + "loss": 3.735, + "step": 5894 + }, + { + "epoch": 0.75, + "learning_rate": 7.489861798794065e-05, + "loss": 3.8115, + "step": 5895 + }, + { + "epoch": 0.75, + "learning_rate": 7.482464939231842e-05, + "loss": 3.8476, + "step": 5896 + }, + { + "epoch": 0.75, + "learning_rate": 7.475071091136973e-05, + "loss": 3.8131, + "step": 5897 + }, + { + "epoch": 0.75, + "learning_rate": 7.467680255780555e-05, + "loss": 3.6144, + "step": 5898 + }, + { + "epoch": 0.76, + "learning_rate": 7.460292434433147e-05, + "loss": 3.6907, + "step": 5899 + }, + { + "epoch": 0.76, + "learning_rate": 7.45290762836481e-05, + "loss": 3.8777, + "step": 5900 + }, + { + "epoch": 0.76, + "learning_rate": 7.445525838845076e-05, + "loss": 3.7642, + "step": 5901 + }, + { + "epoch": 0.76, + "learning_rate": 7.43814706714295e-05, + "loss": 3.6422, + "step": 5902 + }, + { + "epoch": 0.76, + "learning_rate": 7.43077131452694e-05, + "loss": 3.9245, + "step": 5903 + }, + { + "epoch": 0.76, + "learning_rate": 7.423398582265026e-05, + "loss": 3.7911, + "step": 5904 + }, + { + "epoch": 0.76, + "learning_rate": 7.416028871624664e-05, + "loss": 3.7222, + "step": 5905 + }, + { + "epoch": 0.76, + "learning_rate": 7.408662183872786e-05, + "loss": 3.8253, + "step": 5906 + }, + { + "epoch": 0.76, + "learning_rate": 7.401298520275817e-05, + "loss": 3.8084, + "step": 5907 + }, + { + "epoch": 0.76, + "learning_rate": 7.393937882099656e-05, + "loss": 3.9095, + "step": 5908 + }, + { + "epoch": 0.76, + "learning_rate": 7.386580270609669e-05, + "loss": 3.7256, + "step": 5909 + }, + { + "epoch": 0.76, + "learning_rate": 7.379225687070734e-05, + "loss": 3.8417, + "step": 5910 + }, + { + "epoch": 0.76, + "learning_rate": 7.371874132747175e-05, + "loss": 3.8291, + "step": 5911 + }, + { + "epoch": 0.76, + "learning_rate": 7.364525608902823e-05, + "loss": 3.7198, + "step": 5912 + }, + { + "epoch": 0.76, + "learning_rate": 7.357180116800965e-05, + "loss": 3.7901, + "step": 5913 + }, + { + "epoch": 0.76, + "learning_rate": 7.349837657704378e-05, + "loss": 3.924, + "step": 5914 + }, + { + "epoch": 0.76, + "learning_rate": 7.342498232875319e-05, + "loss": 3.9054, + "step": 5915 + }, + { + "epoch": 0.76, + "learning_rate": 7.335161843575505e-05, + "loss": 3.7916, + "step": 5916 + }, + { + "epoch": 0.76, + "learning_rate": 7.327828491066169e-05, + "loss": 3.8512, + "step": 5917 + }, + { + "epoch": 0.76, + "learning_rate": 7.32049817660799e-05, + "loss": 3.7155, + "step": 5918 + }, + { + "epoch": 0.76, + "learning_rate": 7.31317090146113e-05, + "loss": 3.7539, + "step": 5919 + }, + { + "epoch": 0.76, + "learning_rate": 7.305846666885236e-05, + "loss": 3.9002, + "step": 5920 + }, + { + "epoch": 0.76, + "learning_rate": 7.298525474139419e-05, + "loss": 3.8503, + "step": 5921 + }, + { + "epoch": 0.76, + "learning_rate": 7.291207324482296e-05, + "loss": 3.8674, + "step": 5922 + }, + { + "epoch": 0.76, + "learning_rate": 7.283892219171933e-05, + "loss": 3.7517, + "step": 5923 + }, + { + "epoch": 0.76, + "learning_rate": 7.27658015946587e-05, + "loss": 3.7327, + "step": 5924 + }, + { + "epoch": 0.76, + "learning_rate": 7.269271146621153e-05, + "loss": 3.8253, + "step": 5925 + }, + { + "epoch": 0.76, + "learning_rate": 7.26196518189428e-05, + "loss": 3.8706, + "step": 5926 + }, + { + "epoch": 0.76, + "learning_rate": 7.254662266541229e-05, + "loss": 3.7851, + "step": 5927 + }, + { + "epoch": 0.76, + "learning_rate": 7.247362401817456e-05, + "loss": 3.8601, + "step": 5928 + }, + { + "epoch": 0.76, + "learning_rate": 7.24006558897789e-05, + "loss": 3.8334, + "step": 5929 + }, + { + "epoch": 0.76, + "learning_rate": 7.232771829276935e-05, + "loss": 3.7805, + "step": 5930 + }, + { + "epoch": 0.76, + "learning_rate": 7.225481123968475e-05, + "loss": 3.8323, + "step": 5931 + }, + { + "epoch": 0.76, + "learning_rate": 7.218193474305881e-05, + "loss": 3.8911, + "step": 5932 + }, + { + "epoch": 0.76, + "learning_rate": 7.21090888154197e-05, + "loss": 3.8484, + "step": 5933 + }, + { + "epoch": 0.76, + "learning_rate": 7.203627346929053e-05, + "loss": 3.8479, + "step": 5934 + }, + { + "epoch": 0.76, + "learning_rate": 7.196348871718907e-05, + "loss": 3.8398, + "step": 5935 + }, + { + "epoch": 0.76, + "learning_rate": 7.18907345716279e-05, + "loss": 3.6894, + "step": 5936 + }, + { + "epoch": 0.76, + "learning_rate": 7.181801104511417e-05, + "loss": 3.8971, + "step": 5937 + }, + { + "epoch": 0.76, + "learning_rate": 7.174531815015009e-05, + "loss": 3.8034, + "step": 5938 + }, + { + "epoch": 0.76, + "learning_rate": 7.167265589923228e-05, + "loss": 3.7905, + "step": 5939 + }, + { + "epoch": 0.76, + "learning_rate": 7.16000243048523e-05, + "loss": 3.7811, + "step": 5940 + }, + { + "epoch": 0.76, + "learning_rate": 7.152742337949636e-05, + "loss": 3.7398, + "step": 5941 + }, + { + "epoch": 0.76, + "learning_rate": 7.145485313564536e-05, + "loss": 3.7801, + "step": 5942 + }, + { + "epoch": 0.76, + "learning_rate": 7.138231358577498e-05, + "loss": 3.7264, + "step": 5943 + }, + { + "epoch": 0.76, + "learning_rate": 7.13098047423556e-05, + "loss": 3.7625, + "step": 5944 + }, + { + "epoch": 0.76, + "learning_rate": 7.12373266178523e-05, + "loss": 3.8516, + "step": 5945 + }, + { + "epoch": 0.76, + "learning_rate": 7.116487922472498e-05, + "loss": 3.7938, + "step": 5946 + }, + { + "epoch": 0.76, + "learning_rate": 7.109246257542817e-05, + "loss": 3.8639, + "step": 5947 + }, + { + "epoch": 0.76, + "learning_rate": 7.102007668241107e-05, + "loss": 3.9027, + "step": 5948 + }, + { + "epoch": 0.76, + "learning_rate": 7.094772155811774e-05, + "loss": 3.8764, + "step": 5949 + }, + { + "epoch": 0.76, + "learning_rate": 7.087539721498684e-05, + "loss": 3.7084, + "step": 5950 + }, + { + "epoch": 0.76, + "learning_rate": 7.080310366545176e-05, + "loss": 3.7603, + "step": 5951 + }, + { + "epoch": 0.76, + "learning_rate": 7.073084092194049e-05, + "loss": 3.8867, + "step": 5952 + }, + { + "epoch": 0.76, + "learning_rate": 7.065860899687604e-05, + "loss": 3.914, + "step": 5953 + }, + { + "epoch": 0.76, + "learning_rate": 7.05864079026758e-05, + "loss": 3.8177, + "step": 5954 + }, + { + "epoch": 0.76, + "learning_rate": 7.051423765175194e-05, + "loss": 3.7548, + "step": 5955 + }, + { + "epoch": 0.76, + "learning_rate": 7.044209825651143e-05, + "loss": 3.8412, + "step": 5956 + }, + { + "epoch": 0.76, + "learning_rate": 7.036998972935585e-05, + "loss": 3.8662, + "step": 5957 + }, + { + "epoch": 0.76, + "learning_rate": 7.02979120826814e-05, + "loss": 3.948, + "step": 5958 + }, + { + "epoch": 0.76, + "learning_rate": 7.022586532887926e-05, + "loss": 3.9721, + "step": 5959 + }, + { + "epoch": 0.76, + "learning_rate": 7.015384948033487e-05, + "loss": 3.6967, + "step": 5960 + }, + { + "epoch": 0.76, + "learning_rate": 7.00818645494288e-05, + "loss": 3.836, + "step": 5961 + }, + { + "epoch": 0.76, + "learning_rate": 7.0009910548536e-05, + "loss": 3.5685, + "step": 5962 + }, + { + "epoch": 0.76, + "learning_rate": 6.993798749002622e-05, + "loss": 3.9226, + "step": 5963 + }, + { + "epoch": 0.76, + "learning_rate": 6.986609538626384e-05, + "loss": 3.7485, + "step": 5964 + }, + { + "epoch": 0.76, + "learning_rate": 6.979423424960795e-05, + "loss": 3.7431, + "step": 5965 + }, + { + "epoch": 0.76, + "learning_rate": 6.972240409241224e-05, + "loss": 3.8326, + "step": 5966 + }, + { + "epoch": 0.76, + "learning_rate": 6.965060492702525e-05, + "loss": 3.847, + "step": 5967 + }, + { + "epoch": 0.76, + "learning_rate": 6.95788367657901e-05, + "loss": 3.7798, + "step": 5968 + }, + { + "epoch": 0.76, + "learning_rate": 6.950709962104454e-05, + "loss": 3.8581, + "step": 5969 + }, + { + "epoch": 0.76, + "learning_rate": 6.943539350512101e-05, + "loss": 3.8217, + "step": 5970 + }, + { + "epoch": 0.76, + "learning_rate": 6.936371843034663e-05, + "loss": 3.884, + "step": 5971 + }, + { + "epoch": 0.76, + "learning_rate": 6.929207440904318e-05, + "loss": 3.7824, + "step": 5972 + }, + { + "epoch": 0.76, + "learning_rate": 6.922046145352698e-05, + "loss": 3.7623, + "step": 5973 + }, + { + "epoch": 0.76, + "learning_rate": 6.91488795761093e-05, + "loss": 3.8691, + "step": 5974 + }, + { + "epoch": 0.76, + "learning_rate": 6.907732878909587e-05, + "loss": 3.9417, + "step": 5975 + }, + { + "epoch": 0.76, + "learning_rate": 6.900580910478693e-05, + "loss": 3.7628, + "step": 5976 + }, + { + "epoch": 0.77, + "learning_rate": 6.89343205354778e-05, + "loss": 3.7091, + "step": 5977 + }, + { + "epoch": 0.77, + "learning_rate": 6.886286309345801e-05, + "loss": 3.7387, + "step": 5978 + }, + { + "epoch": 0.77, + "learning_rate": 6.879143679101202e-05, + "loss": 3.8836, + "step": 5979 + }, + { + "epoch": 0.77, + "learning_rate": 6.872004164041878e-05, + "loss": 3.7862, + "step": 5980 + }, + { + "epoch": 0.77, + "learning_rate": 6.864867765395188e-05, + "loss": 3.8352, + "step": 5981 + }, + { + "epoch": 0.77, + "learning_rate": 6.857734484387976e-05, + "loss": 3.8159, + "step": 5982 + }, + { + "epoch": 0.77, + "learning_rate": 6.850604322246532e-05, + "loss": 3.7383, + "step": 5983 + }, + { + "epoch": 0.77, + "learning_rate": 6.843477280196609e-05, + "loss": 3.7572, + "step": 5984 + }, + { + "epoch": 0.77, + "learning_rate": 6.836353359463424e-05, + "loss": 3.897, + "step": 5985 + }, + { + "epoch": 0.77, + "learning_rate": 6.829232561271672e-05, + "loss": 3.8271, + "step": 5986 + }, + { + "epoch": 0.77, + "learning_rate": 6.822114886845498e-05, + "loss": 3.6643, + "step": 5987 + }, + { + "epoch": 0.77, + "learning_rate": 6.815000337408506e-05, + "loss": 3.8466, + "step": 5988 + }, + { + "epoch": 0.77, + "learning_rate": 6.80788891418378e-05, + "loss": 3.974, + "step": 5989 + }, + { + "epoch": 0.77, + "learning_rate": 6.800780618393851e-05, + "loss": 3.8439, + "step": 5990 + }, + { + "epoch": 0.77, + "learning_rate": 6.793675451260717e-05, + "loss": 3.8086, + "step": 5991 + }, + { + "epoch": 0.77, + "learning_rate": 6.78657341400584e-05, + "loss": 3.8919, + "step": 5992 + }, + { + "epoch": 0.77, + "learning_rate": 6.779474507850139e-05, + "loss": 3.8549, + "step": 5993 + }, + { + "epoch": 0.77, + "learning_rate": 6.772378734013992e-05, + "loss": 3.7279, + "step": 5994 + }, + { + "epoch": 0.77, + "learning_rate": 6.765286093717265e-05, + "loss": 3.9256, + "step": 5995 + }, + { + "epoch": 0.77, + "learning_rate": 6.758196588179244e-05, + "loss": 3.7623, + "step": 5996 + }, + { + "epoch": 0.77, + "learning_rate": 6.751110218618714e-05, + "loss": 3.699, + "step": 5997 + }, + { + "epoch": 0.77, + "learning_rate": 6.744026986253895e-05, + "loss": 3.7914, + "step": 5998 + }, + { + "epoch": 0.77, + "learning_rate": 6.736946892302481e-05, + "loss": 3.7924, + "step": 5999 + }, + { + "epoch": 0.77, + "learning_rate": 6.729869937981619e-05, + "loss": 3.8951, + "step": 6000 + }, + { + "epoch": 0.77, + "learning_rate": 6.72279612450792e-05, + "loss": 3.7382, + "step": 6001 + }, + { + "epoch": 0.77, + "learning_rate": 6.715725453097446e-05, + "loss": 3.8148, + "step": 6002 + }, + { + "epoch": 0.77, + "learning_rate": 6.708657924965746e-05, + "loss": 3.7479, + "step": 6003 + }, + { + "epoch": 0.77, + "learning_rate": 6.701593541327792e-05, + "loss": 3.7989, + "step": 6004 + }, + { + "epoch": 0.77, + "learning_rate": 6.694532303398048e-05, + "loss": 3.9072, + "step": 6005 + }, + { + "epoch": 0.77, + "learning_rate": 6.687474212390418e-05, + "loss": 3.7812, + "step": 6006 + }, + { + "epoch": 0.77, + "learning_rate": 6.680419269518265e-05, + "loss": 3.8939, + "step": 6007 + }, + { + "epoch": 0.77, + "learning_rate": 6.673367475994421e-05, + "loss": 3.8166, + "step": 6008 + }, + { + "epoch": 0.77, + "learning_rate": 6.66631883303116e-05, + "loss": 3.9406, + "step": 6009 + }, + { + "epoch": 0.77, + "learning_rate": 6.659273341840241e-05, + "loss": 3.7912, + "step": 6010 + }, + { + "epoch": 0.77, + "learning_rate": 6.652231003632858e-05, + "loss": 3.92, + "step": 6011 + }, + { + "epoch": 0.77, + "learning_rate": 6.645191819619672e-05, + "loss": 3.7749, + "step": 6012 + }, + { + "epoch": 0.77, + "learning_rate": 6.638155791010791e-05, + "loss": 3.7591, + "step": 6013 + }, + { + "epoch": 0.77, + "learning_rate": 6.631122919015809e-05, + "loss": 3.7144, + "step": 6014 + }, + { + "epoch": 0.77, + "learning_rate": 6.624093204843746e-05, + "loss": 3.7426, + "step": 6015 + }, + { + "epoch": 0.77, + "learning_rate": 6.61706664970309e-05, + "loss": 3.8439, + "step": 6016 + }, + { + "epoch": 0.77, + "learning_rate": 6.610043254801784e-05, + "loss": 3.843, + "step": 6017 + }, + { + "epoch": 0.77, + "learning_rate": 6.603023021347246e-05, + "loss": 3.7251, + "step": 6018 + }, + { + "epoch": 0.77, + "learning_rate": 6.596005950546327e-05, + "loss": 3.7879, + "step": 6019 + }, + { + "epoch": 0.77, + "learning_rate": 6.58899204360534e-05, + "loss": 3.7031, + "step": 6020 + }, + { + "epoch": 0.77, + "learning_rate": 6.58198130173006e-05, + "loss": 3.7738, + "step": 6021 + }, + { + "epoch": 0.77, + "learning_rate": 6.57497372612571e-05, + "loss": 3.7104, + "step": 6022 + }, + { + "epoch": 0.77, + "learning_rate": 6.567969317996982e-05, + "loss": 3.7052, + "step": 6023 + }, + { + "epoch": 0.77, + "learning_rate": 6.560968078548005e-05, + "loss": 3.8775, + "step": 6024 + }, + { + "epoch": 0.77, + "learning_rate": 6.553970008982385e-05, + "loss": 3.7543, + "step": 6025 + }, + { + "epoch": 0.77, + "learning_rate": 6.546975110503164e-05, + "loss": 3.6748, + "step": 6026 + }, + { + "epoch": 0.77, + "learning_rate": 6.53998338431285e-05, + "loss": 3.8676, + "step": 6027 + }, + { + "epoch": 0.77, + "learning_rate": 6.532994831613398e-05, + "loss": 3.9078, + "step": 6028 + }, + { + "epoch": 0.77, + "learning_rate": 6.526009453606224e-05, + "loss": 3.7438, + "step": 6029 + }, + { + "epoch": 0.77, + "learning_rate": 6.519027251492185e-05, + "loss": 3.8344, + "step": 6030 + }, + { + "epoch": 0.77, + "learning_rate": 6.512048226471617e-05, + "loss": 3.7779, + "step": 6031 + }, + { + "epoch": 0.77, + "learning_rate": 6.505072379744283e-05, + "loss": 3.7806, + "step": 6032 + }, + { + "epoch": 0.77, + "learning_rate": 6.498099712509428e-05, + "loss": 3.8699, + "step": 6033 + }, + { + "epoch": 0.77, + "learning_rate": 6.491130225965722e-05, + "loss": 3.8028, + "step": 6034 + }, + { + "epoch": 0.77, + "learning_rate": 6.484163921311306e-05, + "loss": 3.7311, + "step": 6035 + }, + { + "epoch": 0.77, + "learning_rate": 6.477200799743766e-05, + "loss": 3.7806, + "step": 6036 + }, + { + "epoch": 0.77, + "learning_rate": 6.470240862460142e-05, + "loss": 3.7395, + "step": 6037 + }, + { + "epoch": 0.77, + "learning_rate": 6.46328411065692e-05, + "loss": 3.7429, + "step": 6038 + }, + { + "epoch": 0.77, + "learning_rate": 6.456330545530065e-05, + "loss": 3.8829, + "step": 6039 + }, + { + "epoch": 0.77, + "learning_rate": 6.449380168274965e-05, + "loss": 3.7541, + "step": 6040 + }, + { + "epoch": 0.77, + "learning_rate": 6.442432980086466e-05, + "loss": 3.8465, + "step": 6041 + }, + { + "epoch": 0.77, + "learning_rate": 6.43548898215888e-05, + "loss": 3.6903, + "step": 6042 + }, + { + "epoch": 0.77, + "learning_rate": 6.428548175685958e-05, + "loss": 3.8686, + "step": 6043 + }, + { + "epoch": 0.77, + "learning_rate": 6.421610561860902e-05, + "loss": 3.7741, + "step": 6044 + }, + { + "epoch": 0.77, + "learning_rate": 6.414676141876363e-05, + "loss": 3.8075, + "step": 6045 + }, + { + "epoch": 0.77, + "learning_rate": 6.407744916924463e-05, + "loss": 3.8402, + "step": 6046 + }, + { + "epoch": 0.77, + "learning_rate": 6.400816888196751e-05, + "loss": 3.7009, + "step": 6047 + }, + { + "epoch": 0.77, + "learning_rate": 6.393892056884234e-05, + "loss": 3.7809, + "step": 6048 + }, + { + "epoch": 0.77, + "learning_rate": 6.386970424177376e-05, + "loss": 3.8183, + "step": 6049 + }, + { + "epoch": 0.77, + "learning_rate": 6.380051991266075e-05, + "loss": 3.8626, + "step": 6050 + }, + { + "epoch": 0.77, + "learning_rate": 6.373136759339704e-05, + "loss": 3.8657, + "step": 6051 + }, + { + "epoch": 0.77, + "learning_rate": 6.366224729587067e-05, + "loss": 3.5905, + "step": 6052 + }, + { + "epoch": 0.77, + "learning_rate": 6.359315903196411e-05, + "loss": 3.7851, + "step": 6053 + }, + { + "epoch": 0.77, + "learning_rate": 6.352410281355461e-05, + "loss": 3.7978, + "step": 6054 + }, + { + "epoch": 0.78, + "learning_rate": 6.345507865251366e-05, + "loss": 3.7316, + "step": 6055 + }, + { + "epoch": 0.78, + "learning_rate": 6.338608656070727e-05, + "loss": 3.7213, + "step": 6056 + }, + { + "epoch": 0.78, + "learning_rate": 6.331712654999602e-05, + "loss": 3.6906, + "step": 6057 + }, + { + "epoch": 0.78, + "learning_rate": 6.324819863223497e-05, + "loss": 3.8156, + "step": 6058 + }, + { + "epoch": 0.78, + "learning_rate": 6.317930281927348e-05, + "loss": 3.9385, + "step": 6059 + }, + { + "epoch": 0.78, + "learning_rate": 6.311043912295563e-05, + "loss": 3.7262, + "step": 6060 + }, + { + "epoch": 0.78, + "learning_rate": 6.304160755512003e-05, + "loss": 3.8744, + "step": 6061 + }, + { + "epoch": 0.78, + "learning_rate": 6.297280812759945e-05, + "loss": 3.7933, + "step": 6062 + }, + { + "epoch": 0.78, + "learning_rate": 6.29040408522214e-05, + "loss": 3.794, + "step": 6063 + }, + { + "epoch": 0.78, + "learning_rate": 6.28353057408077e-05, + "loss": 3.8456, + "step": 6064 + }, + { + "epoch": 0.78, + "learning_rate": 6.276660280517477e-05, + "loss": 3.6968, + "step": 6065 + }, + { + "epoch": 0.78, + "learning_rate": 6.269793205713331e-05, + "loss": 3.7986, + "step": 6066 + }, + { + "epoch": 0.78, + "learning_rate": 6.262929350848881e-05, + "loss": 3.7414, + "step": 6067 + }, + { + "epoch": 0.78, + "learning_rate": 6.256068717104093e-05, + "loss": 3.8634, + "step": 6068 + }, + { + "epoch": 0.78, + "learning_rate": 6.249211305658384e-05, + "loss": 3.6348, + "step": 6069 + }, + { + "epoch": 0.78, + "learning_rate": 6.242357117690639e-05, + "loss": 3.6963, + "step": 6070 + }, + { + "epoch": 0.78, + "learning_rate": 6.235506154379161e-05, + "loss": 3.7716, + "step": 6071 + }, + { + "epoch": 0.78, + "learning_rate": 6.228658416901711e-05, + "loss": 3.7886, + "step": 6072 + }, + { + "epoch": 0.78, + "learning_rate": 6.221813906435494e-05, + "loss": 3.7433, + "step": 6073 + }, + { + "epoch": 0.78, + "learning_rate": 6.214972624157159e-05, + "loss": 3.9104, + "step": 6074 + }, + { + "epoch": 0.78, + "learning_rate": 6.208134571242808e-05, + "loss": 3.646, + "step": 6075 + }, + { + "epoch": 0.78, + "learning_rate": 6.20129974886798e-05, + "loss": 3.7131, + "step": 6076 + }, + { + "epoch": 0.78, + "learning_rate": 6.194468158207659e-05, + "loss": 3.812, + "step": 6077 + }, + { + "epoch": 0.78, + "learning_rate": 6.187639800436265e-05, + "loss": 3.7984, + "step": 6078 + }, + { + "epoch": 0.78, + "learning_rate": 6.18081467672769e-05, + "loss": 3.7693, + "step": 6079 + }, + { + "epoch": 0.78, + "learning_rate": 6.17399278825524e-05, + "loss": 3.9377, + "step": 6080 + }, + { + "epoch": 0.78, + "learning_rate": 6.167174136191675e-05, + "loss": 3.829, + "step": 6081 + }, + { + "epoch": 0.78, + "learning_rate": 6.160358721709212e-05, + "loss": 3.7404, + "step": 6082 + }, + { + "epoch": 0.78, + "learning_rate": 6.15354654597949e-05, + "loss": 3.8233, + "step": 6083 + }, + { + "epoch": 0.78, + "learning_rate": 6.146737610173606e-05, + "loss": 3.8105, + "step": 6084 + }, + { + "epoch": 0.78, + "learning_rate": 6.13993191546209e-05, + "loss": 3.6954, + "step": 6085 + }, + { + "epoch": 0.78, + "learning_rate": 6.133129463014924e-05, + "loss": 3.8682, + "step": 6086 + }, + { + "epoch": 0.78, + "learning_rate": 6.126330254001522e-05, + "loss": 3.7667, + "step": 6087 + }, + { + "epoch": 0.78, + "learning_rate": 6.119534289590747e-05, + "loss": 3.6144, + "step": 6088 + }, + { + "epoch": 0.78, + "learning_rate": 6.112741570950919e-05, + "loss": 3.7972, + "step": 6089 + }, + { + "epoch": 0.78, + "learning_rate": 6.105952099249776e-05, + "loss": 3.7427, + "step": 6090 + }, + { + "epoch": 0.78, + "learning_rate": 6.099165875654503e-05, + "loss": 3.9026, + "step": 6091 + }, + { + "epoch": 0.78, + "learning_rate": 6.092382901331733e-05, + "loss": 3.705, + "step": 6092 + }, + { + "epoch": 0.78, + "learning_rate": 6.0856031774475407e-05, + "loss": 3.8432, + "step": 6093 + }, + { + "epoch": 0.78, + "learning_rate": 6.078826705167437e-05, + "loss": 3.759, + "step": 6094 + }, + { + "epoch": 0.78, + "learning_rate": 6.072053485656365e-05, + "loss": 3.8358, + "step": 6095 + }, + { + "epoch": 0.78, + "learning_rate": 6.065283520078732e-05, + "loss": 3.7799, + "step": 6096 + }, + { + "epoch": 0.78, + "learning_rate": 6.05851680959838e-05, + "loss": 3.8689, + "step": 6097 + }, + { + "epoch": 0.78, + "learning_rate": 6.051753355378578e-05, + "loss": 3.7906, + "step": 6098 + }, + { + "epoch": 0.78, + "learning_rate": 6.044993158582038e-05, + "loss": 3.78, + "step": 6099 + }, + { + "epoch": 0.78, + "learning_rate": 6.038236220370921e-05, + "loss": 3.7996, + "step": 6100 + }, + { + "epoch": 0.78, + "learning_rate": 6.031482541906821e-05, + "loss": 3.9405, + "step": 6101 + }, + { + "epoch": 0.78, + "learning_rate": 6.024732124350768e-05, + "loss": 3.741, + "step": 6102 + }, + { + "epoch": 0.78, + "learning_rate": 6.017984968863249e-05, + "loss": 3.8207, + "step": 6103 + }, + { + "epoch": 0.78, + "learning_rate": 6.0112410766041735e-05, + "loss": 3.771, + "step": 6104 + }, + { + "epoch": 0.78, + "learning_rate": 6.0045004487328917e-05, + "loss": 3.8018, + "step": 6105 + }, + { + "epoch": 0.78, + "learning_rate": 5.997763086408192e-05, + "loss": 3.7512, + "step": 6106 + }, + { + "epoch": 0.78, + "learning_rate": 5.991028990788316e-05, + "loss": 3.862, + "step": 6107 + }, + { + "epoch": 0.78, + "learning_rate": 5.984298163030929e-05, + "loss": 3.7444, + "step": 6108 + }, + { + "epoch": 0.78, + "learning_rate": 5.977570604293128e-05, + "loss": 3.8791, + "step": 6109 + }, + { + "epoch": 0.78, + "learning_rate": 5.9708463157314765e-05, + "loss": 3.9208, + "step": 6110 + }, + { + "epoch": 0.78, + "learning_rate": 5.964125298501946e-05, + "loss": 3.773, + "step": 6111 + }, + { + "epoch": 0.78, + "learning_rate": 5.957407553759961e-05, + "loss": 3.7309, + "step": 6112 + }, + { + "epoch": 0.78, + "learning_rate": 5.950693082660377e-05, + "loss": 3.8737, + "step": 6113 + }, + { + "epoch": 0.78, + "learning_rate": 5.9439818863574913e-05, + "loss": 3.6379, + "step": 6114 + }, + { + "epoch": 0.78, + "learning_rate": 5.937273966005028e-05, + "loss": 3.8125, + "step": 6115 + }, + { + "epoch": 0.78, + "learning_rate": 5.9305693227561715e-05, + "loss": 3.6638, + "step": 6116 + }, + { + "epoch": 0.78, + "learning_rate": 5.923867957763512e-05, + "loss": 3.7904, + "step": 6117 + }, + { + "epoch": 0.78, + "learning_rate": 5.917169872179109e-05, + "loss": 3.8457, + "step": 6118 + }, + { + "epoch": 0.78, + "learning_rate": 5.91047506715443e-05, + "loss": 3.8169, + "step": 6119 + }, + { + "epoch": 0.78, + "learning_rate": 5.903783543840393e-05, + "loss": 3.7245, + "step": 6120 + }, + { + "epoch": 0.78, + "learning_rate": 5.8970953033873484e-05, + "loss": 3.7089, + "step": 6121 + }, + { + "epoch": 0.78, + "learning_rate": 5.89041034694508e-05, + "loss": 3.7961, + "step": 6122 + }, + { + "epoch": 0.78, + "learning_rate": 5.8837286756628025e-05, + "loss": 3.7233, + "step": 6123 + }, + { + "epoch": 0.78, + "learning_rate": 5.877050290689182e-05, + "loss": 3.7321, + "step": 6124 + }, + { + "epoch": 0.78, + "learning_rate": 5.870375193172314e-05, + "loss": 3.8419, + "step": 6125 + }, + { + "epoch": 0.78, + "learning_rate": 5.8637033842597224e-05, + "loss": 3.8606, + "step": 6126 + }, + { + "epoch": 0.78, + "learning_rate": 5.857034865098365e-05, + "loss": 3.686, + "step": 6127 + }, + { + "epoch": 0.78, + "learning_rate": 5.8503696368346374e-05, + "loss": 3.7089, + "step": 6128 + }, + { + "epoch": 0.78, + "learning_rate": 5.84370770061437e-05, + "loss": 3.721, + "step": 6129 + }, + { + "epoch": 0.78, + "learning_rate": 5.8370490575828216e-05, + "loss": 3.7676, + "step": 6130 + }, + { + "epoch": 0.78, + "learning_rate": 5.8303937088847045e-05, + "loss": 3.7061, + "step": 6131 + }, + { + "epoch": 0.78, + "learning_rate": 5.823741655664141e-05, + "loss": 3.8954, + "step": 6132 + }, + { + "epoch": 0.79, + "learning_rate": 5.817092899064691e-05, + "loss": 3.8757, + "step": 6133 + }, + { + "epoch": 0.79, + "learning_rate": 5.810447440229366e-05, + "loss": 3.778, + "step": 6134 + }, + { + "epoch": 0.79, + "learning_rate": 5.8038052803005894e-05, + "loss": 3.7661, + "step": 6135 + }, + { + "epoch": 0.79, + "learning_rate": 5.797166420420228e-05, + "loss": 3.7924, + "step": 6136 + }, + { + "epoch": 0.79, + "learning_rate": 5.790530861729576e-05, + "loss": 3.7217, + "step": 6137 + }, + { + "epoch": 0.79, + "learning_rate": 5.783898605369356e-05, + "loss": 3.7537, + "step": 6138 + }, + { + "epoch": 0.79, + "learning_rate": 5.777269652479747e-05, + "loss": 3.733, + "step": 6139 + }, + { + "epoch": 0.79, + "learning_rate": 5.770644004200332e-05, + "loss": 3.712, + "step": 6140 + }, + { + "epoch": 0.79, + "learning_rate": 5.764021661670141e-05, + "loss": 3.6684, + "step": 6141 + }, + { + "epoch": 0.79, + "learning_rate": 5.757402626027625e-05, + "loss": 3.8592, + "step": 6142 + }, + { + "epoch": 0.79, + "learning_rate": 5.750786898410673e-05, + "loss": 3.7084, + "step": 6143 + }, + { + "epoch": 0.79, + "learning_rate": 5.744174479956615e-05, + "loss": 3.8795, + "step": 6144 + }, + { + "epoch": 0.79, + "learning_rate": 5.737565371802189e-05, + "loss": 3.7229, + "step": 6145 + }, + { + "epoch": 0.79, + "learning_rate": 5.730959575083594e-05, + "loss": 3.7866, + "step": 6146 + }, + { + "epoch": 0.79, + "learning_rate": 5.7243570909364326e-05, + "loss": 3.6287, + "step": 6147 + }, + { + "epoch": 0.79, + "learning_rate": 5.71775792049575e-05, + "loss": 3.6903, + "step": 6148 + }, + { + "epoch": 0.79, + "learning_rate": 5.711162064896019e-05, + "loss": 3.7716, + "step": 6149 + }, + { + "epoch": 0.79, + "learning_rate": 5.7045695252711475e-05, + "loss": 3.6765, + "step": 6150 + }, + { + "epoch": 0.79, + "learning_rate": 5.697980302754458e-05, + "loss": 3.8073, + "step": 6151 + }, + { + "epoch": 0.79, + "learning_rate": 5.691394398478727e-05, + "loss": 3.7271, + "step": 6152 + }, + { + "epoch": 0.79, + "learning_rate": 5.684811813576138e-05, + "loss": 3.9395, + "step": 6153 + }, + { + "epoch": 0.79, + "learning_rate": 5.678232549178328e-05, + "loss": 3.9011, + "step": 6154 + }, + { + "epoch": 0.79, + "learning_rate": 5.671656606416337e-05, + "loss": 3.7337, + "step": 6155 + }, + { + "epoch": 0.79, + "learning_rate": 5.6650839864206496e-05, + "loss": 3.8606, + "step": 6156 + }, + { + "epoch": 0.79, + "learning_rate": 5.658514690321176e-05, + "loss": 3.861, + "step": 6157 + }, + { + "epoch": 0.79, + "learning_rate": 5.651948719247252e-05, + "loss": 3.625, + "step": 6158 + }, + { + "epoch": 0.79, + "learning_rate": 5.645386074327638e-05, + "loss": 3.8591, + "step": 6159 + }, + { + "epoch": 0.79, + "learning_rate": 5.63882675669054e-05, + "loss": 3.777, + "step": 6160 + }, + { + "epoch": 0.79, + "learning_rate": 5.6322707674635736e-05, + "loss": 3.8813, + "step": 6161 + }, + { + "epoch": 0.79, + "learning_rate": 5.6257181077737975e-05, + "loss": 3.7485, + "step": 6162 + }, + { + "epoch": 0.79, + "learning_rate": 5.619168778747685e-05, + "loss": 3.7974, + "step": 6163 + }, + { + "epoch": 0.79, + "learning_rate": 5.6126227815111425e-05, + "loss": 3.8449, + "step": 6164 + }, + { + "epoch": 0.79, + "learning_rate": 5.6060801171894996e-05, + "loss": 3.7582, + "step": 6165 + }, + { + "epoch": 0.79, + "learning_rate": 5.599540786907512e-05, + "loss": 3.923, + "step": 6166 + }, + { + "epoch": 0.79, + "learning_rate": 5.59300479178938e-05, + "loss": 3.7889, + "step": 6167 + }, + { + "epoch": 0.79, + "learning_rate": 5.5864721329587084e-05, + "loss": 3.7255, + "step": 6168 + }, + { + "epoch": 0.79, + "learning_rate": 5.579942811538538e-05, + "loss": 3.8612, + "step": 6169 + }, + { + "epoch": 0.79, + "learning_rate": 5.573416828651329e-05, + "loss": 3.9012, + "step": 6170 + }, + { + "epoch": 0.79, + "learning_rate": 5.566894185418986e-05, + "loss": 3.8806, + "step": 6171 + }, + { + "epoch": 0.79, + "learning_rate": 5.560374882962821e-05, + "loss": 3.7895, + "step": 6172 + }, + { + "epoch": 0.79, + "learning_rate": 5.553858922403576e-05, + "loss": 3.8159, + "step": 6173 + }, + { + "epoch": 0.79, + "learning_rate": 5.5473463048614144e-05, + "loss": 3.8886, + "step": 6174 + }, + { + "epoch": 0.79, + "learning_rate": 5.540837031455945e-05, + "loss": 3.8509, + "step": 6175 + }, + { + "epoch": 0.79, + "learning_rate": 5.534331103306181e-05, + "loss": 3.9491, + "step": 6176 + }, + { + "epoch": 0.79, + "learning_rate": 5.527828521530562e-05, + "loss": 3.8185, + "step": 6177 + }, + { + "epoch": 0.79, + "learning_rate": 5.521329287246965e-05, + "loss": 3.7154, + "step": 6178 + }, + { + "epoch": 0.79, + "learning_rate": 5.5148334015726774e-05, + "loss": 3.7008, + "step": 6179 + }, + { + "epoch": 0.79, + "learning_rate": 5.5083408656244146e-05, + "loss": 3.7124, + "step": 6180 + }, + { + "epoch": 0.79, + "learning_rate": 5.501851680518322e-05, + "loss": 3.8196, + "step": 6181 + }, + { + "epoch": 0.79, + "learning_rate": 5.4953658473699734e-05, + "loss": 3.9498, + "step": 6182 + }, + { + "epoch": 0.79, + "learning_rate": 5.488883367294353e-05, + "loss": 3.8555, + "step": 6183 + }, + { + "epoch": 0.79, + "learning_rate": 5.482404241405875e-05, + "loss": 3.7937, + "step": 6184 + }, + { + "epoch": 0.79, + "learning_rate": 5.4759284708183755e-05, + "loss": 3.7483, + "step": 6185 + }, + { + "epoch": 0.79, + "learning_rate": 5.469456056645114e-05, + "loss": 3.8541, + "step": 6186 + }, + { + "epoch": 0.79, + "learning_rate": 5.4629869999987675e-05, + "loss": 3.7598, + "step": 6187 + }, + { + "epoch": 0.79, + "learning_rate": 5.4565213019914544e-05, + "loss": 3.7933, + "step": 6188 + }, + { + "epoch": 0.79, + "learning_rate": 5.450058963734691e-05, + "loss": 3.9947, + "step": 6189 + }, + { + "epoch": 0.79, + "learning_rate": 5.44359998633944e-05, + "loss": 3.926, + "step": 6190 + }, + { + "epoch": 0.79, + "learning_rate": 5.437144370916069e-05, + "loss": 3.8339, + "step": 6191 + }, + { + "epoch": 0.79, + "learning_rate": 5.430692118574374e-05, + "loss": 3.8065, + "step": 6192 + }, + { + "epoch": 0.79, + "learning_rate": 5.424243230423567e-05, + "loss": 3.7508, + "step": 6193 + }, + { + "epoch": 0.79, + "learning_rate": 5.417797707572294e-05, + "loss": 3.7161, + "step": 6194 + }, + { + "epoch": 0.79, + "learning_rate": 5.411355551128602e-05, + "loss": 3.7615, + "step": 6195 + }, + { + "epoch": 0.79, + "learning_rate": 5.404916762199991e-05, + "loss": 3.7991, + "step": 6196 + }, + { + "epoch": 0.79, + "learning_rate": 5.3984813418933545e-05, + "loss": 3.8679, + "step": 6197 + }, + { + "epoch": 0.79, + "learning_rate": 5.3920492913150074e-05, + "loss": 3.6576, + "step": 6198 + }, + { + "epoch": 0.79, + "learning_rate": 5.3856206115707077e-05, + "loss": 3.655, + "step": 6199 + }, + { + "epoch": 0.79, + "learning_rate": 5.379195303765616e-05, + "loss": 3.721, + "step": 6200 + }, + { + "epoch": 0.79, + "learning_rate": 5.372773369004314e-05, + "loss": 3.65, + "step": 6201 + }, + { + "epoch": 0.79, + "learning_rate": 5.366354808390803e-05, + "loss": 3.8003, + "step": 6202 + }, + { + "epoch": 0.79, + "learning_rate": 5.35993962302852e-05, + "loss": 3.743, + "step": 6203 + }, + { + "epoch": 0.79, + "learning_rate": 5.353527814020301e-05, + "loss": 3.8197, + "step": 6204 + }, + { + "epoch": 0.79, + "learning_rate": 5.347119382468413e-05, + "loss": 3.7802, + "step": 6205 + }, + { + "epoch": 0.79, + "learning_rate": 5.340714329474541e-05, + "loss": 3.6681, + "step": 6206 + }, + { + "epoch": 0.79, + "learning_rate": 5.3343126561397756e-05, + "loss": 3.7032, + "step": 6207 + }, + { + "epoch": 0.79, + "learning_rate": 5.327914363564659e-05, + "loss": 3.9164, + "step": 6208 + }, + { + "epoch": 0.79, + "learning_rate": 5.3215194528491186e-05, + "loss": 3.7483, + "step": 6209 + }, + { + "epoch": 0.79, + "learning_rate": 5.315127925092511e-05, + "loss": 3.7422, + "step": 6210 + }, + { + "epoch": 0.8, + "learning_rate": 5.308739781393629e-05, + "loss": 3.6736, + "step": 6211 + }, + { + "epoch": 0.8, + "learning_rate": 5.302355022850655e-05, + "loss": 3.7683, + "step": 6212 + }, + { + "epoch": 0.8, + "learning_rate": 5.295973650561212e-05, + "loss": 3.8878, + "step": 6213 + }, + { + "epoch": 0.8, + "learning_rate": 5.289595665622324e-05, + "loss": 3.9146, + "step": 6214 + }, + { + "epoch": 0.8, + "learning_rate": 5.283221069130442e-05, + "loss": 3.8673, + "step": 6215 + }, + { + "epoch": 0.8, + "learning_rate": 5.276849862181432e-05, + "loss": 3.8623, + "step": 6216 + }, + { + "epoch": 0.8, + "learning_rate": 5.2704820458705785e-05, + "loss": 3.7389, + "step": 6217 + }, + { + "epoch": 0.8, + "learning_rate": 5.264117621292594e-05, + "loss": 3.6763, + "step": 6218 + }, + { + "epoch": 0.8, + "learning_rate": 5.2577565895415876e-05, + "loss": 3.7323, + "step": 6219 + }, + { + "epoch": 0.8, + "learning_rate": 5.251398951711095e-05, + "loss": 3.9432, + "step": 6220 + }, + { + "epoch": 0.8, + "learning_rate": 5.245044708894067e-05, + "loss": 3.7554, + "step": 6221 + }, + { + "epoch": 0.8, + "learning_rate": 5.238693862182875e-05, + "loss": 3.6356, + "step": 6222 + }, + { + "epoch": 0.8, + "learning_rate": 5.232346412669292e-05, + "loss": 3.9193, + "step": 6223 + }, + { + "epoch": 0.8, + "learning_rate": 5.2260023614445344e-05, + "loss": 3.9411, + "step": 6224 + }, + { + "epoch": 0.8, + "learning_rate": 5.2196617095992103e-05, + "loss": 3.7613, + "step": 6225 + }, + { + "epoch": 0.8, + "learning_rate": 5.213324458223345e-05, + "loss": 3.7812, + "step": 6226 + }, + { + "epoch": 0.8, + "learning_rate": 5.2069906084064006e-05, + "loss": 3.8013, + "step": 6227 + }, + { + "epoch": 0.8, + "learning_rate": 5.200660161237228e-05, + "loss": 3.6907, + "step": 6228 + }, + { + "epoch": 0.8, + "learning_rate": 5.1943331178041094e-05, + "loss": 3.8089, + "step": 6229 + }, + { + "epoch": 0.8, + "learning_rate": 5.188009479194736e-05, + "loss": 3.8591, + "step": 6230 + }, + { + "epoch": 0.8, + "learning_rate": 5.1816892464962046e-05, + "loss": 3.8351, + "step": 6231 + }, + { + "epoch": 0.8, + "learning_rate": 5.1753724207950517e-05, + "loss": 3.856, + "step": 6232 + }, + { + "epoch": 0.8, + "learning_rate": 5.1690590031772066e-05, + "loss": 3.7529, + "step": 6233 + }, + { + "epoch": 0.8, + "learning_rate": 5.16274899472802e-05, + "loss": 3.8917, + "step": 6234 + }, + { + "epoch": 0.8, + "learning_rate": 5.156442396532246e-05, + "loss": 3.7894, + "step": 6235 + }, + { + "epoch": 0.8, + "learning_rate": 5.150139209674079e-05, + "loss": 3.8871, + "step": 6236 + }, + { + "epoch": 0.8, + "learning_rate": 5.1438394352370994e-05, + "loss": 3.8629, + "step": 6237 + }, + { + "epoch": 0.8, + "learning_rate": 5.137543074304307e-05, + "loss": 4.0107, + "step": 6238 + }, + { + "epoch": 0.8, + "learning_rate": 5.131250127958134e-05, + "loss": 3.8116, + "step": 6239 + }, + { + "epoch": 0.8, + "learning_rate": 5.124960597280401e-05, + "loss": 3.8315, + "step": 6240 + }, + { + "epoch": 0.8, + "learning_rate": 5.1186744833523533e-05, + "loss": 3.9108, + "step": 6241 + }, + { + "epoch": 0.8, + "learning_rate": 5.112391787254645e-05, + "loss": 3.8818, + "step": 6242 + }, + { + "epoch": 0.8, + "learning_rate": 5.106112510067346e-05, + "loss": 3.6727, + "step": 6243 + }, + { + "epoch": 0.8, + "learning_rate": 5.0998366528699266e-05, + "loss": 3.8349, + "step": 6244 + }, + { + "epoch": 0.8, + "learning_rate": 5.0935642167413e-05, + "loss": 4.0609, + "step": 6245 + }, + { + "epoch": 0.8, + "learning_rate": 5.087295202759751e-05, + "loss": 3.9097, + "step": 6246 + }, + { + "epoch": 0.8, + "learning_rate": 5.081029612003013e-05, + "loss": 3.7943, + "step": 6247 + }, + { + "epoch": 0.8, + "learning_rate": 5.074767445548204e-05, + "loss": 3.9104, + "step": 6248 + }, + { + "epoch": 0.8, + "learning_rate": 5.0685087044718635e-05, + "loss": 3.8366, + "step": 6249 + }, + { + "epoch": 0.8, + "learning_rate": 5.062253389849941e-05, + "loss": 3.7414, + "step": 6250 + }, + { + "epoch": 0.8, + "learning_rate": 5.056001502757801e-05, + "loss": 3.8313, + "step": 6251 + }, + { + "epoch": 0.8, + "learning_rate": 5.0497530442702086e-05, + "loss": 3.75, + "step": 6252 + }, + { + "epoch": 0.8, + "learning_rate": 5.043508015461354e-05, + "loss": 3.7855, + "step": 6253 + }, + { + "epoch": 0.8, + "learning_rate": 5.037266417404823e-05, + "loss": 3.7296, + "step": 6254 + }, + { + "epoch": 0.8, + "learning_rate": 5.031028251173628e-05, + "loss": 3.597, + "step": 6255 + }, + { + "epoch": 0.8, + "learning_rate": 5.024793517840176e-05, + "loss": 3.7646, + "step": 6256 + }, + { + "epoch": 0.8, + "learning_rate": 5.018562218476294e-05, + "loss": 3.8007, + "step": 6257 + }, + { + "epoch": 0.8, + "learning_rate": 5.012334354153208e-05, + "loss": 3.7979, + "step": 6258 + }, + { + "epoch": 0.8, + "learning_rate": 5.00610992594156e-05, + "loss": 3.9324, + "step": 6259 + }, + { + "epoch": 0.8, + "learning_rate": 4.999888934911409e-05, + "loss": 3.7467, + "step": 6260 + }, + { + "epoch": 0.8, + "learning_rate": 4.993671382132212e-05, + "loss": 3.8241, + "step": 6261 + }, + { + "epoch": 0.8, + "learning_rate": 4.987457268672837e-05, + "loss": 3.8624, + "step": 6262 + }, + { + "epoch": 0.8, + "learning_rate": 4.981246595601555e-05, + "loss": 3.7658, + "step": 6263 + }, + { + "epoch": 0.8, + "learning_rate": 4.9750393639860694e-05, + "loss": 3.8321, + "step": 6264 + }, + { + "epoch": 0.8, + "learning_rate": 4.9688355748934654e-05, + "loss": 3.7934, + "step": 6265 + }, + { + "epoch": 0.8, + "learning_rate": 4.9626352293902474e-05, + "loss": 3.9113, + "step": 6266 + }, + { + "epoch": 0.8, + "learning_rate": 4.9564383285423204e-05, + "loss": 3.8225, + "step": 6267 + }, + { + "epoch": 0.8, + "learning_rate": 4.950244873415016e-05, + "loss": 3.7097, + "step": 6268 + }, + { + "epoch": 0.8, + "learning_rate": 4.9440548650730555e-05, + "loss": 3.8325, + "step": 6269 + }, + { + "epoch": 0.8, + "learning_rate": 4.93786830458057e-05, + "loss": 3.8337, + "step": 6270 + }, + { + "epoch": 0.8, + "learning_rate": 4.931685193001106e-05, + "loss": 3.7488, + "step": 6271 + }, + { + "epoch": 0.8, + "learning_rate": 4.9255055313976003e-05, + "loss": 3.8528, + "step": 6272 + }, + { + "epoch": 0.8, + "learning_rate": 4.9193293208324246e-05, + "loss": 3.8571, + "step": 6273 + }, + { + "epoch": 0.8, + "learning_rate": 4.9131565623673266e-05, + "loss": 3.8874, + "step": 6274 + }, + { + "epoch": 0.8, + "learning_rate": 4.906987257063489e-05, + "loss": 3.9726, + "step": 6275 + }, + { + "epoch": 0.8, + "learning_rate": 4.900821405981482e-05, + "loss": 3.804, + "step": 6276 + }, + { + "epoch": 0.8, + "learning_rate": 4.894659010181282e-05, + "loss": 3.6655, + "step": 6277 + }, + { + "epoch": 0.8, + "learning_rate": 4.888500070722282e-05, + "loss": 3.8602, + "step": 6278 + }, + { + "epoch": 0.8, + "learning_rate": 4.882344588663271e-05, + "loss": 3.775, + "step": 6279 + }, + { + "epoch": 0.8, + "learning_rate": 4.87619256506244e-05, + "loss": 3.7716, + "step": 6280 + }, + { + "epoch": 0.8, + "learning_rate": 4.870044000977406e-05, + "loss": 3.6207, + "step": 6281 + }, + { + "epoch": 0.8, + "learning_rate": 4.86389889746518e-05, + "loss": 3.6548, + "step": 6282 + }, + { + "epoch": 0.8, + "learning_rate": 4.857757255582171e-05, + "loss": 3.5894, + "step": 6283 + }, + { + "epoch": 0.8, + "learning_rate": 4.851619076384201e-05, + "loss": 3.8528, + "step": 6284 + }, + { + "epoch": 0.8, + "learning_rate": 4.845484360926489e-05, + "loss": 3.7705, + "step": 6285 + }, + { + "epoch": 0.8, + "learning_rate": 4.8393531102636664e-05, + "loss": 3.8408, + "step": 6286 + }, + { + "epoch": 0.8, + "learning_rate": 4.8332253254497665e-05, + "loss": 3.7803, + "step": 6287 + }, + { + "epoch": 0.8, + "learning_rate": 4.8271010075382205e-05, + "loss": 3.6567, + "step": 6288 + }, + { + "epoch": 0.8, + "learning_rate": 4.8209801575818835e-05, + "loss": 3.6471, + "step": 6289 + }, + { + "epoch": 0.81, + "learning_rate": 4.8148627766329914e-05, + "loss": 3.9277, + "step": 6290 + }, + { + "epoch": 0.81, + "learning_rate": 4.808748865743187e-05, + "loss": 3.7834, + "step": 6291 + }, + { + "epoch": 0.81, + "learning_rate": 4.802638425963537e-05, + "loss": 3.6886, + "step": 6292 + }, + { + "epoch": 0.81, + "learning_rate": 4.796531458344491e-05, + "loss": 3.8091, + "step": 6293 + }, + { + "epoch": 0.81, + "learning_rate": 4.790427963935903e-05, + "loss": 3.7669, + "step": 6294 + }, + { + "epoch": 0.81, + "learning_rate": 4.784327943787034e-05, + "loss": 3.7885, + "step": 6295 + }, + { + "epoch": 0.81, + "learning_rate": 4.7782313989465556e-05, + "loss": 3.8237, + "step": 6296 + }, + { + "epoch": 0.81, + "learning_rate": 4.772138330462533e-05, + "loss": 3.629, + "step": 6297 + }, + { + "epoch": 0.81, + "learning_rate": 4.766048739382431e-05, + "loss": 3.831, + "step": 6298 + }, + { + "epoch": 0.81, + "learning_rate": 4.7599626267531225e-05, + "loss": 3.7643, + "step": 6299 + }, + { + "epoch": 0.81, + "learning_rate": 4.753879993620877e-05, + "loss": 3.8349, + "step": 6300 + }, + { + "epoch": 0.81, + "learning_rate": 4.7478008410313775e-05, + "loss": 3.7079, + "step": 6301 + }, + { + "epoch": 0.81, + "learning_rate": 4.741725170029693e-05, + "loss": 3.7766, + "step": 6302 + }, + { + "epoch": 0.81, + "learning_rate": 4.73565298166031e-05, + "loss": 3.8169, + "step": 6303 + }, + { + "epoch": 0.81, + "learning_rate": 4.7295842769671053e-05, + "loss": 3.8474, + "step": 6304 + }, + { + "epoch": 0.81, + "learning_rate": 4.723519056993358e-05, + "loss": 3.8788, + "step": 6305 + }, + { + "epoch": 0.81, + "learning_rate": 4.717457322781749e-05, + "loss": 3.7776, + "step": 6306 + }, + { + "epoch": 0.81, + "learning_rate": 4.711399075374362e-05, + "loss": 3.7175, + "step": 6307 + }, + { + "epoch": 0.81, + "learning_rate": 4.705344315812682e-05, + "loss": 3.7253, + "step": 6308 + }, + { + "epoch": 0.81, + "learning_rate": 4.699293045137582e-05, + "loss": 3.8473, + "step": 6309 + }, + { + "epoch": 0.81, + "learning_rate": 4.693245264389351e-05, + "loss": 3.8948, + "step": 6310 + }, + { + "epoch": 0.81, + "learning_rate": 4.687200974607683e-05, + "loss": 3.9165, + "step": 6311 + }, + { + "epoch": 0.81, + "learning_rate": 4.6811601768316556e-05, + "loss": 3.8822, + "step": 6312 + }, + { + "epoch": 0.81, + "learning_rate": 4.675122872099749e-05, + "loss": 3.7424, + "step": 6313 + }, + { + "epoch": 0.81, + "learning_rate": 4.6690890614498466e-05, + "loss": 3.6426, + "step": 6314 + }, + { + "epoch": 0.81, + "learning_rate": 4.66305874591923e-05, + "loss": 3.8876, + "step": 6315 + }, + { + "epoch": 0.81, + "learning_rate": 4.657031926544575e-05, + "loss": 3.8742, + "step": 6316 + }, + { + "epoch": 0.81, + "learning_rate": 4.651008604361975e-05, + "loss": 3.867, + "step": 6317 + }, + { + "epoch": 0.81, + "learning_rate": 4.6449887804068944e-05, + "loss": 3.8839, + "step": 6318 + }, + { + "epoch": 0.81, + "learning_rate": 4.638972455714224e-05, + "loss": 3.782, + "step": 6319 + }, + { + "epoch": 0.81, + "learning_rate": 4.632959631318234e-05, + "loss": 3.7132, + "step": 6320 + }, + { + "epoch": 0.81, + "learning_rate": 4.6269503082526e-05, + "loss": 3.7284, + "step": 6321 + }, + { + "epoch": 0.81, + "learning_rate": 4.620944487550391e-05, + "loss": 3.8567, + "step": 6322 + }, + { + "epoch": 0.81, + "learning_rate": 4.6149421702440743e-05, + "loss": 3.711, + "step": 6323 + }, + { + "epoch": 0.81, + "learning_rate": 4.6089433573655276e-05, + "loss": 3.7909, + "step": 6324 + }, + { + "epoch": 0.81, + "learning_rate": 4.6029480499460095e-05, + "loss": 3.845, + "step": 6325 + }, + { + "epoch": 0.81, + "learning_rate": 4.596956249016188e-05, + "loss": 3.7015, + "step": 6326 + }, + { + "epoch": 0.81, + "learning_rate": 4.590967955606115e-05, + "loss": 3.6571, + "step": 6327 + }, + { + "epoch": 0.81, + "learning_rate": 4.5849831707452496e-05, + "loss": 3.6569, + "step": 6328 + }, + { + "epoch": 0.81, + "learning_rate": 4.579001895462453e-05, + "loss": 3.758, + "step": 6329 + }, + { + "epoch": 0.81, + "learning_rate": 4.573024130785972e-05, + "loss": 3.7637, + "step": 6330 + }, + { + "epoch": 0.81, + "learning_rate": 4.5670498777434456e-05, + "loss": 3.7257, + "step": 6331 + }, + { + "epoch": 0.81, + "learning_rate": 4.561079137361932e-05, + "loss": 3.843, + "step": 6332 + }, + { + "epoch": 0.81, + "learning_rate": 4.555111910667861e-05, + "loss": 3.9607, + "step": 6333 + }, + { + "epoch": 0.81, + "learning_rate": 4.5491481986870726e-05, + "loss": 3.6339, + "step": 6334 + }, + { + "epoch": 0.81, + "learning_rate": 4.543188002444795e-05, + "loss": 3.8122, + "step": 6335 + }, + { + "epoch": 0.81, + "learning_rate": 4.537231322965654e-05, + "loss": 3.8421, + "step": 6336 + }, + { + "epoch": 0.81, + "learning_rate": 4.531278161273667e-05, + "loss": 3.8013, + "step": 6337 + }, + { + "epoch": 0.81, + "learning_rate": 4.5253285183922574e-05, + "loss": 3.9227, + "step": 6338 + }, + { + "epoch": 0.81, + "learning_rate": 4.519382395344246e-05, + "loss": 3.8306, + "step": 6339 + }, + { + "epoch": 0.81, + "learning_rate": 4.513439793151833e-05, + "loss": 3.8561, + "step": 6340 + }, + { + "epoch": 0.81, + "learning_rate": 4.507500712836621e-05, + "loss": 3.7228, + "step": 6341 + }, + { + "epoch": 0.81, + "learning_rate": 4.5015651554196015e-05, + "loss": 3.8397, + "step": 6342 + }, + { + "epoch": 0.81, + "learning_rate": 4.495633121921175e-05, + "loss": 3.763, + "step": 6343 + }, + { + "epoch": 0.81, + "learning_rate": 4.489704613361112e-05, + "loss": 3.9432, + "step": 6344 + }, + { + "epoch": 0.81, + "learning_rate": 4.4837796307586085e-05, + "loss": 3.8655, + "step": 6345 + }, + { + "epoch": 0.81, + "learning_rate": 4.477858175132227e-05, + "loss": 3.6266, + "step": 6346 + }, + { + "epoch": 0.81, + "learning_rate": 4.4719402474999424e-05, + "loss": 3.7807, + "step": 6347 + }, + { + "epoch": 0.81, + "learning_rate": 4.4660258488791125e-05, + "loss": 3.8081, + "step": 6348 + }, + { + "epoch": 0.81, + "learning_rate": 4.4601149802864864e-05, + "loss": 3.9151, + "step": 6349 + }, + { + "epoch": 0.81, + "learning_rate": 4.454207642738217e-05, + "loss": 3.7794, + "step": 6350 + }, + { + "epoch": 0.81, + "learning_rate": 4.4483038372498397e-05, + "loss": 3.883, + "step": 6351 + }, + { + "epoch": 0.81, + "learning_rate": 4.4424035648362836e-05, + "loss": 3.7751, + "step": 6352 + }, + { + "epoch": 0.81, + "learning_rate": 4.4365068265118825e-05, + "loss": 3.8502, + "step": 6353 + }, + { + "epoch": 0.81, + "learning_rate": 4.4306136232903524e-05, + "loss": 3.8744, + "step": 6354 + }, + { + "epoch": 0.81, + "learning_rate": 4.424723956184795e-05, + "loss": 3.7371, + "step": 6355 + }, + { + "epoch": 0.81, + "learning_rate": 4.418837826207725e-05, + "loss": 3.8591, + "step": 6356 + }, + { + "epoch": 0.81, + "learning_rate": 4.41295523437103e-05, + "loss": 3.9009, + "step": 6357 + }, + { + "epoch": 0.81, + "learning_rate": 4.407076181685996e-05, + "loss": 3.7451, + "step": 6358 + }, + { + "epoch": 0.81, + "learning_rate": 4.401200669163291e-05, + "loss": 3.8595, + "step": 6359 + }, + { + "epoch": 0.81, + "learning_rate": 4.3953286978130005e-05, + "loss": 3.7259, + "step": 6360 + }, + { + "epoch": 0.81, + "learning_rate": 4.389460268644577e-05, + "loss": 3.7806, + "step": 6361 + }, + { + "epoch": 0.81, + "learning_rate": 4.383595382666872e-05, + "loss": 3.7291, + "step": 6362 + }, + { + "epoch": 0.81, + "learning_rate": 4.3777340408881263e-05, + "loss": 3.8599, + "step": 6363 + }, + { + "epoch": 0.81, + "learning_rate": 4.3718762443159725e-05, + "loss": 3.9016, + "step": 6364 + }, + { + "epoch": 0.81, + "learning_rate": 4.366021993957428e-05, + "loss": 3.8097, + "step": 6365 + }, + { + "epoch": 0.81, + "learning_rate": 4.360171290818918e-05, + "loss": 3.8231, + "step": 6366 + }, + { + "epoch": 0.81, + "learning_rate": 4.354324135906235e-05, + "loss": 3.7783, + "step": 6367 + }, + { + "epoch": 0.82, + "learning_rate": 4.3484805302245826e-05, + "loss": 3.7641, + "step": 6368 + }, + { + "epoch": 0.82, + "learning_rate": 4.342640474778542e-05, + "loss": 3.7973, + "step": 6369 + }, + { + "epoch": 0.82, + "learning_rate": 4.3368039705720844e-05, + "loss": 3.7687, + "step": 6370 + }, + { + "epoch": 0.82, + "learning_rate": 4.33097101860857e-05, + "loss": 3.821, + "step": 6371 + }, + { + "epoch": 0.82, + "learning_rate": 4.3251416198907576e-05, + "loss": 3.8345, + "step": 6372 + }, + { + "epoch": 0.82, + "learning_rate": 4.319315775420776e-05, + "loss": 3.8223, + "step": 6373 + }, + { + "epoch": 0.82, + "learning_rate": 4.3134934862001624e-05, + "loss": 3.7088, + "step": 6374 + }, + { + "epoch": 0.82, + "learning_rate": 4.307674753229846e-05, + "loss": 3.9889, + "step": 6375 + }, + { + "epoch": 0.82, + "learning_rate": 4.301859577510123e-05, + "loss": 3.6212, + "step": 6376 + }, + { + "epoch": 0.82, + "learning_rate": 4.2960479600406917e-05, + "loss": 3.8034, + "step": 6377 + }, + { + "epoch": 0.82, + "learning_rate": 4.290239901820639e-05, + "loss": 3.8332, + "step": 6378 + }, + { + "epoch": 0.82, + "learning_rate": 4.284435403848436e-05, + "loss": 3.848, + "step": 6379 + }, + { + "epoch": 0.82, + "learning_rate": 4.2786344671219334e-05, + "loss": 3.7473, + "step": 6380 + }, + { + "epoch": 0.82, + "learning_rate": 4.2728370926383956e-05, + "loss": 3.7829, + "step": 6381 + }, + { + "epoch": 0.82, + "learning_rate": 4.267043281394453e-05, + "loss": 3.8052, + "step": 6382 + }, + { + "epoch": 0.82, + "learning_rate": 4.26125303438612e-05, + "loss": 3.8231, + "step": 6383 + }, + { + "epoch": 0.82, + "learning_rate": 4.255466352608822e-05, + "loss": 3.8465, + "step": 6384 + }, + { + "epoch": 0.82, + "learning_rate": 4.2496832370573476e-05, + "loss": 3.8233, + "step": 6385 + }, + { + "epoch": 0.82, + "learning_rate": 4.2439036887258837e-05, + "loss": 3.7667, + "step": 6386 + }, + { + "epoch": 0.82, + "learning_rate": 4.2381277086080026e-05, + "loss": 3.753, + "step": 6387 + }, + { + "epoch": 0.82, + "learning_rate": 4.2323552976966525e-05, + "loss": 3.8055, + "step": 6388 + }, + { + "epoch": 0.82, + "learning_rate": 4.226586456984191e-05, + "loss": 3.7505, + "step": 6389 + }, + { + "epoch": 0.82, + "learning_rate": 4.220821187462345e-05, + "loss": 3.7627, + "step": 6390 + }, + { + "epoch": 0.82, + "learning_rate": 4.2150594901222306e-05, + "loss": 3.7753, + "step": 6391 + }, + { + "epoch": 0.82, + "learning_rate": 4.209301365954343e-05, + "loss": 3.738, + "step": 6392 + }, + { + "epoch": 0.82, + "learning_rate": 4.2035468159485835e-05, + "loss": 3.86, + "step": 6393 + }, + { + "epoch": 0.82, + "learning_rate": 4.197795841094221e-05, + "loss": 3.77, + "step": 6394 + }, + { + "epoch": 0.82, + "learning_rate": 4.192048442379903e-05, + "loss": 3.812, + "step": 6395 + }, + { + "epoch": 0.82, + "learning_rate": 4.1863046207936934e-05, + "loss": 3.7431, + "step": 6396 + }, + { + "epoch": 0.82, + "learning_rate": 4.180564377323012e-05, + "loss": 3.7146, + "step": 6397 + }, + { + "epoch": 0.82, + "learning_rate": 4.1748277129546735e-05, + "loss": 3.8031, + "step": 6398 + }, + { + "epoch": 0.82, + "learning_rate": 4.169094628674877e-05, + "loss": 3.7139, + "step": 6399 + }, + { + "epoch": 0.82, + "learning_rate": 4.163365125469207e-05, + "loss": 3.663, + "step": 6400 + }, + { + "epoch": 0.82, + "learning_rate": 4.157639204322625e-05, + "loss": 3.7622, + "step": 6401 + }, + { + "epoch": 0.82, + "learning_rate": 4.1519168662194935e-05, + "loss": 3.8509, + "step": 6402 + }, + { + "epoch": 0.82, + "learning_rate": 4.14619811214354e-05, + "loss": 3.8799, + "step": 6403 + }, + { + "epoch": 0.82, + "learning_rate": 4.140482943077895e-05, + "loss": 3.901, + "step": 6404 + }, + { + "epoch": 0.82, + "learning_rate": 4.1347713600050547e-05, + "loss": 3.6813, + "step": 6405 + }, + { + "epoch": 0.82, + "learning_rate": 4.12906336390691e-05, + "loss": 3.6529, + "step": 6406 + }, + { + "epoch": 0.82, + "learning_rate": 4.123358955764728e-05, + "loss": 3.8381, + "step": 6407 + }, + { + "epoch": 0.82, + "learning_rate": 4.117658136559166e-05, + "loss": 3.8186, + "step": 6408 + }, + { + "epoch": 0.82, + "learning_rate": 4.1119609072702515e-05, + "loss": 3.7179, + "step": 6409 + }, + { + "epoch": 0.82, + "learning_rate": 4.10626726887742e-05, + "loss": 3.6779, + "step": 6410 + }, + { + "epoch": 0.82, + "learning_rate": 4.1005772223594583e-05, + "loss": 3.7674, + "step": 6411 + }, + { + "epoch": 0.82, + "learning_rate": 4.0948907686945675e-05, + "loss": 3.8518, + "step": 6412 + }, + { + "epoch": 0.82, + "learning_rate": 4.089207908860304e-05, + "loss": 3.8524, + "step": 6413 + }, + { + "epoch": 0.82, + "learning_rate": 4.08352864383362e-05, + "loss": 3.7845, + "step": 6414 + }, + { + "epoch": 0.82, + "learning_rate": 4.077852974590846e-05, + "loss": 3.7699, + "step": 6415 + }, + { + "epoch": 0.82, + "learning_rate": 4.0721809021076916e-05, + "loss": 3.8429, + "step": 6416 + }, + { + "epoch": 0.82, + "learning_rate": 4.066512427359262e-05, + "loss": 3.7085, + "step": 6417 + }, + { + "epoch": 0.82, + "learning_rate": 4.060847551320027e-05, + "loss": 3.7797, + "step": 6418 + }, + { + "epoch": 0.82, + "learning_rate": 4.055186274963846e-05, + "loss": 3.6704, + "step": 6419 + }, + { + "epoch": 0.82, + "learning_rate": 4.0495285992639494e-05, + "loss": 3.8104, + "step": 6420 + }, + { + "epoch": 0.82, + "learning_rate": 4.0438745251929756e-05, + "loss": 3.8126, + "step": 6421 + }, + { + "epoch": 0.82, + "learning_rate": 4.038224053722911e-05, + "loss": 3.7756, + "step": 6422 + }, + { + "epoch": 0.82, + "learning_rate": 4.0325771858251424e-05, + "loss": 3.7632, + "step": 6423 + }, + { + "epoch": 0.82, + "learning_rate": 4.026933922470427e-05, + "loss": 3.8128, + "step": 6424 + }, + { + "epoch": 0.82, + "learning_rate": 4.021294264628914e-05, + "loss": 3.7652, + "step": 6425 + }, + { + "epoch": 0.82, + "learning_rate": 4.015658213270126e-05, + "loss": 3.8323, + "step": 6426 + }, + { + "epoch": 0.82, + "learning_rate": 4.01002576936296e-05, + "loss": 3.6857, + "step": 6427 + }, + { + "epoch": 0.82, + "learning_rate": 4.004396933875701e-05, + "loss": 3.6912, + "step": 6428 + }, + { + "epoch": 0.82, + "learning_rate": 3.9987717077760054e-05, + "loss": 3.84, + "step": 6429 + }, + { + "epoch": 0.82, + "learning_rate": 3.993150092030928e-05, + "loss": 3.8359, + "step": 6430 + }, + { + "epoch": 0.82, + "learning_rate": 3.987532087606874e-05, + "loss": 3.8939, + "step": 6431 + }, + { + "epoch": 0.82, + "learning_rate": 3.981917695469658e-05, + "loss": 3.7407, + "step": 6432 + }, + { + "epoch": 0.82, + "learning_rate": 3.9763069165844546e-05, + "loss": 3.8682, + "step": 6433 + }, + { + "epoch": 0.82, + "learning_rate": 3.9706997519158185e-05, + "loss": 3.7644, + "step": 6434 + }, + { + "epoch": 0.82, + "learning_rate": 3.965096202427687e-05, + "loss": 3.7025, + "step": 6435 + }, + { + "epoch": 0.82, + "learning_rate": 3.959496269083376e-05, + "loss": 3.9399, + "step": 6436 + }, + { + "epoch": 0.82, + "learning_rate": 3.953899952845572e-05, + "loss": 3.7257, + "step": 6437 + }, + { + "epoch": 0.82, + "learning_rate": 3.9483072546763615e-05, + "loss": 3.773, + "step": 6438 + }, + { + "epoch": 0.82, + "learning_rate": 3.9427181755371774e-05, + "loss": 3.7701, + "step": 6439 + }, + { + "epoch": 0.82, + "learning_rate": 3.937132716388864e-05, + "loss": 3.7899, + "step": 6440 + }, + { + "epoch": 0.82, + "learning_rate": 3.931550878191617e-05, + "loss": 3.7478, + "step": 6441 + }, + { + "epoch": 0.82, + "learning_rate": 3.9259726619050225e-05, + "loss": 3.7984, + "step": 6442 + }, + { + "epoch": 0.82, + "learning_rate": 3.920398068488037e-05, + "loss": 3.7534, + "step": 6443 + }, + { + "epoch": 0.82, + "learning_rate": 3.914827098898999e-05, + "loss": 3.8785, + "step": 6444 + }, + { + "epoch": 0.82, + "learning_rate": 3.909259754095617e-05, + "loss": 3.8122, + "step": 6445 + }, + { + "epoch": 0.83, + "learning_rate": 3.9036960350349954e-05, + "loss": 3.8702, + "step": 6446 + }, + { + "epoch": 0.83, + "learning_rate": 3.8981359426735945e-05, + "loss": 3.7934, + "step": 6447 + }, + { + "epoch": 0.83, + "learning_rate": 3.892579477967253e-05, + "loss": 3.857, + "step": 6448 + }, + { + "epoch": 0.83, + "learning_rate": 3.887026641871203e-05, + "loss": 3.7532, + "step": 6449 + }, + { + "epoch": 0.83, + "learning_rate": 3.8814774353400335e-05, + "loss": 3.8798, + "step": 6450 + }, + { + "epoch": 0.83, + "learning_rate": 3.875931859327722e-05, + "loss": 3.8414, + "step": 6451 + }, + { + "epoch": 0.83, + "learning_rate": 3.870389914787609e-05, + "loss": 3.8651, + "step": 6452 + }, + { + "epoch": 0.83, + "learning_rate": 3.8648516026724314e-05, + "loss": 3.8354, + "step": 6453 + }, + { + "epoch": 0.83, + "learning_rate": 3.859316923934284e-05, + "loss": 3.7695, + "step": 6454 + }, + { + "epoch": 0.83, + "learning_rate": 3.8537858795246404e-05, + "loss": 3.7279, + "step": 6455 + }, + { + "epoch": 0.83, + "learning_rate": 3.84825847039435e-05, + "loss": 3.7679, + "step": 6456 + }, + { + "epoch": 0.83, + "learning_rate": 3.8427346974936355e-05, + "loss": 3.8512, + "step": 6457 + }, + { + "epoch": 0.83, + "learning_rate": 3.837214561772109e-05, + "loss": 3.794, + "step": 6458 + }, + { + "epoch": 0.83, + "learning_rate": 3.83169806417874e-05, + "loss": 3.8217, + "step": 6459 + }, + { + "epoch": 0.83, + "learning_rate": 3.826185205661872e-05, + "loss": 3.8032, + "step": 6460 + }, + { + "epoch": 0.83, + "learning_rate": 3.820675987169239e-05, + "loss": 3.6325, + "step": 6461 + }, + { + "epoch": 0.83, + "learning_rate": 3.815170409647939e-05, + "loss": 3.931, + "step": 6462 + }, + { + "epoch": 0.83, + "learning_rate": 3.8096684740444395e-05, + "loss": 3.7827, + "step": 6463 + }, + { + "epoch": 0.83, + "learning_rate": 3.804170181304587e-05, + "loss": 3.771, + "step": 6464 + }, + { + "epoch": 0.83, + "learning_rate": 3.798675532373605e-05, + "loss": 3.775, + "step": 6465 + }, + { + "epoch": 0.83, + "learning_rate": 3.7931845281960807e-05, + "loss": 3.708, + "step": 6466 + }, + { + "epoch": 0.83, + "learning_rate": 3.787697169715984e-05, + "loss": 3.8888, + "step": 6467 + }, + { + "epoch": 0.83, + "learning_rate": 3.7822134578766654e-05, + "loss": 3.8332, + "step": 6468 + }, + { + "epoch": 0.83, + "learning_rate": 3.776733393620832e-05, + "loss": 3.7729, + "step": 6469 + }, + { + "epoch": 0.83, + "learning_rate": 3.771256977890569e-05, + "loss": 3.8058, + "step": 6470 + }, + { + "epoch": 0.83, + "learning_rate": 3.765784211627335e-05, + "loss": 3.7204, + "step": 6471 + }, + { + "epoch": 0.83, + "learning_rate": 3.760315095771966e-05, + "loss": 3.8174, + "step": 6472 + }, + { + "epoch": 0.83, + "learning_rate": 3.754849631264653e-05, + "loss": 3.8198, + "step": 6473 + }, + { + "epoch": 0.83, + "learning_rate": 3.749387819044994e-05, + "loss": 3.8992, + "step": 6474 + }, + { + "epoch": 0.83, + "learning_rate": 3.743929660051923e-05, + "loss": 3.7484, + "step": 6475 + }, + { + "epoch": 0.83, + "learning_rate": 3.738475155223761e-05, + "loss": 3.7923, + "step": 6476 + }, + { + "epoch": 0.83, + "learning_rate": 3.7330243054982116e-05, + "loss": 3.8103, + "step": 6477 + }, + { + "epoch": 0.83, + "learning_rate": 3.7275771118123305e-05, + "loss": 3.7409, + "step": 6478 + }, + { + "epoch": 0.83, + "learning_rate": 3.722133575102554e-05, + "loss": 3.8302, + "step": 6479 + }, + { + "epoch": 0.83, + "learning_rate": 3.7166936963046933e-05, + "loss": 3.711, + "step": 6480 + }, + { + "epoch": 0.83, + "learning_rate": 3.711257476353916e-05, + "loss": 3.8424, + "step": 6481 + }, + { + "epoch": 0.83, + "learning_rate": 3.705824916184783e-05, + "loss": 3.7816, + "step": 6482 + }, + { + "epoch": 0.83, + "learning_rate": 3.700396016731214e-05, + "loss": 3.7038, + "step": 6483 + }, + { + "epoch": 0.83, + "learning_rate": 3.694970778926493e-05, + "loss": 3.823, + "step": 6484 + }, + { + "epoch": 0.83, + "learning_rate": 3.6895492037032766e-05, + "loss": 3.7559, + "step": 6485 + }, + { + "epoch": 0.83, + "learning_rate": 3.684131291993614e-05, + "loss": 3.7268, + "step": 6486 + }, + { + "epoch": 0.83, + "learning_rate": 3.678717044728894e-05, + "loss": 3.7835, + "step": 6487 + }, + { + "epoch": 0.83, + "learning_rate": 3.6733064628398874e-05, + "loss": 3.7269, + "step": 6488 + }, + { + "epoch": 0.83, + "learning_rate": 3.667899547256745e-05, + "loss": 3.7946, + "step": 6489 + }, + { + "epoch": 0.83, + "learning_rate": 3.6624962989089736e-05, + "loss": 3.8813, + "step": 6490 + }, + { + "epoch": 0.83, + "learning_rate": 3.657096718725456e-05, + "loss": 3.8159, + "step": 6491 + }, + { + "epoch": 0.83, + "learning_rate": 3.651700807634439e-05, + "loss": 3.9329, + "step": 6492 + }, + { + "epoch": 0.83, + "learning_rate": 3.646308566563541e-05, + "loss": 3.8228, + "step": 6493 + }, + { + "epoch": 0.83, + "learning_rate": 3.640919996439751e-05, + "loss": 3.8256, + "step": 6494 + }, + { + "epoch": 0.83, + "learning_rate": 3.63553509818943e-05, + "loss": 3.6365, + "step": 6495 + }, + { + "epoch": 0.83, + "learning_rate": 3.6301538727383094e-05, + "loss": 3.8976, + "step": 6496 + }, + { + "epoch": 0.83, + "learning_rate": 3.624776321011478e-05, + "loss": 3.7653, + "step": 6497 + }, + { + "epoch": 0.83, + "learning_rate": 3.619402443933398e-05, + "loss": 3.705, + "step": 6498 + }, + { + "epoch": 0.83, + "learning_rate": 3.614032242427903e-05, + "loss": 3.8266, + "step": 6499 + }, + { + "epoch": 0.83, + "learning_rate": 3.608665717418197e-05, + "loss": 3.8908, + "step": 6500 + }, + { + "epoch": 0.83, + "learning_rate": 3.603302869826841e-05, + "loss": 3.8503, + "step": 6501 + }, + { + "epoch": 0.83, + "learning_rate": 3.5979437005757674e-05, + "loss": 3.642, + "step": 6502 + }, + { + "epoch": 0.83, + "learning_rate": 3.592588210586287e-05, + "loss": 3.8951, + "step": 6503 + }, + { + "epoch": 0.83, + "learning_rate": 3.587236400779073e-05, + "loss": 3.5837, + "step": 6504 + }, + { + "epoch": 0.83, + "learning_rate": 3.58188827207416e-05, + "loss": 3.7815, + "step": 6505 + }, + { + "epoch": 0.83, + "learning_rate": 3.576543825390954e-05, + "loss": 3.895, + "step": 6506 + }, + { + "epoch": 0.83, + "learning_rate": 3.571203061648226e-05, + "loss": 3.8488, + "step": 6507 + }, + { + "epoch": 0.83, + "learning_rate": 3.565865981764116e-05, + "loss": 3.7359, + "step": 6508 + }, + { + "epoch": 0.83, + "learning_rate": 3.560532586656126e-05, + "loss": 3.8897, + "step": 6509 + }, + { + "epoch": 0.83, + "learning_rate": 3.555202877241134e-05, + "loss": 3.7375, + "step": 6510 + }, + { + "epoch": 0.83, + "learning_rate": 3.54987685443538e-05, + "loss": 3.8853, + "step": 6511 + }, + { + "epoch": 0.83, + "learning_rate": 3.544554519154464e-05, + "loss": 3.7052, + "step": 6512 + }, + { + "epoch": 0.83, + "learning_rate": 3.539235872313354e-05, + "loss": 3.8606, + "step": 6513 + }, + { + "epoch": 0.83, + "learning_rate": 3.533920914826397e-05, + "loss": 3.7747, + "step": 6514 + }, + { + "epoch": 0.83, + "learning_rate": 3.528609647607289e-05, + "loss": 3.7601, + "step": 6515 + }, + { + "epoch": 0.83, + "learning_rate": 3.523302071569098e-05, + "loss": 3.7337, + "step": 6516 + }, + { + "epoch": 0.83, + "learning_rate": 3.5179981876242634e-05, + "loss": 3.8258, + "step": 6517 + }, + { + "epoch": 0.83, + "learning_rate": 3.5126979966845826e-05, + "loss": 3.8742, + "step": 6518 + }, + { + "epoch": 0.83, + "learning_rate": 3.50740149966122e-05, + "loss": 3.8586, + "step": 6519 + }, + { + "epoch": 0.83, + "learning_rate": 3.502108697464701e-05, + "loss": 3.7421, + "step": 6520 + }, + { + "epoch": 0.83, + "learning_rate": 3.496819591004921e-05, + "loss": 3.8243, + "step": 6521 + }, + { + "epoch": 0.83, + "learning_rate": 3.491534181191136e-05, + "loss": 3.7728, + "step": 6522 + }, + { + "epoch": 0.83, + "learning_rate": 3.4862524689319776e-05, + "loss": 3.7739, + "step": 6523 + }, + { + "epoch": 0.84, + "learning_rate": 3.480974455135422e-05, + "loss": 3.6352, + "step": 6524 + }, + { + "epoch": 0.84, + "learning_rate": 3.4757001407088346e-05, + "loss": 3.7624, + "step": 6525 + }, + { + "epoch": 0.84, + "learning_rate": 3.470429526558921e-05, + "loss": 3.7529, + "step": 6526 + }, + { + "epoch": 0.84, + "learning_rate": 3.465162613591769e-05, + "loss": 3.8813, + "step": 6527 + }, + { + "epoch": 0.84, + "learning_rate": 3.459899402712813e-05, + "loss": 3.7038, + "step": 6528 + }, + { + "epoch": 0.84, + "learning_rate": 3.4546398948268665e-05, + "loss": 3.8986, + "step": 6529 + }, + { + "epoch": 0.84, + "learning_rate": 3.4493840908380895e-05, + "loss": 3.7772, + "step": 6530 + }, + { + "epoch": 0.84, + "learning_rate": 3.444131991650024e-05, + "loss": 3.7666, + "step": 6531 + }, + { + "epoch": 0.84, + "learning_rate": 3.4388835981655737e-05, + "loss": 3.7241, + "step": 6532 + }, + { + "epoch": 0.84, + "learning_rate": 3.433638911286987e-05, + "loss": 3.7106, + "step": 6533 + }, + { + "epoch": 0.84, + "learning_rate": 3.428397931915894e-05, + "loss": 3.7018, + "step": 6534 + }, + { + "epoch": 0.84, + "learning_rate": 3.423160660953276e-05, + "loss": 3.7634, + "step": 6535 + }, + { + "epoch": 0.84, + "learning_rate": 3.417927099299478e-05, + "loss": 3.708, + "step": 6536 + }, + { + "epoch": 0.84, + "learning_rate": 3.4126972478542076e-05, + "loss": 3.8018, + "step": 6537 + }, + { + "epoch": 0.84, + "learning_rate": 3.407471107516549e-05, + "loss": 3.7482, + "step": 6538 + }, + { + "epoch": 0.84, + "learning_rate": 3.402248679184927e-05, + "loss": 3.6902, + "step": 6539 + }, + { + "epoch": 0.84, + "learning_rate": 3.397029963757134e-05, + "loss": 3.8252, + "step": 6540 + }, + { + "epoch": 0.84, + "learning_rate": 3.391814962130341e-05, + "loss": 3.6135, + "step": 6541 + }, + { + "epoch": 0.84, + "learning_rate": 3.3866036752010585e-05, + "loss": 3.8033, + "step": 6542 + }, + { + "epoch": 0.84, + "learning_rate": 3.381396103865167e-05, + "loss": 3.6367, + "step": 6543 + }, + { + "epoch": 0.84, + "learning_rate": 3.376192249017912e-05, + "loss": 3.9358, + "step": 6544 + }, + { + "epoch": 0.84, + "learning_rate": 3.370992111553886e-05, + "loss": 3.7235, + "step": 6545 + }, + { + "epoch": 0.84, + "learning_rate": 3.365795692367069e-05, + "loss": 3.6734, + "step": 6546 + }, + { + "epoch": 0.84, + "learning_rate": 3.360602992350775e-05, + "loss": 3.8326, + "step": 6547 + }, + { + "epoch": 0.84, + "learning_rate": 3.3554140123976954e-05, + "loss": 3.878, + "step": 6548 + }, + { + "epoch": 0.84, + "learning_rate": 3.35022875339987e-05, + "loss": 3.8415, + "step": 6549 + }, + { + "epoch": 0.84, + "learning_rate": 3.345047216248703e-05, + "loss": 3.7217, + "step": 6550 + }, + { + "epoch": 0.84, + "learning_rate": 3.3398694018349715e-05, + "loss": 3.7577, + "step": 6551 + }, + { + "epoch": 0.84, + "learning_rate": 3.334695311048788e-05, + "loss": 3.9114, + "step": 6552 + }, + { + "epoch": 0.84, + "learning_rate": 3.329524944779655e-05, + "loss": 3.8694, + "step": 6553 + }, + { + "epoch": 0.84, + "learning_rate": 3.3243583039164054e-05, + "loss": 3.7895, + "step": 6554 + }, + { + "epoch": 0.84, + "learning_rate": 3.319195389347251e-05, + "loss": 3.7286, + "step": 6555 + }, + { + "epoch": 0.84, + "learning_rate": 3.314036201959755e-05, + "loss": 3.7649, + "step": 6556 + }, + { + "epoch": 0.84, + "learning_rate": 3.3088807426408434e-05, + "loss": 3.6232, + "step": 6557 + }, + { + "epoch": 0.84, + "learning_rate": 3.3037290122767873e-05, + "loss": 3.765, + "step": 6558 + }, + { + "epoch": 0.84, + "learning_rate": 3.298581011753246e-05, + "loss": 3.9207, + "step": 6559 + }, + { + "epoch": 0.84, + "learning_rate": 3.293436741955208e-05, + "loss": 3.6628, + "step": 6560 + }, + { + "epoch": 0.84, + "learning_rate": 3.288296203767044e-05, + "loss": 3.7989, + "step": 6561 + }, + { + "epoch": 0.84, + "learning_rate": 3.2831593980724664e-05, + "loss": 3.9898, + "step": 6562 + }, + { + "epoch": 0.84, + "learning_rate": 3.278026325754552e-05, + "loss": 3.7712, + "step": 6563 + }, + { + "epoch": 0.84, + "learning_rate": 3.272896987695734e-05, + "loss": 3.8624, + "step": 6564 + }, + { + "epoch": 0.84, + "learning_rate": 3.267771384777804e-05, + "loss": 3.9399, + "step": 6565 + }, + { + "epoch": 0.84, + "learning_rate": 3.2626495178819134e-05, + "loss": 3.7058, + "step": 6566 + }, + { + "epoch": 0.84, + "learning_rate": 3.257531387888574e-05, + "loss": 3.8116, + "step": 6567 + }, + { + "epoch": 0.84, + "learning_rate": 3.252416995677646e-05, + "loss": 3.8053, + "step": 6568 + }, + { + "epoch": 0.84, + "learning_rate": 3.247306342128359e-05, + "loss": 3.7267, + "step": 6569 + }, + { + "epoch": 0.84, + "learning_rate": 3.2421994281192915e-05, + "loss": 3.8317, + "step": 6570 + }, + { + "epoch": 0.84, + "learning_rate": 3.23709625452838e-05, + "loss": 3.7473, + "step": 6571 + }, + { + "epoch": 0.84, + "learning_rate": 3.2319968222329216e-05, + "loss": 3.6036, + "step": 6572 + }, + { + "epoch": 0.84, + "learning_rate": 3.226901132109558e-05, + "loss": 3.6942, + "step": 6573 + }, + { + "epoch": 0.84, + "learning_rate": 3.221809185034311e-05, + "loss": 3.7523, + "step": 6574 + }, + { + "epoch": 0.84, + "learning_rate": 3.21672098188254e-05, + "loss": 3.7914, + "step": 6575 + }, + { + "epoch": 0.84, + "learning_rate": 3.211636523528966e-05, + "loss": 3.6639, + "step": 6576 + }, + { + "epoch": 0.84, + "learning_rate": 3.2065558108476615e-05, + "loss": 3.8903, + "step": 6577 + }, + { + "epoch": 0.84, + "learning_rate": 3.201478844712069e-05, + "loss": 3.8794, + "step": 6578 + }, + { + "epoch": 0.84, + "learning_rate": 3.196405625994972e-05, + "loss": 3.7537, + "step": 6579 + }, + { + "epoch": 0.84, + "learning_rate": 3.1913361555685196e-05, + "loss": 3.6921, + "step": 6580 + }, + { + "epoch": 0.84, + "learning_rate": 3.1862704343042e-05, + "loss": 3.7759, + "step": 6581 + }, + { + "epoch": 0.84, + "learning_rate": 3.181208463072888e-05, + "loss": 3.7825, + "step": 6582 + }, + { + "epoch": 0.84, + "learning_rate": 3.1761502427447855e-05, + "loss": 3.8374, + "step": 6583 + }, + { + "epoch": 0.84, + "learning_rate": 3.1710957741894614e-05, + "loss": 3.7563, + "step": 6584 + }, + { + "epoch": 0.84, + "learning_rate": 3.166045058275835e-05, + "loss": 3.8282, + "step": 6585 + }, + { + "epoch": 0.84, + "learning_rate": 3.160998095872183e-05, + "loss": 3.8724, + "step": 6586 + }, + { + "epoch": 0.84, + "learning_rate": 3.1559548878461325e-05, + "loss": 3.6569, + "step": 6587 + }, + { + "epoch": 0.84, + "learning_rate": 3.1509154350646745e-05, + "loss": 3.8269, + "step": 6588 + }, + { + "epoch": 0.84, + "learning_rate": 3.145879738394156e-05, + "loss": 3.8346, + "step": 6589 + }, + { + "epoch": 0.84, + "learning_rate": 3.140847798700267e-05, + "loss": 3.6794, + "step": 6590 + }, + { + "epoch": 0.84, + "learning_rate": 3.1358196168480515e-05, + "loss": 3.8748, + "step": 6591 + }, + { + "epoch": 0.84, + "learning_rate": 3.130795193701916e-05, + "loss": 3.7739, + "step": 6592 + }, + { + "epoch": 0.84, + "learning_rate": 3.1257745301256165e-05, + "loss": 3.8358, + "step": 6593 + }, + { + "epoch": 0.84, + "learning_rate": 3.1207576269822566e-05, + "loss": 3.754, + "step": 6594 + }, + { + "epoch": 0.84, + "learning_rate": 3.115744485134314e-05, + "loss": 3.6855, + "step": 6595 + }, + { + "epoch": 0.84, + "learning_rate": 3.1107351054435906e-05, + "loss": 3.921, + "step": 6596 + }, + { + "epoch": 0.84, + "learning_rate": 3.105729488771272e-05, + "loss": 3.8959, + "step": 6597 + }, + { + "epoch": 0.84, + "learning_rate": 3.100727635977873e-05, + "loss": 3.9382, + "step": 6598 + }, + { + "epoch": 0.84, + "learning_rate": 3.095729547923273e-05, + "loss": 3.7159, + "step": 6599 + }, + { + "epoch": 0.84, + "learning_rate": 3.0907352254666985e-05, + "loss": 3.8144, + "step": 6600 + }, + { + "epoch": 0.84, + "learning_rate": 3.085744669466733e-05, + "loss": 3.7454, + "step": 6601 + }, + { + "epoch": 0.85, + "learning_rate": 3.080757880781307e-05, + "loss": 3.82, + "step": 6602 + }, + { + "epoch": 0.85, + "learning_rate": 3.075774860267716e-05, + "loss": 3.7933, + "step": 6603 + }, + { + "epoch": 0.85, + "learning_rate": 3.0707956087825923e-05, + "loss": 3.7487, + "step": 6604 + }, + { + "epoch": 0.85, + "learning_rate": 3.065820127181923e-05, + "loss": 3.7112, + "step": 6605 + }, + { + "epoch": 0.85, + "learning_rate": 3.060848416321063e-05, + "loss": 3.8549, + "step": 6606 + }, + { + "epoch": 0.85, + "learning_rate": 3.055880477054701e-05, + "loss": 3.7136, + "step": 6607 + }, + { + "epoch": 0.85, + "learning_rate": 3.0509163102368815e-05, + "loss": 3.8369, + "step": 6608 + }, + { + "epoch": 0.85, + "learning_rate": 3.0459559167209993e-05, + "loss": 3.9519, + "step": 6609 + }, + { + "epoch": 0.85, + "learning_rate": 3.0409992973598145e-05, + "loss": 3.897, + "step": 6610 + }, + { + "epoch": 0.85, + "learning_rate": 3.03604645300542e-05, + "loss": 3.8559, + "step": 6611 + }, + { + "epoch": 0.85, + "learning_rate": 3.031097384509271e-05, + "loss": 3.9195, + "step": 6612 + }, + { + "epoch": 0.85, + "learning_rate": 3.0261520927221647e-05, + "loss": 3.8789, + "step": 6613 + }, + { + "epoch": 0.85, + "learning_rate": 3.021210578494249e-05, + "loss": 3.8044, + "step": 6614 + }, + { + "epoch": 0.85, + "learning_rate": 3.016272842675044e-05, + "loss": 3.8262, + "step": 6615 + }, + { + "epoch": 0.85, + "learning_rate": 3.0113388861133907e-05, + "loss": 3.7789, + "step": 6616 + }, + { + "epoch": 0.85, + "learning_rate": 3.006408709657496e-05, + "loss": 3.7281, + "step": 6617 + }, + { + "epoch": 0.85, + "learning_rate": 3.0014823141549186e-05, + "loss": 3.8329, + "step": 6618 + }, + { + "epoch": 0.85, + "learning_rate": 2.9965597004525614e-05, + "loss": 3.8224, + "step": 6619 + }, + { + "epoch": 0.85, + "learning_rate": 2.991640869396675e-05, + "loss": 3.8186, + "step": 6620 + }, + { + "epoch": 0.85, + "learning_rate": 2.9867258218328668e-05, + "loss": 3.8251, + "step": 6621 + }, + { + "epoch": 0.85, + "learning_rate": 2.9818145586060912e-05, + "loss": 3.8639, + "step": 6622 + }, + { + "epoch": 0.85, + "learning_rate": 2.976907080560645e-05, + "loss": 3.6356, + "step": 6623 + }, + { + "epoch": 0.85, + "learning_rate": 2.9720033885401816e-05, + "loss": 3.7625, + "step": 6624 + }, + { + "epoch": 0.85, + "learning_rate": 2.9671034833877147e-05, + "loss": 3.6964, + "step": 6625 + }, + { + "epoch": 0.85, + "learning_rate": 2.962207365945585e-05, + "loss": 3.8219, + "step": 6626 + }, + { + "epoch": 0.85, + "learning_rate": 2.9573150370554942e-05, + "loss": 3.7258, + "step": 6627 + }, + { + "epoch": 0.85, + "learning_rate": 2.9524264975584887e-05, + "loss": 3.7854, + "step": 6628 + }, + { + "epoch": 0.85, + "learning_rate": 2.9475417482949657e-05, + "loss": 3.7209, + "step": 6629 + }, + { + "epoch": 0.85, + "learning_rate": 2.9426607901046622e-05, + "loss": 3.9188, + "step": 6630 + }, + { + "epoch": 0.85, + "learning_rate": 2.937783623826687e-05, + "loss": 3.7214, + "step": 6631 + }, + { + "epoch": 0.85, + "learning_rate": 2.9329102502994753e-05, + "loss": 3.8686, + "step": 6632 + }, + { + "epoch": 0.85, + "learning_rate": 2.92804067036081e-05, + "loss": 3.8855, + "step": 6633 + }, + { + "epoch": 0.85, + "learning_rate": 2.9231748848478373e-05, + "loss": 3.6641, + "step": 6634 + }, + { + "epoch": 0.85, + "learning_rate": 2.918312894597039e-05, + "loss": 3.763, + "step": 6635 + }, + { + "epoch": 0.85, + "learning_rate": 2.9134547004442456e-05, + "loss": 3.8289, + "step": 6636 + }, + { + "epoch": 0.85, + "learning_rate": 2.90860030322464e-05, + "loss": 3.8732, + "step": 6637 + }, + { + "epoch": 0.85, + "learning_rate": 2.903749703772743e-05, + "loss": 3.6836, + "step": 6638 + }, + { + "epoch": 0.85, + "learning_rate": 2.8989029029224374e-05, + "loss": 3.9226, + "step": 6639 + }, + { + "epoch": 0.85, + "learning_rate": 2.8940599015069403e-05, + "loss": 3.6902, + "step": 6640 + }, + { + "epoch": 0.85, + "learning_rate": 2.8892207003588218e-05, + "loss": 3.6039, + "step": 6641 + }, + { + "epoch": 0.85, + "learning_rate": 2.8843853003099885e-05, + "loss": 3.7933, + "step": 6642 + }, + { + "epoch": 0.85, + "learning_rate": 2.8795537021917144e-05, + "loss": 3.7119, + "step": 6643 + }, + { + "epoch": 0.85, + "learning_rate": 2.874725906834602e-05, + "loss": 3.7554, + "step": 6644 + }, + { + "epoch": 0.85, + "learning_rate": 2.8699019150685958e-05, + "loss": 3.7614, + "step": 6645 + }, + { + "epoch": 0.85, + "learning_rate": 2.8650817277230123e-05, + "loss": 3.8478, + "step": 6646 + }, + { + "epoch": 0.85, + "learning_rate": 2.8602653456264893e-05, + "loss": 3.7724, + "step": 6647 + }, + { + "epoch": 0.85, + "learning_rate": 2.85545276960702e-05, + "loss": 3.7284, + "step": 6648 + }, + { + "epoch": 0.85, + "learning_rate": 2.850644000491942e-05, + "loss": 3.7801, + "step": 6649 + }, + { + "epoch": 0.85, + "learning_rate": 2.8458390391079365e-05, + "loss": 3.7832, + "step": 6650 + }, + { + "epoch": 0.85, + "learning_rate": 2.8410378862810255e-05, + "loss": 3.7846, + "step": 6651 + }, + { + "epoch": 0.85, + "learning_rate": 2.836240542836599e-05, + "loss": 3.9534, + "step": 6652 + }, + { + "epoch": 0.85, + "learning_rate": 2.831447009599361e-05, + "loss": 3.7184, + "step": 6653 + }, + { + "epoch": 0.85, + "learning_rate": 2.826657287393389e-05, + "loss": 3.7213, + "step": 6654 + }, + { + "epoch": 0.85, + "learning_rate": 2.8218713770420816e-05, + "loss": 3.844, + "step": 6655 + }, + { + "epoch": 0.85, + "learning_rate": 2.8170892793681984e-05, + "loss": 3.7773, + "step": 6656 + }, + { + "epoch": 0.85, + "learning_rate": 2.812310995193834e-05, + "loss": 3.7123, + "step": 6657 + }, + { + "epoch": 0.85, + "learning_rate": 2.807536525340429e-05, + "loss": 3.7366, + "step": 6658 + }, + { + "epoch": 0.85, + "learning_rate": 2.8027658706287677e-05, + "loss": 3.9464, + "step": 6659 + }, + { + "epoch": 0.85, + "learning_rate": 2.7979990318789922e-05, + "loss": 3.6496, + "step": 6660 + }, + { + "epoch": 0.85, + "learning_rate": 2.7932360099105658e-05, + "loss": 3.8664, + "step": 6661 + }, + { + "epoch": 0.85, + "learning_rate": 2.7884768055423172e-05, + "loss": 3.7033, + "step": 6662 + }, + { + "epoch": 0.85, + "learning_rate": 2.7837214195924027e-05, + "loss": 3.8613, + "step": 6663 + }, + { + "epoch": 0.85, + "learning_rate": 2.7789698528783323e-05, + "loss": 3.8176, + "step": 6664 + }, + { + "epoch": 0.85, + "learning_rate": 2.7742221062169505e-05, + "loss": 3.9033, + "step": 6665 + }, + { + "epoch": 0.85, + "learning_rate": 2.769478180424445e-05, + "loss": 3.7583, + "step": 6666 + }, + { + "epoch": 0.85, + "learning_rate": 2.7647380763163673e-05, + "loss": 3.8087, + "step": 6667 + }, + { + "epoch": 0.85, + "learning_rate": 2.760001794707584e-05, + "loss": 3.6541, + "step": 6668 + }, + { + "epoch": 0.85, + "learning_rate": 2.755269336412322e-05, + "loss": 3.7052, + "step": 6669 + }, + { + "epoch": 0.85, + "learning_rate": 2.750540702244139e-05, + "loss": 3.6845, + "step": 6670 + }, + { + "epoch": 0.85, + "learning_rate": 2.7458158930159516e-05, + "loss": 3.7736, + "step": 6671 + }, + { + "epoch": 0.85, + "learning_rate": 2.7410949095400067e-05, + "loss": 3.6012, + "step": 6672 + }, + { + "epoch": 0.85, + "learning_rate": 2.7363777526278915e-05, + "loss": 3.74, + "step": 6673 + }, + { + "epoch": 0.85, + "learning_rate": 2.731664423090541e-05, + "loss": 3.7434, + "step": 6674 + }, + { + "epoch": 0.85, + "learning_rate": 2.726954921738234e-05, + "loss": 3.7481, + "step": 6675 + }, + { + "epoch": 0.85, + "learning_rate": 2.72224924938059e-05, + "loss": 3.6948, + "step": 6676 + }, + { + "epoch": 0.85, + "learning_rate": 2.7175474068265648e-05, + "loss": 3.7299, + "step": 6677 + }, + { + "epoch": 0.85, + "learning_rate": 2.7128493948844617e-05, + "loss": 3.8671, + "step": 6678 + }, + { + "epoch": 0.85, + "learning_rate": 2.708155214361918e-05, + "loss": 3.611, + "step": 6679 + }, + { + "epoch": 0.86, + "learning_rate": 2.7034648660659246e-05, + "loss": 3.8667, + "step": 6680 + }, + { + "epoch": 0.86, + "learning_rate": 2.6987783508028023e-05, + "loss": 3.7092, + "step": 6681 + }, + { + "epoch": 0.86, + "learning_rate": 2.6940956693782215e-05, + "loss": 3.8587, + "step": 6682 + }, + { + "epoch": 0.86, + "learning_rate": 2.6894168225971876e-05, + "loss": 3.7823, + "step": 6683 + }, + { + "epoch": 0.86, + "learning_rate": 2.684741811264052e-05, + "loss": 3.6933, + "step": 6684 + }, + { + "epoch": 0.86, + "learning_rate": 2.6800706361824966e-05, + "loss": 3.7352, + "step": 6685 + }, + { + "epoch": 0.86, + "learning_rate": 2.6754032981555544e-05, + "loss": 3.7236, + "step": 6686 + }, + { + "epoch": 0.86, + "learning_rate": 2.6707397979855885e-05, + "loss": 3.7475, + "step": 6687 + }, + { + "epoch": 0.86, + "learning_rate": 2.6660801364743138e-05, + "loss": 3.8834, + "step": 6688 + }, + { + "epoch": 0.86, + "learning_rate": 2.661424314422789e-05, + "loss": 3.6723, + "step": 6689 + }, + { + "epoch": 0.86, + "learning_rate": 2.656772332631391e-05, + "loss": 3.866, + "step": 6690 + }, + { + "epoch": 0.86, + "learning_rate": 2.6521241918998572e-05, + "loss": 3.7758, + "step": 6691 + }, + { + "epoch": 0.86, + "learning_rate": 2.647479893027252e-05, + "loss": 3.7155, + "step": 6692 + }, + { + "epoch": 0.86, + "learning_rate": 2.642839436811986e-05, + "loss": 3.6589, + "step": 6693 + }, + { + "epoch": 0.86, + "learning_rate": 2.638202824051808e-05, + "loss": 3.8583, + "step": 6694 + }, + { + "epoch": 0.86, + "learning_rate": 2.633570055543802e-05, + "loss": 3.7544, + "step": 6695 + }, + { + "epoch": 0.86, + "learning_rate": 2.6289411320843974e-05, + "loss": 3.787, + "step": 6696 + }, + { + "epoch": 0.86, + "learning_rate": 2.624316054469364e-05, + "loss": 3.7311, + "step": 6697 + }, + { + "epoch": 0.86, + "learning_rate": 2.6196948234937955e-05, + "loss": 3.7085, + "step": 6698 + }, + { + "epoch": 0.86, + "learning_rate": 2.615077439952146e-05, + "loss": 3.7722, + "step": 6699 + }, + { + "epoch": 0.86, + "learning_rate": 2.6104639046381912e-05, + "loss": 3.7658, + "step": 6700 + }, + { + "epoch": 0.86, + "learning_rate": 2.605854218345055e-05, + "loss": 3.786, + "step": 6701 + }, + { + "epoch": 0.86, + "learning_rate": 2.60124838186519e-05, + "loss": 3.7488, + "step": 6702 + }, + { + "epoch": 0.86, + "learning_rate": 2.5966463959904013e-05, + "loss": 3.8521, + "step": 6703 + }, + { + "epoch": 0.86, + "learning_rate": 2.5920482615118173e-05, + "loss": 3.8672, + "step": 6704 + }, + { + "epoch": 0.86, + "learning_rate": 2.5874539792199142e-05, + "loss": 3.8267, + "step": 6705 + }, + { + "epoch": 0.86, + "learning_rate": 2.582863549904499e-05, + "loss": 3.6817, + "step": 6706 + }, + { + "epoch": 0.86, + "learning_rate": 2.578276974354718e-05, + "loss": 3.7111, + "step": 6707 + }, + { + "epoch": 0.86, + "learning_rate": 2.573694253359063e-05, + "loss": 3.6424, + "step": 6708 + }, + { + "epoch": 0.86, + "learning_rate": 2.5691153877053564e-05, + "loss": 3.728, + "step": 6709 + }, + { + "epoch": 0.86, + "learning_rate": 2.5645403781807492e-05, + "loss": 3.8341, + "step": 6710 + }, + { + "epoch": 0.86, + "learning_rate": 2.5599692255717512e-05, + "loss": 3.6683, + "step": 6711 + }, + { + "epoch": 0.86, + "learning_rate": 2.555401930664189e-05, + "loss": 3.745, + "step": 6712 + }, + { + "epoch": 0.86, + "learning_rate": 2.550838494243235e-05, + "loss": 3.8325, + "step": 6713 + }, + { + "epoch": 0.86, + "learning_rate": 2.5462789170933976e-05, + "loss": 3.7591, + "step": 6714 + }, + { + "epoch": 0.86, + "learning_rate": 2.541723199998522e-05, + "loss": 3.8492, + "step": 6715 + }, + { + "epoch": 0.86, + "learning_rate": 2.5371713437417787e-05, + "loss": 3.6641, + "step": 6716 + }, + { + "epoch": 0.86, + "learning_rate": 2.5326233491056948e-05, + "loss": 3.8159, + "step": 6717 + }, + { + "epoch": 0.86, + "learning_rate": 2.5280792168721257e-05, + "loss": 3.8242, + "step": 6718 + }, + { + "epoch": 0.86, + "learning_rate": 2.523538947822257e-05, + "loss": 3.8921, + "step": 6719 + }, + { + "epoch": 0.86, + "learning_rate": 2.519002542736612e-05, + "loss": 3.6232, + "step": 6720 + }, + { + "epoch": 0.86, + "learning_rate": 2.5144700023950527e-05, + "loss": 3.7158, + "step": 6721 + }, + { + "epoch": 0.86, + "learning_rate": 2.509941327576773e-05, + "loss": 3.7449, + "step": 6722 + }, + { + "epoch": 0.86, + "learning_rate": 2.5054165190603022e-05, + "loss": 3.7423, + "step": 6723 + }, + { + "epoch": 0.86, + "learning_rate": 2.500895577623516e-05, + "loss": 3.6616, + "step": 6724 + }, + { + "epoch": 0.86, + "learning_rate": 2.4963785040436088e-05, + "loss": 3.9745, + "step": 6725 + }, + { + "epoch": 0.86, + "learning_rate": 2.4918652990971236e-05, + "loss": 3.749, + "step": 6726 + }, + { + "epoch": 0.86, + "learning_rate": 2.4873559635599313e-05, + "loss": 3.804, + "step": 6727 + }, + { + "epoch": 0.86, + "learning_rate": 2.4828504982072397e-05, + "loss": 3.722, + "step": 6728 + }, + { + "epoch": 0.86, + "learning_rate": 2.4783489038135847e-05, + "loss": 3.7975, + "step": 6729 + }, + { + "epoch": 0.86, + "learning_rate": 2.47385118115285e-05, + "loss": 3.7941, + "step": 6730 + }, + { + "epoch": 0.86, + "learning_rate": 2.4693573309982342e-05, + "loss": 3.8304, + "step": 6731 + }, + { + "epoch": 0.86, + "learning_rate": 2.4648673541222994e-05, + "loss": 3.7734, + "step": 6732 + }, + { + "epoch": 0.86, + "learning_rate": 2.4603812512969142e-05, + "loss": 3.7689, + "step": 6733 + }, + { + "epoch": 0.86, + "learning_rate": 2.4558990232932955e-05, + "loss": 3.7888, + "step": 6734 + }, + { + "epoch": 0.86, + "learning_rate": 2.451420670881982e-05, + "loss": 3.8742, + "step": 6735 + }, + { + "epoch": 0.86, + "learning_rate": 2.4469461948328635e-05, + "loss": 3.7281, + "step": 6736 + }, + { + "epoch": 0.86, + "learning_rate": 2.4424755959151558e-05, + "loss": 3.7497, + "step": 6737 + }, + { + "epoch": 0.86, + "learning_rate": 2.4380088748973933e-05, + "loss": 3.831, + "step": 6738 + }, + { + "epoch": 0.86, + "learning_rate": 2.4335460325474736e-05, + "loss": 3.8387, + "step": 6739 + }, + { + "epoch": 0.86, + "learning_rate": 2.4290870696326046e-05, + "loss": 3.8883, + "step": 6740 + }, + { + "epoch": 0.86, + "learning_rate": 2.424631986919332e-05, + "loss": 4.0161, + "step": 6741 + }, + { + "epoch": 0.86, + "learning_rate": 2.420180785173534e-05, + "loss": 3.8928, + "step": 6742 + }, + { + "epoch": 0.86, + "learning_rate": 2.4157334651604302e-05, + "loss": 3.7983, + "step": 6743 + }, + { + "epoch": 0.86, + "learning_rate": 2.411290027644558e-05, + "loss": 3.827, + "step": 6744 + }, + { + "epoch": 0.86, + "learning_rate": 2.4068504733897988e-05, + "loss": 3.7983, + "step": 6745 + }, + { + "epoch": 0.86, + "learning_rate": 2.4024148031593723e-05, + "loss": 3.7651, + "step": 6746 + }, + { + "epoch": 0.86, + "learning_rate": 2.397983017715813e-05, + "loss": 3.6927, + "step": 6747 + }, + { + "epoch": 0.86, + "learning_rate": 2.3935551178210004e-05, + "loss": 3.7593, + "step": 6748 + }, + { + "epoch": 0.86, + "learning_rate": 2.3891311042361364e-05, + "loss": 3.7976, + "step": 6749 + }, + { + "epoch": 0.86, + "learning_rate": 2.3847109777217658e-05, + "loss": 3.7542, + "step": 6750 + }, + { + "epoch": 0.86, + "learning_rate": 2.3802947390377554e-05, + "loss": 3.618, + "step": 6751 + }, + { + "epoch": 0.86, + "learning_rate": 2.375882388943307e-05, + "loss": 3.729, + "step": 6752 + }, + { + "epoch": 0.86, + "learning_rate": 2.3714739281969545e-05, + "loss": 3.9195, + "step": 6753 + }, + { + "epoch": 0.86, + "learning_rate": 2.3670693575565726e-05, + "loss": 3.6151, + "step": 6754 + }, + { + "epoch": 0.86, + "learning_rate": 2.3626686777793503e-05, + "loss": 3.7459, + "step": 6755 + }, + { + "epoch": 0.86, + "learning_rate": 2.3582718896218185e-05, + "loss": 3.7755, + "step": 6756 + }, + { + "epoch": 0.86, + "learning_rate": 2.3538789938398335e-05, + "loss": 3.9736, + "step": 6757 + }, + { + "epoch": 0.87, + "learning_rate": 2.3494899911885857e-05, + "loss": 3.8219, + "step": 6758 + }, + { + "epoch": 0.87, + "learning_rate": 2.3451048824225912e-05, + "loss": 3.7495, + "step": 6759 + }, + { + "epoch": 0.87, + "learning_rate": 2.3407236682957106e-05, + "loss": 3.7275, + "step": 6760 + }, + { + "epoch": 0.87, + "learning_rate": 2.336346349561119e-05, + "loss": 3.7239, + "step": 6761 + }, + { + "epoch": 0.87, + "learning_rate": 2.3319729269713263e-05, + "loss": 3.7208, + "step": 6762 + }, + { + "epoch": 0.87, + "learning_rate": 2.3276034012781803e-05, + "loss": 3.7267, + "step": 6763 + }, + { + "epoch": 0.87, + "learning_rate": 2.323237773232853e-05, + "loss": 3.733, + "step": 6764 + }, + { + "epoch": 0.87, + "learning_rate": 2.3188760435858436e-05, + "loss": 3.9151, + "step": 6765 + }, + { + "epoch": 0.87, + "learning_rate": 2.3145182130869772e-05, + "loss": 3.9271, + "step": 6766 + }, + { + "epoch": 0.87, + "learning_rate": 2.3101642824854302e-05, + "loss": 3.8846, + "step": 6767 + }, + { + "epoch": 0.87, + "learning_rate": 2.3058142525296864e-05, + "loss": 3.7166, + "step": 6768 + }, + { + "epoch": 0.87, + "learning_rate": 2.301468123967565e-05, + "loss": 3.826, + "step": 6769 + }, + { + "epoch": 0.87, + "learning_rate": 2.297125897546215e-05, + "loss": 3.856, + "step": 6770 + }, + { + "epoch": 0.87, + "learning_rate": 2.29278757401212e-05, + "loss": 3.7298, + "step": 6771 + }, + { + "epoch": 0.87, + "learning_rate": 2.288453154111081e-05, + "loss": 3.7613, + "step": 6772 + }, + { + "epoch": 0.87, + "learning_rate": 2.284122638588243e-05, + "loss": 3.909, + "step": 6773 + }, + { + "epoch": 0.87, + "learning_rate": 2.2797960281880664e-05, + "loss": 3.8271, + "step": 6774 + }, + { + "epoch": 0.87, + "learning_rate": 2.2754733236543506e-05, + "loss": 3.7632, + "step": 6775 + }, + { + "epoch": 0.87, + "learning_rate": 2.2711545257302152e-05, + "loss": 3.7306, + "step": 6776 + }, + { + "epoch": 0.87, + "learning_rate": 2.2668396351581134e-05, + "loss": 3.8614, + "step": 6777 + }, + { + "epoch": 0.87, + "learning_rate": 2.262528652679824e-05, + "loss": 3.8179, + "step": 6778 + }, + { + "epoch": 0.87, + "learning_rate": 2.2582215790364542e-05, + "loss": 3.9825, + "step": 6779 + }, + { + "epoch": 0.87, + "learning_rate": 2.2539184149684338e-05, + "loss": 3.7122, + "step": 6780 + }, + { + "epoch": 0.87, + "learning_rate": 2.2496191612155355e-05, + "loss": 3.8513, + "step": 6781 + }, + { + "epoch": 0.87, + "learning_rate": 2.2453238185168505e-05, + "loss": 3.8145, + "step": 6782 + }, + { + "epoch": 0.87, + "learning_rate": 2.2410323876107974e-05, + "loss": 3.8239, + "step": 6783 + }, + { + "epoch": 0.87, + "learning_rate": 2.2367448692351216e-05, + "loss": 3.7495, + "step": 6784 + }, + { + "epoch": 0.87, + "learning_rate": 2.2324612641268975e-05, + "loss": 3.6903, + "step": 6785 + }, + { + "epoch": 0.87, + "learning_rate": 2.2281815730225252e-05, + "loss": 3.8608, + "step": 6786 + }, + { + "epoch": 0.87, + "learning_rate": 2.2239057966577297e-05, + "loss": 3.7529, + "step": 6787 + }, + { + "epoch": 0.87, + "learning_rate": 2.219633935767576e-05, + "loss": 3.8295, + "step": 6788 + }, + { + "epoch": 0.87, + "learning_rate": 2.215365991086443e-05, + "loss": 3.6739, + "step": 6789 + }, + { + "epoch": 0.87, + "learning_rate": 2.2111019633480306e-05, + "loss": 3.8496, + "step": 6790 + }, + { + "epoch": 0.87, + "learning_rate": 2.2068418532853878e-05, + "loss": 3.7481, + "step": 6791 + }, + { + "epoch": 0.87, + "learning_rate": 2.202585661630871e-05, + "loss": 3.6331, + "step": 6792 + }, + { + "epoch": 0.87, + "learning_rate": 2.198333389116172e-05, + "loss": 3.7213, + "step": 6793 + }, + { + "epoch": 0.87, + "learning_rate": 2.1940850364723014e-05, + "loss": 3.7752, + "step": 6794 + }, + { + "epoch": 0.87, + "learning_rate": 2.1898406044295964e-05, + "loss": 3.6994, + "step": 6795 + }, + { + "epoch": 0.87, + "learning_rate": 2.185600093717735e-05, + "loss": 3.9137, + "step": 6796 + }, + { + "epoch": 0.87, + "learning_rate": 2.1813635050657032e-05, + "loss": 3.6894, + "step": 6797 + }, + { + "epoch": 0.87, + "learning_rate": 2.1771308392018213e-05, + "loss": 3.8197, + "step": 6798 + }, + { + "epoch": 0.87, + "learning_rate": 2.1729020968537296e-05, + "loss": 3.7904, + "step": 6799 + }, + { + "epoch": 0.87, + "learning_rate": 2.1686772787484072e-05, + "loss": 3.7543, + "step": 6800 + }, + { + "epoch": 0.87, + "learning_rate": 2.164456385612143e-05, + "loss": 3.7503, + "step": 6801 + }, + { + "epoch": 0.87, + "learning_rate": 2.1602394181705564e-05, + "loss": 3.7584, + "step": 6802 + }, + { + "epoch": 0.87, + "learning_rate": 2.1560263771485983e-05, + "loss": 3.9011, + "step": 6803 + }, + { + "epoch": 0.87, + "learning_rate": 2.1518172632705334e-05, + "loss": 3.7691, + "step": 6804 + }, + { + "epoch": 0.87, + "learning_rate": 2.1476120772599613e-05, + "loss": 3.7712, + "step": 6805 + }, + { + "epoch": 0.87, + "learning_rate": 2.1434108198398027e-05, + "loss": 3.8585, + "step": 6806 + }, + { + "epoch": 0.87, + "learning_rate": 2.1392134917322975e-05, + "loss": 3.6162, + "step": 6807 + }, + { + "epoch": 0.87, + "learning_rate": 2.1350200936590152e-05, + "loss": 3.798, + "step": 6808 + }, + { + "epoch": 0.87, + "learning_rate": 2.130830626340857e-05, + "loss": 3.7534, + "step": 6809 + }, + { + "epoch": 0.87, + "learning_rate": 2.1266450904980335e-05, + "loss": 3.77, + "step": 6810 + }, + { + "epoch": 0.87, + "learning_rate": 2.1224634868500902e-05, + "loss": 3.7745, + "step": 6811 + }, + { + "epoch": 0.87, + "learning_rate": 2.1182858161158947e-05, + "loss": 3.8446, + "step": 6812 + }, + { + "epoch": 0.87, + "learning_rate": 2.114112079013636e-05, + "loss": 3.9078, + "step": 6813 + }, + { + "epoch": 0.87, + "learning_rate": 2.1099422762608262e-05, + "loss": 3.8759, + "step": 6814 + }, + { + "epoch": 0.87, + "learning_rate": 2.105776408574303e-05, + "loss": 3.6914, + "step": 6815 + }, + { + "epoch": 0.87, + "learning_rate": 2.1016144766702217e-05, + "loss": 3.8367, + "step": 6816 + }, + { + "epoch": 0.87, + "learning_rate": 2.097456481264079e-05, + "loss": 3.7213, + "step": 6817 + }, + { + "epoch": 0.87, + "learning_rate": 2.0933024230706726e-05, + "loss": 3.7975, + "step": 6818 + }, + { + "epoch": 0.87, + "learning_rate": 2.08915230280414e-05, + "loss": 3.7572, + "step": 6819 + }, + { + "epoch": 0.87, + "learning_rate": 2.085006121177932e-05, + "loss": 3.7705, + "step": 6820 + }, + { + "epoch": 0.87, + "learning_rate": 2.0808638789048263e-05, + "loss": 3.7061, + "step": 6821 + }, + { + "epoch": 0.87, + "learning_rate": 2.0767255766969195e-05, + "loss": 3.7396, + "step": 6822 + }, + { + "epoch": 0.87, + "learning_rate": 2.0725912152656317e-05, + "loss": 3.6974, + "step": 6823 + }, + { + "epoch": 0.87, + "learning_rate": 2.0684607953217164e-05, + "loss": 3.9105, + "step": 6824 + }, + { + "epoch": 0.87, + "learning_rate": 2.0643343175752367e-05, + "loss": 3.7845, + "step": 6825 + }, + { + "epoch": 0.87, + "learning_rate": 2.060211782735577e-05, + "loss": 3.841, + "step": 6826 + }, + { + "epoch": 0.87, + "learning_rate": 2.0560931915114518e-05, + "loss": 3.8899, + "step": 6827 + }, + { + "epoch": 0.87, + "learning_rate": 2.0519785446108996e-05, + "loss": 3.797, + "step": 6828 + }, + { + "epoch": 0.87, + "learning_rate": 2.0478678427412718e-05, + "loss": 3.8456, + "step": 6829 + }, + { + "epoch": 0.87, + "learning_rate": 2.0437610866092442e-05, + "loss": 3.8634, + "step": 6830 + }, + { + "epoch": 0.87, + "learning_rate": 2.0396582769208137e-05, + "loss": 3.7776, + "step": 6831 + }, + { + "epoch": 0.87, + "learning_rate": 2.0355594143813078e-05, + "loss": 3.735, + "step": 6832 + }, + { + "epoch": 0.87, + "learning_rate": 2.031464499695368e-05, + "loss": 3.7006, + "step": 6833 + }, + { + "epoch": 0.87, + "learning_rate": 2.0273735335669536e-05, + "loss": 3.8148, + "step": 6834 + }, + { + "epoch": 0.87, + "learning_rate": 2.0232865166993492e-05, + "loss": 3.7207, + "step": 6835 + }, + { + "epoch": 0.88, + "learning_rate": 2.0192034497951566e-05, + "loss": 3.7475, + "step": 6836 + }, + { + "epoch": 0.88, + "learning_rate": 2.0151243335563146e-05, + "loss": 3.8111, + "step": 6837 + }, + { + "epoch": 0.88, + "learning_rate": 2.0110491686840564e-05, + "loss": 3.7866, + "step": 6838 + }, + { + "epoch": 0.88, + "learning_rate": 2.006977955878961e-05, + "loss": 3.7086, + "step": 6839 + }, + { + "epoch": 0.88, + "learning_rate": 2.0029106958409148e-05, + "loss": 3.6405, + "step": 6840 + }, + { + "epoch": 0.88, + "learning_rate": 1.9988473892691235e-05, + "loss": 3.7285, + "step": 6841 + }, + { + "epoch": 0.88, + "learning_rate": 1.994788036862119e-05, + "loss": 3.7052, + "step": 6842 + }, + { + "epoch": 0.88, + "learning_rate": 1.990732639317752e-05, + "loss": 3.8258, + "step": 6843 + }, + { + "epoch": 0.88, + "learning_rate": 1.9866811973331846e-05, + "loss": 3.877, + "step": 6844 + }, + { + "epoch": 0.88, + "learning_rate": 1.982633711604917e-05, + "loss": 3.9248, + "step": 6845 + }, + { + "epoch": 0.88, + "learning_rate": 1.978590182828749e-05, + "loss": 3.8546, + "step": 6846 + }, + { + "epoch": 0.88, + "learning_rate": 1.9745506116998214e-05, + "loss": 3.7485, + "step": 6847 + }, + { + "epoch": 0.88, + "learning_rate": 1.970514998912576e-05, + "loss": 3.7632, + "step": 6848 + }, + { + "epoch": 0.88, + "learning_rate": 1.9664833451607856e-05, + "loss": 3.6968, + "step": 6849 + }, + { + "epoch": 0.88, + "learning_rate": 1.9624556511375347e-05, + "loss": 3.7701, + "step": 6850 + }, + { + "epoch": 0.88, + "learning_rate": 1.9584319175352307e-05, + "loss": 3.7863, + "step": 6851 + }, + { + "epoch": 0.88, + "learning_rate": 1.9544121450455976e-05, + "loss": 3.7037, + "step": 6852 + }, + { + "epoch": 0.88, + "learning_rate": 1.950396334359686e-05, + "loss": 3.8165, + "step": 6853 + }, + { + "epoch": 0.88, + "learning_rate": 1.9463844861678627e-05, + "loss": 3.8242, + "step": 6854 + }, + { + "epoch": 0.88, + "learning_rate": 1.942376601159798e-05, + "loss": 3.7158, + "step": 6855 + }, + { + "epoch": 0.88, + "learning_rate": 1.9383726800245083e-05, + "loss": 3.7769, + "step": 6856 + }, + { + "epoch": 0.88, + "learning_rate": 1.934372723450309e-05, + "loss": 3.8883, + "step": 6857 + }, + { + "epoch": 0.88, + "learning_rate": 1.9303767321248388e-05, + "loss": 3.7329, + "step": 6858 + }, + { + "epoch": 0.88, + "learning_rate": 1.926384706735049e-05, + "loss": 3.8233, + "step": 6859 + }, + { + "epoch": 0.88, + "learning_rate": 1.9223966479672255e-05, + "loss": 3.8669, + "step": 6860 + }, + { + "epoch": 0.88, + "learning_rate": 1.9184125565069543e-05, + "loss": 3.6843, + "step": 6861 + }, + { + "epoch": 0.88, + "learning_rate": 1.914432433039151e-05, + "loss": 3.7339, + "step": 6862 + }, + { + "epoch": 0.88, + "learning_rate": 1.9104562782480434e-05, + "loss": 3.8115, + "step": 6863 + }, + { + "epoch": 0.88, + "learning_rate": 1.906484092817176e-05, + "loss": 3.7264, + "step": 6864 + }, + { + "epoch": 0.88, + "learning_rate": 1.902515877429417e-05, + "loss": 3.7106, + "step": 6865 + }, + { + "epoch": 0.88, + "learning_rate": 1.8985516327669512e-05, + "loss": 3.8243, + "step": 6866 + }, + { + "epoch": 0.88, + "learning_rate": 1.894591359511269e-05, + "loss": 3.7721, + "step": 6867 + }, + { + "epoch": 0.88, + "learning_rate": 1.890635058343196e-05, + "loss": 3.5808, + "step": 6868 + }, + { + "epoch": 0.88, + "learning_rate": 1.886682729942865e-05, + "loss": 3.8858, + "step": 6869 + }, + { + "epoch": 0.88, + "learning_rate": 1.8827343749897224e-05, + "loss": 3.8635, + "step": 6870 + }, + { + "epoch": 0.88, + "learning_rate": 1.8787899941625413e-05, + "loss": 3.7507, + "step": 6871 + }, + { + "epoch": 0.88, + "learning_rate": 1.8748495881394046e-05, + "loss": 3.7902, + "step": 6872 + }, + { + "epoch": 0.88, + "learning_rate": 1.870913157597709e-05, + "loss": 3.7529, + "step": 6873 + }, + { + "epoch": 0.88, + "learning_rate": 1.866980703214177e-05, + "loss": 3.7797, + "step": 6874 + }, + { + "epoch": 0.88, + "learning_rate": 1.8630522256648463e-05, + "loss": 3.7336, + "step": 6875 + }, + { + "epoch": 0.88, + "learning_rate": 1.8591277256250648e-05, + "loss": 3.6834, + "step": 6876 + }, + { + "epoch": 0.88, + "learning_rate": 1.855207203769499e-05, + "loss": 3.8071, + "step": 6877 + }, + { + "epoch": 0.88, + "learning_rate": 1.8512906607721342e-05, + "loss": 3.79, + "step": 6878 + }, + { + "epoch": 0.88, + "learning_rate": 1.8473780973062655e-05, + "loss": 3.7195, + "step": 6879 + }, + { + "epoch": 0.88, + "learning_rate": 1.8434695140445074e-05, + "loss": 3.623, + "step": 6880 + }, + { + "epoch": 0.88, + "learning_rate": 1.8395649116587974e-05, + "loss": 3.7989, + "step": 6881 + }, + { + "epoch": 0.88, + "learning_rate": 1.8356642908203767e-05, + "loss": 3.7986, + "step": 6882 + }, + { + "epoch": 0.88, + "learning_rate": 1.8317676521998033e-05, + "loss": 3.8762, + "step": 6883 + }, + { + "epoch": 0.88, + "learning_rate": 1.827874996466966e-05, + "loss": 3.8471, + "step": 6884 + }, + { + "epoch": 0.88, + "learning_rate": 1.823986324291052e-05, + "loss": 3.8284, + "step": 6885 + }, + { + "epoch": 0.88, + "learning_rate": 1.8201016363405653e-05, + "loss": 3.8785, + "step": 6886 + }, + { + "epoch": 0.88, + "learning_rate": 1.816220933283336e-05, + "loss": 3.7982, + "step": 6887 + }, + { + "epoch": 0.88, + "learning_rate": 1.8123442157864907e-05, + "loss": 3.6419, + "step": 6888 + }, + { + "epoch": 0.88, + "learning_rate": 1.8084714845164912e-05, + "loss": 3.7795, + "step": 6889 + }, + { + "epoch": 0.88, + "learning_rate": 1.804602740139105e-05, + "loss": 3.801, + "step": 6890 + }, + { + "epoch": 0.88, + "learning_rate": 1.8007379833194142e-05, + "loss": 3.7437, + "step": 6891 + }, + { + "epoch": 0.88, + "learning_rate": 1.7968772147218067e-05, + "loss": 3.7594, + "step": 6892 + }, + { + "epoch": 0.88, + "learning_rate": 1.793020435010004e-05, + "loss": 3.9223, + "step": 6893 + }, + { + "epoch": 0.88, + "learning_rate": 1.7891676448470255e-05, + "loss": 3.8826, + "step": 6894 + }, + { + "epoch": 0.88, + "learning_rate": 1.785318844895209e-05, + "loss": 3.786, + "step": 6895 + }, + { + "epoch": 0.88, + "learning_rate": 1.7814740358162136e-05, + "loss": 3.7945, + "step": 6896 + }, + { + "epoch": 0.88, + "learning_rate": 1.7776332182710047e-05, + "loss": 3.9002, + "step": 6897 + }, + { + "epoch": 0.88, + "learning_rate": 1.773796392919863e-05, + "loss": 3.8681, + "step": 6898 + }, + { + "epoch": 0.88, + "learning_rate": 1.769963560422383e-05, + "loss": 3.8799, + "step": 6899 + }, + { + "epoch": 0.88, + "learning_rate": 1.7661347214374706e-05, + "loss": 3.798, + "step": 6900 + }, + { + "epoch": 0.88, + "learning_rate": 1.762309876623347e-05, + "loss": 3.7151, + "step": 6901 + }, + { + "epoch": 0.88, + "learning_rate": 1.7584890266375552e-05, + "loss": 3.9241, + "step": 6902 + }, + { + "epoch": 0.88, + "learning_rate": 1.7546721721369314e-05, + "loss": 3.9345, + "step": 6903 + }, + { + "epoch": 0.88, + "learning_rate": 1.7508593137776503e-05, + "loss": 3.8501, + "step": 6904 + }, + { + "epoch": 0.88, + "learning_rate": 1.7470504522151792e-05, + "loss": 3.884, + "step": 6905 + }, + { + "epoch": 0.88, + "learning_rate": 1.7432455881043085e-05, + "loss": 3.8085, + "step": 6906 + }, + { + "epoch": 0.88, + "learning_rate": 1.7394447220991342e-05, + "loss": 3.9303, + "step": 6907 + }, + { + "epoch": 0.88, + "learning_rate": 1.735647854853073e-05, + "loss": 3.7412, + "step": 6908 + }, + { + "epoch": 0.88, + "learning_rate": 1.7318549870188468e-05, + "loss": 3.7932, + "step": 6909 + }, + { + "epoch": 0.88, + "learning_rate": 1.728066119248492e-05, + "loss": 3.715, + "step": 6910 + }, + { + "epoch": 0.88, + "learning_rate": 1.724281252193369e-05, + "loss": 3.7171, + "step": 6911 + }, + { + "epoch": 0.88, + "learning_rate": 1.7205003865041342e-05, + "loss": 3.6434, + "step": 6912 + }, + { + "epoch": 0.88, + "learning_rate": 1.7167235228307627e-05, + "loss": 3.6617, + "step": 6913 + }, + { + "epoch": 0.88, + "learning_rate": 1.7129506618225376e-05, + "loss": 3.8395, + "step": 6914 + }, + { + "epoch": 0.89, + "learning_rate": 1.7091818041280626e-05, + "loss": 3.875, + "step": 6915 + }, + { + "epoch": 0.89, + "learning_rate": 1.7054169503952415e-05, + "loss": 3.8359, + "step": 6916 + }, + { + "epoch": 0.89, + "learning_rate": 1.701656101271304e-05, + "loss": 3.8591, + "step": 6917 + }, + { + "epoch": 0.89, + "learning_rate": 1.6978992574027824e-05, + "loss": 3.8934, + "step": 6918 + }, + { + "epoch": 0.89, + "learning_rate": 1.6941464194355188e-05, + "loss": 3.8509, + "step": 6919 + }, + { + "epoch": 0.89, + "learning_rate": 1.6903975880146638e-05, + "loss": 3.7626, + "step": 6920 + }, + { + "epoch": 0.89, + "learning_rate": 1.686652763784699e-05, + "loss": 3.7459, + "step": 6921 + }, + { + "epoch": 0.89, + "learning_rate": 1.6829119473893927e-05, + "loss": 3.8066, + "step": 6922 + }, + { + "epoch": 0.89, + "learning_rate": 1.679175139471839e-05, + "loss": 3.7662, + "step": 6923 + }, + { + "epoch": 0.89, + "learning_rate": 1.6754423406744323e-05, + "loss": 3.8379, + "step": 6924 + }, + { + "epoch": 0.89, + "learning_rate": 1.6717135516388925e-05, + "loss": 3.7536, + "step": 6925 + }, + { + "epoch": 0.89, + "learning_rate": 1.6679887730062404e-05, + "loss": 3.7178, + "step": 6926 + }, + { + "epoch": 0.89, + "learning_rate": 1.6642680054168026e-05, + "loss": 3.889, + "step": 6927 + }, + { + "epoch": 0.89, + "learning_rate": 1.6605512495102282e-05, + "loss": 3.7978, + "step": 6928 + }, + { + "epoch": 0.89, + "learning_rate": 1.656838505925462e-05, + "loss": 3.7968, + "step": 6929 + }, + { + "epoch": 0.89, + "learning_rate": 1.6531297753007795e-05, + "loss": 3.7784, + "step": 6930 + }, + { + "epoch": 0.89, + "learning_rate": 1.649425058273743e-05, + "loss": 3.7524, + "step": 6931 + }, + { + "epoch": 0.89, + "learning_rate": 1.6457243554812486e-05, + "loss": 3.7521, + "step": 6932 + }, + { + "epoch": 0.89, + "learning_rate": 1.6420276675594814e-05, + "loss": 3.8727, + "step": 6933 + }, + { + "epoch": 0.89, + "learning_rate": 1.6383349951439475e-05, + "loss": 3.7787, + "step": 6934 + }, + { + "epoch": 0.89, + "learning_rate": 1.634646338869461e-05, + "loss": 3.8184, + "step": 6935 + }, + { + "epoch": 0.89, + "learning_rate": 1.6309616993701426e-05, + "loss": 3.8304, + "step": 6936 + }, + { + "epoch": 0.89, + "learning_rate": 1.6272810772794218e-05, + "loss": 3.7613, + "step": 6937 + }, + { + "epoch": 0.89, + "learning_rate": 1.623604473230042e-05, + "loss": 3.8587, + "step": 6938 + }, + { + "epoch": 0.89, + "learning_rate": 1.6199318878540593e-05, + "loss": 3.7619, + "step": 6939 + }, + { + "epoch": 0.89, + "learning_rate": 1.616263321782832e-05, + "loss": 3.8242, + "step": 6940 + }, + { + "epoch": 0.89, + "learning_rate": 1.6125987756470257e-05, + "loss": 3.8489, + "step": 6941 + }, + { + "epoch": 0.89, + "learning_rate": 1.6089382500766193e-05, + "loss": 3.8005, + "step": 6942 + }, + { + "epoch": 0.89, + "learning_rate": 1.605281745700904e-05, + "loss": 3.7835, + "step": 6943 + }, + { + "epoch": 0.89, + "learning_rate": 1.6016292631484684e-05, + "loss": 3.8686, + "step": 6944 + }, + { + "epoch": 0.89, + "learning_rate": 1.5979808030472164e-05, + "loss": 3.608, + "step": 6945 + }, + { + "epoch": 0.89, + "learning_rate": 1.5943363660243655e-05, + "loss": 3.7491, + "step": 6946 + }, + { + "epoch": 0.89, + "learning_rate": 1.5906959527064334e-05, + "loss": 3.7341, + "step": 6947 + }, + { + "epoch": 0.89, + "learning_rate": 1.5870595637192535e-05, + "loss": 3.8064, + "step": 6948 + }, + { + "epoch": 0.89, + "learning_rate": 1.5834271996879644e-05, + "loss": 3.658, + "step": 6949 + }, + { + "epoch": 0.89, + "learning_rate": 1.579798861237003e-05, + "loss": 3.6603, + "step": 6950 + }, + { + "epoch": 0.89, + "learning_rate": 1.5761745489901307e-05, + "loss": 3.7989, + "step": 6951 + }, + { + "epoch": 0.89, + "learning_rate": 1.5725542635704026e-05, + "loss": 3.6858, + "step": 6952 + }, + { + "epoch": 0.89, + "learning_rate": 1.5689380056001927e-05, + "loss": 3.7427, + "step": 6953 + }, + { + "epoch": 0.89, + "learning_rate": 1.5653257757011763e-05, + "loss": 3.9148, + "step": 6954 + }, + { + "epoch": 0.89, + "learning_rate": 1.5617175744943368e-05, + "loss": 3.847, + "step": 6955 + }, + { + "epoch": 0.89, + "learning_rate": 1.5581134025999644e-05, + "loss": 3.8128, + "step": 6956 + }, + { + "epoch": 0.89, + "learning_rate": 1.5545132606376605e-05, + "loss": 3.765, + "step": 6957 + }, + { + "epoch": 0.89, + "learning_rate": 1.5509171492263302e-05, + "loss": 3.7992, + "step": 6958 + }, + { + "epoch": 0.89, + "learning_rate": 1.5473250689841843e-05, + "loss": 3.8225, + "step": 6959 + }, + { + "epoch": 0.89, + "learning_rate": 1.5437370205287515e-05, + "loss": 3.8553, + "step": 6960 + }, + { + "epoch": 0.89, + "learning_rate": 1.540153004476852e-05, + "loss": 3.6431, + "step": 6961 + }, + { + "epoch": 0.89, + "learning_rate": 1.5365730214446204e-05, + "loss": 3.6572, + "step": 6962 + }, + { + "epoch": 0.89, + "learning_rate": 1.5329970720474985e-05, + "loss": 3.8462, + "step": 6963 + }, + { + "epoch": 0.89, + "learning_rate": 1.52942515690023e-05, + "loss": 3.8414, + "step": 6964 + }, + { + "epoch": 0.89, + "learning_rate": 1.5258572766168738e-05, + "loss": 3.8922, + "step": 6965 + }, + { + "epoch": 0.89, + "learning_rate": 1.5222934318107839e-05, + "loss": 3.922, + "step": 6966 + }, + { + "epoch": 0.89, + "learning_rate": 1.5187336230946285e-05, + "loss": 3.801, + "step": 6967 + }, + { + "epoch": 0.89, + "learning_rate": 1.5151778510803877e-05, + "loss": 3.8063, + "step": 6968 + }, + { + "epoch": 0.89, + "learning_rate": 1.5116261163793332e-05, + "loss": 3.6389, + "step": 6969 + }, + { + "epoch": 0.89, + "learning_rate": 1.5080784196020491e-05, + "loss": 3.7532, + "step": 6970 + }, + { + "epoch": 0.89, + "learning_rate": 1.5045347613584253e-05, + "loss": 3.8692, + "step": 6971 + }, + { + "epoch": 0.89, + "learning_rate": 1.5009951422576607e-05, + "loss": 3.7863, + "step": 6972 + }, + { + "epoch": 0.89, + "learning_rate": 1.4974595629082488e-05, + "loss": 3.7731, + "step": 6973 + }, + { + "epoch": 0.89, + "learning_rate": 1.4939280239180091e-05, + "loss": 3.8417, + "step": 6974 + }, + { + "epoch": 0.89, + "learning_rate": 1.4904005258940424e-05, + "loss": 3.8538, + "step": 6975 + }, + { + "epoch": 0.89, + "learning_rate": 1.4868770694427768e-05, + "loss": 3.769, + "step": 6976 + }, + { + "epoch": 0.89, + "learning_rate": 1.4833576551699285e-05, + "loss": 3.8149, + "step": 6977 + }, + { + "epoch": 0.89, + "learning_rate": 1.4798422836805298e-05, + "loss": 3.8263, + "step": 6978 + }, + { + "epoch": 0.89, + "learning_rate": 1.4763309555789111e-05, + "loss": 3.844, + "step": 6979 + }, + { + "epoch": 0.89, + "learning_rate": 1.4728236714687066e-05, + "loss": 3.883, + "step": 6980 + }, + { + "epoch": 0.89, + "learning_rate": 1.4693204319528696e-05, + "loss": 3.7681, + "step": 6981 + }, + { + "epoch": 0.89, + "learning_rate": 1.4658212376336384e-05, + "loss": 3.7723, + "step": 6982 + }, + { + "epoch": 0.89, + "learning_rate": 1.46232608911257e-05, + "loss": 3.9345, + "step": 6983 + }, + { + "epoch": 0.89, + "learning_rate": 1.4588349869905149e-05, + "loss": 3.9161, + "step": 6984 + }, + { + "epoch": 0.89, + "learning_rate": 1.4553479318676398e-05, + "loss": 3.841, + "step": 6985 + }, + { + "epoch": 0.89, + "learning_rate": 1.45186492434341e-05, + "loss": 3.8676, + "step": 6986 + }, + { + "epoch": 0.89, + "learning_rate": 1.4483859650165937e-05, + "loss": 3.8548, + "step": 6987 + }, + { + "epoch": 0.89, + "learning_rate": 1.4449110544852596e-05, + "loss": 3.8573, + "step": 6988 + }, + { + "epoch": 0.89, + "learning_rate": 1.4414401933467907e-05, + "loss": 3.8507, + "step": 6989 + }, + { + "epoch": 0.89, + "learning_rate": 1.4379733821978686e-05, + "loss": 3.8376, + "step": 6990 + }, + { + "epoch": 0.89, + "learning_rate": 1.4345106216344772e-05, + "loss": 3.7498, + "step": 6991 + }, + { + "epoch": 0.89, + "learning_rate": 1.4310519122519045e-05, + "loss": 3.8726, + "step": 6992 + }, + { + "epoch": 0.9, + "learning_rate": 1.4275972546447412e-05, + "loss": 3.851, + "step": 6993 + }, + { + "epoch": 0.9, + "learning_rate": 1.4241466494068822e-05, + "loss": 3.7224, + "step": 6994 + }, + { + "epoch": 0.9, + "learning_rate": 1.4207000971315276e-05, + "loss": 3.7658, + "step": 6995 + }, + { + "epoch": 0.9, + "learning_rate": 1.4172575984111869e-05, + "loss": 3.6515, + "step": 6996 + }, + { + "epoch": 0.9, + "learning_rate": 1.4138191538376587e-05, + "loss": 3.7983, + "step": 6997 + }, + { + "epoch": 0.9, + "learning_rate": 1.4103847640020511e-05, + "loss": 3.7949, + "step": 6998 + }, + { + "epoch": 0.9, + "learning_rate": 1.4069544294947779e-05, + "loss": 3.682, + "step": 6999 + }, + { + "epoch": 0.9, + "learning_rate": 1.4035281509055531e-05, + "loss": 3.8062, + "step": 7000 + }, + { + "epoch": 0.9, + "learning_rate": 1.4001059288233892e-05, + "loss": 3.7976, + "step": 7001 + }, + { + "epoch": 0.9, + "learning_rate": 1.3966877638366127e-05, + "loss": 3.9641, + "step": 7002 + }, + { + "epoch": 0.9, + "learning_rate": 1.3932736565328396e-05, + "loss": 3.7495, + "step": 7003 + }, + { + "epoch": 0.9, + "learning_rate": 1.389863607498998e-05, + "loss": 3.8535, + "step": 7004 + }, + { + "epoch": 0.9, + "learning_rate": 1.3864576173213183e-05, + "loss": 3.7691, + "step": 7005 + }, + { + "epoch": 0.9, + "learning_rate": 1.3830556865853244e-05, + "loss": 3.822, + "step": 7006 + }, + { + "epoch": 0.9, + "learning_rate": 1.3796578158758483e-05, + "loss": 3.7104, + "step": 7007 + }, + { + "epoch": 0.9, + "learning_rate": 1.3762640057770253e-05, + "loss": 3.8315, + "step": 7008 + }, + { + "epoch": 0.9, + "learning_rate": 1.3728742568722864e-05, + "loss": 3.6958, + "step": 7009 + }, + { + "epoch": 0.9, + "learning_rate": 1.369488569744376e-05, + "loss": 3.6883, + "step": 7010 + }, + { + "epoch": 0.9, + "learning_rate": 1.366106944975326e-05, + "loss": 3.7616, + "step": 7011 + }, + { + "epoch": 0.9, + "learning_rate": 1.3627293831464771e-05, + "loss": 3.8325, + "step": 7012 + }, + { + "epoch": 0.9, + "learning_rate": 1.3593558848384785e-05, + "loss": 3.8534, + "step": 7013 + }, + { + "epoch": 0.9, + "learning_rate": 1.3559864506312691e-05, + "loss": 3.5978, + "step": 7014 + }, + { + "epoch": 0.9, + "learning_rate": 1.352621081104094e-05, + "loss": 3.778, + "step": 7015 + }, + { + "epoch": 0.9, + "learning_rate": 1.3492597768354959e-05, + "loss": 3.8734, + "step": 7016 + }, + { + "epoch": 0.9, + "learning_rate": 1.3459025384033264e-05, + "loss": 3.6801, + "step": 7017 + }, + { + "epoch": 0.9, + "learning_rate": 1.3425493663847349e-05, + "loss": 3.6619, + "step": 7018 + }, + { + "epoch": 0.9, + "learning_rate": 1.339200261356166e-05, + "loss": 3.7534, + "step": 7019 + }, + { + "epoch": 0.9, + "learning_rate": 1.335855223893373e-05, + "loss": 3.7448, + "step": 7020 + }, + { + "epoch": 0.9, + "learning_rate": 1.3325142545714014e-05, + "loss": 3.7778, + "step": 7021 + }, + { + "epoch": 0.9, + "learning_rate": 1.3291773539646112e-05, + "loss": 3.6148, + "step": 7022 + }, + { + "epoch": 0.9, + "learning_rate": 1.3258445226466464e-05, + "loss": 3.6763, + "step": 7023 + }, + { + "epoch": 0.9, + "learning_rate": 1.3225157611904625e-05, + "loss": 3.8562, + "step": 7024 + }, + { + "epoch": 0.9, + "learning_rate": 1.3191910701683129e-05, + "loss": 3.8145, + "step": 7025 + }, + { + "epoch": 0.9, + "learning_rate": 1.3158704501517516e-05, + "loss": 3.8075, + "step": 7026 + }, + { + "epoch": 0.9, + "learning_rate": 1.31255390171163e-05, + "loss": 3.8241, + "step": 7027 + }, + { + "epoch": 0.9, + "learning_rate": 1.3092414254181006e-05, + "loss": 3.7819, + "step": 7028 + }, + { + "epoch": 0.9, + "learning_rate": 1.3059330218406162e-05, + "loss": 3.7672, + "step": 7029 + }, + { + "epoch": 0.9, + "learning_rate": 1.3026286915479273e-05, + "loss": 3.8022, + "step": 7030 + }, + { + "epoch": 0.9, + "learning_rate": 1.2993284351080909e-05, + "loss": 3.7269, + "step": 7031 + }, + { + "epoch": 0.9, + "learning_rate": 1.296032253088461e-05, + "loss": 3.7996, + "step": 7032 + }, + { + "epoch": 0.9, + "learning_rate": 1.2927401460556876e-05, + "loss": 3.7822, + "step": 7033 + }, + { + "epoch": 0.9, + "learning_rate": 1.2894521145757205e-05, + "loss": 3.8324, + "step": 7034 + }, + { + "epoch": 0.9, + "learning_rate": 1.2861681592138103e-05, + "loss": 3.6805, + "step": 7035 + }, + { + "epoch": 0.9, + "learning_rate": 1.282888280534511e-05, + "loss": 3.7919, + "step": 7036 + }, + { + "epoch": 0.9, + "learning_rate": 1.2796124791016605e-05, + "loss": 3.6723, + "step": 7037 + }, + { + "epoch": 0.9, + "learning_rate": 1.2763407554784223e-05, + "loss": 3.7785, + "step": 7038 + }, + { + "epoch": 0.9, + "learning_rate": 1.2730731102272352e-05, + "loss": 3.8313, + "step": 7039 + }, + { + "epoch": 0.9, + "learning_rate": 1.2698095439098445e-05, + "loss": 3.8506, + "step": 7040 + }, + { + "epoch": 0.9, + "learning_rate": 1.2665500570872984e-05, + "loss": 3.7685, + "step": 7041 + }, + { + "epoch": 0.9, + "learning_rate": 1.2632946503199406e-05, + "loss": 3.7385, + "step": 7042 + }, + { + "epoch": 0.9, + "learning_rate": 1.260043324167412e-05, + "loss": 3.8121, + "step": 7043 + }, + { + "epoch": 0.9, + "learning_rate": 1.2567960791886518e-05, + "loss": 3.8681, + "step": 7044 + }, + { + "epoch": 0.9, + "learning_rate": 1.2535529159418968e-05, + "loss": 3.7609, + "step": 7045 + }, + { + "epoch": 0.9, + "learning_rate": 1.2503138349846926e-05, + "loss": 3.8959, + "step": 7046 + }, + { + "epoch": 0.9, + "learning_rate": 1.2470788368738717e-05, + "loss": 3.5907, + "step": 7047 + }, + { + "epoch": 0.9, + "learning_rate": 1.2438479221655641e-05, + "loss": 3.7824, + "step": 7048 + }, + { + "epoch": 0.9, + "learning_rate": 1.2406210914152005e-05, + "loss": 3.6283, + "step": 7049 + }, + { + "epoch": 0.9, + "learning_rate": 1.2373983451775179e-05, + "loss": 3.6487, + "step": 7050 + }, + { + "epoch": 0.9, + "learning_rate": 1.2341796840065366e-05, + "loss": 3.7998, + "step": 7051 + }, + { + "epoch": 0.9, + "learning_rate": 1.230965108455584e-05, + "loss": 3.8405, + "step": 7052 + }, + { + "epoch": 0.9, + "learning_rate": 1.227754619077287e-05, + "loss": 3.8646, + "step": 7053 + }, + { + "epoch": 0.9, + "learning_rate": 1.2245482164235627e-05, + "loss": 3.8139, + "step": 7054 + }, + { + "epoch": 0.9, + "learning_rate": 1.2213459010456285e-05, + "loss": 3.7553, + "step": 7055 + }, + { + "epoch": 0.9, + "learning_rate": 1.2181476734939968e-05, + "loss": 3.858, + "step": 7056 + }, + { + "epoch": 0.9, + "learning_rate": 1.2149535343184858e-05, + "loss": 3.6585, + "step": 7057 + }, + { + "epoch": 0.9, + "learning_rate": 1.2117634840681984e-05, + "loss": 3.7914, + "step": 7058 + }, + { + "epoch": 0.9, + "learning_rate": 1.2085775232915485e-05, + "loss": 3.8701, + "step": 7059 + }, + { + "epoch": 0.9, + "learning_rate": 1.2053956525362314e-05, + "loss": 3.809, + "step": 7060 + }, + { + "epoch": 0.9, + "learning_rate": 1.2022178723492566e-05, + "loss": 3.8554, + "step": 7061 + }, + { + "epoch": 0.9, + "learning_rate": 1.1990441832769178e-05, + "loss": 3.8713, + "step": 7062 + }, + { + "epoch": 0.9, + "learning_rate": 1.195874585864809e-05, + "loss": 3.7704, + "step": 7063 + }, + { + "epoch": 0.9, + "learning_rate": 1.1927090806578195e-05, + "loss": 3.7674, + "step": 7064 + }, + { + "epoch": 0.9, + "learning_rate": 1.189547668200136e-05, + "loss": 3.7262, + "step": 7065 + }, + { + "epoch": 0.9, + "learning_rate": 1.1863903490352379e-05, + "loss": 3.9567, + "step": 7066 + }, + { + "epoch": 0.9, + "learning_rate": 1.1832371237059158e-05, + "loss": 3.7887, + "step": 7067 + }, + { + "epoch": 0.9, + "learning_rate": 1.1800879927542335e-05, + "loss": 3.7455, + "step": 7068 + }, + { + "epoch": 0.9, + "learning_rate": 1.1769429567215773e-05, + "loss": 3.6178, + "step": 7069 + }, + { + "epoch": 0.9, + "learning_rate": 1.1738020161486035e-05, + "loss": 3.6272, + "step": 7070 + }, + { + "epoch": 0.91, + "learning_rate": 1.1706651715752803e-05, + "loss": 3.8026, + "step": 7071 + }, + { + "epoch": 0.91, + "learning_rate": 1.1675324235408707e-05, + "loss": 3.7327, + "step": 7072 + }, + { + "epoch": 0.91, + "learning_rate": 1.164403772583919e-05, + "loss": 3.8305, + "step": 7073 + }, + { + "epoch": 0.91, + "learning_rate": 1.1612792192422922e-05, + "loss": 3.8555, + "step": 7074 + }, + { + "epoch": 0.91, + "learning_rate": 1.1581587640531272e-05, + "loss": 3.8346, + "step": 7075 + }, + { + "epoch": 0.91, + "learning_rate": 1.1550424075528697e-05, + "loss": 3.7362, + "step": 7076 + }, + { + "epoch": 0.91, + "learning_rate": 1.1519301502772556e-05, + "loss": 3.7945, + "step": 7077 + }, + { + "epoch": 0.91, + "learning_rate": 1.1488219927613202e-05, + "loss": 3.7712, + "step": 7078 + }, + { + "epoch": 0.91, + "learning_rate": 1.145717935539392e-05, + "loss": 3.8328, + "step": 7079 + }, + { + "epoch": 0.91, + "learning_rate": 1.1426179791450913e-05, + "loss": 3.8198, + "step": 7080 + }, + { + "epoch": 0.91, + "learning_rate": 1.1395221241113363e-05, + "loss": 3.7413, + "step": 7081 + }, + { + "epoch": 0.91, + "learning_rate": 1.1364303709703482e-05, + "loss": 3.891, + "step": 7082 + }, + { + "epoch": 0.91, + "learning_rate": 1.1333427202536273e-05, + "loss": 3.6513, + "step": 7083 + }, + { + "epoch": 0.91, + "learning_rate": 1.1302591724919791e-05, + "loss": 3.7911, + "step": 7084 + }, + { + "epoch": 0.91, + "learning_rate": 1.1271797282154994e-05, + "loss": 3.7372, + "step": 7085 + }, + { + "epoch": 0.91, + "learning_rate": 1.1241043879535811e-05, + "loss": 3.7131, + "step": 7086 + }, + { + "epoch": 0.91, + "learning_rate": 1.1210331522349126e-05, + "loss": 3.8133, + "step": 7087 + }, + { + "epoch": 0.91, + "learning_rate": 1.1179660215874715e-05, + "loss": 3.693, + "step": 7088 + }, + { + "epoch": 0.91, + "learning_rate": 1.1149029965385416e-05, + "loss": 3.7012, + "step": 7089 + }, + { + "epoch": 0.91, + "learning_rate": 1.1118440776146821e-05, + "loss": 3.7118, + "step": 7090 + }, + { + "epoch": 0.91, + "learning_rate": 1.1087892653417642e-05, + "loss": 3.7171, + "step": 7091 + }, + { + "epoch": 0.91, + "learning_rate": 1.10573856024494e-05, + "loss": 3.6541, + "step": 7092 + }, + { + "epoch": 0.91, + "learning_rate": 1.1026919628486647e-05, + "loss": 3.8104, + "step": 7093 + }, + { + "epoch": 0.91, + "learning_rate": 1.0996494736766782e-05, + "loss": 3.7958, + "step": 7094 + }, + { + "epoch": 0.91, + "learning_rate": 1.0966110932520285e-05, + "loss": 3.6561, + "step": 7095 + }, + { + "epoch": 0.91, + "learning_rate": 1.0935768220970393e-05, + "loss": 3.7729, + "step": 7096 + }, + { + "epoch": 0.91, + "learning_rate": 1.0905466607333465e-05, + "loss": 3.8192, + "step": 7097 + }, + { + "epoch": 0.91, + "learning_rate": 1.0875206096818607e-05, + "loss": 3.7776, + "step": 7098 + }, + { + "epoch": 0.91, + "learning_rate": 1.0844986694628022e-05, + "loss": 3.8332, + "step": 7099 + }, + { + "epoch": 0.91, + "learning_rate": 1.081480840595675e-05, + "loss": 3.7545, + "step": 7100 + }, + { + "epoch": 0.91, + "learning_rate": 1.0784671235992777e-05, + "loss": 3.8703, + "step": 7101 + }, + { + "epoch": 0.91, + "learning_rate": 1.0754575189917015e-05, + "loss": 3.7753, + "step": 7102 + }, + { + "epoch": 0.91, + "learning_rate": 1.0724520272903382e-05, + "loss": 3.8399, + "step": 7103 + }, + { + "epoch": 0.91, + "learning_rate": 1.0694506490118632e-05, + "loss": 3.7146, + "step": 7104 + }, + { + "epoch": 0.91, + "learning_rate": 1.0664533846722447e-05, + "loss": 3.681, + "step": 7105 + }, + { + "epoch": 0.91, + "learning_rate": 1.0634602347867533e-05, + "loss": 3.6763, + "step": 7106 + }, + { + "epoch": 0.91, + "learning_rate": 1.0604711998699445e-05, + "loss": 3.7097, + "step": 7107 + }, + { + "epoch": 0.91, + "learning_rate": 1.0574862804356683e-05, + "loss": 3.797, + "step": 7108 + }, + { + "epoch": 0.91, + "learning_rate": 1.0545054769970614e-05, + "loss": 3.9525, + "step": 7109 + }, + { + "epoch": 0.91, + "learning_rate": 1.0515287900665666e-05, + "loss": 3.7056, + "step": 7110 + }, + { + "epoch": 0.91, + "learning_rate": 1.0485562201559079e-05, + "loss": 3.7754, + "step": 7111 + }, + { + "epoch": 0.91, + "learning_rate": 1.0455877677761044e-05, + "loss": 3.7289, + "step": 7112 + }, + { + "epoch": 0.91, + "learning_rate": 1.0426234334374647e-05, + "loss": 3.8013, + "step": 7113 + }, + { + "epoch": 0.91, + "learning_rate": 1.0396632176495946e-05, + "loss": 3.7556, + "step": 7114 + }, + { + "epoch": 0.91, + "learning_rate": 1.0367071209213902e-05, + "loss": 3.9237, + "step": 7115 + }, + { + "epoch": 0.91, + "learning_rate": 1.0337551437610365e-05, + "loss": 3.8471, + "step": 7116 + }, + { + "epoch": 0.91, + "learning_rate": 1.0308072866760137e-05, + "loss": 3.7412, + "step": 7117 + }, + { + "epoch": 0.91, + "learning_rate": 1.027863550173097e-05, + "loss": 3.9068, + "step": 7118 + }, + { + "epoch": 0.91, + "learning_rate": 1.0249239347583428e-05, + "loss": 3.8022, + "step": 7119 + }, + { + "epoch": 0.91, + "learning_rate": 1.0219884409371077e-05, + "loss": 3.7354, + "step": 7120 + }, + { + "epoch": 0.91, + "learning_rate": 1.0190570692140355e-05, + "loss": 3.7924, + "step": 7121 + }, + { + "epoch": 0.91, + "learning_rate": 1.0161298200930647e-05, + "loss": 3.8658, + "step": 7122 + }, + { + "epoch": 0.91, + "learning_rate": 1.0132066940774203e-05, + "loss": 3.7256, + "step": 7123 + }, + { + "epoch": 0.91, + "learning_rate": 1.010287691669623e-05, + "loss": 3.7412, + "step": 7124 + }, + { + "epoch": 0.91, + "learning_rate": 1.0073728133714877e-05, + "loss": 3.7997, + "step": 7125 + }, + { + "epoch": 0.91, + "learning_rate": 1.0044620596841136e-05, + "loss": 3.642, + "step": 7126 + }, + { + "epoch": 0.91, + "learning_rate": 1.0015554311078895e-05, + "loss": 3.8261, + "step": 7127 + }, + { + "epoch": 0.91, + "learning_rate": 9.986529281425016e-06, + "loss": 3.887, + "step": 7128 + }, + { + "epoch": 0.91, + "learning_rate": 9.957545512869231e-06, + "loss": 3.7206, + "step": 7129 + }, + { + "epoch": 0.91, + "learning_rate": 9.928603010394138e-06, + "loss": 3.7671, + "step": 7130 + }, + { + "epoch": 0.91, + "learning_rate": 9.899701778975395e-06, + "loss": 3.7355, + "step": 7131 + }, + { + "epoch": 0.91, + "learning_rate": 9.870841823581362e-06, + "loss": 3.6606, + "step": 7132 + }, + { + "epoch": 0.91, + "learning_rate": 9.842023149173428e-06, + "loss": 3.8721, + "step": 7133 + }, + { + "epoch": 0.91, + "learning_rate": 9.813245760705886e-06, + "loss": 3.8157, + "step": 7134 + }, + { + "epoch": 0.91, + "learning_rate": 9.784509663125884e-06, + "loss": 3.8451, + "step": 7135 + }, + { + "epoch": 0.91, + "learning_rate": 9.755814861373502e-06, + "loss": 3.6316, + "step": 7136 + }, + { + "epoch": 0.91, + "learning_rate": 9.727161360381681e-06, + "loss": 3.8385, + "step": 7137 + }, + { + "epoch": 0.91, + "learning_rate": 9.69854916507626e-06, + "loss": 3.7598, + "step": 7138 + }, + { + "epoch": 0.91, + "learning_rate": 9.669978280376107e-06, + "loss": 3.7062, + "step": 7139 + }, + { + "epoch": 0.91, + "learning_rate": 9.641448711192796e-06, + "loss": 3.6504, + "step": 7140 + }, + { + "epoch": 0.91, + "learning_rate": 9.61296046243096e-06, + "loss": 3.7158, + "step": 7141 + }, + { + "epoch": 0.91, + "learning_rate": 9.584513538987983e-06, + "loss": 3.7964, + "step": 7142 + }, + { + "epoch": 0.91, + "learning_rate": 9.556107945754316e-06, + "loss": 3.8351, + "step": 7143 + }, + { + "epoch": 0.91, + "learning_rate": 9.527743687613138e-06, + "loss": 3.8549, + "step": 7144 + }, + { + "epoch": 0.91, + "learning_rate": 9.499420769440576e-06, + "loss": 3.7222, + "step": 7145 + }, + { + "epoch": 0.91, + "learning_rate": 9.471139196105732e-06, + "loss": 3.8699, + "step": 7146 + }, + { + "epoch": 0.91, + "learning_rate": 9.442898972470526e-06, + "loss": 3.6953, + "step": 7147 + }, + { + "epoch": 0.91, + "learning_rate": 9.414700103389768e-06, + "loss": 3.6937, + "step": 7148 + }, + { + "epoch": 0.92, + "learning_rate": 9.386542593711162e-06, + "loss": 3.7926, + "step": 7149 + }, + { + "epoch": 0.92, + "learning_rate": 9.358426448275309e-06, + "loss": 3.6927, + "step": 7150 + }, + { + "epoch": 0.92, + "learning_rate": 9.330351671915676e-06, + "loss": 3.8386, + "step": 7151 + }, + { + "epoch": 0.92, + "learning_rate": 9.302318269458682e-06, + "loss": 3.6895, + "step": 7152 + }, + { + "epoch": 0.92, + "learning_rate": 9.274326245723607e-06, + "loss": 3.7359, + "step": 7153 + }, + { + "epoch": 0.92, + "learning_rate": 9.246375605522578e-06, + "loss": 3.9849, + "step": 7154 + }, + { + "epoch": 0.92, + "learning_rate": 9.218466353660637e-06, + "loss": 3.7498, + "step": 7155 + }, + { + "epoch": 0.92, + "learning_rate": 9.190598494935726e-06, + "loss": 3.7156, + "step": 7156 + }, + { + "epoch": 0.92, + "learning_rate": 9.162772034138623e-06, + "loss": 3.8779, + "step": 7157 + }, + { + "epoch": 0.92, + "learning_rate": 9.134986976053028e-06, + "loss": 3.6743, + "step": 7158 + }, + { + "epoch": 0.92, + "learning_rate": 9.10724332545551e-06, + "loss": 3.8159, + "step": 7159 + }, + { + "epoch": 0.92, + "learning_rate": 9.079541087115506e-06, + "loss": 3.8364, + "step": 7160 + }, + { + "epoch": 0.92, + "learning_rate": 9.051880265795426e-06, + "loss": 3.75, + "step": 7161 + }, + { + "epoch": 0.92, + "learning_rate": 9.024260866250439e-06, + "loss": 3.7904, + "step": 7162 + }, + { + "epoch": 0.92, + "learning_rate": 8.996682893228609e-06, + "loss": 3.5639, + "step": 7163 + }, + { + "epoch": 0.92, + "learning_rate": 8.969146351470974e-06, + "loss": 3.7978, + "step": 7164 + }, + { + "epoch": 0.92, + "learning_rate": 8.941651245711336e-06, + "loss": 3.7112, + "step": 7165 + }, + { + "epoch": 0.92, + "learning_rate": 8.914197580676409e-06, + "loss": 3.7915, + "step": 7166 + }, + { + "epoch": 0.92, + "learning_rate": 8.886785361085865e-06, + "loss": 3.7359, + "step": 7167 + }, + { + "epoch": 0.92, + "learning_rate": 8.859414591652126e-06, + "loss": 3.6478, + "step": 7168 + }, + { + "epoch": 0.92, + "learning_rate": 8.832085277080571e-06, + "loss": 3.8182, + "step": 7169 + }, + { + "epoch": 0.92, + "learning_rate": 8.804797422069383e-06, + "loss": 3.8241, + "step": 7170 + }, + { + "epoch": 0.92, + "learning_rate": 8.777551031309727e-06, + "loss": 3.6135, + "step": 7171 + }, + { + "epoch": 0.92, + "learning_rate": 8.750346109485525e-06, + "loss": 3.7558, + "step": 7172 + }, + { + "epoch": 0.92, + "learning_rate": 8.723182661273615e-06, + "loss": 3.8778, + "step": 7173 + }, + { + "epoch": 0.92, + "learning_rate": 8.696060691343765e-06, + "loss": 3.7108, + "step": 7174 + }, + { + "epoch": 0.92, + "learning_rate": 8.668980204358496e-06, + "loss": 3.9782, + "step": 7175 + }, + { + "epoch": 0.92, + "learning_rate": 8.641941204973274e-06, + "loss": 3.73, + "step": 7176 + }, + { + "epoch": 0.92, + "learning_rate": 8.61494369783644e-06, + "loss": 3.8133, + "step": 7177 + }, + { + "epoch": 0.92, + "learning_rate": 8.587987687589172e-06, + "loss": 3.8452, + "step": 7178 + }, + { + "epoch": 0.92, + "learning_rate": 8.561073178865453e-06, + "loss": 3.8329, + "step": 7179 + }, + { + "epoch": 0.92, + "learning_rate": 8.534200176292305e-06, + "loss": 3.813, + "step": 7180 + }, + { + "epoch": 0.92, + "learning_rate": 8.507368684489397e-06, + "loss": 3.7229, + "step": 7181 + }, + { + "epoch": 0.92, + "learning_rate": 8.48057870806951e-06, + "loss": 3.8201, + "step": 7182 + }, + { + "epoch": 0.92, + "learning_rate": 8.45383025163804e-06, + "loss": 3.9386, + "step": 7183 + }, + { + "epoch": 0.92, + "learning_rate": 8.427123319793395e-06, + "loss": 3.8652, + "step": 7184 + }, + { + "epoch": 0.92, + "learning_rate": 8.400457917126819e-06, + "loss": 3.6895, + "step": 7185 + }, + { + "epoch": 0.92, + "learning_rate": 8.373834048222394e-06, + "loss": 3.795, + "step": 7186 + }, + { + "epoch": 0.92, + "learning_rate": 8.347251717657018e-06, + "loss": 3.8512, + "step": 7187 + }, + { + "epoch": 0.92, + "learning_rate": 8.320710930000586e-06, + "loss": 3.8533, + "step": 7188 + }, + { + "epoch": 0.92, + "learning_rate": 8.294211689815729e-06, + "loss": 3.8131, + "step": 7189 + }, + { + "epoch": 0.92, + "learning_rate": 8.267754001657969e-06, + "loss": 3.7011, + "step": 7190 + }, + { + "epoch": 0.92, + "learning_rate": 8.241337870075721e-06, + "loss": 3.775, + "step": 7191 + }, + { + "epoch": 0.92, + "learning_rate": 8.214963299610189e-06, + "loss": 3.9052, + "step": 7192 + }, + { + "epoch": 0.92, + "learning_rate": 8.188630294795469e-06, + "loss": 3.6664, + "step": 7193 + }, + { + "epoch": 0.92, + "learning_rate": 8.16233886015852e-06, + "loss": 3.8111, + "step": 7194 + }, + { + "epoch": 0.92, + "learning_rate": 8.136089000219144e-06, + "loss": 3.8138, + "step": 7195 + }, + { + "epoch": 0.92, + "learning_rate": 8.10988071949001e-06, + "loss": 3.8243, + "step": 7196 + }, + { + "epoch": 0.92, + "learning_rate": 8.083714022476568e-06, + "loss": 3.792, + "step": 7197 + }, + { + "epoch": 0.92, + "learning_rate": 8.057588913677277e-06, + "loss": 3.631, + "step": 7198 + }, + { + "epoch": 0.92, + "learning_rate": 8.031505397583267e-06, + "loss": 3.7947, + "step": 7199 + }, + { + "epoch": 0.92, + "learning_rate": 8.005463478678615e-06, + "loss": 3.7741, + "step": 7200 + }, + { + "epoch": 0.92, + "learning_rate": 7.979463161440242e-06, + "loss": 3.616, + "step": 7201 + }, + { + "epoch": 0.92, + "learning_rate": 7.953504450337879e-06, + "loss": 3.8145, + "step": 7202 + }, + { + "epoch": 0.92, + "learning_rate": 7.927587349834148e-06, + "loss": 3.7471, + "step": 7203 + }, + { + "epoch": 0.92, + "learning_rate": 7.901711864384515e-06, + "loss": 3.7566, + "step": 7204 + }, + { + "epoch": 0.92, + "learning_rate": 7.875877998437226e-06, + "loss": 3.7206, + "step": 7205 + }, + { + "epoch": 0.92, + "learning_rate": 7.850085756433478e-06, + "loss": 3.7335, + "step": 7206 + }, + { + "epoch": 0.92, + "learning_rate": 7.824335142807198e-06, + "loss": 3.7756, + "step": 7207 + }, + { + "epoch": 0.92, + "learning_rate": 7.79862616198529e-06, + "loss": 3.73, + "step": 7208 + }, + { + "epoch": 0.92, + "learning_rate": 7.772958818387326e-06, + "loss": 3.8731, + "step": 7209 + }, + { + "epoch": 0.92, + "learning_rate": 7.747333116425947e-06, + "loss": 3.7851, + "step": 7210 + }, + { + "epoch": 0.92, + "learning_rate": 7.721749060506406e-06, + "loss": 3.6103, + "step": 7211 + }, + { + "epoch": 0.92, + "learning_rate": 7.696206655026933e-06, + "loss": 3.8412, + "step": 7212 + }, + { + "epoch": 0.92, + "learning_rate": 7.670705904378572e-06, + "loss": 3.7335, + "step": 7213 + }, + { + "epoch": 0.92, + "learning_rate": 7.645246812945206e-06, + "loss": 3.8024, + "step": 7214 + }, + { + "epoch": 0.92, + "learning_rate": 7.6198293851034715e-06, + "loss": 3.6234, + "step": 7215 + }, + { + "epoch": 0.92, + "learning_rate": 7.594453625223013e-06, + "loss": 3.6869, + "step": 7216 + }, + { + "epoch": 0.92, + "learning_rate": 7.569119537666175e-06, + "loss": 3.7603, + "step": 7217 + }, + { + "epoch": 0.92, + "learning_rate": 7.543827126788194e-06, + "loss": 3.7792, + "step": 7218 + }, + { + "epoch": 0.92, + "learning_rate": 7.5185763969371215e-06, + "loss": 3.5845, + "step": 7219 + }, + { + "epoch": 0.92, + "learning_rate": 7.493367352453873e-06, + "loss": 3.7628, + "step": 7220 + }, + { + "epoch": 0.92, + "learning_rate": 7.468199997672148e-06, + "loss": 3.7596, + "step": 7221 + }, + { + "epoch": 0.92, + "learning_rate": 7.443074336918487e-06, + "loss": 3.7752, + "step": 7222 + }, + { + "epoch": 0.92, + "learning_rate": 7.417990374512296e-06, + "loss": 3.8097, + "step": 7223 + }, + { + "epoch": 0.92, + "learning_rate": 7.392948114765846e-06, + "loss": 3.6945, + "step": 7224 + }, + { + "epoch": 0.92, + "learning_rate": 7.36794756198414e-06, + "loss": 3.8158, + "step": 7225 + }, + { + "epoch": 0.92, + "learning_rate": 7.3429887204650994e-06, + "loss": 3.7734, + "step": 7226 + }, + { + "epoch": 0.93, + "learning_rate": 7.318071594499403e-06, + "loss": 3.8279, + "step": 7227 + }, + { + "epoch": 0.93, + "learning_rate": 7.293196188370627e-06, + "loss": 3.8466, + "step": 7228 + }, + { + "epoch": 0.93, + "learning_rate": 7.268362506355125e-06, + "loss": 3.8524, + "step": 7229 + }, + { + "epoch": 0.93, + "learning_rate": 7.243570552722067e-06, + "loss": 3.7006, + "step": 7230 + }, + { + "epoch": 0.93, + "learning_rate": 7.2188203317335165e-06, + "loss": 3.7804, + "step": 7231 + }, + { + "epoch": 0.93, + "learning_rate": 7.194111847644347e-06, + "loss": 3.6605, + "step": 7232 + }, + { + "epoch": 0.93, + "learning_rate": 7.16944510470216e-06, + "loss": 3.7473, + "step": 7233 + }, + { + "epoch": 0.93, + "learning_rate": 7.144820107147482e-06, + "loss": 3.8263, + "step": 7234 + }, + { + "epoch": 0.93, + "learning_rate": 7.120236859213674e-06, + "loss": 3.8297, + "step": 7235 + }, + { + "epoch": 0.93, + "learning_rate": 7.095695365126858e-06, + "loss": 3.6798, + "step": 7236 + }, + { + "epoch": 0.93, + "learning_rate": 7.071195629105992e-06, + "loss": 3.7131, + "step": 7237 + }, + { + "epoch": 0.93, + "learning_rate": 7.046737655362845e-06, + "loss": 3.8709, + "step": 7238 + }, + { + "epoch": 0.93, + "learning_rate": 7.0223214481020535e-06, + "loss": 3.7871, + "step": 7239 + }, + { + "epoch": 0.93, + "learning_rate": 6.997947011521067e-06, + "loss": 3.8618, + "step": 7240 + }, + { + "epoch": 0.93, + "learning_rate": 6.973614349810115e-06, + "loss": 3.7174, + "step": 7241 + }, + { + "epoch": 0.93, + "learning_rate": 6.949323467152269e-06, + "loss": 3.6721, + "step": 7242 + }, + { + "epoch": 0.93, + "learning_rate": 6.925074367723383e-06, + "loss": 3.7548, + "step": 7243 + }, + { + "epoch": 0.93, + "learning_rate": 6.90086705569215e-06, + "loss": 3.8047, + "step": 7244 + }, + { + "epoch": 0.93, + "learning_rate": 6.876701535220131e-06, + "loss": 3.69, + "step": 7245 + }, + { + "epoch": 0.93, + "learning_rate": 6.8525778104616685e-06, + "loss": 3.8755, + "step": 7246 + }, + { + "epoch": 0.93, + "learning_rate": 6.82849588556389e-06, + "loss": 3.7141, + "step": 7247 + }, + { + "epoch": 0.93, + "learning_rate": 6.804455764666733e-06, + "loss": 3.9453, + "step": 7248 + }, + { + "epoch": 0.93, + "learning_rate": 6.7804574519030325e-06, + "loss": 3.7177, + "step": 7249 + }, + { + "epoch": 0.93, + "learning_rate": 6.75650095139832e-06, + "loss": 3.8223, + "step": 7250 + }, + { + "epoch": 0.93, + "learning_rate": 6.732586267270968e-06, + "loss": 3.8399, + "step": 7251 + }, + { + "epoch": 0.93, + "learning_rate": 6.708713403632299e-06, + "loss": 3.8236, + "step": 7252 + }, + { + "epoch": 0.93, + "learning_rate": 6.684882364586226e-06, + "loss": 3.7427, + "step": 7253 + }, + { + "epoch": 0.93, + "learning_rate": 6.661093154229636e-06, + "loss": 3.7461, + "step": 7254 + }, + { + "epoch": 0.93, + "learning_rate": 6.637345776652176e-06, + "loss": 3.8771, + "step": 7255 + }, + { + "epoch": 0.93, + "learning_rate": 6.613640235936275e-06, + "loss": 3.9385, + "step": 7256 + }, + { + "epoch": 0.93, + "learning_rate": 6.589976536157199e-06, + "loss": 3.7231, + "step": 7257 + }, + { + "epoch": 0.93, + "learning_rate": 6.566354681383002e-06, + "loss": 3.8066, + "step": 7258 + }, + { + "epoch": 0.93, + "learning_rate": 6.542774675674546e-06, + "loss": 3.6467, + "step": 7259 + }, + { + "epoch": 0.93, + "learning_rate": 6.519236523085531e-06, + "loss": 3.8369, + "step": 7260 + }, + { + "epoch": 0.93, + "learning_rate": 6.495740227662445e-06, + "loss": 3.8672, + "step": 7261 + }, + { + "epoch": 0.93, + "learning_rate": 6.472285793444499e-06, + "loss": 3.7506, + "step": 7262 + }, + { + "epoch": 0.93, + "learning_rate": 6.448873224463914e-06, + "loss": 3.7605, + "step": 7263 + }, + { + "epoch": 0.93, + "learning_rate": 6.4255025247454694e-06, + "loss": 3.6282, + "step": 7264 + }, + { + "epoch": 0.93, + "learning_rate": 6.402173698306924e-06, + "loss": 3.9194, + "step": 7265 + }, + { + "epoch": 0.93, + "learning_rate": 6.378886749158708e-06, + "loss": 3.7261, + "step": 7266 + }, + { + "epoch": 0.93, + "learning_rate": 6.355641681304175e-06, + "loss": 3.8967, + "step": 7267 + }, + { + "epoch": 0.93, + "learning_rate": 6.332438498739435e-06, + "loss": 3.7967, + "step": 7268 + }, + { + "epoch": 0.93, + "learning_rate": 6.309277205453323e-06, + "loss": 3.7105, + "step": 7269 + }, + { + "epoch": 0.93, + "learning_rate": 6.2861578054276e-06, + "loss": 3.8607, + "step": 7270 + }, + { + "epoch": 0.93, + "learning_rate": 6.26308030263667e-06, + "loss": 3.7413, + "step": 7271 + }, + { + "epoch": 0.93, + "learning_rate": 6.240044701047915e-06, + "loss": 3.8441, + "step": 7272 + }, + { + "epoch": 0.93, + "learning_rate": 6.21705100462136e-06, + "loss": 3.9115, + "step": 7273 + }, + { + "epoch": 0.93, + "learning_rate": 6.194099217309901e-06, + "loss": 3.7225, + "step": 7274 + }, + { + "epoch": 0.93, + "learning_rate": 6.171189343059269e-06, + "loss": 3.8544, + "step": 7275 + }, + { + "epoch": 0.93, + "learning_rate": 6.14832138580787e-06, + "loss": 3.8602, + "step": 7276 + }, + { + "epoch": 0.93, + "learning_rate": 6.125495349487003e-06, + "loss": 3.8389, + "step": 7277 + }, + { + "epoch": 0.93, + "learning_rate": 6.102711238020725e-06, + "loss": 3.5983, + "step": 7278 + }, + { + "epoch": 0.93, + "learning_rate": 6.079969055325901e-06, + "loss": 3.6755, + "step": 7279 + }, + { + "epoch": 0.93, + "learning_rate": 6.057268805312127e-06, + "loss": 3.5535, + "step": 7280 + }, + { + "epoch": 0.93, + "learning_rate": 6.0346104918818645e-06, + "loss": 3.849, + "step": 7281 + }, + { + "epoch": 0.93, + "learning_rate": 6.011994118930358e-06, + "loss": 3.7742, + "step": 7282 + }, + { + "epoch": 0.93, + "learning_rate": 5.9894196903456376e-06, + "loss": 3.7696, + "step": 7283 + }, + { + "epoch": 0.93, + "learning_rate": 5.966887210008487e-06, + "loss": 3.7965, + "step": 7284 + }, + { + "epoch": 0.93, + "learning_rate": 5.944396681792474e-06, + "loss": 3.8432, + "step": 7285 + }, + { + "epoch": 0.93, + "learning_rate": 5.921948109564035e-06, + "loss": 3.8676, + "step": 7286 + }, + { + "epoch": 0.93, + "learning_rate": 5.899541497182276e-06, + "loss": 3.7339, + "step": 7287 + }, + { + "epoch": 0.93, + "learning_rate": 5.8771768484992e-06, + "loss": 3.8071, + "step": 7288 + }, + { + "epoch": 0.93, + "learning_rate": 5.854854167359564e-06, + "loss": 3.7705, + "step": 7289 + }, + { + "epoch": 0.93, + "learning_rate": 5.832573457600826e-06, + "loss": 3.7616, + "step": 7290 + }, + { + "epoch": 0.93, + "learning_rate": 5.810334723053367e-06, + "loss": 3.8875, + "step": 7291 + }, + { + "epoch": 0.93, + "learning_rate": 5.788137967540269e-06, + "loss": 3.7011, + "step": 7292 + }, + { + "epoch": 0.93, + "learning_rate": 5.765983194877394e-06, + "loss": 3.7444, + "step": 7293 + }, + { + "epoch": 0.93, + "learning_rate": 5.743870408873419e-06, + "loss": 3.7382, + "step": 7294 + }, + { + "epoch": 0.93, + "learning_rate": 5.7217996133297476e-06, + "loss": 3.6566, + "step": 7295 + }, + { + "epoch": 0.93, + "learning_rate": 5.6997708120406775e-06, + "loss": 3.7079, + "step": 7296 + }, + { + "epoch": 0.93, + "learning_rate": 5.677784008793152e-06, + "loss": 3.7671, + "step": 7297 + }, + { + "epoch": 0.93, + "learning_rate": 5.655839207367008e-06, + "loss": 3.7577, + "step": 7298 + }, + { + "epoch": 0.93, + "learning_rate": 5.633936411534729e-06, + "loss": 3.7141, + "step": 7299 + }, + { + "epoch": 0.93, + "learning_rate": 5.612075625061774e-06, + "loss": 3.6233, + "step": 7300 + }, + { + "epoch": 0.93, + "learning_rate": 5.590256851706193e-06, + "loss": 3.8479, + "step": 7301 + }, + { + "epoch": 0.93, + "learning_rate": 5.5684800952188755e-06, + "loss": 3.8001, + "step": 7302 + }, + { + "epoch": 0.93, + "learning_rate": 5.546745359343547e-06, + "loss": 3.6549, + "step": 7303 + }, + { + "epoch": 0.93, + "learning_rate": 5.525052647816636e-06, + "loss": 3.9529, + "step": 7304 + }, + { + "epoch": 0.94, + "learning_rate": 5.503401964367355e-06, + "loss": 3.9849, + "step": 7305 + }, + { + "epoch": 0.94, + "learning_rate": 5.481793312717753e-06, + "loss": 3.8126, + "step": 7306 + }, + { + "epoch": 0.94, + "learning_rate": 5.460226696582555e-06, + "loss": 3.8244, + "step": 7307 + }, + { + "epoch": 0.94, + "learning_rate": 5.43870211966932e-06, + "loss": 3.7572, + "step": 7308 + }, + { + "epoch": 0.94, + "learning_rate": 5.417219585678423e-06, + "loss": 3.8694, + "step": 7309 + }, + { + "epoch": 0.94, + "learning_rate": 5.395779098302911e-06, + "loss": 3.8893, + "step": 7310 + }, + { + "epoch": 0.94, + "learning_rate": 5.374380661228667e-06, + "loss": 3.7232, + "step": 7311 + }, + { + "epoch": 0.94, + "learning_rate": 5.353024278134333e-06, + "loss": 3.8252, + "step": 7312 + }, + { + "epoch": 0.94, + "learning_rate": 5.331709952691333e-06, + "loss": 3.6811, + "step": 7313 + }, + { + "epoch": 0.94, + "learning_rate": 5.3104376885638185e-06, + "loss": 3.9994, + "step": 7314 + }, + { + "epoch": 0.94, + "learning_rate": 5.289207489408754e-06, + "loss": 3.7604, + "step": 7315 + }, + { + "epoch": 0.94, + "learning_rate": 5.268019358875803e-06, + "loss": 3.8785, + "step": 7316 + }, + { + "epoch": 0.94, + "learning_rate": 5.246873300607552e-06, + "loss": 3.8044, + "step": 7317 + }, + { + "epoch": 0.94, + "learning_rate": 5.225769318239177e-06, + "loss": 3.7729, + "step": 7318 + }, + { + "epoch": 0.94, + "learning_rate": 5.2047074153987465e-06, + "loss": 3.7035, + "step": 7319 + }, + { + "epoch": 0.94, + "learning_rate": 5.183687595707032e-06, + "loss": 3.7238, + "step": 7320 + }, + { + "epoch": 0.94, + "learning_rate": 5.1627098627775594e-06, + "loss": 3.7253, + "step": 7321 + }, + { + "epoch": 0.94, + "learning_rate": 5.1417742202166665e-06, + "loss": 3.8329, + "step": 7322 + }, + { + "epoch": 0.94, + "learning_rate": 5.12088067162339e-06, + "loss": 3.7205, + "step": 7323 + }, + { + "epoch": 0.94, + "learning_rate": 5.100029220589636e-06, + "loss": 3.938, + "step": 7324 + }, + { + "epoch": 0.94, + "learning_rate": 5.07921987069998e-06, + "loss": 3.8814, + "step": 7325 + }, + { + "epoch": 0.94, + "learning_rate": 5.058452625531812e-06, + "loss": 3.8022, + "step": 7326 + }, + { + "epoch": 0.94, + "learning_rate": 5.037727488655192e-06, + "loss": 3.8995, + "step": 7327 + }, + { + "epoch": 0.94, + "learning_rate": 5.0170444636331025e-06, + "loss": 3.8039, + "step": 7328 + }, + { + "epoch": 0.94, + "learning_rate": 4.996403554021145e-06, + "loss": 3.7274, + "step": 7329 + }, + { + "epoch": 0.94, + "learning_rate": 4.975804763367758e-06, + "loss": 3.8867, + "step": 7330 + }, + { + "epoch": 0.94, + "learning_rate": 4.955248095214082e-06, + "loss": 3.7436, + "step": 7331 + }, + { + "epoch": 0.94, + "learning_rate": 4.934733553094068e-06, + "loss": 3.8657, + "step": 7332 + }, + { + "epoch": 0.94, + "learning_rate": 4.914261140534393e-06, + "loss": 3.787, + "step": 7333 + }, + { + "epoch": 0.94, + "learning_rate": 4.8938308610544935e-06, + "loss": 3.8689, + "step": 7334 + }, + { + "epoch": 0.94, + "learning_rate": 4.8734427181666156e-06, + "loss": 3.7174, + "step": 7335 + }, + { + "epoch": 0.94, + "learning_rate": 4.853096715375649e-06, + "loss": 3.8261, + "step": 7336 + }, + { + "epoch": 0.94, + "learning_rate": 4.8327928561793525e-06, + "loss": 4.0014, + "step": 7337 + }, + { + "epoch": 0.94, + "learning_rate": 4.812531144068183e-06, + "loss": 3.7843, + "step": 7338 + }, + { + "epoch": 0.94, + "learning_rate": 4.792311582525383e-06, + "loss": 3.743, + "step": 7339 + }, + { + "epoch": 0.94, + "learning_rate": 4.772134175026921e-06, + "loss": 3.8817, + "step": 7340 + }, + { + "epoch": 0.94, + "learning_rate": 4.751998925041495e-06, + "loss": 3.7584, + "step": 7341 + }, + { + "epoch": 0.94, + "learning_rate": 4.731905836030642e-06, + "loss": 3.7285, + "step": 7342 + }, + { + "epoch": 0.94, + "learning_rate": 4.711854911448543e-06, + "loss": 3.8223, + "step": 7343 + }, + { + "epoch": 0.94, + "learning_rate": 4.691846154742191e-06, + "loss": 3.8993, + "step": 7344 + }, + { + "epoch": 0.94, + "learning_rate": 4.671879569351362e-06, + "loss": 3.5336, + "step": 7345 + }, + { + "epoch": 0.94, + "learning_rate": 4.651955158708532e-06, + "loss": 3.7469, + "step": 7346 + }, + { + "epoch": 0.94, + "learning_rate": 4.632072926238934e-06, + "loss": 3.8303, + "step": 7347 + }, + { + "epoch": 0.94, + "learning_rate": 4.612232875360528e-06, + "loss": 3.7153, + "step": 7348 + }, + { + "epoch": 0.94, + "learning_rate": 4.592435009484086e-06, + "loss": 3.974, + "step": 7349 + }, + { + "epoch": 0.94, + "learning_rate": 4.57267933201308e-06, + "loss": 3.928, + "step": 7350 + }, + { + "epoch": 0.94, + "learning_rate": 4.552965846343709e-06, + "loss": 3.8354, + "step": 7351 + }, + { + "epoch": 0.94, + "learning_rate": 4.533294555864986e-06, + "loss": 3.8695, + "step": 7352 + }, + { + "epoch": 0.94, + "learning_rate": 4.513665463958621e-06, + "loss": 3.6994, + "step": 7353 + }, + { + "epoch": 0.94, + "learning_rate": 4.494078573999111e-06, + "loss": 3.8235, + "step": 7354 + }, + { + "epoch": 0.94, + "learning_rate": 4.474533889353594e-06, + "loss": 3.741, + "step": 7355 + }, + { + "epoch": 0.94, + "learning_rate": 4.455031413382104e-06, + "loss": 3.8109, + "step": 7356 + }, + { + "epoch": 0.94, + "learning_rate": 4.435571149437323e-06, + "loss": 3.7749, + "step": 7357 + }, + { + "epoch": 0.94, + "learning_rate": 4.416153100864684e-06, + "loss": 3.7768, + "step": 7358 + }, + { + "epoch": 0.94, + "learning_rate": 4.396777271002378e-06, + "loss": 3.6775, + "step": 7359 + }, + { + "epoch": 0.94, + "learning_rate": 4.377443663181324e-06, + "loss": 3.938, + "step": 7360 + }, + { + "epoch": 0.94, + "learning_rate": 4.358152280725225e-06, + "loss": 3.7881, + "step": 7361 + }, + { + "epoch": 0.94, + "learning_rate": 4.338903126950483e-06, + "loss": 3.7206, + "step": 7362 + }, + { + "epoch": 0.94, + "learning_rate": 4.3196962051662014e-06, + "loss": 3.6543, + "step": 7363 + }, + { + "epoch": 0.94, + "learning_rate": 4.300531518674322e-06, + "loss": 3.7776, + "step": 7364 + }, + { + "epoch": 0.94, + "learning_rate": 4.281409070769487e-06, + "loss": 3.8116, + "step": 7365 + }, + { + "epoch": 0.94, + "learning_rate": 4.26232886473904e-06, + "loss": 3.8424, + "step": 7366 + }, + { + "epoch": 0.94, + "learning_rate": 4.2432909038630785e-06, + "loss": 3.7173, + "step": 7367 + }, + { + "epoch": 0.94, + "learning_rate": 4.224295191414512e-06, + "loss": 3.7685, + "step": 7368 + }, + { + "epoch": 0.94, + "learning_rate": 4.205341730658841e-06, + "loss": 3.7349, + "step": 7369 + }, + { + "epoch": 0.94, + "learning_rate": 4.186430524854429e-06, + "loss": 3.7757, + "step": 7370 + }, + { + "epoch": 0.94, + "learning_rate": 4.167561577252343e-06, + "loss": 3.7796, + "step": 7371 + }, + { + "epoch": 0.94, + "learning_rate": 4.148734891096351e-06, + "loss": 3.7695, + "step": 7372 + }, + { + "epoch": 0.94, + "learning_rate": 4.129950469622945e-06, + "loss": 3.967, + "step": 7373 + }, + { + "epoch": 0.94, + "learning_rate": 4.1112083160614326e-06, + "loss": 3.7375, + "step": 7374 + }, + { + "epoch": 0.94, + "learning_rate": 4.0925084336338455e-06, + "loss": 3.6652, + "step": 7375 + }, + { + "epoch": 0.94, + "learning_rate": 4.073850825554837e-06, + "loss": 3.809, + "step": 7376 + }, + { + "epoch": 0.94, + "learning_rate": 4.0552354950318944e-06, + "loss": 3.6663, + "step": 7377 + }, + { + "epoch": 0.94, + "learning_rate": 4.03666244526521e-06, + "loss": 3.9111, + "step": 7378 + }, + { + "epoch": 0.94, + "learning_rate": 4.018131679447701e-06, + "loss": 3.7087, + "step": 7379 + }, + { + "epoch": 0.94, + "learning_rate": 3.999643200764985e-06, + "loss": 3.9128, + "step": 7380 + }, + { + "epoch": 0.94, + "learning_rate": 3.981197012395521e-06, + "loss": 3.7881, + "step": 7381 + }, + { + "epoch": 0.94, + "learning_rate": 3.962793117510383e-06, + "loss": 3.7314, + "step": 7382 + }, + { + "epoch": 0.95, + "learning_rate": 3.944431519273401e-06, + "loss": 3.8487, + "step": 7383 + }, + { + "epoch": 0.95, + "learning_rate": 3.926112220841188e-06, + "loss": 3.8333, + "step": 7384 + }, + { + "epoch": 0.95, + "learning_rate": 3.907835225363005e-06, + "loss": 3.7134, + "step": 7385 + }, + { + "epoch": 0.95, + "learning_rate": 3.889600535980892e-06, + "loss": 3.9159, + "step": 7386 + }, + { + "epoch": 0.95, + "learning_rate": 3.8714081558295925e-06, + "loss": 3.8194, + "step": 7387 + }, + { + "epoch": 0.95, + "learning_rate": 3.853258088036604e-06, + "loss": 3.7952, + "step": 7388 + }, + { + "epoch": 0.95, + "learning_rate": 3.835150335722154e-06, + "loss": 3.6667, + "step": 7389 + }, + { + "epoch": 0.95, + "learning_rate": 3.817084901999113e-06, + "loss": 3.8123, + "step": 7390 + }, + { + "epoch": 0.95, + "learning_rate": 3.7990617899731904e-06, + "loss": 3.7537, + "step": 7391 + }, + { + "epoch": 0.95, + "learning_rate": 3.7810810027427424e-06, + "loss": 3.892, + "step": 7392 + }, + { + "epoch": 0.95, + "learning_rate": 3.7631425433989062e-06, + "loss": 3.8526, + "step": 7393 + }, + { + "epoch": 0.95, + "learning_rate": 3.745246415025466e-06, + "loss": 3.9181, + "step": 7394 + }, + { + "epoch": 0.95, + "learning_rate": 3.727392620699016e-06, + "loss": 3.7274, + "step": 7395 + }, + { + "epoch": 0.95, + "learning_rate": 3.7095811634887956e-06, + "loss": 3.6728, + "step": 7396 + }, + { + "epoch": 0.95, + "learning_rate": 3.691812046456827e-06, + "loss": 3.6878, + "step": 7397 + }, + { + "epoch": 0.95, + "learning_rate": 3.6740852726578067e-06, + "loss": 3.7781, + "step": 7398 + }, + { + "epoch": 0.95, + "learning_rate": 3.6564008451392127e-06, + "loss": 3.6808, + "step": 7399 + }, + { + "epoch": 0.95, + "learning_rate": 3.6387587669411416e-06, + "loss": 3.7872, + "step": 7400 + }, + { + "epoch": 0.95, + "learning_rate": 3.6211590410965e-06, + "loss": 3.8748, + "step": 7401 + }, + { + "epoch": 0.95, + "learning_rate": 3.603601670630896e-06, + "loss": 3.8814, + "step": 7402 + }, + { + "epoch": 0.95, + "learning_rate": 3.5860866585626373e-06, + "loss": 3.8343, + "step": 7403 + }, + { + "epoch": 0.95, + "learning_rate": 3.5686140079027598e-06, + "loss": 3.7189, + "step": 7404 + }, + { + "epoch": 0.95, + "learning_rate": 3.551183721655027e-06, + "loss": 3.7001, + "step": 7405 + }, + { + "epoch": 0.95, + "learning_rate": 3.5337958028158754e-06, + "loss": 3.6993, + "step": 7406 + }, + { + "epoch": 0.95, + "learning_rate": 3.5164502543745257e-06, + "loss": 3.7139, + "step": 7407 + }, + { + "epoch": 0.95, + "learning_rate": 3.499147079312842e-06, + "loss": 3.7969, + "step": 7408 + }, + { + "epoch": 0.95, + "learning_rate": 3.481886280605445e-06, + "loss": 3.7937, + "step": 7409 + }, + { + "epoch": 0.95, + "learning_rate": 3.4646678612196837e-06, + "loss": 3.7968, + "step": 7410 + }, + { + "epoch": 0.95, + "learning_rate": 3.447491824115606e-06, + "loss": 3.8353, + "step": 7411 + }, + { + "epoch": 0.95, + "learning_rate": 3.43035817224599e-06, + "loss": 3.8298, + "step": 7412 + }, + { + "epoch": 0.95, + "learning_rate": 3.413266908556256e-06, + "loss": 3.8772, + "step": 7413 + }, + { + "epoch": 0.95, + "learning_rate": 3.3962180359846386e-06, + "loss": 3.8375, + "step": 7414 + }, + { + "epoch": 0.95, + "learning_rate": 3.379211557462014e-06, + "loss": 3.8353, + "step": 7415 + }, + { + "epoch": 0.95, + "learning_rate": 3.362247475911989e-06, + "loss": 3.6878, + "step": 7416 + }, + { + "epoch": 0.95, + "learning_rate": 3.3453257942508972e-06, + "loss": 3.7896, + "step": 7417 + }, + { + "epoch": 0.95, + "learning_rate": 3.328446515387773e-06, + "loss": 3.7636, + "step": 7418 + }, + { + "epoch": 0.95, + "learning_rate": 3.3116096422243225e-06, + "loss": 3.6465, + "step": 7419 + }, + { + "epoch": 0.95, + "learning_rate": 3.294815177655064e-06, + "loss": 3.7124, + "step": 7420 + }, + { + "epoch": 0.95, + "learning_rate": 3.2780631245671043e-06, + "loss": 3.7558, + "step": 7421 + }, + { + "epoch": 0.95, + "learning_rate": 3.2613534858403625e-06, + "loss": 3.8648, + "step": 7422 + }, + { + "epoch": 0.95, + "learning_rate": 3.244686264347374e-06, + "loss": 3.8158, + "step": 7423 + }, + { + "epoch": 0.95, + "learning_rate": 3.2280614629534853e-06, + "loss": 3.9133, + "step": 7424 + }, + { + "epoch": 0.95, + "learning_rate": 3.211479084516633e-06, + "loss": 3.9158, + "step": 7425 + }, + { + "epoch": 0.95, + "learning_rate": 3.194939131887564e-06, + "loss": 3.6282, + "step": 7426 + }, + { + "epoch": 0.95, + "learning_rate": 3.1784416079096433e-06, + "loss": 3.8076, + "step": 7427 + }, + { + "epoch": 0.95, + "learning_rate": 3.1619865154190186e-06, + "loss": 3.8349, + "step": 7428 + }, + { + "epoch": 0.95, + "learning_rate": 3.1455738572444824e-06, + "loss": 3.9626, + "step": 7429 + }, + { + "epoch": 0.95, + "learning_rate": 3.1292036362076117e-06, + "loss": 3.7583, + "step": 7430 + }, + { + "epoch": 0.95, + "learning_rate": 3.1128758551225988e-06, + "loss": 3.7519, + "step": 7431 + }, + { + "epoch": 0.95, + "learning_rate": 3.0965905167963935e-06, + "loss": 3.7487, + "step": 7432 + }, + { + "epoch": 0.95, + "learning_rate": 3.0803476240286178e-06, + "loss": 3.6964, + "step": 7433 + }, + { + "epoch": 0.95, + "learning_rate": 3.064147179611648e-06, + "loss": 3.6876, + "step": 7434 + }, + { + "epoch": 0.95, + "learning_rate": 3.04798918633048e-06, + "loss": 3.7963, + "step": 7435 + }, + { + "epoch": 0.95, + "learning_rate": 3.0318736469628906e-06, + "loss": 3.6972, + "step": 7436 + }, + { + "epoch": 0.95, + "learning_rate": 3.015800564279303e-06, + "loss": 3.7405, + "step": 7437 + }, + { + "epoch": 0.95, + "learning_rate": 2.9997699410428956e-06, + "loss": 3.6733, + "step": 7438 + }, + { + "epoch": 0.95, + "learning_rate": 2.9837817800095203e-06, + "loss": 3.9392, + "step": 7439 + }, + { + "epoch": 0.95, + "learning_rate": 2.9678360839277276e-06, + "loss": 3.683, + "step": 7440 + }, + { + "epoch": 0.95, + "learning_rate": 2.9519328555387417e-06, + "loss": 3.9322, + "step": 7441 + }, + { + "epoch": 0.95, + "learning_rate": 2.9360720975765144e-06, + "loss": 3.7969, + "step": 7442 + }, + { + "epoch": 0.95, + "learning_rate": 2.920253812767698e-06, + "loss": 3.8475, + "step": 7443 + }, + { + "epoch": 0.95, + "learning_rate": 2.9044780038316456e-06, + "loss": 3.8266, + "step": 7444 + }, + { + "epoch": 0.95, + "learning_rate": 2.888744673480437e-06, + "loss": 3.7389, + "step": 7445 + }, + { + "epoch": 0.95, + "learning_rate": 2.873053824418742e-06, + "loss": 3.6987, + "step": 7446 + }, + { + "epoch": 0.95, + "learning_rate": 2.8574054593440423e-06, + "loss": 3.6898, + "step": 7447 + }, + { + "epoch": 0.95, + "learning_rate": 2.841799580946464e-06, + "loss": 3.718, + "step": 7448 + }, + { + "epoch": 0.95, + "learning_rate": 2.82623619190886e-06, + "loss": 3.738, + "step": 7449 + }, + { + "epoch": 0.95, + "learning_rate": 2.8107152949067295e-06, + "loss": 3.9378, + "step": 7450 + }, + { + "epoch": 0.95, + "learning_rate": 2.795236892608327e-06, + "loss": 3.6391, + "step": 7451 + }, + { + "epoch": 0.95, + "learning_rate": 2.779800987674497e-06, + "loss": 3.7172, + "step": 7452 + }, + { + "epoch": 0.95, + "learning_rate": 2.764407582758921e-06, + "loss": 3.7363, + "step": 7453 + }, + { + "epoch": 0.95, + "learning_rate": 2.7490566805078996e-06, + "loss": 3.7874, + "step": 7454 + }, + { + "epoch": 0.95, + "learning_rate": 2.7337482835604056e-06, + "loss": 3.7602, + "step": 7455 + }, + { + "epoch": 0.95, + "learning_rate": 2.7184823945481105e-06, + "loss": 3.7645, + "step": 7456 + }, + { + "epoch": 0.95, + "learning_rate": 2.7032590160954716e-06, + "loss": 3.7819, + "step": 7457 + }, + { + "epoch": 0.95, + "learning_rate": 2.6880781508194784e-06, + "loss": 3.7808, + "step": 7458 + }, + { + "epoch": 0.95, + "learning_rate": 2.67293980132996e-06, + "loss": 3.7886, + "step": 7459 + }, + { + "epoch": 0.95, + "learning_rate": 2.6578439702293344e-06, + "loss": 3.7013, + "step": 7460 + }, + { + "epoch": 0.96, + "learning_rate": 2.642790660112776e-06, + "loss": 3.7411, + "step": 7461 + }, + { + "epoch": 0.96, + "learning_rate": 2.6277798735681315e-06, + "loss": 3.8303, + "step": 7462 + }, + { + "epoch": 0.96, + "learning_rate": 2.6128116131758918e-06, + "loss": 3.7863, + "step": 7463 + }, + { + "epoch": 0.96, + "learning_rate": 2.5978858815093045e-06, + "loss": 3.8036, + "step": 7464 + }, + { + "epoch": 0.96, + "learning_rate": 2.5830026811342335e-06, + "loss": 3.7158, + "step": 7465 + }, + { + "epoch": 0.96, + "learning_rate": 2.5681620146093266e-06, + "loss": 3.8918, + "step": 7466 + }, + { + "epoch": 0.96, + "learning_rate": 2.5533638844858486e-06, + "loss": 3.6657, + "step": 7467 + }, + { + "epoch": 0.96, + "learning_rate": 2.538608293307765e-06, + "loss": 3.8017, + "step": 7468 + }, + { + "epoch": 0.96, + "learning_rate": 2.52389524361174e-06, + "loss": 3.8688, + "step": 7469 + }, + { + "epoch": 0.96, + "learning_rate": 2.5092247379271126e-06, + "loss": 3.8103, + "step": 7470 + }, + { + "epoch": 0.96, + "learning_rate": 2.494596778775893e-06, + "loss": 3.83, + "step": 7471 + }, + { + "epoch": 0.96, + "learning_rate": 2.4800113686728467e-06, + "loss": 3.906, + "step": 7472 + }, + { + "epoch": 0.96, + "learning_rate": 2.4654685101253014e-06, + "loss": 3.7763, + "step": 7473 + }, + { + "epoch": 0.96, + "learning_rate": 2.450968205633425e-06, + "loss": 3.8132, + "step": 7474 + }, + { + "epoch": 0.96, + "learning_rate": 2.436510457689917e-06, + "loss": 3.7325, + "step": 7475 + }, + { + "epoch": 0.96, + "learning_rate": 2.4220952687802887e-06, + "loss": 3.931, + "step": 7476 + }, + { + "epoch": 0.96, + "learning_rate": 2.4077226413826426e-06, + "loss": 3.7655, + "step": 7477 + }, + { + "epoch": 0.96, + "learning_rate": 2.3933925779678346e-06, + "loss": 3.7693, + "step": 7478 + }, + { + "epoch": 0.96, + "learning_rate": 2.379105080999311e-06, + "loss": 3.8441, + "step": 7479 + }, + { + "epoch": 0.96, + "learning_rate": 2.3648601529333014e-06, + "loss": 3.7045, + "step": 7480 + }, + { + "epoch": 0.96, + "learning_rate": 2.35065779621868e-06, + "loss": 3.8062, + "step": 7481 + }, + { + "epoch": 0.96, + "learning_rate": 2.336498013296967e-06, + "loss": 3.7271, + "step": 7482 + }, + { + "epoch": 0.96, + "learning_rate": 2.3223808066024086e-06, + "loss": 3.6767, + "step": 7483 + }, + { + "epoch": 0.96, + "learning_rate": 2.3083061785618977e-06, + "loss": 3.8506, + "step": 7484 + }, + { + "epoch": 0.96, + "learning_rate": 2.2942741315950534e-06, + "loss": 3.7806, + "step": 7485 + }, + { + "epoch": 0.96, + "learning_rate": 2.2802846681141685e-06, + "loss": 3.6411, + "step": 7486 + }, + { + "epoch": 0.96, + "learning_rate": 2.2663377905241257e-06, + "loss": 3.8267, + "step": 7487 + }, + { + "epoch": 0.96, + "learning_rate": 2.2524335012225894e-06, + "loss": 3.7157, + "step": 7488 + }, + { + "epoch": 0.96, + "learning_rate": 2.238571802599898e-06, + "loss": 3.7938, + "step": 7489 + }, + { + "epoch": 0.96, + "learning_rate": 2.2247526970390064e-06, + "loss": 3.7375, + "step": 7490 + }, + { + "epoch": 0.96, + "learning_rate": 2.2109761869155697e-06, + "loss": 3.6913, + "step": 7491 + }, + { + "epoch": 0.96, + "learning_rate": 2.1972422745979436e-06, + "loss": 3.858, + "step": 7492 + }, + { + "epoch": 0.96, + "learning_rate": 2.1835509624471562e-06, + "loss": 3.9506, + "step": 7493 + }, + { + "epoch": 0.96, + "learning_rate": 2.1699022528168797e-06, + "loss": 3.7435, + "step": 7494 + }, + { + "epoch": 0.96, + "learning_rate": 2.156296148053488e-06, + "loss": 3.8904, + "step": 7495 + }, + { + "epoch": 0.96, + "learning_rate": 2.142732650496082e-06, + "loss": 3.739, + "step": 7496 + }, + { + "epoch": 0.96, + "learning_rate": 2.1292117624763243e-06, + "loss": 3.7504, + "step": 7497 + }, + { + "epoch": 0.96, + "learning_rate": 2.115733486318605e-06, + "loss": 3.7153, + "step": 7498 + }, + { + "epoch": 0.96, + "learning_rate": 2.102297824340044e-06, + "loss": 3.79, + "step": 7499 + }, + { + "epoch": 0.96, + "learning_rate": 2.0889047788503755e-06, + "loss": 3.8062, + "step": 7500 + }, + { + "epoch": 0.96, + "learning_rate": 2.0755543521519815e-06, + "loss": 3.8125, + "step": 7501 + }, + { + "epoch": 0.96, + "learning_rate": 2.06224654653997e-06, + "loss": 3.834, + "step": 7502 + }, + { + "epoch": 0.96, + "learning_rate": 2.0489813643021493e-06, + "loss": 3.7438, + "step": 7503 + }, + { + "epoch": 0.96, + "learning_rate": 2.035758807718918e-06, + "loss": 3.8167, + "step": 7504 + }, + { + "epoch": 0.96, + "learning_rate": 2.0225788790633746e-06, + "loss": 3.7171, + "step": 7505 + }, + { + "epoch": 0.96, + "learning_rate": 2.0094415806013454e-06, + "loss": 4.0504, + "step": 7506 + }, + { + "epoch": 0.96, + "learning_rate": 1.996346914591274e-06, + "loss": 3.6916, + "step": 7507 + }, + { + "epoch": 0.96, + "learning_rate": 1.983294883284248e-06, + "loss": 3.8414, + "step": 7508 + }, + { + "epoch": 0.96, + "learning_rate": 1.970285488924084e-06, + "loss": 3.9525, + "step": 7509 + }, + { + "epoch": 0.96, + "learning_rate": 1.957318733747271e-06, + "loss": 3.8744, + "step": 7510 + }, + { + "epoch": 0.96, + "learning_rate": 1.944394619982942e-06, + "loss": 3.6807, + "step": 7511 + }, + { + "epoch": 0.96, + "learning_rate": 1.931513149852848e-06, + "loss": 3.6617, + "step": 7512 + }, + { + "epoch": 0.96, + "learning_rate": 1.9186743255714955e-06, + "loss": 3.6991, + "step": 7513 + }, + { + "epoch": 0.96, + "learning_rate": 1.905878149346063e-06, + "loss": 3.7712, + "step": 7514 + }, + { + "epoch": 0.96, + "learning_rate": 1.893124623376319e-06, + "loss": 3.8237, + "step": 7515 + }, + { + "epoch": 0.96, + "learning_rate": 1.8804137498547592e-06, + "loss": 3.6712, + "step": 7516 + }, + { + "epoch": 0.96, + "learning_rate": 1.8677455309664971e-06, + "loss": 3.7244, + "step": 7517 + }, + { + "epoch": 0.96, + "learning_rate": 1.8551199688894016e-06, + "loss": 3.7662, + "step": 7518 + }, + { + "epoch": 0.96, + "learning_rate": 1.842537065793931e-06, + "loss": 3.8095, + "step": 7519 + }, + { + "epoch": 0.96, + "learning_rate": 1.8299968238432163e-06, + "loss": 3.7726, + "step": 7520 + }, + { + "epoch": 0.96, + "learning_rate": 1.8174992451930605e-06, + "loss": 3.864, + "step": 7521 + }, + { + "epoch": 0.96, + "learning_rate": 1.805044331991995e-06, + "loss": 3.8649, + "step": 7522 + }, + { + "epoch": 0.96, + "learning_rate": 1.7926320863811129e-06, + "loss": 3.742, + "step": 7523 + }, + { + "epoch": 0.96, + "learning_rate": 1.7802625104942627e-06, + "loss": 3.7498, + "step": 7524 + }, + { + "epoch": 0.96, + "learning_rate": 1.7679356064578821e-06, + "loss": 3.8534, + "step": 7525 + }, + { + "epoch": 0.96, + "learning_rate": 1.7556513763911096e-06, + "loss": 3.6646, + "step": 7526 + }, + { + "epoch": 0.96, + "learning_rate": 1.7434098224057838e-06, + "loss": 3.793, + "step": 7527 + }, + { + "epoch": 0.96, + "learning_rate": 1.7312109466063597e-06, + "loss": 3.6247, + "step": 7528 + }, + { + "epoch": 0.96, + "learning_rate": 1.71905475108991e-06, + "loss": 3.9322, + "step": 7529 + }, + { + "epoch": 0.96, + "learning_rate": 1.7069412379462911e-06, + "loss": 3.699, + "step": 7530 + }, + { + "epoch": 0.96, + "learning_rate": 1.6948704092579205e-06, + "loss": 3.8069, + "step": 7531 + }, + { + "epoch": 0.96, + "learning_rate": 1.6828422670999432e-06, + "loss": 3.7831, + "step": 7532 + }, + { + "epoch": 0.96, + "learning_rate": 1.6708568135401225e-06, + "loss": 3.7274, + "step": 7533 + }, + { + "epoch": 0.96, + "learning_rate": 1.6589140506388933e-06, + "loss": 3.7909, + "step": 7534 + }, + { + "epoch": 0.96, + "learning_rate": 1.6470139804493357e-06, + "loss": 3.8829, + "step": 7535 + }, + { + "epoch": 0.96, + "learning_rate": 1.6351566050172573e-06, + "loss": 3.6919, + "step": 7536 + }, + { + "epoch": 0.96, + "learning_rate": 1.6233419263810278e-06, + "loss": 3.815, + "step": 7537 + }, + { + "epoch": 0.96, + "learning_rate": 1.611569946571745e-06, + "loss": 3.713, + "step": 7538 + }, + { + "epoch": 0.96, + "learning_rate": 1.5998406676131783e-06, + "loss": 3.877, + "step": 7539 + }, + { + "epoch": 0.97, + "learning_rate": 1.5881540915216875e-06, + "loss": 3.5976, + "step": 7540 + }, + { + "epoch": 0.97, + "learning_rate": 1.5765102203063596e-06, + "loss": 3.7785, + "step": 7541 + }, + { + "epoch": 0.97, + "learning_rate": 1.5649090559688716e-06, + "loss": 3.7329, + "step": 7542 + }, + { + "epoch": 0.97, + "learning_rate": 1.5533506005036557e-06, + "loss": 3.8711, + "step": 7543 + }, + { + "epoch": 0.97, + "learning_rate": 1.5418348558977058e-06, + "loss": 3.8419, + "step": 7544 + }, + { + "epoch": 0.97, + "learning_rate": 1.5303618241306883e-06, + "loss": 3.6931, + "step": 7545 + }, + { + "epoch": 0.97, + "learning_rate": 1.5189315071749977e-06, + "loss": 3.7061, + "step": 7546 + }, + { + "epoch": 0.97, + "learning_rate": 1.5075439069956453e-06, + "loss": 3.6598, + "step": 7547 + }, + { + "epoch": 0.97, + "learning_rate": 1.4961990255502323e-06, + "loss": 3.7859, + "step": 7548 + }, + { + "epoch": 0.97, + "learning_rate": 1.4848968647891148e-06, + "loss": 3.761, + "step": 7549 + }, + { + "epoch": 0.97, + "learning_rate": 1.4736374266552943e-06, + "loss": 3.6912, + "step": 7550 + }, + { + "epoch": 0.97, + "learning_rate": 1.4624207130843336e-06, + "loss": 3.8012, + "step": 7551 + }, + { + "epoch": 0.97, + "learning_rate": 1.4512467260045514e-06, + "loss": 3.7668, + "step": 7552 + }, + { + "epoch": 0.97, + "learning_rate": 1.4401154673368833e-06, + "loss": 3.9691, + "step": 7553 + }, + { + "epoch": 0.97, + "learning_rate": 1.4290269389949095e-06, + "loss": 3.727, + "step": 7554 + }, + { + "epoch": 0.97, + "learning_rate": 1.417981142884911e-06, + "loss": 3.766, + "step": 7555 + }, + { + "epoch": 0.97, + "learning_rate": 1.4069780809057575e-06, + "loss": 3.733, + "step": 7556 + }, + { + "epoch": 0.97, + "learning_rate": 1.3960177549489917e-06, + "loss": 3.9367, + "step": 7557 + }, + { + "epoch": 0.97, + "learning_rate": 1.3851001668988562e-06, + "loss": 3.7773, + "step": 7558 + }, + { + "epoch": 0.97, + "learning_rate": 1.3742253186321829e-06, + "loss": 3.7985, + "step": 7559 + }, + { + "epoch": 0.97, + "learning_rate": 1.3633932120184766e-06, + "loss": 3.8891, + "step": 7560 + }, + { + "epoch": 0.97, + "learning_rate": 1.3526038489199421e-06, + "loss": 3.7782, + "step": 7561 + }, + { + "epoch": 0.97, + "learning_rate": 1.3418572311913735e-06, + "loss": 3.8025, + "step": 7562 + }, + { + "epoch": 0.97, + "learning_rate": 1.3311533606802651e-06, + "loss": 3.7722, + "step": 7563 + }, + { + "epoch": 0.97, + "learning_rate": 1.3204922392266728e-06, + "loss": 3.8366, + "step": 7564 + }, + { + "epoch": 0.97, + "learning_rate": 1.309873868663436e-06, + "loss": 3.8287, + "step": 7565 + }, + { + "epoch": 0.97, + "learning_rate": 1.2992982508159e-06, + "loss": 3.7919, + "step": 7566 + }, + { + "epoch": 0.97, + "learning_rate": 1.2887653875021944e-06, + "loss": 3.8995, + "step": 7567 + }, + { + "epoch": 0.97, + "learning_rate": 1.2782752805330366e-06, + "loss": 3.7152, + "step": 7568 + }, + { + "epoch": 0.97, + "learning_rate": 1.2678279317117903e-06, + "loss": 3.7314, + "step": 7569 + }, + { + "epoch": 0.97, + "learning_rate": 1.2574233428344905e-06, + "loss": 3.9658, + "step": 7570 + }, + { + "epoch": 0.97, + "learning_rate": 1.2470615156897624e-06, + "loss": 3.8585, + "step": 7571 + }, + { + "epoch": 0.97, + "learning_rate": 1.2367424520589588e-06, + "loss": 3.8665, + "step": 7572 + }, + { + "epoch": 0.97, + "learning_rate": 1.2264661537160492e-06, + "loss": 3.7813, + "step": 7573 + }, + { + "epoch": 0.97, + "learning_rate": 1.216232622427621e-06, + "loss": 3.7754, + "step": 7574 + }, + { + "epoch": 0.97, + "learning_rate": 1.206041859952961e-06, + "loss": 3.7868, + "step": 7575 + }, + { + "epoch": 0.97, + "learning_rate": 1.1958938680439736e-06, + "loss": 3.6823, + "step": 7576 + }, + { + "epoch": 0.97, + "learning_rate": 1.1857886484452073e-06, + "loss": 3.7493, + "step": 7577 + }, + { + "epoch": 0.97, + "learning_rate": 1.1757262028938842e-06, + "loss": 3.9146, + "step": 7578 + }, + { + "epoch": 0.97, + "learning_rate": 1.1657065331198425e-06, + "loss": 3.6884, + "step": 7579 + }, + { + "epoch": 0.97, + "learning_rate": 1.1557296408455932e-06, + "loss": 3.8046, + "step": 7580 + }, + { + "epoch": 0.97, + "learning_rate": 1.1457955277862641e-06, + "loss": 3.8562, + "step": 7581 + }, + { + "epoch": 0.97, + "learning_rate": 1.1359041956496286e-06, + "loss": 3.7193, + "step": 7582 + }, + { + "epoch": 0.97, + "learning_rate": 1.1260556461361592e-06, + "loss": 3.796, + "step": 7583 + }, + { + "epoch": 0.97, + "learning_rate": 1.1162498809389188e-06, + "loss": 3.7223, + "step": 7584 + }, + { + "epoch": 0.97, + "learning_rate": 1.106486901743642e-06, + "loss": 3.7322, + "step": 7585 + }, + { + "epoch": 0.97, + "learning_rate": 1.0967667102286527e-06, + "loss": 3.7828, + "step": 7586 + }, + { + "epoch": 0.97, + "learning_rate": 1.0870893080650313e-06, + "loss": 3.6291, + "step": 7587 + }, + { + "epoch": 0.97, + "learning_rate": 1.0774546969163912e-06, + "loss": 3.7519, + "step": 7588 + }, + { + "epoch": 0.97, + "learning_rate": 1.0678628784390466e-06, + "loss": 3.8178, + "step": 7589 + }, + { + "epoch": 0.97, + "learning_rate": 1.0583138542819558e-06, + "loss": 3.752, + "step": 7590 + }, + { + "epoch": 0.97, + "learning_rate": 1.0488076260866952e-06, + "loss": 3.7396, + "step": 7591 + }, + { + "epoch": 0.97, + "learning_rate": 1.0393441954874849e-06, + "loss": 3.8565, + "step": 7592 + }, + { + "epoch": 0.97, + "learning_rate": 1.0299235641111904e-06, + "loss": 3.7184, + "step": 7593 + }, + { + "epoch": 0.97, + "learning_rate": 1.0205457335773493e-06, + "loss": 3.7489, + "step": 7594 + }, + { + "epoch": 0.97, + "learning_rate": 1.0112107054981167e-06, + "loss": 3.7669, + "step": 7595 + }, + { + "epoch": 0.97, + "learning_rate": 1.0019184814782923e-06, + "loss": 3.8019, + "step": 7596 + }, + { + "epoch": 0.97, + "learning_rate": 9.9266906311532e-07, + "loss": 3.9677, + "step": 7597 + }, + { + "epoch": 0.97, + "learning_rate": 9.834624519992897e-07, + "loss": 3.7832, + "step": 7598 + }, + { + "epoch": 0.97, + "learning_rate": 9.742986497128792e-07, + "loss": 3.8185, + "step": 7599 + }, + { + "epoch": 0.97, + "learning_rate": 9.65177657831523e-07, + "loss": 3.7281, + "step": 7600 + }, + { + "epoch": 0.97, + "learning_rate": 9.560994779231613e-07, + "loss": 3.7621, + "step": 7601 + }, + { + "epoch": 0.97, + "learning_rate": 9.470641115484624e-07, + "loss": 3.7657, + "step": 7602 + }, + { + "epoch": 0.97, + "learning_rate": 9.380715602607115e-07, + "loss": 3.8466, + "step": 7603 + }, + { + "epoch": 0.97, + "learning_rate": 9.291218256058387e-07, + "loss": 3.8412, + "step": 7604 + }, + { + "epoch": 0.97, + "learning_rate": 9.20214909122391e-07, + "loss": 3.602, + "step": 7605 + }, + { + "epoch": 0.97, + "learning_rate": 9.113508123415881e-07, + "loss": 3.6721, + "step": 7606 + }, + { + "epoch": 0.97, + "learning_rate": 9.025295367872665e-07, + "loss": 3.8213, + "step": 7607 + }, + { + "epoch": 0.97, + "learning_rate": 8.937510839759078e-07, + "loss": 3.7272, + "step": 7608 + }, + { + "epoch": 0.97, + "learning_rate": 8.850154554166101e-07, + "loss": 3.7466, + "step": 7609 + }, + { + "epoch": 0.97, + "learning_rate": 8.763226526111723e-07, + "loss": 3.6658, + "step": 7610 + }, + { + "epoch": 0.97, + "learning_rate": 8.676726770539267e-07, + "loss": 3.8284, + "step": 7611 + }, + { + "epoch": 0.97, + "learning_rate": 8.590655302319616e-07, + "loss": 3.8071, + "step": 7612 + }, + { + "epoch": 0.97, + "learning_rate": 8.505012136249268e-07, + "loss": 3.847, + "step": 7613 + }, + { + "epoch": 0.97, + "learning_rate": 8.41979728705089e-07, + "loss": 3.7687, + "step": 7614 + }, + { + "epoch": 0.97, + "learning_rate": 8.335010769374429e-07, + "loss": 3.785, + "step": 7615 + }, + { + "epoch": 0.97, + "learning_rate": 8.25065259779545e-07, + "loss": 3.7606, + "step": 7616 + }, + { + "epoch": 0.97, + "learning_rate": 8.166722786816239e-07, + "loss": 3.7443, + "step": 7617 + }, + { + "epoch": 0.98, + "learning_rate": 8.083221350865256e-07, + "loss": 3.7807, + "step": 7618 + }, + { + "epoch": 0.98, + "learning_rate": 8.000148304297128e-07, + "loss": 3.7756, + "step": 7619 + }, + { + "epoch": 0.98, + "learning_rate": 7.917503661393211e-07, + "loss": 3.6436, + "step": 7620 + }, + { + "epoch": 0.98, + "learning_rate": 7.835287436361305e-07, + "loss": 3.7639, + "step": 7621 + }, + { + "epoch": 0.98, + "learning_rate": 7.753499643334827e-07, + "loss": 3.7029, + "step": 7622 + }, + { + "epoch": 0.98, + "learning_rate": 7.672140296374475e-07, + "loss": 3.7041, + "step": 7623 + }, + { + "epoch": 0.98, + "learning_rate": 7.591209409466837e-07, + "loss": 3.7896, + "step": 7624 + }, + { + "epoch": 0.98, + "learning_rate": 7.510706996524675e-07, + "loss": 3.788, + "step": 7625 + }, + { + "epoch": 0.98, + "learning_rate": 7.430633071387749e-07, + "loss": 3.8834, + "step": 7626 + }, + { + "epoch": 0.98, + "learning_rate": 7.350987647820884e-07, + "loss": 3.7625, + "step": 7627 + }, + { + "epoch": 0.98, + "learning_rate": 7.271770739516737e-07, + "loss": 3.7064, + "step": 7628 + }, + { + "epoch": 0.98, + "learning_rate": 7.19298236009358e-07, + "loss": 3.7617, + "step": 7629 + }, + { + "epoch": 0.98, + "learning_rate": 7.114622523095305e-07, + "loss": 3.6013, + "step": 7630 + }, + { + "epoch": 0.98, + "learning_rate": 7.036691241993909e-07, + "loss": 3.7747, + "step": 7631 + }, + { + "epoch": 0.98, + "learning_rate": 6.9591885301859e-07, + "loss": 3.8626, + "step": 7632 + }, + { + "epoch": 0.98, + "learning_rate": 6.882114400995343e-07, + "loss": 3.7138, + "step": 7633 + }, + { + "epoch": 0.98, + "learning_rate": 6.805468867672193e-07, + "loss": 3.6938, + "step": 7634 + }, + { + "epoch": 0.98, + "learning_rate": 6.729251943392301e-07, + "loss": 3.787, + "step": 7635 + }, + { + "epoch": 0.98, + "learning_rate": 6.653463641258517e-07, + "loss": 3.7155, + "step": 7636 + }, + { + "epoch": 0.98, + "learning_rate": 6.578103974299588e-07, + "loss": 3.7906, + "step": 7637 + }, + { + "epoch": 0.98, + "learning_rate": 6.503172955470982e-07, + "loss": 3.8754, + "step": 7638 + }, + { + "epoch": 0.98, + "learning_rate": 6.428670597654062e-07, + "loss": 3.7733, + "step": 7639 + }, + { + "epoch": 0.98, + "learning_rate": 6.354596913656363e-07, + "loss": 3.7151, + "step": 7640 + }, + { + "epoch": 0.98, + "learning_rate": 6.280951916212418e-07, + "loss": 3.7342, + "step": 7641 + }, + { + "epoch": 0.98, + "learning_rate": 6.207735617982657e-07, + "loss": 3.9003, + "step": 7642 + }, + { + "epoch": 0.98, + "learning_rate": 6.134948031553678e-07, + "loss": 3.7802, + "step": 7643 + }, + { + "epoch": 0.98, + "learning_rate": 6.062589169438248e-07, + "loss": 3.9116, + "step": 7644 + }, + { + "epoch": 0.98, + "learning_rate": 5.990659044076141e-07, + "loss": 3.686, + "step": 7645 + }, + { + "epoch": 0.98, + "learning_rate": 5.919157667832464e-07, + "loss": 3.7774, + "step": 7646 + }, + { + "epoch": 0.98, + "learning_rate": 5.848085052999885e-07, + "loss": 3.7688, + "step": 7647 + }, + { + "epoch": 0.98, + "learning_rate": 5.777441211795853e-07, + "loss": 3.8536, + "step": 7648 + }, + { + "epoch": 0.98, + "learning_rate": 5.707226156365375e-07, + "loss": 3.788, + "step": 7649 + }, + { + "epoch": 0.98, + "learning_rate": 5.637439898779073e-07, + "loss": 3.7338, + "step": 7650 + }, + { + "epoch": 0.98, + "learning_rate": 5.56808245103374e-07, + "loss": 3.8908, + "step": 7651 + }, + { + "epoch": 0.98, + "learning_rate": 5.499153825053171e-07, + "loss": 3.8328, + "step": 7652 + }, + { + "epoch": 0.98, + "learning_rate": 5.430654032686777e-07, + "loss": 3.7849, + "step": 7653 + }, + { + "epoch": 0.98, + "learning_rate": 5.362583085710416e-07, + "loss": 3.7341, + "step": 7654 + }, + { + "epoch": 0.98, + "learning_rate": 5.294940995826392e-07, + "loss": 3.857, + "step": 7655 + }, + { + "epoch": 0.98, + "learning_rate": 5.227727774663182e-07, + "loss": 3.6349, + "step": 7656 + }, + { + "epoch": 0.98, + "learning_rate": 5.160943433775434e-07, + "loss": 3.7684, + "step": 7657 + }, + { + "epoch": 0.98, + "learning_rate": 5.094587984643962e-07, + "loss": 3.7424, + "step": 7658 + }, + { + "epoch": 0.98, + "learning_rate": 5.028661438676308e-07, + "loss": 3.9306, + "step": 7659 + }, + { + "epoch": 0.98, + "learning_rate": 4.963163807205906e-07, + "loss": 3.8185, + "step": 7660 + }, + { + "epoch": 0.98, + "learning_rate": 4.898095101492916e-07, + "loss": 3.8078, + "step": 7661 + }, + { + "epoch": 0.98, + "learning_rate": 4.833455332722836e-07, + "loss": 3.7852, + "step": 7662 + }, + { + "epoch": 0.98, + "learning_rate": 4.769244512008164e-07, + "loss": 3.6883, + "step": 7663 + }, + { + "epoch": 0.98, + "learning_rate": 4.7054626503878484e-07, + "loss": 3.9141, + "step": 7664 + }, + { + "epoch": 0.98, + "learning_rate": 4.642109758826174e-07, + "loss": 3.8678, + "step": 7665 + }, + { + "epoch": 0.98, + "learning_rate": 4.579185848214429e-07, + "loss": 3.6859, + "step": 7666 + }, + { + "epoch": 0.98, + "learning_rate": 4.5166909293703487e-07, + "loss": 3.8279, + "step": 7667 + }, + { + "epoch": 0.98, + "learning_rate": 4.454625013037006e-07, + "loss": 3.5381, + "step": 7668 + }, + { + "epoch": 0.98, + "learning_rate": 4.392988109884477e-07, + "loss": 3.9076, + "step": 7669 + }, + { + "epoch": 0.98, + "learning_rate": 4.331780230509008e-07, + "loss": 3.8529, + "step": 7670 + }, + { + "epoch": 0.98, + "learning_rate": 4.271001385432738e-07, + "loss": 3.9424, + "step": 7671 + }, + { + "epoch": 0.98, + "learning_rate": 4.2106515851042524e-07, + "loss": 3.638, + "step": 7672 + }, + { + "epoch": 0.98, + "learning_rate": 4.150730839898309e-07, + "loss": 3.7737, + "step": 7673 + }, + { + "epoch": 0.98, + "learning_rate": 4.0912391601161115e-07, + "loss": 3.841, + "step": 7674 + }, + { + "epoch": 0.98, + "learning_rate": 4.032176555985034e-07, + "loss": 3.7855, + "step": 7675 + }, + { + "epoch": 0.98, + "learning_rate": 3.9735430376586224e-07, + "loss": 3.7496, + "step": 7676 + }, + { + "epoch": 0.98, + "learning_rate": 3.9153386152165905e-07, + "loss": 3.6742, + "step": 7677 + }, + { + "epoch": 0.98, + "learning_rate": 3.8575632986648236e-07, + "loss": 3.7342, + "step": 7678 + }, + { + "epoch": 0.98, + "learning_rate": 3.800217097935932e-07, + "loss": 3.7819, + "step": 7679 + }, + { + "epoch": 0.98, + "learning_rate": 3.7433000228878635e-07, + "loss": 3.7696, + "step": 7680 + }, + { + "epoch": 0.98, + "learning_rate": 3.6868120833055686e-07, + "loss": 3.8087, + "step": 7681 + }, + { + "epoch": 0.98, + "learning_rate": 3.630753288900446e-07, + "loss": 3.8768, + "step": 7682 + }, + { + "epoch": 0.98, + "learning_rate": 3.575123649308953e-07, + "loss": 3.9069, + "step": 7683 + }, + { + "epoch": 0.98, + "learning_rate": 3.5199231740945525e-07, + "loss": 3.8876, + "step": 7684 + }, + { + "epoch": 0.98, + "learning_rate": 3.4651518727474294e-07, + "loss": 3.8273, + "step": 7685 + }, + { + "epoch": 0.98, + "learning_rate": 3.410809754682831e-07, + "loss": 3.7639, + "step": 7686 + }, + { + "epoch": 0.98, + "learning_rate": 3.356896829243006e-07, + "loss": 3.6048, + "step": 7687 + }, + { + "epoch": 0.98, + "learning_rate": 3.303413105696096e-07, + "loss": 3.8714, + "step": 7688 + }, + { + "epoch": 0.98, + "learning_rate": 3.250358593236691e-07, + "loss": 3.7834, + "step": 7689 + }, + { + "epoch": 0.98, + "learning_rate": 3.19773330098555e-07, + "loss": 3.8046, + "step": 7690 + }, + { + "epoch": 0.98, + "learning_rate": 3.1455372379893267e-07, + "loss": 3.6281, + "step": 7691 + }, + { + "epoch": 0.98, + "learning_rate": 3.0937704132213975e-07, + "loss": 3.6473, + "step": 7692 + }, + { + "epoch": 0.98, + "learning_rate": 3.0424328355810326e-07, + "loss": 3.7461, + "step": 7693 + }, + { + "epoch": 0.98, + "learning_rate": 2.9915245138933957e-07, + "loss": 3.8854, + "step": 7694 + }, + { + "epoch": 0.98, + "learning_rate": 2.9410454569106516e-07, + "loss": 3.8593, + "step": 7695 + }, + { + "epoch": 0.99, + "learning_rate": 2.8909956733105815e-07, + "loss": 3.8373, + "step": 7696 + }, + { + "epoch": 0.99, + "learning_rate": 2.841375171697413e-07, + "loss": 3.8125, + "step": 7697 + }, + { + "epoch": 0.99, + "learning_rate": 2.792183960601269e-07, + "loss": 3.6259, + "step": 7698 + }, + { + "epoch": 0.99, + "learning_rate": 2.743422048478994e-07, + "loss": 3.8114, + "step": 7699 + }, + { + "epoch": 0.99, + "learning_rate": 2.69508944371305e-07, + "loss": 3.7806, + "step": 7700 + }, + { + "epoch": 0.99, + "learning_rate": 2.6471861546123465e-07, + "loss": 3.6973, + "step": 7701 + }, + { + "epoch": 0.99, + "learning_rate": 2.59971218941224e-07, + "loss": 3.7885, + "step": 7702 + }, + { + "epoch": 0.99, + "learning_rate": 2.55266755627398e-07, + "loss": 3.6802, + "step": 7703 + }, + { + "epoch": 0.99, + "learning_rate": 2.506052263284986e-07, + "loss": 3.9439, + "step": 7704 + }, + { + "epoch": 0.99, + "learning_rate": 2.4598663184591254e-07, + "loss": 3.8338, + "step": 7705 + }, + { + "epoch": 0.99, + "learning_rate": 2.414109729736158e-07, + "loss": 3.9326, + "step": 7706 + }, + { + "epoch": 0.99, + "learning_rate": 2.368782504982292e-07, + "loss": 3.7927, + "step": 7707 + }, + { + "epoch": 0.99, + "learning_rate": 2.3238846519896274e-07, + "loss": 3.8017, + "step": 7708 + }, + { + "epoch": 0.99, + "learning_rate": 2.2794161784769896e-07, + "loss": 3.7654, + "step": 7709 + }, + { + "epoch": 0.99, + "learning_rate": 2.235377092088542e-07, + "loss": 3.9008, + "step": 7710 + }, + { + "epoch": 0.99, + "learning_rate": 2.1917674003954502e-07, + "loss": 3.9163, + "step": 7711 + }, + { + "epoch": 0.99, + "learning_rate": 2.1485871108944955e-07, + "loss": 3.6522, + "step": 7712 + }, + { + "epoch": 0.99, + "learning_rate": 2.1058362310091837e-07, + "loss": 3.7589, + "step": 7713 + }, + { + "epoch": 0.99, + "learning_rate": 2.0635147680886368e-07, + "loss": 3.6927, + "step": 7714 + }, + { + "epoch": 0.99, + "learning_rate": 2.0216227294084234e-07, + "loss": 3.8532, + "step": 7715 + }, + { + "epoch": 0.99, + "learning_rate": 1.9801601221702825e-07, + "loss": 3.7318, + "step": 7716 + }, + { + "epoch": 0.99, + "learning_rate": 1.939126953502124e-07, + "loss": 3.7271, + "step": 7717 + }, + { + "epoch": 0.99, + "learning_rate": 1.8985232304580268e-07, + "loss": 3.8359, + "step": 7718 + }, + { + "epoch": 0.99, + "learning_rate": 1.8583489600182413e-07, + "loss": 3.7676, + "step": 7719 + }, + { + "epoch": 0.99, + "learning_rate": 1.8186041490894646e-07, + "loss": 3.7889, + "step": 7720 + }, + { + "epoch": 0.99, + "learning_rate": 1.7792888045037315e-07, + "loss": 3.9409, + "step": 7721 + }, + { + "epoch": 0.99, + "learning_rate": 1.7404029330203574e-07, + "loss": 3.7362, + "step": 7722 + }, + { + "epoch": 0.99, + "learning_rate": 1.7019465413239955e-07, + "loss": 3.9302, + "step": 7723 + }, + { + "epoch": 0.99, + "learning_rate": 1.6639196360257458e-07, + "loss": 3.8408, + "step": 7724 + }, + { + "epoch": 0.99, + "learning_rate": 1.6263222236628794e-07, + "loss": 3.7437, + "step": 7725 + }, + { + "epoch": 0.99, + "learning_rate": 1.589154310698837e-07, + "loss": 3.7766, + "step": 7726 + }, + { + "epoch": 0.99, + "learning_rate": 1.55241590352323e-07, + "loss": 3.8576, + "step": 7727 + }, + { + "epoch": 0.99, + "learning_rate": 1.5161070084518392e-07, + "loss": 3.8126, + "step": 7728 + }, + { + "epoch": 0.99, + "learning_rate": 1.4802276317266162e-07, + "loss": 3.7693, + "step": 7729 + }, + { + "epoch": 0.99, + "learning_rate": 1.444777779515405e-07, + "loss": 3.7129, + "step": 7730 + }, + { + "epoch": 0.99, + "learning_rate": 1.4097574579127749e-07, + "loss": 3.7025, + "step": 7731 + }, + { + "epoch": 0.99, + "learning_rate": 1.37516667293891e-07, + "loss": 3.78, + "step": 7732 + }, + { + "epoch": 0.99, + "learning_rate": 1.3410054305404428e-07, + "loss": 3.8601, + "step": 7733 + }, + { + "epoch": 0.99, + "learning_rate": 1.3072737365901755e-07, + "loss": 3.8019, + "step": 7734 + }, + { + "epoch": 0.99, + "learning_rate": 1.2739715968868028e-07, + "loss": 3.8185, + "step": 7735 + }, + { + "epoch": 0.99, + "learning_rate": 1.241099017155467e-07, + "loss": 3.7546, + "step": 7736 + }, + { + "epoch": 0.99, + "learning_rate": 1.2086560030474813e-07, + "loss": 3.8167, + "step": 7737 + }, + { + "epoch": 0.99, + "learning_rate": 1.1766425601397734e-07, + "loss": 3.7473, + "step": 7738 + }, + { + "epoch": 0.99, + "learning_rate": 1.1450586939362739e-07, + "loss": 3.7835, + "step": 7739 + }, + { + "epoch": 0.99, + "learning_rate": 1.1139044098662509e-07, + "loss": 3.7979, + "step": 7740 + }, + { + "epoch": 0.99, + "learning_rate": 1.0831797132854204e-07, + "loss": 3.7944, + "step": 7741 + }, + { + "epoch": 0.99, + "learning_rate": 1.0528846094762234e-07, + "loss": 3.7879, + "step": 7742 + }, + { + "epoch": 0.99, + "learning_rate": 1.0230191036464388e-07, + "loss": 3.7399, + "step": 7743 + }, + { + "epoch": 0.99, + "learning_rate": 9.93583200930015e-08, + "loss": 3.7913, + "step": 7744 + }, + { + "epoch": 0.99, + "learning_rate": 9.645769063879039e-08, + "loss": 3.76, + "step": 7745 + }, + { + "epoch": 0.99, + "learning_rate": 9.360002250061172e-08, + "loss": 3.7591, + "step": 7746 + }, + { + "epoch": 0.99, + "learning_rate": 9.078531616976693e-08, + "loss": 3.8491, + "step": 7747 + }, + { + "epoch": 0.99, + "learning_rate": 8.801357213011896e-08, + "loss": 3.72, + "step": 7748 + }, + { + "epoch": 0.99, + "learning_rate": 8.528479085817554e-08, + "loss": 3.7403, + "step": 7749 + }, + { + "epoch": 0.99, + "learning_rate": 8.259897282303363e-08, + "loss": 3.7846, + "step": 7750 + }, + { + "epoch": 0.99, + "learning_rate": 7.995611848640728e-08, + "loss": 3.7714, + "step": 7751 + }, + { + "epoch": 0.99, + "learning_rate": 7.735622830265521e-08, + "loss": 3.7554, + "step": 7752 + }, + { + "epoch": 0.99, + "learning_rate": 7.479930271869773e-08, + "loss": 3.7115, + "step": 7753 + }, + { + "epoch": 0.99, + "learning_rate": 7.228534217415539e-08, + "loss": 3.81, + "step": 7754 + }, + { + "epoch": 0.99, + "learning_rate": 6.981434710115475e-08, + "loss": 3.7057, + "step": 7755 + }, + { + "epoch": 0.99, + "learning_rate": 6.738631792452266e-08, + "loss": 3.6999, + "step": 7756 + }, + { + "epoch": 0.99, + "learning_rate": 6.500125506161969e-08, + "loss": 3.9116, + "step": 7757 + }, + { + "epoch": 0.99, + "learning_rate": 6.265915892253448e-08, + "loss": 3.6489, + "step": 7758 + }, + { + "epoch": 0.99, + "learning_rate": 6.036002990983391e-08, + "loss": 3.6771, + "step": 7759 + }, + { + "epoch": 0.99, + "learning_rate": 5.810386841878512e-08, + "loss": 3.7006, + "step": 7760 + }, + { + "epoch": 0.99, + "learning_rate": 5.5890674837272285e-08, + "loss": 3.9269, + "step": 7761 + }, + { + "epoch": 0.99, + "learning_rate": 5.3720449545768826e-08, + "loss": 3.9267, + "step": 7762 + }, + { + "epoch": 0.99, + "learning_rate": 5.159319291733744e-08, + "loss": 3.6684, + "step": 7763 + }, + { + "epoch": 0.99, + "learning_rate": 4.950890531765784e-08, + "loss": 3.8438, + "step": 7764 + }, + { + "epoch": 0.99, + "learning_rate": 4.746758710511001e-08, + "loss": 3.8451, + "step": 7765 + }, + { + "epoch": 0.99, + "learning_rate": 4.546923863055219e-08, + "loss": 3.7874, + "step": 7766 + }, + { + "epoch": 0.99, + "learning_rate": 4.351386023757064e-08, + "loss": 3.8315, + "step": 7767 + }, + { + "epoch": 0.99, + "learning_rate": 4.1601452262313155e-08, + "loss": 3.771, + "step": 7768 + }, + { + "epoch": 0.99, + "learning_rate": 3.973201503351675e-08, + "loss": 3.9003, + "step": 7769 + }, + { + "epoch": 0.99, + "learning_rate": 3.790554887256326e-08, + "loss": 3.7849, + "step": 7770 + }, + { + "epoch": 0.99, + "learning_rate": 3.612205409347924e-08, + "loss": 3.7542, + "step": 7771 + }, + { + "epoch": 0.99, + "learning_rate": 3.438153100282504e-08, + "loss": 3.8397, + "step": 7772 + }, + { + "epoch": 0.99, + "learning_rate": 3.2683979899833514e-08, + "loss": 3.684, + "step": 7773 + }, + { + "epoch": 1.0, + "learning_rate": 3.1029401076354546e-08, + "loss": 3.6976, + "step": 7774 + }, + { + "epoch": 1.0, + "learning_rate": 2.9417794816799515e-08, + "loss": 3.7305, + "step": 7775 + }, + { + "epoch": 1.0, + "learning_rate": 2.7849161398224575e-08, + "loss": 3.7053, + "step": 7776 + }, + { + "epoch": 1.0, + "learning_rate": 2.632350109033066e-08, + "loss": 3.7377, + "step": 7777 + }, + { + "epoch": 1.0, + "learning_rate": 2.484081415535244e-08, + "loss": 3.7276, + "step": 7778 + }, + { + "epoch": 1.0, + "learning_rate": 2.3401100848197132e-08, + "loss": 3.7678, + "step": 7779 + }, + { + "epoch": 1.0, + "learning_rate": 2.2004361416361195e-08, + "loss": 3.8288, + "step": 7780 + }, + { + "epoch": 1.0, + "learning_rate": 2.0650596099985874e-08, + "loss": 3.7373, + "step": 7781 + }, + { + "epoch": 1.0, + "learning_rate": 1.9339805131773912e-08, + "loss": 3.7388, + "step": 7782 + }, + { + "epoch": 1.0, + "learning_rate": 1.8071988737100585e-08, + "loss": 3.7721, + "step": 7783 + }, + { + "epoch": 1.0, + "learning_rate": 1.6847147133847163e-08, + "loss": 3.74, + "step": 7784 + }, + { + "epoch": 1.0, + "learning_rate": 1.5665280532650705e-08, + "loss": 3.8119, + "step": 7785 + }, + { + "epoch": 1.0, + "learning_rate": 1.4526389136654273e-08, + "loss": 3.823, + "step": 7786 + }, + { + "epoch": 1.0, + "learning_rate": 1.3430473141645694e-08, + "loss": 3.7782, + "step": 7787 + }, + { + "epoch": 1.0, + "learning_rate": 1.2377532736057572e-08, + "loss": 3.8449, + "step": 7788 + }, + { + "epoch": 1.0, + "learning_rate": 1.1367568100856262e-08, + "loss": 3.779, + "step": 7789 + }, + { + "epoch": 1.0, + "learning_rate": 1.0400579409680643e-08, + "loss": 3.7362, + "step": 7790 + }, + { + "epoch": 1.0, + "learning_rate": 9.476566828786615e-09, + "loss": 3.8614, + "step": 7791 + }, + { + "epoch": 1.0, + "learning_rate": 8.595530516991579e-09, + "loss": 3.6482, + "step": 7792 + }, + { + "epoch": 1.0, + "learning_rate": 7.757470625785467e-09, + "loss": 3.7911, + "step": 7793 + }, + { + "epoch": 1.0, + "learning_rate": 6.962387299219719e-09, + "loss": 3.8028, + "step": 7794 + }, + { + "epoch": 1.0, + "learning_rate": 6.210280674018298e-09, + "loss": 3.7795, + "step": 7795 + }, + { + "epoch": 1.0, + "learning_rate": 5.501150879411165e-09, + "loss": 3.9167, + "step": 7796 + }, + { + "epoch": 1.0, + "learning_rate": 4.8349980373563195e-09, + "loss": 3.7024, + "step": 7797 + }, + { + "epoch": 1.0, + "learning_rate": 4.2118222623455105e-09, + "loss": 3.6901, + "step": 7798 + }, + { + "epoch": 1.0, + "learning_rate": 3.6316236615430154e-09, + "loss": 3.743, + "step": 7799 + }, + { + "epoch": 1.0, + "learning_rate": 3.0944023346746175e-09, + "loss": 3.697, + "step": 7800 + }, + { + "epoch": 1.0, + "learning_rate": 2.6001583740553615e-09, + "loss": 3.8118, + "step": 7801 + }, + { + "epoch": 1.0, + "learning_rate": 2.1488918647283307e-09, + "loss": 3.8384, + "step": 7802 + }, + { + "epoch": 1.0, + "learning_rate": 1.7406028842148481e-09, + "loss": 3.8372, + "step": 7803 + }, + { + "epoch": 1.0, + "learning_rate": 1.3752915027087642e-09, + "loss": 3.7894, + "step": 7804 + }, + { + "epoch": 1.0, + "learning_rate": 1.0529577830209468e-09, + "loss": 3.702, + "step": 7805 + }, + { + "epoch": 1.0, + "learning_rate": 7.736017805792805e-10, + "loss": 3.7799, + "step": 7806 + }, + { + "epoch": 1.0, + "learning_rate": 5.37223543400911e-10, + "loss": 3.7676, + "step": 7807 + }, + { + "epoch": 1.0, + "learning_rate": 3.4382311209224526e-10, + "loss": 3.8968, + "step": 7808 + }, + { + "epoch": 1.0, + "learning_rate": 1.9340051995997422e-10, + "loss": 3.6531, + "step": 7809 + }, + { + "epoch": 1.0, + "learning_rate": 8.595579281678311e-11, + "loss": 3.7579, + "step": 7810 + }, + { + "epoch": 1.0, + "learning_rate": 2.1488949120129775e-11, + "loss": 3.7537, + "step": 7811 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 3.8606, + "step": 7812 + }, + { + "epoch": 1.0, + "step": 7812, + "total_flos": 8.618721262174208e+17, + "train_loss": 3.9615137033809225, + "train_runtime": 30945.2367, + "train_samples_per_second": 64.63, + "train_steps_per_second": 0.252 + } + ], + "logging_steps": 1.0, + "max_steps": 7812, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 24000, + "total_flos": 8.618721262174208e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}