{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.800225415610031, "eval_steps": 178, "global_step": 710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 2.0913, "step": 1 }, { "epoch": 0.0, "eval_loss": 2.086364507675171, "eval_runtime": 70.9467, "eval_samples_per_second": 7.386, "eval_steps_per_second": 1.846, "step": 1 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 1.8593, "step": 2 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 1.9531, "step": 3 }, { "epoch": 0.0, "learning_rate": 8e-05, "loss": 1.497, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 1.371, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 0.9074, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.00014, "loss": 0.6526, "step": 7 }, { "epoch": 0.01, "learning_rate": 0.00016, "loss": 0.5406, "step": 8 }, { "epoch": 0.01, "learning_rate": 0.00018, "loss": 0.4498, "step": 9 }, { "epoch": 0.01, "learning_rate": 0.0002, "loss": 0.3852, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.00019999996057660117, "loss": 0.3148, "step": 11 }, { "epoch": 0.01, "learning_rate": 0.0001999998423064356, "loss": 0.294, "step": 12 }, { "epoch": 0.01, "learning_rate": 0.00019999964518959668, "loss": 0.2364, "step": 13 }, { "epoch": 0.02, "learning_rate": 0.00019999936922623977, "loss": 0.263, "step": 14 }, { "epoch": 0.02, "learning_rate": 0.0001999990144165825, "loss": 0.2718, "step": 15 }, { "epoch": 0.02, "learning_rate": 0.00019999858076090457, "loss": 0.2188, "step": 16 }, { "epoch": 0.02, "learning_rate": 0.00019999806825954792, "loss": 0.1855, "step": 17 }, { "epoch": 0.02, "learning_rate": 0.00019999747691291665, "loss": 0.1666, "step": 18 }, { "epoch": 0.02, "learning_rate": 0.000199996806721477, "loss": 0.2055, "step": 19 }, { "epoch": 0.02, "learning_rate": 0.00019999605768575743, "loss": 0.2022, "step": 20 }, { "epoch": 0.02, "learning_rate": 0.0001999952298063485, "loss": 0.1747, "step": 21 }, { "epoch": 0.02, "learning_rate": 0.000199994323083903, "loss": 0.1447, "step": 22 }, { "epoch": 0.03, "learning_rate": 0.0001999933375191358, "loss": 0.1613, "step": 23 }, { "epoch": 0.03, "learning_rate": 0.00019999227311282404, "loss": 0.1555, "step": 24 }, { "epoch": 0.03, "learning_rate": 0.00019999112986580694, "loss": 0.1488, "step": 25 }, { "epoch": 0.03, "learning_rate": 0.00019998990777898593, "loss": 0.1672, "step": 26 }, { "epoch": 0.03, "learning_rate": 0.00019998860685332455, "loss": 0.1647, "step": 27 }, { "epoch": 0.03, "learning_rate": 0.00019998722708984855, "loss": 0.1565, "step": 28 }, { "epoch": 0.03, "learning_rate": 0.00019998576848964588, "loss": 0.1471, "step": 29 }, { "epoch": 0.03, "learning_rate": 0.00019998423105386655, "loss": 0.1508, "step": 30 }, { "epoch": 0.03, "learning_rate": 0.0001999826147837228, "loss": 0.1482, "step": 31 }, { "epoch": 0.04, "learning_rate": 0.00019998091968048898, "loss": 0.1314, "step": 32 }, { "epoch": 0.04, "learning_rate": 0.00019997914574550166, "loss": 0.1445, "step": 33 }, { "epoch": 0.04, "learning_rate": 0.0001999772929801595, "loss": 0.1411, "step": 34 }, { "epoch": 0.04, "learning_rate": 0.00019997536138592335, "loss": 0.1338, "step": 35 }, { "epoch": 0.04, "learning_rate": 0.00019997335096431625, "loss": 0.1447, "step": 36 }, { "epoch": 0.04, "learning_rate": 0.00019997126171692332, "loss": 0.127, "step": 37 }, { "epoch": 0.04, "learning_rate": 0.00019996909364539185, "loss": 0.1258, "step": 38 }, { "epoch": 0.04, "learning_rate": 0.0001999668467514313, "loss": 0.1446, "step": 39 }, { "epoch": 0.05, "learning_rate": 0.00019996452103681334, "loss": 0.1319, "step": 40 }, { "epoch": 0.05, "learning_rate": 0.00019996211650337163, "loss": 0.1254, "step": 41 }, { "epoch": 0.05, "learning_rate": 0.00019995963315300213, "loss": 0.1415, "step": 42 }, { "epoch": 0.05, "learning_rate": 0.0001999570709876628, "loss": 0.1248, "step": 43 }, { "epoch": 0.05, "learning_rate": 0.00019995443000937394, "loss": 0.1091, "step": 44 }, { "epoch": 0.05, "learning_rate": 0.00019995171022021784, "loss": 0.1261, "step": 45 }, { "epoch": 0.05, "learning_rate": 0.00019994891162233887, "loss": 0.1454, "step": 46 }, { "epoch": 0.05, "learning_rate": 0.00019994603421794377, "loss": 0.1234, "step": 47 }, { "epoch": 0.05, "learning_rate": 0.00019994307800930123, "loss": 0.128, "step": 48 }, { "epoch": 0.06, "learning_rate": 0.00019994004299874209, "loss": 0.134, "step": 49 }, { "epoch": 0.06, "learning_rate": 0.0001999369291886594, "loss": 0.1134, "step": 50 }, { "epoch": 0.06, "learning_rate": 0.00019993373658150828, "loss": 0.1282, "step": 51 }, { "epoch": 0.06, "learning_rate": 0.00019993046517980602, "loss": 0.1212, "step": 52 }, { "epoch": 0.06, "learning_rate": 0.000199927114986132, "loss": 0.1017, "step": 53 }, { "epoch": 0.06, "learning_rate": 0.00019992368600312772, "loss": 0.1125, "step": 54 }, { "epoch": 0.06, "learning_rate": 0.00019992017823349686, "loss": 0.0959, "step": 55 }, { "epoch": 0.06, "learning_rate": 0.00019991659168000516, "loss": 0.138, "step": 56 }, { "epoch": 0.06, "learning_rate": 0.00019991292634548053, "loss": 0.1299, "step": 57 }, { "epoch": 0.07, "learning_rate": 0.00019990918223281295, "loss": 0.1105, "step": 58 }, { "epoch": 0.07, "learning_rate": 0.0001999053593449545, "loss": 0.1405, "step": 59 }, { "epoch": 0.07, "learning_rate": 0.00019990145768491947, "loss": 0.1021, "step": 60 }, { "epoch": 0.07, "learning_rate": 0.00019989747725578417, "loss": 0.1204, "step": 61 }, { "epoch": 0.07, "learning_rate": 0.000199893418060687, "loss": 0.131, "step": 62 }, { "epoch": 0.07, "learning_rate": 0.00019988928010282857, "loss": 0.1047, "step": 63 }, { "epoch": 0.07, "learning_rate": 0.00019988506338547147, "loss": 0.1332, "step": 64 }, { "epoch": 0.07, "learning_rate": 0.00019988076791194051, "loss": 0.1341, "step": 65 }, { "epoch": 0.07, "learning_rate": 0.00019987639368562244, "loss": 0.1104, "step": 66 }, { "epoch": 0.08, "learning_rate": 0.00019987194070996632, "loss": 0.1162, "step": 67 }, { "epoch": 0.08, "learning_rate": 0.00019986740898848306, "loss": 0.1267, "step": 68 }, { "epoch": 0.08, "learning_rate": 0.00019986279852474584, "loss": 0.1259, "step": 69 }, { "epoch": 0.08, "learning_rate": 0.00019985810932238987, "loss": 0.1146, "step": 70 }, { "epoch": 0.08, "learning_rate": 0.00019985334138511237, "loss": 0.099, "step": 71 }, { "epoch": 0.08, "learning_rate": 0.00019984849471667282, "loss": 0.1424, "step": 72 }, { "epoch": 0.08, "learning_rate": 0.00019984356932089257, "loss": 0.115, "step": 73 }, { "epoch": 0.08, "learning_rate": 0.00019983856520165512, "loss": 0.1114, "step": 74 }, { "epoch": 0.08, "learning_rate": 0.00019983348236290616, "loss": 0.1192, "step": 75 }, { "epoch": 0.09, "learning_rate": 0.00019982832080865328, "loss": 0.1216, "step": 76 }, { "epoch": 0.09, "learning_rate": 0.0001998230805429662, "loss": 0.1095, "step": 77 }, { "epoch": 0.09, "learning_rate": 0.00019981776156997668, "loss": 0.0877, "step": 78 }, { "epoch": 0.09, "learning_rate": 0.0001998123638938786, "loss": 0.12, "step": 79 }, { "epoch": 0.09, "learning_rate": 0.0001998068875189279, "loss": 0.1013, "step": 80 }, { "epoch": 0.09, "learning_rate": 0.00019980133244944243, "loss": 0.1093, "step": 81 }, { "epoch": 0.09, "learning_rate": 0.0001997956986898022, "loss": 0.112, "step": 82 }, { "epoch": 0.09, "learning_rate": 0.00019978998624444927, "loss": 0.0877, "step": 83 }, { "epoch": 0.09, "learning_rate": 0.00019978419511788777, "loss": 0.1113, "step": 84 }, { "epoch": 0.1, "learning_rate": 0.00019977832531468376, "loss": 0.1091, "step": 85 }, { "epoch": 0.1, "learning_rate": 0.00019977237683946536, "loss": 0.1018, "step": 86 }, { "epoch": 0.1, "learning_rate": 0.00019976634969692282, "loss": 0.1021, "step": 87 }, { "epoch": 0.1, "learning_rate": 0.00019976024389180833, "loss": 0.1225, "step": 88 }, { "epoch": 0.1, "learning_rate": 0.00019975405942893612, "loss": 0.1249, "step": 89 }, { "epoch": 0.1, "learning_rate": 0.00019974779631318242, "loss": 0.1151, "step": 90 }, { "epoch": 0.1, "learning_rate": 0.00019974145454948553, "loss": 0.0959, "step": 91 }, { "epoch": 0.1, "learning_rate": 0.0001997350341428457, "loss": 0.1044, "step": 92 }, { "epoch": 0.1, "learning_rate": 0.00019972853509832525, "loss": 0.1159, "step": 93 }, { "epoch": 0.11, "learning_rate": 0.00019972195742104846, "loss": 0.1115, "step": 94 }, { "epoch": 0.11, "learning_rate": 0.00019971530111620155, "loss": 0.1073, "step": 95 }, { "epoch": 0.11, "learning_rate": 0.00019970856618903285, "loss": 0.1157, "step": 96 }, { "epoch": 0.11, "learning_rate": 0.00019970175264485266, "loss": 0.1047, "step": 97 }, { "epoch": 0.11, "learning_rate": 0.00019969486048903323, "loss": 0.1005, "step": 98 }, { "epoch": 0.11, "learning_rate": 0.0001996878897270088, "loss": 0.1104, "step": 99 }, { "epoch": 0.11, "learning_rate": 0.0001996808403642756, "loss": 0.1195, "step": 100 }, { "epoch": 0.11, "learning_rate": 0.00019967371240639178, "loss": 0.0993, "step": 101 }, { "epoch": 0.11, "learning_rate": 0.00019966650585897755, "loss": 0.1076, "step": 102 }, { "epoch": 0.12, "learning_rate": 0.00019965922072771503, "loss": 0.1244, "step": 103 }, { "epoch": 0.12, "learning_rate": 0.0001996518570183483, "loss": 0.1237, "step": 104 }, { "epoch": 0.12, "learning_rate": 0.00019964441473668346, "loss": 0.1069, "step": 105 }, { "epoch": 0.12, "learning_rate": 0.00019963689388858847, "loss": 0.1278, "step": 106 }, { "epoch": 0.12, "learning_rate": 0.0001996292944799933, "loss": 0.1058, "step": 107 }, { "epoch": 0.12, "learning_rate": 0.0001996216165168898, "loss": 0.1296, "step": 108 }, { "epoch": 0.12, "learning_rate": 0.00019961386000533183, "loss": 0.1021, "step": 109 }, { "epoch": 0.12, "learning_rate": 0.00019960602495143514, "loss": 0.1018, "step": 110 }, { "epoch": 0.13, "learning_rate": 0.00019959811136137742, "loss": 0.1041, "step": 111 }, { "epoch": 0.13, "learning_rate": 0.00019959011924139828, "loss": 0.098, "step": 112 }, { "epoch": 0.13, "learning_rate": 0.00019958204859779925, "loss": 0.1016, "step": 113 }, { "epoch": 0.13, "learning_rate": 0.00019957389943694377, "loss": 0.1036, "step": 114 }, { "epoch": 0.13, "learning_rate": 0.00019956567176525724, "loss": 0.1001, "step": 115 }, { "epoch": 0.13, "learning_rate": 0.00019955736558922683, "loss": 0.1144, "step": 116 }, { "epoch": 0.13, "learning_rate": 0.0001995489809154018, "loss": 0.1071, "step": 117 }, { "epoch": 0.13, "learning_rate": 0.0001995405177503931, "loss": 0.1013, "step": 118 }, { "epoch": 0.13, "learning_rate": 0.0001995319761008737, "loss": 0.101, "step": 119 }, { "epoch": 0.14, "learning_rate": 0.00019952335597357842, "loss": 0.1129, "step": 120 }, { "epoch": 0.14, "learning_rate": 0.00019951465737530396, "loss": 0.0942, "step": 121 }, { "epoch": 0.14, "learning_rate": 0.00019950588031290888, "loss": 0.1028, "step": 122 }, { "epoch": 0.14, "learning_rate": 0.00019949702479331362, "loss": 0.0941, "step": 123 }, { "epoch": 0.14, "learning_rate": 0.0001994880908235005, "loss": 0.1053, "step": 124 }, { "epoch": 0.14, "learning_rate": 0.00019947907841051356, "loss": 0.0921, "step": 125 }, { "epoch": 0.14, "learning_rate": 0.0001994699875614589, "loss": 0.0998, "step": 126 }, { "epoch": 0.14, "learning_rate": 0.00019946081828350437, "loss": 0.1009, "step": 127 }, { "epoch": 0.14, "learning_rate": 0.00019945157058387958, "loss": 0.1124, "step": 128 }, { "epoch": 0.15, "learning_rate": 0.00019944224446987612, "loss": 0.1214, "step": 129 }, { "epoch": 0.15, "learning_rate": 0.00019943283994884728, "loss": 0.1042, "step": 130 }, { "epoch": 0.15, "learning_rate": 0.00019942335702820824, "loss": 0.0921, "step": 131 }, { "epoch": 0.15, "learning_rate": 0.00019941379571543596, "loss": 0.1032, "step": 132 }, { "epoch": 0.15, "learning_rate": 0.0001994041560180693, "loss": 0.1243, "step": 133 }, { "epoch": 0.15, "learning_rate": 0.00019939443794370876, "loss": 0.0999, "step": 134 }, { "epoch": 0.15, "learning_rate": 0.00019938464150001678, "loss": 0.0953, "step": 135 }, { "epoch": 0.15, "learning_rate": 0.00019937476669471753, "loss": 0.1074, "step": 136 }, { "epoch": 0.15, "learning_rate": 0.00019936481353559699, "loss": 0.0931, "step": 137 }, { "epoch": 0.16, "learning_rate": 0.00019935478203050288, "loss": 0.1016, "step": 138 }, { "epoch": 0.16, "learning_rate": 0.00019934467218734477, "loss": 0.1032, "step": 139 }, { "epoch": 0.16, "learning_rate": 0.00019933448401409393, "loss": 0.1125, "step": 140 }, { "epoch": 0.16, "learning_rate": 0.00019932421751878336, "loss": 0.1009, "step": 141 }, { "epoch": 0.16, "learning_rate": 0.0001993138727095079, "loss": 0.0957, "step": 142 }, { "epoch": 0.16, "learning_rate": 0.0001993034495944241, "loss": 0.1153, "step": 143 }, { "epoch": 0.16, "learning_rate": 0.00019929294818175027, "loss": 0.1029, "step": 144 }, { "epoch": 0.16, "learning_rate": 0.0001992823684797664, "loss": 0.0956, "step": 145 }, { "epoch": 0.16, "learning_rate": 0.00019927171049681427, "loss": 0.1208, "step": 146 }, { "epoch": 0.17, "learning_rate": 0.00019926097424129738, "loss": 0.0972, "step": 147 }, { "epoch": 0.17, "learning_rate": 0.0001992501597216809, "loss": 0.0949, "step": 148 }, { "epoch": 0.17, "learning_rate": 0.00019923926694649167, "loss": 0.105, "step": 149 }, { "epoch": 0.17, "learning_rate": 0.00019922829592431839, "loss": 0.102, "step": 150 }, { "epoch": 0.17, "learning_rate": 0.00019921724666381132, "loss": 0.0956, "step": 151 }, { "epoch": 0.17, "learning_rate": 0.00019920611917368248, "loss": 0.092, "step": 152 }, { "epoch": 0.17, "learning_rate": 0.0001991949134627055, "loss": 0.1078, "step": 153 }, { "epoch": 0.17, "learning_rate": 0.00019918362953971573, "loss": 0.0964, "step": 154 }, { "epoch": 0.17, "learning_rate": 0.00019917226741361015, "loss": 0.1044, "step": 155 }, { "epoch": 0.18, "learning_rate": 0.00019916082709334752, "loss": 0.1089, "step": 156 }, { "epoch": 0.18, "learning_rate": 0.00019914930858794808, "loss": 0.0999, "step": 157 }, { "epoch": 0.18, "learning_rate": 0.00019913771190649388, "loss": 0.1075, "step": 158 }, { "epoch": 0.18, "learning_rate": 0.00019912603705812847, "loss": 0.1032, "step": 159 }, { "epoch": 0.18, "learning_rate": 0.0001991142840520571, "loss": 0.1141, "step": 160 }, { "epoch": 0.18, "learning_rate": 0.0001991024528975467, "loss": 0.103, "step": 161 }, { "epoch": 0.18, "learning_rate": 0.00019909054360392567, "loss": 0.1116, "step": 162 }, { "epoch": 0.18, "learning_rate": 0.00019907855618058414, "loss": 0.0906, "step": 163 }, { "epoch": 0.18, "learning_rate": 0.00019906649063697386, "loss": 0.1002, "step": 164 }, { "epoch": 0.19, "learning_rate": 0.00019905434698260806, "loss": 0.1024, "step": 165 }, { "epoch": 0.19, "learning_rate": 0.00019904212522706165, "loss": 0.0892, "step": 166 }, { "epoch": 0.19, "learning_rate": 0.00019902982537997108, "loss": 0.1029, "step": 167 }, { "epoch": 0.19, "learning_rate": 0.00019901744745103438, "loss": 0.0924, "step": 168 }, { "epoch": 0.19, "learning_rate": 0.00019900499145001117, "loss": 0.0965, "step": 169 }, { "epoch": 0.19, "learning_rate": 0.00019899245738672262, "loss": 0.0785, "step": 170 }, { "epoch": 0.19, "learning_rate": 0.0001989798452710514, "loss": 0.0897, "step": 171 }, { "epoch": 0.19, "learning_rate": 0.00019896715511294176, "loss": 0.1162, "step": 172 }, { "epoch": 0.19, "learning_rate": 0.00019895438692239955, "loss": 0.1108, "step": 173 }, { "epoch": 0.2, "learning_rate": 0.000198941540709492, "loss": 0.1195, "step": 174 }, { "epoch": 0.2, "learning_rate": 0.00019892861648434796, "loss": 0.0956, "step": 175 }, { "epoch": 0.2, "learning_rate": 0.0001989156142571578, "loss": 0.0976, "step": 176 }, { "epoch": 0.2, "learning_rate": 0.00019890253403817334, "loss": 0.1009, "step": 177 }, { "epoch": 0.2, "learning_rate": 0.0001988893758377079, "loss": 0.0992, "step": 178 }, { "epoch": 0.2, "eval_loss": 0.10378821939229965, "eval_runtime": 71.6357, "eval_samples_per_second": 7.315, "eval_steps_per_second": 1.829, "step": 178 }, { "epoch": 0.2, "learning_rate": 0.0001988761396661363, "loss": 0.1052, "step": 179 }, { "epoch": 0.2, "learning_rate": 0.00019886282553389486, "loss": 0.096, "step": 180 }, { "epoch": 0.2, "learning_rate": 0.0001988494334514813, "loss": 0.1077, "step": 181 }, { "epoch": 0.21, "learning_rate": 0.0001988359634294549, "loss": 0.1049, "step": 182 }, { "epoch": 0.21, "learning_rate": 0.00019882241547843635, "loss": 0.115, "step": 183 }, { "epoch": 0.21, "learning_rate": 0.00019880878960910772, "loss": 0.0944, "step": 184 }, { "epoch": 0.21, "learning_rate": 0.0001987950858322126, "loss": 0.1106, "step": 185 }, { "epoch": 0.21, "learning_rate": 0.00019878130415855601, "loss": 0.1091, "step": 186 }, { "epoch": 0.21, "learning_rate": 0.0001987674445990043, "loss": 0.0884, "step": 187 }, { "epoch": 0.21, "learning_rate": 0.00019875350716448532, "loss": 0.0992, "step": 188 }, { "epoch": 0.21, "learning_rate": 0.00019873949186598828, "loss": 0.1075, "step": 189 }, { "epoch": 0.21, "learning_rate": 0.00019872539871456383, "loss": 0.1061, "step": 190 }, { "epoch": 0.22, "learning_rate": 0.00019871122772132388, "loss": 0.1174, "step": 191 }, { "epoch": 0.22, "learning_rate": 0.00019869697889744192, "loss": 0.1215, "step": 192 }, { "epoch": 0.22, "learning_rate": 0.00019868265225415265, "loss": 0.1092, "step": 193 }, { "epoch": 0.22, "learning_rate": 0.00019866824780275208, "loss": 0.1141, "step": 194 }, { "epoch": 0.22, "learning_rate": 0.0001986537655545978, "loss": 0.0987, "step": 195 }, { "epoch": 0.22, "learning_rate": 0.0001986392055211085, "loss": 0.1023, "step": 196 }, { "epoch": 0.22, "learning_rate": 0.00019862456771376437, "loss": 0.0952, "step": 197 }, { "epoch": 0.22, "learning_rate": 0.00019860985214410678, "loss": 0.1053, "step": 198 }, { "epoch": 0.22, "learning_rate": 0.00019859505882373853, "loss": 0.083, "step": 199 }, { "epoch": 0.23, "learning_rate": 0.00019858018776432366, "loss": 0.1012, "step": 200 }, { "epoch": 0.23, "learning_rate": 0.00019856523897758755, "loss": 0.1132, "step": 201 }, { "epoch": 0.23, "learning_rate": 0.00019855021247531683, "loss": 0.1091, "step": 202 }, { "epoch": 0.23, "learning_rate": 0.00019853510826935936, "loss": 0.1017, "step": 203 }, { "epoch": 0.23, "learning_rate": 0.00019851992637162443, "loss": 0.0791, "step": 204 }, { "epoch": 0.23, "learning_rate": 0.0001985046667940824, "loss": 0.0922, "step": 205 }, { "epoch": 0.23, "learning_rate": 0.00019848932954876498, "loss": 0.1002, "step": 206 }, { "epoch": 0.23, "learning_rate": 0.0001984739146477651, "loss": 0.1047, "step": 207 }, { "epoch": 0.23, "learning_rate": 0.0001984584221032369, "loss": 0.0827, "step": 208 }, { "epoch": 0.24, "learning_rate": 0.0001984428519273958, "loss": 0.109, "step": 209 }, { "epoch": 0.24, "learning_rate": 0.0001984272041325183, "loss": 0.1173, "step": 210 }, { "epoch": 0.24, "learning_rate": 0.0001984114787309423, "loss": 0.1018, "step": 211 }, { "epoch": 0.24, "learning_rate": 0.00019839567573506667, "loss": 0.1004, "step": 212 }, { "epoch": 0.24, "learning_rate": 0.00019837979515735166, "loss": 0.0815, "step": 213 }, { "epoch": 0.24, "learning_rate": 0.00019836383701031852, "loss": 0.1091, "step": 214 }, { "epoch": 0.24, "learning_rate": 0.00019834780130654976, "loss": 0.1019, "step": 215 }, { "epoch": 0.24, "learning_rate": 0.000198331688058689, "loss": 0.0967, "step": 216 }, { "epoch": 0.24, "learning_rate": 0.00019831549727944108, "loss": 0.0992, "step": 217 }, { "epoch": 0.25, "learning_rate": 0.00019829922898157188, "loss": 0.1007, "step": 218 }, { "epoch": 0.25, "learning_rate": 0.0001982828831779084, "loss": 0.1103, "step": 219 }, { "epoch": 0.25, "learning_rate": 0.00019826645988133882, "loss": 0.1117, "step": 220 }, { "epoch": 0.25, "learning_rate": 0.00019824995910481237, "loss": 0.1013, "step": 221 }, { "epoch": 0.25, "learning_rate": 0.0001982333808613394, "loss": 0.0922, "step": 222 }, { "epoch": 0.25, "learning_rate": 0.0001982167251639913, "loss": 0.0933, "step": 223 }, { "epoch": 0.25, "learning_rate": 0.00019819999202590058, "loss": 0.0991, "step": 224 }, { "epoch": 0.25, "learning_rate": 0.00019818318146026077, "loss": 0.1012, "step": 225 }, { "epoch": 0.25, "learning_rate": 0.00019816629348032644, "loss": 0.1109, "step": 226 }, { "epoch": 0.26, "learning_rate": 0.00019814932809941326, "loss": 0.0801, "step": 227 }, { "epoch": 0.26, "learning_rate": 0.00019813228533089786, "loss": 0.0932, "step": 228 }, { "epoch": 0.26, "learning_rate": 0.00019811516518821796, "loss": 0.0994, "step": 229 }, { "epoch": 0.26, "learning_rate": 0.00019809796768487217, "loss": 0.1059, "step": 230 }, { "epoch": 0.26, "learning_rate": 0.00019808069283442023, "loss": 0.0955, "step": 231 }, { "epoch": 0.26, "learning_rate": 0.00019806334065048282, "loss": 0.1049, "step": 232 }, { "epoch": 0.26, "learning_rate": 0.00019804591114674152, "loss": 0.1112, "step": 233 }, { "epoch": 0.26, "learning_rate": 0.00019802840433693895, "loss": 0.1001, "step": 234 }, { "epoch": 0.26, "learning_rate": 0.00019801082023487873, "loss": 0.1045, "step": 235 }, { "epoch": 0.27, "learning_rate": 0.00019799315885442531, "loss": 0.104, "step": 236 }, { "epoch": 0.27, "learning_rate": 0.00019797542020950413, "loss": 0.0907, "step": 237 }, { "epoch": 0.27, "learning_rate": 0.00019795760431410154, "loss": 0.1032, "step": 238 }, { "epoch": 0.27, "learning_rate": 0.00019793971118226483, "loss": 0.0952, "step": 239 }, { "epoch": 0.27, "learning_rate": 0.0001979217408281021, "loss": 0.1036, "step": 240 }, { "epoch": 0.27, "learning_rate": 0.00019790369326578243, "loss": 0.1087, "step": 241 }, { "epoch": 0.27, "learning_rate": 0.0001978855685095358, "loss": 0.091, "step": 242 }, { "epoch": 0.27, "learning_rate": 0.00019786736657365292, "loss": 0.0881, "step": 243 }, { "epoch": 0.28, "learning_rate": 0.00019784908747248547, "loss": 0.1187, "step": 244 }, { "epoch": 0.28, "learning_rate": 0.00019783073122044595, "loss": 0.1191, "step": 245 }, { "epoch": 0.28, "learning_rate": 0.00019781229783200766, "loss": 0.1029, "step": 246 }, { "epoch": 0.28, "learning_rate": 0.0001977937873217047, "loss": 0.1013, "step": 247 }, { "epoch": 0.28, "learning_rate": 0.0001977751997041321, "loss": 0.1031, "step": 248 }, { "epoch": 0.28, "learning_rate": 0.00019775653499394552, "loss": 0.0953, "step": 249 }, { "epoch": 0.28, "learning_rate": 0.00019773779320586153, "loss": 0.1028, "step": 250 }, { "epoch": 0.28, "learning_rate": 0.00019771897435465742, "loss": 0.1004, "step": 251 }, { "epoch": 0.28, "learning_rate": 0.00019770007845517124, "loss": 0.0998, "step": 252 }, { "epoch": 0.29, "learning_rate": 0.0001976811055223018, "loss": 0.1056, "step": 253 }, { "epoch": 0.29, "learning_rate": 0.00019766205557100868, "loss": 0.1111, "step": 254 }, { "epoch": 0.29, "learning_rate": 0.00019764292861631212, "loss": 0.0967, "step": 255 }, { "epoch": 0.29, "learning_rate": 0.00019762372467329315, "loss": 0.0989, "step": 256 }, { "epoch": 0.29, "learning_rate": 0.00019760444375709345, "loss": 0.1048, "step": 257 }, { "epoch": 0.29, "learning_rate": 0.00019758508588291535, "loss": 0.0932, "step": 258 }, { "epoch": 0.29, "learning_rate": 0.000197565651066022, "loss": 0.0996, "step": 259 }, { "epoch": 0.29, "learning_rate": 0.00019754613932173709, "loss": 0.0898, "step": 260 }, { "epoch": 0.29, "learning_rate": 0.000197526550665445, "loss": 0.1129, "step": 261 }, { "epoch": 0.3, "learning_rate": 0.0001975068851125908, "loss": 0.0919, "step": 262 }, { "epoch": 0.3, "learning_rate": 0.00019748714267868006, "loss": 0.0943, "step": 263 }, { "epoch": 0.3, "learning_rate": 0.00019746732337927916, "loss": 0.1023, "step": 264 }, { "epoch": 0.3, "learning_rate": 0.0001974474272300149, "loss": 0.1056, "step": 265 }, { "epoch": 0.3, "learning_rate": 0.00019742745424657478, "loss": 0.1017, "step": 266 }, { "epoch": 0.3, "learning_rate": 0.00019740740444470692, "loss": 0.0954, "step": 267 }, { "epoch": 0.3, "learning_rate": 0.00019738727784021984, "loss": 0.102, "step": 268 }, { "epoch": 0.3, "learning_rate": 0.00019736707444898284, "loss": 0.0854, "step": 269 }, { "epoch": 0.3, "learning_rate": 0.00019734679428692556, "loss": 0.0969, "step": 270 }, { "epoch": 0.31, "learning_rate": 0.00019732643737003827, "loss": 0.0991, "step": 271 }, { "epoch": 0.31, "learning_rate": 0.00019730600371437176, "loss": 0.1092, "step": 272 }, { "epoch": 0.31, "learning_rate": 0.0001972854933360373, "loss": 0.0971, "step": 273 }, { "epoch": 0.31, "learning_rate": 0.00019726490625120673, "loss": 0.0877, "step": 274 }, { "epoch": 0.31, "learning_rate": 0.00019724424247611225, "loss": 0.0904, "step": 275 }, { "epoch": 0.31, "learning_rate": 0.00019722350202704657, "loss": 0.1058, "step": 276 }, { "epoch": 0.31, "learning_rate": 0.00019720268492036288, "loss": 0.0898, "step": 277 }, { "epoch": 0.31, "learning_rate": 0.00019718179117247483, "loss": 0.1093, "step": 278 }, { "epoch": 0.31, "learning_rate": 0.00019716082079985648, "loss": 0.0985, "step": 279 }, { "epoch": 0.32, "learning_rate": 0.00019713977381904227, "loss": 0.0889, "step": 280 }, { "epoch": 0.32, "learning_rate": 0.00019711865024662706, "loss": 0.0889, "step": 281 }, { "epoch": 0.32, "learning_rate": 0.00019709745009926613, "loss": 0.0953, "step": 282 }, { "epoch": 0.32, "learning_rate": 0.0001970761733936751, "loss": 0.102, "step": 283 }, { "epoch": 0.32, "learning_rate": 0.00019705482014662997, "loss": 0.1006, "step": 284 }, { "epoch": 0.32, "learning_rate": 0.00019703339037496713, "loss": 0.0914, "step": 285 }, { "epoch": 0.32, "learning_rate": 0.00019701188409558326, "loss": 0.0996, "step": 286 }, { "epoch": 0.32, "learning_rate": 0.00019699030132543533, "loss": 0.1041, "step": 287 }, { "epoch": 0.32, "learning_rate": 0.0001969686420815407, "loss": 0.1136, "step": 288 }, { "epoch": 0.33, "learning_rate": 0.00019694690638097698, "loss": 0.0929, "step": 289 }, { "epoch": 0.33, "learning_rate": 0.00019692509424088203, "loss": 0.1037, "step": 290 }, { "epoch": 0.33, "learning_rate": 0.0001969032056784541, "loss": 0.1002, "step": 291 }, { "epoch": 0.33, "learning_rate": 0.0001968812407109516, "loss": 0.0955, "step": 292 }, { "epoch": 0.33, "learning_rate": 0.0001968591993556932, "loss": 0.0979, "step": 293 }, { "epoch": 0.33, "learning_rate": 0.00019683708163005777, "loss": 0.0935, "step": 294 }, { "epoch": 0.33, "learning_rate": 0.00019681488755148445, "loss": 0.0955, "step": 295 }, { "epoch": 0.33, "learning_rate": 0.00019679261713747255, "loss": 0.0885, "step": 296 }, { "epoch": 0.33, "learning_rate": 0.0001967702704055816, "loss": 0.0804, "step": 297 }, { "epoch": 0.34, "learning_rate": 0.00019674784737343132, "loss": 0.0994, "step": 298 }, { "epoch": 0.34, "learning_rate": 0.00019672534805870143, "loss": 0.084, "step": 299 }, { "epoch": 0.34, "learning_rate": 0.00019670277247913205, "loss": 0.1002, "step": 300 }, { "epoch": 0.34, "learning_rate": 0.00019668012065252323, "loss": 0.0861, "step": 301 }, { "epoch": 0.34, "learning_rate": 0.00019665739259673522, "loss": 0.0948, "step": 302 }, { "epoch": 0.34, "learning_rate": 0.00019663458832968838, "loss": 0.1003, "step": 303 }, { "epoch": 0.34, "learning_rate": 0.00019661170786936313, "loss": 0.0839, "step": 304 }, { "epoch": 0.34, "learning_rate": 0.00019658875123379998, "loss": 0.0916, "step": 305 }, { "epoch": 0.34, "learning_rate": 0.00019656571844109953, "loss": 0.096, "step": 306 }, { "epoch": 0.35, "learning_rate": 0.00019654260950942236, "loss": 0.0946, "step": 307 }, { "epoch": 0.35, "learning_rate": 0.00019651942445698915, "loss": 0.0902, "step": 308 }, { "epoch": 0.35, "learning_rate": 0.00019649616330208054, "loss": 0.1117, "step": 309 }, { "epoch": 0.35, "learning_rate": 0.00019647282606303723, "loss": 0.0988, "step": 310 }, { "epoch": 0.35, "learning_rate": 0.0001964494127582599, "loss": 0.0946, "step": 311 }, { "epoch": 0.35, "learning_rate": 0.00019642592340620915, "loss": 0.0852, "step": 312 }, { "epoch": 0.35, "learning_rate": 0.00019640235802540564, "loss": 0.0939, "step": 313 }, { "epoch": 0.35, "learning_rate": 0.00019637871663442984, "loss": 0.0962, "step": 314 }, { "epoch": 0.36, "learning_rate": 0.0001963549992519223, "loss": 0.0841, "step": 315 }, { "epoch": 0.36, "learning_rate": 0.0001963312058965834, "loss": 0.0987, "step": 316 }, { "epoch": 0.36, "learning_rate": 0.0001963073365871734, "loss": 0.1084, "step": 317 }, { "epoch": 0.36, "learning_rate": 0.00019628339134251252, "loss": 0.1001, "step": 318 }, { "epoch": 0.36, "learning_rate": 0.00019625937018148085, "loss": 0.0935, "step": 319 }, { "epoch": 0.36, "learning_rate": 0.00019623527312301824, "loss": 0.0965, "step": 320 }, { "epoch": 0.36, "learning_rate": 0.0001962111001861245, "loss": 0.0968, "step": 321 }, { "epoch": 0.36, "learning_rate": 0.00019618685138985913, "loss": 0.1054, "step": 322 }, { "epoch": 0.36, "learning_rate": 0.00019616252675334165, "loss": 0.0902, "step": 323 }, { "epoch": 0.37, "learning_rate": 0.0001961381262957512, "loss": 0.0956, "step": 324 }, { "epoch": 0.37, "learning_rate": 0.00019611365003632675, "loss": 0.1103, "step": 325 }, { "epoch": 0.37, "learning_rate": 0.00019608909799436706, "loss": 0.1136, "step": 326 }, { "epoch": 0.37, "learning_rate": 0.00019606447018923062, "loss": 0.1059, "step": 327 }, { "epoch": 0.37, "learning_rate": 0.00019603976664033567, "loss": 0.093, "step": 328 }, { "epoch": 0.37, "learning_rate": 0.00019601498736716017, "loss": 0.1275, "step": 329 }, { "epoch": 0.37, "learning_rate": 0.00019599013238924182, "loss": 0.095, "step": 330 }, { "epoch": 0.37, "learning_rate": 0.0001959652017261779, "loss": 0.0972, "step": 331 }, { "epoch": 0.37, "learning_rate": 0.00019594019539762546, "loss": 0.0954, "step": 332 }, { "epoch": 0.38, "learning_rate": 0.00019591511342330125, "loss": 0.0948, "step": 333 }, { "epoch": 0.38, "learning_rate": 0.00019588995582298153, "loss": 0.0992, "step": 334 }, { "epoch": 0.38, "learning_rate": 0.0001958647226165023, "loss": 0.0989, "step": 335 }, { "epoch": 0.38, "learning_rate": 0.00019583941382375915, "loss": 0.0943, "step": 336 }, { "epoch": 0.38, "learning_rate": 0.0001958140294647072, "loss": 0.0964, "step": 337 }, { "epoch": 0.38, "learning_rate": 0.00019578856955936123, "loss": 0.0943, "step": 338 }, { "epoch": 0.38, "learning_rate": 0.00019576303412779556, "loss": 0.1042, "step": 339 }, { "epoch": 0.38, "learning_rate": 0.0001957374231901441, "loss": 0.0912, "step": 340 }, { "epoch": 0.38, "learning_rate": 0.0001957117367666002, "loss": 0.1139, "step": 341 }, { "epoch": 0.39, "learning_rate": 0.00019568597487741677, "loss": 0.1012, "step": 342 }, { "epoch": 0.39, "learning_rate": 0.0001956601375429063, "loss": 0.0825, "step": 343 }, { "epoch": 0.39, "learning_rate": 0.00019563422478344067, "loss": 0.093, "step": 344 }, { "epoch": 0.39, "learning_rate": 0.00019560823661945125, "loss": 0.0831, "step": 345 }, { "epoch": 0.39, "learning_rate": 0.00019558217307142885, "loss": 0.0976, "step": 346 }, { "epoch": 0.39, "learning_rate": 0.0001955560341599238, "loss": 0.0986, "step": 347 }, { "epoch": 0.39, "learning_rate": 0.0001955298199055458, "loss": 0.1019, "step": 348 }, { "epoch": 0.39, "learning_rate": 0.00019550353032896388, "loss": 0.0949, "step": 349 }, { "epoch": 0.39, "learning_rate": 0.00019547716545090658, "loss": 0.1027, "step": 350 }, { "epoch": 0.4, "learning_rate": 0.00019545072529216176, "loss": 0.1066, "step": 351 }, { "epoch": 0.4, "learning_rate": 0.0001954242098735766, "loss": 0.0923, "step": 352 }, { "epoch": 0.4, "learning_rate": 0.00019539761921605773, "loss": 0.1062, "step": 353 }, { "epoch": 0.4, "learning_rate": 0.00019537095334057095, "loss": 0.0958, "step": 354 }, { "epoch": 0.4, "learning_rate": 0.00019534421226814153, "loss": 0.0855, "step": 355 }, { "epoch": 0.4, "learning_rate": 0.00019531739601985387, "loss": 0.0923, "step": 356 }, { "epoch": 0.4, "eval_loss": 0.09572399407625198, "eval_runtime": 71.9205, "eval_samples_per_second": 7.286, "eval_steps_per_second": 1.821, "step": 356 }, { "epoch": 0.4, "learning_rate": 0.00019529050461685179, "loss": 0.0843, "step": 357 }, { "epoch": 0.4, "learning_rate": 0.00019526353808033825, "loss": 0.1011, "step": 358 }, { "epoch": 0.4, "learning_rate": 0.00019523649643157556, "loss": 0.0999, "step": 359 }, { "epoch": 0.41, "learning_rate": 0.00019520937969188512, "loss": 0.1039, "step": 360 }, { "epoch": 0.41, "learning_rate": 0.00019518218788264763, "loss": 0.0872, "step": 361 }, { "epoch": 0.41, "learning_rate": 0.00019515492102530302, "loss": 0.0977, "step": 362 }, { "epoch": 0.41, "learning_rate": 0.00019512757914135026, "loss": 0.0954, "step": 363 }, { "epoch": 0.41, "learning_rate": 0.00019510016225234762, "loss": 0.0918, "step": 364 }, { "epoch": 0.41, "learning_rate": 0.00019507267037991238, "loss": 0.0958, "step": 365 }, { "epoch": 0.41, "learning_rate": 0.00019504510354572104, "loss": 0.1108, "step": 366 }, { "epoch": 0.41, "learning_rate": 0.00019501746177150913, "loss": 0.083, "step": 367 }, { "epoch": 0.41, "learning_rate": 0.0001949897450790713, "loss": 0.0842, "step": 368 }, { "epoch": 0.42, "learning_rate": 0.0001949619534902613, "loss": 0.0974, "step": 369 }, { "epoch": 0.42, "learning_rate": 0.00019493408702699194, "loss": 0.0944, "step": 370 }, { "epoch": 0.42, "learning_rate": 0.00019490614571123497, "loss": 0.0946, "step": 371 }, { "epoch": 0.42, "learning_rate": 0.00019487812956502128, "loss": 0.0913, "step": 372 }, { "epoch": 0.42, "learning_rate": 0.00019485003861044066, "loss": 0.0892, "step": 373 }, { "epoch": 0.42, "learning_rate": 0.0001948218728696419, "loss": 0.0919, "step": 374 }, { "epoch": 0.42, "learning_rate": 0.00019479363236483285, "loss": 0.1004, "step": 375 }, { "epoch": 0.42, "learning_rate": 0.00019476531711828027, "loss": 0.0967, "step": 376 }, { "epoch": 0.42, "learning_rate": 0.00019473692715230972, "loss": 0.1035, "step": 377 }, { "epoch": 0.43, "learning_rate": 0.00019470846248930587, "loss": 0.088, "step": 378 }, { "epoch": 0.43, "learning_rate": 0.00019467992315171215, "loss": 0.0987, "step": 379 }, { "epoch": 0.43, "learning_rate": 0.0001946513091620309, "loss": 0.0867, "step": 380 }, { "epoch": 0.43, "learning_rate": 0.00019462262054282338, "loss": 0.1074, "step": 381 }, { "epoch": 0.43, "learning_rate": 0.00019459385731670963, "loss": 0.0884, "step": 382 }, { "epoch": 0.43, "learning_rate": 0.00019456501950636853, "loss": 0.0904, "step": 383 }, { "epoch": 0.43, "learning_rate": 0.00019453610713453777, "loss": 0.0878, "step": 384 }, { "epoch": 0.43, "learning_rate": 0.0001945071202240138, "loss": 0.0928, "step": 385 }, { "epoch": 0.44, "learning_rate": 0.00019447805879765193, "loss": 0.0998, "step": 386 }, { "epoch": 0.44, "learning_rate": 0.00019444892287836613, "loss": 0.1027, "step": 387 }, { "epoch": 0.44, "learning_rate": 0.00019441971248912915, "loss": 0.0897, "step": 388 }, { "epoch": 0.44, "learning_rate": 0.00019439042765297243, "loss": 0.0899, "step": 389 }, { "epoch": 0.44, "learning_rate": 0.00019436106839298614, "loss": 0.0969, "step": 390 }, { "epoch": 0.44, "learning_rate": 0.0001943316347323191, "loss": 0.0952, "step": 391 }, { "epoch": 0.44, "learning_rate": 0.0001943021266941788, "loss": 0.091, "step": 392 }, { "epoch": 0.44, "learning_rate": 0.00019427254430183144, "loss": 0.1004, "step": 393 }, { "epoch": 0.44, "learning_rate": 0.00019424288757860175, "loss": 0.1029, "step": 394 }, { "epoch": 0.45, "learning_rate": 0.0001942131565478731, "loss": 0.0916, "step": 395 }, { "epoch": 0.45, "learning_rate": 0.00019418335123308746, "loss": 0.0913, "step": 396 }, { "epoch": 0.45, "learning_rate": 0.00019415347165774538, "loss": 0.1005, "step": 397 }, { "epoch": 0.45, "learning_rate": 0.0001941235178454059, "loss": 0.0922, "step": 398 }, { "epoch": 0.45, "learning_rate": 0.0001940934898196867, "loss": 0.0858, "step": 399 }, { "epoch": 0.45, "learning_rate": 0.00019406338760426394, "loss": 0.1029, "step": 400 }, { "epoch": 0.45, "learning_rate": 0.00019403321122287218, "loss": 0.0997, "step": 401 }, { "epoch": 0.45, "learning_rate": 0.00019400296069930456, "loss": 0.1042, "step": 402 }, { "epoch": 0.45, "learning_rate": 0.00019397263605741265, "loss": 0.0902, "step": 403 }, { "epoch": 0.46, "learning_rate": 0.00019394223732110649, "loss": 0.107, "step": 404 }, { "epoch": 0.46, "learning_rate": 0.00019391176451435445, "loss": 0.1093, "step": 405 }, { "epoch": 0.46, "learning_rate": 0.00019388121766118337, "loss": 0.113, "step": 406 }, { "epoch": 0.46, "learning_rate": 0.00019385059678567851, "loss": 0.0849, "step": 407 }, { "epoch": 0.46, "learning_rate": 0.00019381990191198343, "loss": 0.0701, "step": 408 }, { "epoch": 0.46, "learning_rate": 0.00019378913306430004, "loss": 0.0926, "step": 409 }, { "epoch": 0.46, "learning_rate": 0.00019375829026688863, "loss": 0.0866, "step": 410 }, { "epoch": 0.46, "learning_rate": 0.0001937273735440677, "loss": 0.0957, "step": 411 }, { "epoch": 0.46, "learning_rate": 0.00019369638292021413, "loss": 0.0965, "step": 412 }, { "epoch": 0.47, "learning_rate": 0.00019366531841976303, "loss": 0.1165, "step": 413 }, { "epoch": 0.47, "learning_rate": 0.0001936341800672078, "loss": 0.0888, "step": 414 }, { "epoch": 0.47, "learning_rate": 0.00019360296788709994, "loss": 0.1102, "step": 415 }, { "epoch": 0.47, "learning_rate": 0.00019357168190404936, "loss": 0.0878, "step": 416 }, { "epoch": 0.47, "learning_rate": 0.00019354032214272403, "loss": 0.105, "step": 417 }, { "epoch": 0.47, "learning_rate": 0.00019350888862785005, "loss": 0.1062, "step": 418 }, { "epoch": 0.47, "learning_rate": 0.00019347738138421181, "loss": 0.0928, "step": 419 }, { "epoch": 0.47, "learning_rate": 0.00019344580043665174, "loss": 0.0963, "step": 420 }, { "epoch": 0.47, "learning_rate": 0.00019341414581007039, "loss": 0.1015, "step": 421 }, { "epoch": 0.48, "learning_rate": 0.00019338241752942642, "loss": 0.0968, "step": 422 }, { "epoch": 0.48, "learning_rate": 0.00019335061561973662, "loss": 0.0896, "step": 423 }, { "epoch": 0.48, "learning_rate": 0.0001933187401060757, "loss": 0.0951, "step": 424 }, { "epoch": 0.48, "learning_rate": 0.00019328679101357653, "loss": 0.0748, "step": 425 }, { "epoch": 0.48, "learning_rate": 0.00019325476836742995, "loss": 0.0897, "step": 426 }, { "epoch": 0.48, "learning_rate": 0.00019322267219288474, "loss": 0.0914, "step": 427 }, { "epoch": 0.48, "learning_rate": 0.00019319050251524777, "loss": 0.0893, "step": 428 }, { "epoch": 0.48, "learning_rate": 0.00019315825935988377, "loss": 0.0919, "step": 429 }, { "epoch": 0.48, "learning_rate": 0.00019312594275221542, "loss": 0.0945, "step": 430 }, { "epoch": 0.49, "learning_rate": 0.00019309355271772335, "loss": 0.092, "step": 431 }, { "epoch": 0.49, "learning_rate": 0.00019306108928194608, "loss": 0.0792, "step": 432 }, { "epoch": 0.49, "learning_rate": 0.00019302855247047996, "loss": 0.1039, "step": 433 }, { "epoch": 0.49, "learning_rate": 0.00019299594230897925, "loss": 0.1154, "step": 434 }, { "epoch": 0.49, "learning_rate": 0.00019296325882315598, "loss": 0.0862, "step": 435 }, { "epoch": 0.49, "learning_rate": 0.00019293050203878005, "loss": 0.0926, "step": 436 }, { "epoch": 0.49, "learning_rate": 0.00019289767198167916, "loss": 0.0822, "step": 437 }, { "epoch": 0.49, "learning_rate": 0.00019286476867773873, "loss": 0.0746, "step": 438 }, { "epoch": 0.49, "learning_rate": 0.00019283179215290197, "loss": 0.0926, "step": 439 }, { "epoch": 0.5, "learning_rate": 0.0001927987424331698, "loss": 0.0903, "step": 440 }, { "epoch": 0.5, "learning_rate": 0.00019276561954460094, "loss": 0.0952, "step": 441 }, { "epoch": 0.5, "learning_rate": 0.00019273242351331162, "loss": 0.0921, "step": 442 }, { "epoch": 0.5, "learning_rate": 0.0001926991543654759, "loss": 0.0837, "step": 443 }, { "epoch": 0.5, "learning_rate": 0.00019266581212732544, "loss": 0.0904, "step": 444 }, { "epoch": 0.5, "learning_rate": 0.00019263239682514952, "loss": 0.1003, "step": 445 }, { "epoch": 0.5, "learning_rate": 0.000192598908485295, "loss": 0.0961, "step": 446 }, { "epoch": 0.5, "learning_rate": 0.00019256534713416645, "loss": 0.0862, "step": 447 }, { "epoch": 0.5, "learning_rate": 0.00019253171279822584, "loss": 0.1226, "step": 448 }, { "epoch": 0.51, "learning_rate": 0.0001924980055039928, "loss": 0.0949, "step": 449 }, { "epoch": 0.51, "learning_rate": 0.00019246422527804448, "loss": 0.0913, "step": 450 }, { "epoch": 0.51, "learning_rate": 0.00019243037214701546, "loss": 0.0921, "step": 451 }, { "epoch": 0.51, "learning_rate": 0.00019239644613759787, "loss": 0.0871, "step": 452 }, { "epoch": 0.51, "learning_rate": 0.00019236244727654126, "loss": 0.1031, "step": 453 }, { "epoch": 0.51, "learning_rate": 0.00019232837559065265, "loss": 0.0934, "step": 454 }, { "epoch": 0.51, "learning_rate": 0.0001922942311067965, "loss": 0.1027, "step": 455 }, { "epoch": 0.51, "learning_rate": 0.00019226001385189463, "loss": 0.0933, "step": 456 }, { "epoch": 0.52, "learning_rate": 0.00019222572385292624, "loss": 0.094, "step": 457 }, { "epoch": 0.52, "learning_rate": 0.00019219136113692787, "loss": 0.0847, "step": 458 }, { "epoch": 0.52, "learning_rate": 0.0001921569257309935, "loss": 0.0942, "step": 459 }, { "epoch": 0.52, "learning_rate": 0.0001921224176622743, "loss": 0.0842, "step": 460 }, { "epoch": 0.52, "learning_rate": 0.00019208783695797875, "loss": 0.0923, "step": 461 }, { "epoch": 0.52, "learning_rate": 0.00019205318364537267, "loss": 0.099, "step": 462 }, { "epoch": 0.52, "learning_rate": 0.00019201845775177904, "loss": 0.0879, "step": 463 }, { "epoch": 0.52, "learning_rate": 0.00019198365930457814, "loss": 0.0958, "step": 464 }, { "epoch": 0.52, "learning_rate": 0.0001919487883312075, "loss": 0.0812, "step": 465 }, { "epoch": 0.53, "learning_rate": 0.00019191384485916165, "loss": 0.0913, "step": 466 }, { "epoch": 0.53, "learning_rate": 0.00019187882891599247, "loss": 0.0952, "step": 467 }, { "epoch": 0.53, "learning_rate": 0.00019184374052930888, "loss": 0.0889, "step": 468 }, { "epoch": 0.53, "learning_rate": 0.00019180857972677696, "loss": 0.0874, "step": 469 }, { "epoch": 0.53, "learning_rate": 0.0001917733465361199, "loss": 0.0862, "step": 470 }, { "epoch": 0.53, "learning_rate": 0.0001917380409851179, "loss": 0.0861, "step": 471 }, { "epoch": 0.53, "learning_rate": 0.00019170266310160827, "loss": 0.0894, "step": 472 }, { "epoch": 0.53, "learning_rate": 0.00019166721291348537, "loss": 0.0813, "step": 473 }, { "epoch": 0.53, "learning_rate": 0.0001916316904487005, "loss": 0.1084, "step": 474 }, { "epoch": 0.54, "learning_rate": 0.000191596095735262, "loss": 0.0923, "step": 475 }, { "epoch": 0.54, "learning_rate": 0.00019156042880123512, "loss": 0.0995, "step": 476 }, { "epoch": 0.54, "learning_rate": 0.00019152468967474217, "loss": 0.0941, "step": 477 }, { "epoch": 0.54, "learning_rate": 0.00019148887838396227, "loss": 0.0929, "step": 478 }, { "epoch": 0.54, "learning_rate": 0.0001914529949571315, "loss": 0.0977, "step": 479 }, { "epoch": 0.54, "learning_rate": 0.00019141703942254275, "loss": 0.0933, "step": 480 }, { "epoch": 0.54, "learning_rate": 0.00019138101180854583, "loss": 0.0881, "step": 481 }, { "epoch": 0.54, "learning_rate": 0.00019134491214354736, "loss": 0.0926, "step": 482 }, { "epoch": 0.54, "learning_rate": 0.00019130874045601075, "loss": 0.1111, "step": 483 }, { "epoch": 0.55, "learning_rate": 0.00019127249677445627, "loss": 0.0886, "step": 484 }, { "epoch": 0.55, "learning_rate": 0.00019123618112746083, "loss": 0.0972, "step": 485 }, { "epoch": 0.55, "learning_rate": 0.00019119979354365822, "loss": 0.0881, "step": 486 }, { "epoch": 0.55, "learning_rate": 0.00019116333405173885, "loss": 0.0941, "step": 487 }, { "epoch": 0.55, "learning_rate": 0.00019112680268044987, "loss": 0.0987, "step": 488 }, { "epoch": 0.55, "learning_rate": 0.0001910901994585951, "loss": 0.0782, "step": 489 }, { "epoch": 0.55, "learning_rate": 0.000191053524415035, "loss": 0.0905, "step": 490 }, { "epoch": 0.55, "learning_rate": 0.00019101677757868668, "loss": 0.081, "step": 491 }, { "epoch": 0.55, "learning_rate": 0.00019097995897852382, "loss": 0.0884, "step": 492 }, { "epoch": 0.56, "learning_rate": 0.00019094306864357675, "loss": 0.077, "step": 493 }, { "epoch": 0.56, "learning_rate": 0.00019090610660293226, "loss": 0.0842, "step": 494 }, { "epoch": 0.56, "learning_rate": 0.00019086907288573378, "loss": 0.1004, "step": 495 }, { "epoch": 0.56, "learning_rate": 0.00019083196752118118, "loss": 0.1001, "step": 496 }, { "epoch": 0.56, "learning_rate": 0.00019079479053853087, "loss": 0.0879, "step": 497 }, { "epoch": 0.56, "learning_rate": 0.00019075754196709572, "loss": 0.0954, "step": 498 }, { "epoch": 0.56, "learning_rate": 0.00019072022183624503, "loss": 0.0971, "step": 499 }, { "epoch": 0.56, "learning_rate": 0.00019068283017540448, "loss": 0.1095, "step": 500 }, { "epoch": 0.56, "learning_rate": 0.00019064536701405627, "loss": 0.1002, "step": 501 }, { "epoch": 0.57, "learning_rate": 0.0001906078323817389, "loss": 0.0952, "step": 502 }, { "epoch": 0.57, "learning_rate": 0.00019057022630804716, "loss": 0.0966, "step": 503 }, { "epoch": 0.57, "learning_rate": 0.0001905325488226323, "loss": 0.0928, "step": 504 }, { "epoch": 0.57, "learning_rate": 0.00019049479995520175, "loss": 0.0933, "step": 505 }, { "epoch": 0.57, "learning_rate": 0.00019045697973551934, "loss": 0.0962, "step": 506 }, { "epoch": 0.57, "learning_rate": 0.00019041908819340505, "loss": 0.0968, "step": 507 }, { "epoch": 0.57, "learning_rate": 0.0001903811253587352, "loss": 0.0977, "step": 508 }, { "epoch": 0.57, "learning_rate": 0.00019034309126144224, "loss": 0.092, "step": 509 }, { "epoch": 0.57, "learning_rate": 0.00019030498593151484, "loss": 0.0897, "step": 510 }, { "epoch": 0.58, "learning_rate": 0.00019026680939899783, "loss": 0.0884, "step": 511 }, { "epoch": 0.58, "learning_rate": 0.0001902285616939922, "loss": 0.0961, "step": 512 }, { "epoch": 0.58, "learning_rate": 0.00019019024284665505, "loss": 0.0882, "step": 513 }, { "epoch": 0.58, "learning_rate": 0.0001901518528871995, "loss": 0.0835, "step": 514 }, { "epoch": 0.58, "learning_rate": 0.00019011339184589487, "loss": 0.0861, "step": 515 }, { "epoch": 0.58, "learning_rate": 0.00019007485975306644, "loss": 0.0824, "step": 516 }, { "epoch": 0.58, "learning_rate": 0.00019003625663909554, "loss": 0.0913, "step": 517 }, { "epoch": 0.58, "learning_rate": 0.00018999758253441946, "loss": 0.1271, "step": 518 }, { "epoch": 0.58, "learning_rate": 0.00018995883746953152, "loss": 0.0795, "step": 519 }, { "epoch": 0.59, "learning_rate": 0.00018992002147498094, "loss": 0.0943, "step": 520 }, { "epoch": 0.59, "learning_rate": 0.0001898811345813729, "loss": 0.0863, "step": 521 }, { "epoch": 0.59, "learning_rate": 0.00018984217681936846, "loss": 0.0936, "step": 522 }, { "epoch": 0.59, "learning_rate": 0.00018980314821968463, "loss": 0.0872, "step": 523 }, { "epoch": 0.59, "learning_rate": 0.00018976404881309412, "loss": 0.0924, "step": 524 }, { "epoch": 0.59, "learning_rate": 0.00018972487863042563, "loss": 0.0838, "step": 525 }, { "epoch": 0.59, "learning_rate": 0.00018968563770256354, "loss": 0.0872, "step": 526 }, { "epoch": 0.59, "learning_rate": 0.0001896463260604481, "loss": 0.0979, "step": 527 }, { "epoch": 0.6, "learning_rate": 0.00018960694373507526, "loss": 0.088, "step": 528 }, { "epoch": 0.6, "learning_rate": 0.00018956749075749673, "loss": 0.0865, "step": 529 }, { "epoch": 0.6, "learning_rate": 0.00018952796715881995, "loss": 0.0936, "step": 530 }, { "epoch": 0.6, "learning_rate": 0.000189488372970208, "loss": 0.0842, "step": 531 }, { "epoch": 0.6, "learning_rate": 0.00018944870822287956, "loss": 0.1191, "step": 532 }, { "epoch": 0.6, "learning_rate": 0.0001894089729481091, "loss": 0.0981, "step": 533 }, { "epoch": 0.6, "learning_rate": 0.0001893691671772266, "loss": 0.0847, "step": 534 }, { "epoch": 0.6, "eval_loss": 0.09380102157592773, "eval_runtime": 71.8003, "eval_samples_per_second": 7.298, "eval_steps_per_second": 1.825, "step": 534 }, { "epoch": 0.6, "learning_rate": 0.00018932929094161758, "loss": 0.0837, "step": 535 }, { "epoch": 0.6, "learning_rate": 0.00018928934427272321, "loss": 0.0954, "step": 536 }, { "epoch": 0.61, "learning_rate": 0.00018924932720204018, "loss": 0.0755, "step": 537 }, { "epoch": 0.61, "learning_rate": 0.00018920923976112065, "loss": 0.0949, "step": 538 }, { "epoch": 0.61, "learning_rate": 0.0001891690819815723, "loss": 0.0949, "step": 539 }, { "epoch": 0.61, "learning_rate": 0.0001891288538950582, "loss": 0.0849, "step": 540 }, { "epoch": 0.61, "learning_rate": 0.00018908855553329695, "loss": 0.0971, "step": 541 }, { "epoch": 0.61, "learning_rate": 0.00018904818692806253, "loss": 0.0847, "step": 542 }, { "epoch": 0.61, "learning_rate": 0.00018900774811118424, "loss": 0.0864, "step": 543 }, { "epoch": 0.61, "learning_rate": 0.00018896723911454686, "loss": 0.0839, "step": 544 }, { "epoch": 0.61, "learning_rate": 0.0001889266599700904, "loss": 0.0981, "step": 545 }, { "epoch": 0.62, "learning_rate": 0.0001888860107098102, "loss": 0.0922, "step": 546 }, { "epoch": 0.62, "learning_rate": 0.00018884529136575692, "loss": 0.0957, "step": 547 }, { "epoch": 0.62, "learning_rate": 0.00018880450197003646, "loss": 0.0953, "step": 548 }, { "epoch": 0.62, "learning_rate": 0.00018876364255480997, "loss": 0.0927, "step": 549 }, { "epoch": 0.62, "learning_rate": 0.00018872271315229373, "loss": 0.0761, "step": 550 }, { "epoch": 0.62, "learning_rate": 0.00018868171379475933, "loss": 0.0912, "step": 551 }, { "epoch": 0.62, "learning_rate": 0.00018864064451453341, "loss": 0.1042, "step": 552 }, { "epoch": 0.62, "learning_rate": 0.0001885995053439978, "loss": 0.0942, "step": 553 }, { "epoch": 0.62, "learning_rate": 0.00018855829631558946, "loss": 0.0873, "step": 554 }, { "epoch": 0.63, "learning_rate": 0.00018851701746180031, "loss": 0.0939, "step": 555 }, { "epoch": 0.63, "learning_rate": 0.0001884756688151774, "loss": 0.0848, "step": 556 }, { "epoch": 0.63, "learning_rate": 0.00018843425040832292, "loss": 0.0916, "step": 557 }, { "epoch": 0.63, "learning_rate": 0.00018839276227389386, "loss": 0.1135, "step": 558 }, { "epoch": 0.63, "learning_rate": 0.0001883512044446023, "loss": 0.0883, "step": 559 }, { "epoch": 0.63, "learning_rate": 0.00018830957695321527, "loss": 0.1102, "step": 560 }, { "epoch": 0.63, "learning_rate": 0.00018826787983255473, "loss": 0.0954, "step": 561 }, { "epoch": 0.63, "learning_rate": 0.00018822611311549754, "loss": 0.0943, "step": 562 }, { "epoch": 0.63, "learning_rate": 0.00018818427683497534, "loss": 0.0694, "step": 563 }, { "epoch": 0.64, "learning_rate": 0.00018814237102397478, "loss": 0.0818, "step": 564 }, { "epoch": 0.64, "learning_rate": 0.0001881003957155372, "loss": 0.0934, "step": 565 }, { "epoch": 0.64, "learning_rate": 0.00018805835094275883, "loss": 0.0902, "step": 566 }, { "epoch": 0.64, "learning_rate": 0.0001880162367387906, "loss": 0.0841, "step": 567 }, { "epoch": 0.64, "learning_rate": 0.00018797405313683818, "loss": 0.0946, "step": 568 }, { "epoch": 0.64, "learning_rate": 0.00018793180017016202, "loss": 0.0871, "step": 569 }, { "epoch": 0.64, "learning_rate": 0.00018788947787207728, "loss": 0.0833, "step": 570 }, { "epoch": 0.64, "learning_rate": 0.00018784708627595363, "loss": 0.084, "step": 571 }, { "epoch": 0.64, "learning_rate": 0.0001878046254152156, "loss": 0.0908, "step": 572 }, { "epoch": 0.65, "learning_rate": 0.00018776209532334214, "loss": 0.0919, "step": 573 }, { "epoch": 0.65, "learning_rate": 0.0001877194960338669, "loss": 0.0851, "step": 574 }, { "epoch": 0.65, "learning_rate": 0.00018767682758037802, "loss": 0.1096, "step": 575 }, { "epoch": 0.65, "learning_rate": 0.00018763408999651824, "loss": 0.0959, "step": 576 }, { "epoch": 0.65, "learning_rate": 0.0001875912833159848, "loss": 0.0969, "step": 577 }, { "epoch": 0.65, "learning_rate": 0.00018754840757252933, "loss": 0.0971, "step": 578 }, { "epoch": 0.65, "learning_rate": 0.00018750546279995805, "loss": 0.1048, "step": 579 }, { "epoch": 0.65, "learning_rate": 0.0001874624490321315, "loss": 0.0874, "step": 580 }, { "epoch": 0.65, "learning_rate": 0.00018741936630296466, "loss": 0.0753, "step": 581 }, { "epoch": 0.66, "learning_rate": 0.00018737621464642688, "loss": 0.0956, "step": 582 }, { "epoch": 0.66, "learning_rate": 0.0001873329940965419, "loss": 0.0831, "step": 583 }, { "epoch": 0.66, "learning_rate": 0.00018728970468738764, "loss": 0.106, "step": 584 }, { "epoch": 0.66, "learning_rate": 0.00018724634645309656, "loss": 0.0888, "step": 585 }, { "epoch": 0.66, "learning_rate": 0.0001872029194278551, "loss": 0.1054, "step": 586 }, { "epoch": 0.66, "learning_rate": 0.00018715942364590417, "loss": 0.0943, "step": 587 }, { "epoch": 0.66, "learning_rate": 0.00018711585914153874, "loss": 0.0985, "step": 588 }, { "epoch": 0.66, "learning_rate": 0.0001870722259491081, "loss": 0.0857, "step": 589 }, { "epoch": 0.66, "learning_rate": 0.00018702852410301554, "loss": 0.1067, "step": 590 }, { "epoch": 0.67, "learning_rate": 0.00018698475363771862, "loss": 0.0879, "step": 591 }, { "epoch": 0.67, "learning_rate": 0.00018694091458772892, "loss": 0.0838, "step": 592 }, { "epoch": 0.67, "learning_rate": 0.00018689700698761217, "loss": 0.1042, "step": 593 }, { "epoch": 0.67, "learning_rate": 0.00018685303087198808, "loss": 0.0993, "step": 594 }, { "epoch": 0.67, "learning_rate": 0.00018680898627553038, "loss": 0.088, "step": 595 }, { "epoch": 0.67, "learning_rate": 0.00018676487323296688, "loss": 0.0862, "step": 596 }, { "epoch": 0.67, "learning_rate": 0.00018672069177907928, "loss": 0.1039, "step": 597 }, { "epoch": 0.67, "learning_rate": 0.0001866764419487032, "loss": 0.0858, "step": 598 }, { "epoch": 0.68, "learning_rate": 0.00018663212377672829, "loss": 0.0855, "step": 599 }, { "epoch": 0.68, "learning_rate": 0.00018658773729809792, "loss": 0.0897, "step": 600 }, { "epoch": 0.68, "learning_rate": 0.0001865432825478095, "loss": 0.0898, "step": 601 }, { "epoch": 0.68, "learning_rate": 0.00018649875956091414, "loss": 0.0981, "step": 602 }, { "epoch": 0.68, "learning_rate": 0.00018645416837251676, "loss": 0.0891, "step": 603 }, { "epoch": 0.68, "learning_rate": 0.00018640950901777613, "loss": 0.0928, "step": 604 }, { "epoch": 0.68, "learning_rate": 0.00018636478153190466, "loss": 0.0865, "step": 605 }, { "epoch": 0.68, "learning_rate": 0.00018631998595016862, "loss": 0.0897, "step": 606 }, { "epoch": 0.68, "learning_rate": 0.00018627512230788785, "loss": 0.081, "step": 607 }, { "epoch": 0.69, "learning_rate": 0.00018623019064043585, "loss": 0.0997, "step": 608 }, { "epoch": 0.69, "learning_rate": 0.00018618519098323986, "loss": 0.0856, "step": 609 }, { "epoch": 0.69, "learning_rate": 0.00018614012337178068, "loss": 0.1042, "step": 610 }, { "epoch": 0.69, "learning_rate": 0.00018609498784159266, "loss": 0.095, "step": 611 }, { "epoch": 0.69, "learning_rate": 0.00018604978442826367, "loss": 0.0814, "step": 612 }, { "epoch": 0.69, "learning_rate": 0.00018600451316743525, "loss": 0.0855, "step": 613 }, { "epoch": 0.69, "learning_rate": 0.00018595917409480227, "loss": 0.0971, "step": 614 }, { "epoch": 0.69, "learning_rate": 0.00018591376724611317, "loss": 0.0899, "step": 615 }, { "epoch": 0.69, "learning_rate": 0.00018586829265716977, "loss": 0.0902, "step": 616 }, { "epoch": 0.7, "learning_rate": 0.00018582275036382732, "loss": 0.1028, "step": 617 }, { "epoch": 0.7, "learning_rate": 0.00018577714040199448, "loss": 0.0919, "step": 618 }, { "epoch": 0.7, "learning_rate": 0.00018573146280763324, "loss": 0.0951, "step": 619 }, { "epoch": 0.7, "learning_rate": 0.00018568571761675893, "loss": 0.085, "step": 620 }, { "epoch": 0.7, "learning_rate": 0.00018563990486544017, "loss": 0.0873, "step": 621 }, { "epoch": 0.7, "learning_rate": 0.0001855940245897988, "loss": 0.0938, "step": 622 }, { "epoch": 0.7, "learning_rate": 0.00018554807682601005, "loss": 0.0822, "step": 623 }, { "epoch": 0.7, "learning_rate": 0.00018550206161030216, "loss": 0.0894, "step": 624 }, { "epoch": 0.7, "learning_rate": 0.0001854559789789567, "loss": 0.0938, "step": 625 }, { "epoch": 0.71, "learning_rate": 0.00018540982896830834, "loss": 0.0791, "step": 626 }, { "epoch": 0.71, "learning_rate": 0.00018536361161474485, "loss": 0.0873, "step": 627 }, { "epoch": 0.71, "learning_rate": 0.00018531732695470723, "loss": 0.0903, "step": 628 }, { "epoch": 0.71, "learning_rate": 0.00018527097502468934, "loss": 0.087, "step": 629 }, { "epoch": 0.71, "learning_rate": 0.00018522455586123825, "loss": 0.0921, "step": 630 }, { "epoch": 0.71, "learning_rate": 0.000185178069500954, "loss": 0.1115, "step": 631 }, { "epoch": 0.71, "learning_rate": 0.00018513151598048956, "loss": 0.1017, "step": 632 }, { "epoch": 0.71, "learning_rate": 0.0001850848953365509, "loss": 0.088, "step": 633 }, { "epoch": 0.71, "learning_rate": 0.0001850382076058969, "loss": 0.0903, "step": 634 }, { "epoch": 0.72, "learning_rate": 0.0001849914528253394, "loss": 0.0888, "step": 635 }, { "epoch": 0.72, "learning_rate": 0.00018494463103174296, "loss": 0.0961, "step": 636 }, { "epoch": 0.72, "learning_rate": 0.0001848977422620251, "loss": 0.0853, "step": 637 }, { "epoch": 0.72, "learning_rate": 0.00018485078655315612, "loss": 0.0918, "step": 638 }, { "epoch": 0.72, "learning_rate": 0.00018480376394215908, "loss": 0.0778, "step": 639 }, { "epoch": 0.72, "learning_rate": 0.0001847566744661098, "loss": 0.0748, "step": 640 }, { "epoch": 0.72, "learning_rate": 0.00018470951816213686, "loss": 0.0901, "step": 641 }, { "epoch": 0.72, "learning_rate": 0.00018466229506742147, "loss": 0.0891, "step": 642 }, { "epoch": 0.72, "learning_rate": 0.00018461500521919753, "loss": 0.0926, "step": 643 }, { "epoch": 0.73, "learning_rate": 0.00018456764865475153, "loss": 0.0991, "step": 644 }, { "epoch": 0.73, "learning_rate": 0.00018452022541142268, "loss": 0.0947, "step": 645 }, { "epoch": 0.73, "learning_rate": 0.00018447273552660266, "loss": 0.0861, "step": 646 }, { "epoch": 0.73, "learning_rate": 0.0001844251790377357, "loss": 0.0908, "step": 647 }, { "epoch": 0.73, "learning_rate": 0.00018437755598231856, "loss": 0.0939, "step": 648 }, { "epoch": 0.73, "learning_rate": 0.00018432986639790056, "loss": 0.1003, "step": 649 }, { "epoch": 0.73, "learning_rate": 0.00018428211032208336, "loss": 0.0959, "step": 650 }, { "epoch": 0.73, "learning_rate": 0.00018423428779252107, "loss": 0.0868, "step": 651 }, { "epoch": 0.73, "learning_rate": 0.0001841863988469203, "loss": 0.094, "step": 652 }, { "epoch": 0.74, "learning_rate": 0.00018413844352303991, "loss": 0.0926, "step": 653 }, { "epoch": 0.74, "learning_rate": 0.0001840904218586911, "loss": 0.0785, "step": 654 }, { "epoch": 0.74, "learning_rate": 0.00018404233389173746, "loss": 0.1018, "step": 655 }, { "epoch": 0.74, "learning_rate": 0.00018399417966009484, "loss": 0.0924, "step": 656 }, { "epoch": 0.74, "learning_rate": 0.00018394595920173123, "loss": 0.0941, "step": 657 }, { "epoch": 0.74, "learning_rate": 0.00018389767255466697, "loss": 0.0779, "step": 658 }, { "epoch": 0.74, "learning_rate": 0.00018384931975697451, "loss": 0.0982, "step": 659 }, { "epoch": 0.74, "learning_rate": 0.00018380090084677854, "loss": 0.0945, "step": 660 }, { "epoch": 0.74, "learning_rate": 0.00018375241586225576, "loss": 0.0759, "step": 661 }, { "epoch": 0.75, "learning_rate": 0.00018370386484163503, "loss": 0.082, "step": 662 }, { "epoch": 0.75, "learning_rate": 0.0001836552478231973, "loss": 0.0918, "step": 663 }, { "epoch": 0.75, "learning_rate": 0.0001836065648452755, "loss": 0.086, "step": 664 }, { "epoch": 0.75, "learning_rate": 0.00018355781594625467, "loss": 0.0996, "step": 665 }, { "epoch": 0.75, "learning_rate": 0.00018350900116457166, "loss": 0.0928, "step": 666 }, { "epoch": 0.75, "learning_rate": 0.0001834601205387154, "loss": 0.0992, "step": 667 }, { "epoch": 0.75, "learning_rate": 0.00018341117410722676, "loss": 0.0871, "step": 668 }, { "epoch": 0.75, "learning_rate": 0.00018336216190869835, "loss": 0.0909, "step": 669 }, { "epoch": 0.76, "learning_rate": 0.00018331308398177477, "loss": 0.0818, "step": 670 }, { "epoch": 0.76, "learning_rate": 0.00018326394036515236, "loss": 0.0904, "step": 671 }, { "epoch": 0.76, "learning_rate": 0.0001832147310975793, "loss": 0.0944, "step": 672 }, { "epoch": 0.76, "learning_rate": 0.0001831654562178555, "loss": 0.0814, "step": 673 }, { "epoch": 0.76, "learning_rate": 0.00018311611576483268, "loss": 0.0935, "step": 674 }, { "epoch": 0.76, "learning_rate": 0.00018306670977741418, "loss": 0.0925, "step": 675 }, { "epoch": 0.76, "learning_rate": 0.000183017238294555, "loss": 0.0942, "step": 676 }, { "epoch": 0.76, "learning_rate": 0.0001829677013552619, "loss": 0.0934, "step": 677 }, { "epoch": 0.76, "learning_rate": 0.0001829180989985931, "loss": 0.0911, "step": 678 }, { "epoch": 0.77, "learning_rate": 0.0001828684312636585, "loss": 0.0781, "step": 679 }, { "epoch": 0.77, "learning_rate": 0.00018281869818961952, "loss": 0.0843, "step": 680 }, { "epoch": 0.77, "learning_rate": 0.00018276889981568906, "loss": 0.0821, "step": 681 }, { "epoch": 0.77, "learning_rate": 0.0001827190361811316, "loss": 0.0944, "step": 682 }, { "epoch": 0.77, "learning_rate": 0.00018266910732526296, "loss": 0.095, "step": 683 }, { "epoch": 0.77, "learning_rate": 0.00018261911328745051, "loss": 0.0927, "step": 684 }, { "epoch": 0.77, "learning_rate": 0.0001825690541071129, "loss": 0.0844, "step": 685 }, { "epoch": 0.77, "learning_rate": 0.00018251892982372022, "loss": 0.0947, "step": 686 }, { "epoch": 0.77, "learning_rate": 0.00018246874047679384, "loss": 0.0718, "step": 687 }, { "epoch": 0.78, "learning_rate": 0.00018241848610590645, "loss": 0.0756, "step": 688 }, { "epoch": 0.78, "learning_rate": 0.00018236816675068203, "loss": 0.0861, "step": 689 }, { "epoch": 0.78, "learning_rate": 0.00018231778245079573, "loss": 0.0884, "step": 690 }, { "epoch": 0.78, "learning_rate": 0.00018226733324597406, "loss": 0.0953, "step": 691 }, { "epoch": 0.78, "learning_rate": 0.00018221681917599453, "loss": 0.0883, "step": 692 }, { "epoch": 0.78, "learning_rate": 0.00018216624028068585, "loss": 0.0894, "step": 693 }, { "epoch": 0.78, "learning_rate": 0.0001821155965999279, "loss": 0.0988, "step": 694 }, { "epoch": 0.78, "learning_rate": 0.0001820648881736516, "loss": 0.1041, "step": 695 }, { "epoch": 0.78, "learning_rate": 0.00018201411504183888, "loss": 0.0812, "step": 696 }, { "epoch": 0.79, "learning_rate": 0.00018196327724452277, "loss": 0.0829, "step": 697 }, { "epoch": 0.79, "learning_rate": 0.00018191237482178724, "loss": 0.0861, "step": 698 }, { "epoch": 0.79, "learning_rate": 0.00018186140781376722, "loss": 0.0853, "step": 699 }, { "epoch": 0.79, "learning_rate": 0.00018181037626064853, "loss": 0.102, "step": 700 }, { "epoch": 0.79, "learning_rate": 0.00018175928020266796, "loss": 0.1039, "step": 701 }, { "epoch": 0.79, "learning_rate": 0.0001817081196801131, "loss": 0.0826, "step": 702 }, { "epoch": 0.79, "learning_rate": 0.00018165689473332238, "loss": 0.0881, "step": 703 }, { "epoch": 0.79, "learning_rate": 0.00018160560540268504, "loss": 0.0887, "step": 704 }, { "epoch": 0.79, "learning_rate": 0.00018155425172864106, "loss": 0.0849, "step": 705 }, { "epoch": 0.8, "learning_rate": 0.00018150283375168114, "loss": 0.0922, "step": 706 }, { "epoch": 0.8, "learning_rate": 0.00018145135151234677, "loss": 0.1013, "step": 707 }, { "epoch": 0.8, "learning_rate": 0.00018139980505123003, "loss": 0.0775, "step": 708 }, { "epoch": 0.8, "learning_rate": 0.0001813481944089736, "loss": 0.0842, "step": 709 }, { "epoch": 0.8, "learning_rate": 0.0001812965196262709, "loss": 0.0792, "step": 710 } ], "logging_steps": 1, "max_steps": 3548, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 355, "total_flos": 1.3009661217545912e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }